- Update to Xen 4.4.0 RC1 c/s 28233

- Drop 32bit support from spec file
- Dropped 520d417d-xen-Add-stdbool.h-workaround-for-BSD.patch

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=282
This commit is contained in:
Charles Arnold 2014-01-02 19:09:07 +00:00 committed by Git OBS Bridge
parent 7057c8e7c9
commit e09562d587
139 changed files with 8426 additions and 10071 deletions

View File

@ -0,0 +1,59 @@
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 18 May 2009 12:05:44 +0100
Subject: net: move the tap buffer into TAPState
Patch-mainline: v0.11.0-rc0
Git-commit: 5b01e886d9eb4d5e94384a79634dcb43848e7bbf
References: bnc#840196
KVM uses a 64k buffer for reading from tapfd (for GSO support)
and allocates the buffer with TAPState rather than on the stack.
Not allocating it on the stack probably makes sense for qemu
anyway, so merge it in advance of GSO support.
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/net.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/net.c b/tools/qemu-xen-traditional-dir-remote/net.c
index 0e7c77c..2ca85a3 100644
--- a/tools/qemu-xen-traditional-dir-remote/net.c
+++ b/tools/qemu-xen-traditional-dir-remote/net.c
@@ -700,6 +700,7 @@ typedef struct TAPState {
char down_script[1024];
char down_script_arg[128];
char script_arg[1024];
+ uint8_t buf[4096];
} TAPState;
#ifndef CONFIG_STUBDOM
@@ -735,20 +736,19 @@ static void tap_receive(void *opaque, const uint8_t *buf, int size)
static void tap_send(void *opaque)
{
TAPState *s = opaque;
- uint8_t buf[4096];
int size;
#ifdef __sun__
struct strbuf sbuf;
int f = 0;
- sbuf.maxlen = sizeof(buf);
- sbuf.buf = buf;
+ sbuf.maxlen = sizeof(s->buf);
+ sbuf.buf = s->buf;
size = getmsg(s->fd, NULL, &sbuf, &f) >=0 ? sbuf.len : -1;
#else
- size = read(s->fd, buf, sizeof(buf));
+ size = read(s->fd, s->buf, sizeof(s->buf));
#endif
if (size > 0) {
- qemu_send_packet(s->vc, buf, size);
+ qemu_send_packet(s->vc, s->buf, size);
}
}
--
1.8.1.4

View File

@ -0,0 +1,47 @@
From: Michal Kubecek <mkubecek@suse.cz>
Date: Fri, 27 Sep 2013 19:05:45 +0200
Subject: net: increase tap buffer size
Patch-mainline: v0.12.0-rc0
Git-commit: 8e0f8e5bf8fd483dd28329055336cf895b74c89f (partial)
References: bnc#840196
Increase size of buffere embedded in struct TAPState to allow
jumbo frames longer then 4096 bytes.
Part of upstream qemu commit
8e0f8e5b net: enable IFF_VNET_HDR on tap fds if available
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/net.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/net.c b/tools/qemu-xen-traditional-dir-remote/net.c
index 2ca85a3..502a691 100644
--- a/tools/qemu-xen-traditional-dir-remote/net.c
+++ b/tools/qemu-xen-traditional-dir-remote/net.c
@@ -693,6 +693,11 @@ static void vmchannel_read(void *opaque, const uint8_t *buf, int size)
#if !defined(_WIN32)
+/* Maximum GSO packet size (64k) plus plenty of room for
+ * the ethernet and virtio_net headers
+ */
+#define TAP_BUFSIZE (4096 + 65536)
+
typedef struct TAPState {
VLANClientState *vc;
int fd;
@@ -700,7 +705,7 @@ typedef struct TAPState {
char down_script[1024];
char down_script_arg[128];
char script_arg[1024];
- uint8_t buf[4096];
+ uint8_t buf[TAP_BUFSIZE];
} TAPState;
#ifndef CONFIG_STUBDOM
--
1.8.1.4

View File

@ -0,0 +1,41 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 12 Jul 2010 20:24:59 +0300
Subject: e1000: fix access 4 bytes beyond buffer end
Patch-mainline: v0.13.0-rc0
Git-commit: b0b900070c7cb29bbefb732ec00397abe5de6d73
References: bnc#840196
We do range check for size, and get size as buffer,
but copy size + 4 bytes (4 is for FCS).
Let's copy size bytes but put size + 4 in length.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index c75bc5e..9b062db 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -659,7 +659,6 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
}
rdh_start = s->mac_reg[RDH];
- size += 4; // for the header
do {
if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
set_ics(s, 0, E1000_ICS_RXO);
@@ -673,7 +672,7 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
if (desc.buffer_addr) {
cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
(void *)(buf + vlan_offset), size);
- desc.length = cpu_to_le16(size);
+ desc.length = cpu_to_le16(size + 4 /* for FCS */);
desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
} else // as per intel docs; skip descriptors with null buf addr
DBGOUT(RX, "Null RX descriptor!!\n");
--
1.8.1.4

View File

@ -0,0 +1,47 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 12 Jul 2010 20:41:02 +0300
Subject: e1000: secrc support
Patch-mainline: v0.13.0-rc0
Git-commit: 55e8d1ce6b09300cc5f3adcd9a705156d168381d
References: bnc#840196
Add support for secrc field. Reportedly needed by old RHEL guests.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index 9b062db..07e681d 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -338,6 +338,15 @@ is_vlan_txd(uint32_t txd_lower)
return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
}
+/* FCS aka Ethernet CRC-32. We don't get it from backends and can't
+ * fill it in, just pad descriptor length by 4 bytes unless guest
+ * told us to trip it off the packet. */
+static inline int
+fcs_len(E1000State *s)
+{
+ return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
+}
+
static void
xmit_seg(E1000State *s)
{
@@ -672,7 +681,7 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
if (desc.buffer_addr) {
cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
(void *)(buf + vlan_offset), size);
- desc.length = cpu_to_le16(size + 4 /* for FCS */);
+ desc.length = cpu_to_le16(size + fcs_len(s));
desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
} else // as per intel docs; skip descriptors with null buf addr
DBGOUT(RX, "Null RX descriptor!!\n");
--
1.8.1.4

View File

@ -0,0 +1,104 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 15 Feb 2011 18:27:48 +0200
Subject: e1000: multi-buffer packet support
Patch-mainline: v0.15.0-rc0
Git-commit: b19487e27ed3009df7f555998a454ba19aefd4b8
References: bnc#840196
e1000 supports multi-buffer packets larger than rxbuf_size.
This fixes the following (on linux):
- in guest: ifconfig eth1 mtu 16110
- in host: ifconfig tap0 mtu 16110
ping -s 16082 <guest-ip>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 39 +++++++++++++++++-------
1 file changed, 28 insertions(+), 11 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index 07e681d..34818e0 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -632,16 +632,13 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
uint32_t rdh_start;
uint16_t vlan_special = 0;
uint8_t vlan_status = 0, vlan_offset = 0;
+ size_t desc_offset;
+ size_t desc_size;
+ size_t total_size;
if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
return;
- if (size > s->rxbuf_size) {
- DBGOUT(RX, "packet too large for buffers (%d > %d)\n", size,
- s->rxbuf_size);
- return;
- }
-
/* Discard oversized packets if !LPE and !SBP. */
if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
(size > MAXIMUM_ETHERNET_VLAN_SIZE
@@ -668,8 +665,16 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
}
rdh_start = s->mac_reg[RDH];
+ desc_offset = 0;
+ total_size = size + fcs_len(s);
do {
+ desc_size = total_size - desc_offset;
+ if (desc_size > s->rxbuf_size) {
+ desc_size = s->rxbuf_size;
+ }
if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
+ /* Discard all data written so far */
+ s->mac_reg[RDH] = rdh_start;
set_ics(s, 0, E1000_ICS_RXO);
return;
}
@@ -679,10 +684,22 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
desc.special = vlan_special;
desc.status |= (vlan_status | E1000_RXD_STAT_DD);
if (desc.buffer_addr) {
- cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
- (void *)(buf + vlan_offset), size);
- desc.length = cpu_to_le16(size + fcs_len(s));
- desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
+ if (desc_offset < size) {
+ size_t copy_size = size - desc_offset;
+ if (copy_size > s->rxbuf_size) {
+ copy_size = s->rxbuf_size;
+ }
+ cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
+ (void *)(buf + desc_offset + vlan_offset),
+ copy_size);
+ }
+ desc_offset += desc_size;
+ if (desc_offset >= total_size) {
+ desc.length = cpu_to_le16(desc_size);
+ desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
+ } else {
+ desc.length = cpu_to_le16(desc_size);
+ }
} else // as per intel docs; skip descriptors with null buf addr
DBGOUT(RX, "Null RX descriptor!!\n");
cpu_physical_memory_write(base, (void *)&desc, sizeof(desc));
@@ -697,7 +714,7 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
set_ics(s, 0, E1000_ICS_RXO);
return;
}
- } while (desc.buffer_addr == 0);
+ } while (desc_offset < total_size);
s->mac_reg[GPRC]++;
s->mac_reg[TPR]++;
--
1.8.1.4

View File

@ -0,0 +1,55 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 15 Feb 2011 18:27:52 +0200
Subject: e1000: clear EOP for multi-buffer descriptors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Patch-mainline: v0.15.0-rc0
Git-commit: ee912ccfa007351a62ba42bd60499769f6c02c1e
References: bnc#840196
The e1000 spec says: if software statically allocates
buffers, and uses memory read to check for completed descriptors, it
simply has to zero the status byte in the descriptor to make it ready
for reuse by hardware. This is not a hardware requirement (moving the
hardware tail pointer is), but is necessary for performing an inmemory
scan.
Thus the guest does not have to clear the status byte. In case it
doesn't we need to clear EOP for all descriptors
except the last. While I don't know of any such guests,
it's probably a good idea to stick to the spec.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reported-by: Juan Quintela <quintela@redhat.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Acked-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index 34818e0..7e791dc 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -694,11 +694,13 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
copy_size);
}
desc_offset += desc_size;
+ desc.length = cpu_to_le16(desc_size);
if (desc_offset >= total_size) {
- desc.length = cpu_to_le16(desc_size);
desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
} else {
- desc.length = cpu_to_le16(desc_size);
+ /* Guest zeroing out status is not a hardware requirement.
+ Clear EOP in case guest didn't do it. */
+ desc.status &= ~E1000_RXD_STAT_EOP;
}
} else // as per intel docs; skip descriptors with null buf addr
DBGOUT(RX, "Null RX descriptor!!\n");
--
1.8.1.4

View File

@ -0,0 +1,83 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 15 Feb 2011 18:27:55 +0200
Subject: e1000: verify we have buffers, upfront
Patch-mainline: v0.15.0-rc0
Git-commit: 322fd48afbed1ef7b834ac343a0c8687bcb33695
References: bnc#840196
The spec says: Any descriptor with a non-zero status byte has been
processed by the hardware, and is ready to be handled by the software.
Thus, once we change a descriptor status to non-zero we should
never move the head backwards and try to reuse this
descriptor from hardware.
This actually happened with a multibuffer packet
that arrives when we don't have enough buffers.
Fix by checking that we have enough buffers upfront
so we never need to discard the packet midway through.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Acked-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 28 +++++++++++++++++++-----
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index 7e791dc..18d7597 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -622,6 +622,24 @@ e1000_can_receive(void *opaque)
return (s->mac_reg[RCTL] & E1000_RCTL_EN && s->mac_reg[RDLEN] != 0);
}
+static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
+{
+ int bufs;
+ /* Fast-path short packets */
+ if (total_size <= s->rxbuf_size) {
+ return s->mac_reg[RDH] != s->mac_reg[RDT] || !s->check_rxov;
+ }
+ if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
+ bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
+ } else if (s->mac_reg[RDH] > s->mac_reg[RDT] || !s->check_rxov) {
+ bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
+ s->mac_reg[RDT] - s->mac_reg[RDH];
+ } else {
+ return false;
+ }
+ return total_size <= bufs * s->rxbuf_size;
+}
+
static void
e1000_receive(void *opaque, const uint8_t *buf, int size)
{
@@ -667,17 +685,15 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
rdh_start = s->mac_reg[RDH];
desc_offset = 0;
total_size = size + fcs_len(s);
+ if (!e1000_has_rxbufs(s, total_size)) {
+ set_ics(s, 0, E1000_ICS_RXO);
+ return;
+ }
do {
desc_size = total_size - desc_offset;
if (desc_size > s->rxbuf_size) {
desc_size = s->rxbuf_size;
}
- if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
- /* Discard all data written so far */
- s->mac_reg[RDH] = rdh_start;
- set_ics(s, 0, E1000_ICS_RXO);
- return;
- }
base = ((uint64_t)s->mac_reg[RDBAH] << 32) + s->mac_reg[RDBAL] +
sizeof(desc) * s->mac_reg[RDH];
cpu_physical_memory_read(base, (void *)&desc, sizeof(desc));
--
1.8.1.4

View File

@ -0,0 +1,55 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 27 Mar 2011 13:37:35 +0200
Subject: e1000: check buffer availability
Patch-mainline: v0.15.0-rc0
Git-commit: 6cdfab2868dd593902e2b7db3ba9f49f2cc03e3f
References: bnc#840196
Reduce spurious packet drops on RX ring empty
by verifying that we have at least 1 buffer
ahead of the time.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index 18d7597..b07c6cb 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -614,14 +614,6 @@ e1000_set_link_status(VLANClientState *vc)
set_ics(s, 0, E1000_ICR_LSC);
}
-static int
-e1000_can_receive(void *opaque)
-{
- E1000State *s = opaque;
-
- return (s->mac_reg[RCTL] & E1000_RCTL_EN && s->mac_reg[RDLEN] != 0);
-}
-
static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
{
int bufs;
@@ -640,6 +632,15 @@ static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
return total_size <= bufs * s->rxbuf_size;
}
+static int
+e1000_can_receive(void *opaque)
+{
+ E1000State *s = opaque;
+
+ return (s->mac_reg[RCTL] & E1000_RCTL_EN) && (s->mac_reg[RDLEN] != 0) &&
+ e1000_has_rxbufs(s, 1);
+}
+
static void
e1000_receive(void *opaque, const uint8_t *buf, int size)
{
--
1.8.1.4

View File

@ -1,664 +0,0 @@
# Commit 2ca9fbd739b8a72b16dd790d0fff7b75f5488fb8
# Date 2013-07-16 11:52:38 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
AMD IOMMU: allocate IRTE entries instead of using a static mapping
For multi-vector MSI, where we surely don't want to allocate
contiguous vectors and be able to set affinities of the individual
vectors separately, we need to drop the use of the tuple of vector and
delivery mode to determine the IRTE to use, and instead allocate IRTEs
(which imo should have been done from the beginning).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
# Commit dcbff3aeac6020cdf1f5bd0f0eb0d329fc55d939
# Date 2013-08-28 10:11:19 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
AMD IOMMU: also allocate IRTEs for HPET MSI
Omitting this was a blatant oversight of mine in commit 2ca9fbd7 ("AMD
IOMMU: allocate IRTE entries instead of using a static mapping").
This also changes a bogus inequality check into a sensible one, even
though it is already known that this will make HPET MSI unusable on
certain systems (having respective broken firmware). This, however,
seems better than failing on systems with consistent ACPI tables.
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Index: xen-4.3.1-testing/xen/drivers/passthrough/amd/iommu_acpi.c
===================================================================
--- xen-4.3.1-testing.orig/xen/drivers/passthrough/amd/iommu_acpi.c
+++ xen-4.3.1-testing/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr
/* allocate per-device interrupt remapping table */
if ( amd_iommu_perdev_intremap )
ivrs_mappings[alias_id].intremap_table =
- amd_iommu_alloc_intremap_table();
+ amd_iommu_alloc_intremap_table(
+ &ivrs_mappings[alias_id].intremap_inuse);
else
{
if ( shared_intremap_table == NULL )
- shared_intremap_table = amd_iommu_alloc_intremap_table();
+ shared_intremap_table = amd_iommu_alloc_intremap_table(
+ &shared_intremap_inuse);
ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
+ ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
}
}
/* assgin iommu hardware */
@@ -678,7 +681,7 @@ static u16 __init parse_ivhd_device_spec
return 0;
}
- if ( ioapic_sbdf[special->handle].pin_setup )
+ if ( ioapic_sbdf[special->handle].pin_2_idx )
{
if ( ioapic_sbdf[special->handle].bdf == bdf &&
ioapic_sbdf[special->handle].seg == seg )
@@ -698,14 +701,17 @@ static u16 __init parse_ivhd_device_spec
ioapic_sbdf[special->handle].bdf = bdf;
ioapic_sbdf[special->handle].seg = seg;
- ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+ ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
+ u16, nr_ioapic_entries[apic]);
if ( nr_ioapic_entries[apic] &&
- !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
{
printk(XENLOG_ERR "IVHD Error: Out of memory\n");
return 0;
}
+ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+ nr_ioapic_entries[apic] *
+ sizeof(*ioapic_sbdf->pin_2_idx));
}
break;
}
@@ -933,7 +939,7 @@ static int __init parse_ivrs_table(struc
for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic )
{
if ( !nr_ioapic_entries[apic] ||
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
continue;
printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
@@ -942,9 +948,12 @@ static int __init parse_ivrs_table(struc
error = -ENXIO;
else
{
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
- if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array(
+ u16, nr_ioapic_entries[apic]);
+ if ( ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
+ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+ nr_ioapic_entries[apic] * sizeof(*ioapic_sbdf->pin_2_idx));
+ else
{
printk(XENLOG_ERR "IVHD Error: Out of memory\n");
error = -ENOMEM;
Index: xen-4.3.1-testing/xen/drivers/passthrough/amd/iommu_intr.c
===================================================================
--- xen-4.3.1-testing.orig/xen/drivers/passthrough/amd/iommu_intr.c
+++ xen-4.3.1-testing/xen/drivers/passthrough/amd/iommu_intr.c
@@ -31,6 +31,7 @@
struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
struct hpet_sbdf hpet_sbdf;
void *shared_intremap_table;
+unsigned long *shared_intremap_inuse;
static DEFINE_SPINLOCK(shared_intremap_lock);
static spinlock_t* get_intremap_lock(int seg, int req_id)
@@ -46,30 +47,31 @@ static int get_intremap_requestor_id(int
return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
}
-static int get_intremap_offset(u8 vector, u8 dm)
+static unsigned int alloc_intremap_entry(int seg, int bdf)
{
- int offset = 0;
- offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK;
- offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) &
- INT_REMAP_INDEX_VECTOR_MASK;
- return offset;
+ unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
+ unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
+
+ if ( slot < INTREMAP_ENTRIES )
+ __set_bit(slot, inuse);
+ return slot;
}
-static u8 *get_intremap_entry(int seg, int bdf, int offset)
+static u32 *get_intremap_entry(int seg, int bdf, int offset)
{
- u8 *table;
+ u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
- table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table;
ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
- return (u8*) (table + offset);
+ return table + offset;
}
static void free_intremap_entry(int seg, int bdf, int offset)
{
- u32* entry;
- entry = (u32*)get_intremap_entry(seg, bdf, offset);
+ u32 *entry = get_intremap_entry(seg, bdf, offset);
+
memset(entry, 0, sizeof(u32));
+ __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
}
static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
@@ -98,18 +100,30 @@ static void update_intremap_entry(u32* e
INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
}
-static void update_intremap_entry_from_ioapic(
+static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
+{
+ return rte->vector | (rte->delivery_mode << 8);
+}
+
+static inline void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
+{
+ rte->vector = (u8)offset;
+ rte->delivery_mode = offset >> 8;
+}
+
+static int update_intremap_entry_from_ioapic(
int bdf,
struct amd_iommu *iommu,
- const struct IO_APIC_route_entry *rte,
- const struct IO_APIC_route_entry *old_rte)
+ struct IO_APIC_route_entry *rte,
+ bool_t lo_update,
+ u16 *index)
{
unsigned long flags;
u32* entry;
u8 delivery_mode, dest, vector, dest_mode;
int req_id;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
req_id = get_intremap_requestor_id(iommu->seg, bdf);
lock = get_intremap_lock(iommu->seg, req_id);
@@ -121,16 +135,35 @@ static void update_intremap_entry_from_i
spin_lock_irqsave(lock, flags);
- offset = get_intremap_offset(vector, delivery_mode);
- if ( old_rte )
+ offset = *index;
+ if ( offset >= INTREMAP_ENTRIES )
{
- int old_offset = get_intremap_offset(old_rte->vector,
- old_rte->delivery_mode);
+ offset = alloc_intremap_entry(iommu->seg, req_id);
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ spin_unlock_irqrestore(lock, flags);
+ rte->mask = 1;
+ return -ENOSPC;
+ }
+ *index = offset;
+ lo_update = 1;
+ }
- if ( offset != old_offset )
- free_intremap_entry(iommu->seg, bdf, old_offset);
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
+ if ( !lo_update )
+ {
+ /*
+ * Low half of incoming RTE is already in remapped format,
+ * so need to recover vector and delivery mode from IRTE.
+ */
+ ASSERT(get_rte_index(rte) == offset);
+ vector = get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_VECTOR_MASK,
+ INT_REMAP_ENTRY_VECTOR_SHIFT);
+ delivery_mode = get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_INTTYPE_MASK,
+ INT_REMAP_ENTRY_INTTYPE_SHIFT);
}
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
@@ -141,6 +174,10 @@ static void update_intremap_entry_from_i
amd_iommu_flush_intremap(iommu, req_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
+
+ set_rte_index(rte, offset);
+
+ return 0;
}
int __init amd_iommu_setup_ioapic_remapping(void)
@@ -153,7 +190,7 @@ int __init amd_iommu_setup_ioapic_remapp
u16 seg, bdf, req_id;
struct amd_iommu *iommu;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
/* Read ioapic entries and update interrupt remapping table accordingly */
for ( apic = 0; apic < nr_ioapics; apic++ )
@@ -184,19 +221,23 @@ int __init amd_iommu_setup_ioapic_remapp
dest = rte.dest.logical.logical_dest;
spin_lock_irqsave(lock, flags);
- offset = get_intremap_offset(vector, delivery_mode);
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+ offset = alloc_intremap_entry(seg, req_id);
+ BUG_ON(offset >= INTREMAP_ENTRIES);
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector,
delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
+ set_rte_index(&rte, offset);
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+ __ioapic_write_entry(apic, pin, 1, rte);
+
if ( iommu->enabled )
{
spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_flush_intremap(iommu, req_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
- set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
}
}
return 0;
@@ -209,7 +250,7 @@ void amd_iommu_ioapic_update_ire(
struct IO_APIC_route_entry new_rte = { 0 };
unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
unsigned int pin = (reg - 0x10) / 2;
- int saved_mask, seg, bdf;
+ int saved_mask, seg, bdf, rc;
struct amd_iommu *iommu;
if ( !iommu_intremap )
@@ -247,7 +288,7 @@ void amd_iommu_ioapic_update_ire(
}
if ( new_rte.mask &&
- !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES )
{
ASSERT(saved_mask);
__io_apic_write(apic, reg, value);
@@ -262,14 +303,19 @@ void amd_iommu_ioapic_update_ire(
}
/* Update interrupt remapping entry */
- update_intremap_entry_from_ioapic(
- bdf, iommu, &new_rte,
- test_and_set_bit(pin,
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte
- : NULL);
+ rc = update_intremap_entry_from_ioapic(
+ bdf, iommu, &new_rte, reg == rte_lo,
+ &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]);
- /* Forward write access to IO-APIC RTE */
- __io_apic_write(apic, reg, value);
+ __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
+
+ if ( rc )
+ {
+ /* Keep the entry masked. */
+ printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed (%d)\n",
+ IO_APIC_ID(apic), pin, rc);
+ return;
+ }
/* For lower bits access, return directly to avoid double writes */
if ( reg == rte_lo )
@@ -283,16 +329,41 @@ void amd_iommu_ioapic_update_ire(
}
}
-static void update_intremap_entry_from_msi_msg(
+unsigned int amd_iommu_read_ioapic_from_ire(
+ unsigned int apic, unsigned int reg)
+{
+ unsigned int val = __io_apic_read(apic, reg);
+
+ if ( !(reg & 1) )
+ {
+ unsigned int offset = val & (INTREMAP_ENTRIES - 1);
+ u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
+ u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg;
+ u16 req_id = get_intremap_requestor_id(seg, bdf);
+ const u32 *entry = get_intremap_entry(seg, req_id, offset);
+
+ val &= ~(INTREMAP_ENTRIES - 1);
+ val |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_INTTYPE_MASK,
+ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+ val |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_VECTOR_MASK,
+ INT_REMAP_ENTRY_VECTOR_SHIFT);
+ }
+
+ return val;
+}
+
+static int update_intremap_entry_from_msi_msg(
struct amd_iommu *iommu, u16 bdf,
- int *remap_index, const struct msi_msg *msg)
+ int *remap_index, const struct msi_msg *msg, u32 *data)
{
unsigned long flags;
u32* entry;
u16 req_id, alias_id;
u8 delivery_mode, dest, vector, dest_mode;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
req_id = get_dma_requestor_id(iommu->seg, bdf);
alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -303,15 +374,6 @@ static void update_intremap_entry_from_m
spin_lock_irqsave(lock, flags);
free_intremap_entry(iommu->seg, req_id, *remap_index);
spin_unlock_irqrestore(lock, flags);
-
- if ( ( req_id != alias_id ) &&
- get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
- {
- lock = get_intremap_lock(iommu->seg, alias_id);
- spin_lock_irqsave(lock, flags);
- free_intremap_entry(iommu->seg, alias_id, *remap_index);
- spin_unlock_irqrestore(lock, flags);
- }
goto done;
}
@@ -322,16 +384,24 @@ static void update_intremap_entry_from_m
delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
- offset = get_intremap_offset(vector, delivery_mode);
- if ( *remap_index < 0)
+ offset = *remap_index;
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ offset = alloc_intremap_entry(iommu->seg, bdf);
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ spin_unlock_irqrestore(lock, flags);
+ return -ENOSPC;
+ }
*remap_index = offset;
- else
- BUG_ON(*remap_index != offset);
+ }
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
+ *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
+
/*
* In some special cases, a pci-e device(e.g SATA controller in IDE mode)
* will use alias id to index interrupt remapping table.
@@ -343,10 +413,8 @@ static void update_intremap_entry_from_m
if ( ( req_id != alias_id ) &&
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
{
- spin_lock_irqsave(lock, flags);
- entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
- update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
- spin_unlock_irqrestore(lock, flags);
+ BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
+ get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
}
done:
@@ -358,19 +426,22 @@ done:
amd_iommu_flush_intremap(iommu, alias_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
+
+ return 0;
}
static struct amd_iommu *_find_iommu_for_device(int seg, int bdf)
{
- struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
-
- if ( iommu )
- return iommu;
+ struct amd_iommu *iommu;
list_for_each_entry ( iommu, &amd_iommu_head, list )
if ( iommu->seg == seg && iommu->bdf == bdf )
return NULL;
+ iommu = find_iommu_for_device(seg, bdf);
+ if ( iommu )
+ return iommu;
+
AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n",
seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf));
return ERR_PTR(-EINVAL);
@@ -380,8 +451,9 @@ int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
struct pci_dev *pdev = msi_desc->dev;
- int bdf, seg;
+ int bdf, seg, rc;
struct amd_iommu *iommu;
+ u32 data;
bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
seg = pdev ? pdev->seg : hpet_sbdf.seg;
@@ -390,11 +462,12 @@ int amd_iommu_msi_msg_update_ire(
if ( IS_ERR_OR_NULL(iommu) )
return PTR_ERR(iommu);
- if ( msi_desc->remap_index >= 0 )
+ if ( msi_desc->remap_index >= 0 && !msg )
{
do {
update_intremap_entry_from_msi_msg(iommu, bdf,
- &msi_desc->remap_index, NULL);
+ &msi_desc->remap_index,
+ NULL, NULL);
if ( !pdev || !pdev->phantom_stride )
break;
bdf += pdev->phantom_stride;
@@ -409,19 +482,39 @@ int amd_iommu_msi_msg_update_ire(
return 0;
do {
- update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
- msg);
- if ( !pdev || !pdev->phantom_stride )
+ rc = update_intremap_entry_from_msi_msg(iommu, bdf,
+ &msi_desc->remap_index,
+ msg, &data);
+ if ( rc || !pdev || !pdev->phantom_stride )
break;
bdf += pdev->phantom_stride;
} while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
- return 0;
+ msg->data = data;
+ return rc;
}
void amd_iommu_read_msi_from_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
+ unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1);
+ const struct pci_dev *pdev = msi_desc->dev;
+ u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
+ u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
+ const u32 *entry;
+
+ if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
+ return;
+
+ entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
+
+ msg->data &= ~(INTREMAP_ENTRIES - 1);
+ msg->data |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_INTTYPE_MASK,
+ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+ msg->data |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_VECTOR_MASK,
+ INT_REMAP_ENTRY_VECTOR_SHIFT);
}
int __init amd_iommu_free_intremap_table(
@@ -438,23 +531,42 @@ int __init amd_iommu_free_intremap_table
return 0;
}
-void* __init amd_iommu_alloc_intremap_table(void)
+void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map)
{
void *tb;
tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
BUG_ON(tb == NULL);
memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+ *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
+ BUG_ON(*inuse_map == NULL);
return tb;
}
int __init amd_setup_hpet_msi(struct msi_desc *msi_desc)
{
- if ( (!msi_desc->hpet_id != hpet_sbdf.id) ||
- (hpet_sbdf.iommu == NULL) )
+ spinlock_t *lock;
+ unsigned long flags;
+ int rc = 0;
+
+ if ( msi_desc->hpet_id != hpet_sbdf.id || !hpet_sbdf.iommu )
{
- AMD_IOMMU_DEBUG("Fail to setup HPET MSI remapping\n");
- return 1;
+ AMD_IOMMU_DEBUG("Failed to setup HPET MSI remapping: %s\n",
+ hpet_sbdf.iommu ? "Wrong HPET" : "No IOMMU");
+ return -ENODEV;
}
- return 0;
+ lock = get_intremap_lock(hpet_sbdf.seg, hpet_sbdf.bdf);
+ spin_lock_irqsave(lock, flags);
+
+ msi_desc->remap_index = alloc_intremap_entry(hpet_sbdf.seg,
+ hpet_sbdf.bdf);
+ if ( msi_desc->remap_index >= INTREMAP_ENTRIES )
+ {
+ msi_desc->remap_index = -1;
+ rc = -ENXIO;
+ }
+
+ spin_unlock_irqrestore(lock, flags);
+
+ return rc;
}
Index: xen-4.3.1-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c
===================================================================
--- xen-4.3.1-testing.orig/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ xen-4.3.1-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -637,7 +637,7 @@ const struct iommu_ops amd_iommu_ops = {
.get_device_group_id = amd_iommu_group_id,
.update_ire_from_apic = amd_iommu_ioapic_update_ire,
.update_ire_from_msi = amd_iommu_msi_msg_update_ire,
- .read_apic_from_ire = __io_apic_read,
+ .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
.read_msi_from_ire = amd_iommu_read_msi_from_ire,
.setup_hpet_msi = amd_setup_hpet_msi,
.suspend = amd_iommu_suspend,
Index: xen-4.3.1-testing/xen/include/asm-x86/amd-iommu.h
===================================================================
--- xen-4.3.1-testing.orig/xen/include/asm-x86/amd-iommu.h
+++ xen-4.3.1-testing/xen/include/asm-x86/amd-iommu.h
@@ -119,6 +119,7 @@ struct ivrs_mappings {
/* per device interrupt remapping table */
void *intremap_table;
+ unsigned long *intremap_inuse;
spinlock_t intremap_lock;
/* ivhd device data settings */
Index: xen-4.3.1-testing/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
===================================================================
--- xen-4.3.1-testing.orig/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ xen-4.3.1-testing/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -470,10 +470,6 @@
#define MAX_AMD_IOMMUS 32
/* interrupt remapping table */
-#define INT_REMAP_INDEX_DM_MASK 0x1C00
-#define INT_REMAP_INDEX_DM_SHIFT 10
-#define INT_REMAP_INDEX_VECTOR_MASK 0x3FC
-#define INT_REMAP_INDEX_VECTOR_SHIFT 2
#define INT_REMAP_ENTRY_REMAPEN_MASK 0x00000001
#define INT_REMAP_ENTRY_REMAPEN_SHIFT 0
#define INT_REMAP_ENTRY_SUPIOPF_MASK 0x00000002
Index: xen-4.3.1-testing/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
===================================================================
--- xen-4.3.1-testing.orig/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ xen-4.3.1-testing/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device(
/* interrupt remapping */
int amd_iommu_setup_ioapic_remapping(void);
-void *amd_iommu_alloc_intremap_table(void);
+void *amd_iommu_alloc_intremap_table(unsigned long **);
int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
void amd_iommu_ioapic_update_ire(
unsigned int apic, unsigned int reg, unsigned int value);
+unsigned int amd_iommu_read_ioapic_from_ire(
+ unsigned int apic, unsigned int reg);
int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg);
void amd_iommu_read_msi_from_ire(
@@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc *
extern struct ioapic_sbdf {
u16 bdf, seg;
- unsigned long *pin_setup;
+ u16 *pin_2_idx;
} ioapic_sbdf[MAX_IO_APICS];
-extern void *shared_intremap_table;
extern struct hpet_sbdf {
u16 bdf, seg, id;
struct amd_iommu *iommu;
} hpet_sbdf;
+extern void *shared_intremap_table;
+extern unsigned long *shared_intremap_inuse;
+
/* power management support */
void amd_iommu_resume(void);
void amd_iommu_suspend(void);

View File

@ -1,68 +0,0 @@
# Commit 561e0f86660f10db492c1ead1cd772013a6cc32d
# Date 2013-07-16 11:54:07 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
AMD IOMMU: untie remap and vector maps
With the specific IRTEs used for an interrupt no longer depending on
the vector, there's no need to tie the remap sharing model to the
vector sharing one.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
Acked-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -207,50 +207,6 @@ int __init amd_iov_detect(void)
init_done = 1;
- /*
- * AMD IOMMUs don't distinguish between vectors destined for
- * different cpus when doing interrupt remapping. This means
- * that interrupts going through the same intremap table
- * can't share the same vector.
- *
- * If irq_vector_map isn't specified, choose a sensible default:
- * - If we're using per-device interemap tables, per-device
- * vector non-sharing maps
- * - If we're using a global interemap table, global vector
- * non-sharing map
- */
- if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT )
- {
- if ( amd_iommu_perdev_intremap )
- {
- /* Per-device vector map logic is broken for devices with multiple
- * MSI-X interrupts (and would also be for multiple MSI, if Xen
- * supported it).
- *
- * Until this is fixed, use global vector tables as far as the irq
- * logic is concerned to avoid the buggy behaviour of per-device
- * maps in map_domain_pirq(), and use per-device tables as far as
- * intremap code is concerned to avoid the security issue.
- */
- printk(XENLOG_WARNING "AMD-Vi: per-device vector map logic is broken. "
- "Using per-device-global maps instead until a fix is found.\n");
-
- opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
- }
- else
- {
- printk("AMD-Vi: Enabling global vector map\n");
- opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
- }
- }
- else
- {
- printk("AMD-Vi: Not overriding irq_vector_map setting\n");
-
- if ( opt_irq_vector_map != OPT_IRQ_VECTOR_MAP_GLOBAL )
- printk(XENLOG_WARNING "AMD-Vi: per-device vector map logic is broken. "
- "Use irq_vector_map=global to work around.\n");
- }
if ( !amd_iommu_perdev_intremap )
printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table is not recommended (see XSA-36)!\n");
return scan_pci_devices();

View File

@ -1,83 +0,0 @@
# Commit 303066fdb1e4fe816e48acd665453f58b8399e81
# Date 2013-07-17 08:47:18 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VMX: fix interaction of APIC-V and Viridian emulation
Viridian using a synthetic MSR for issuing EOI notifications bypasses
the normal in-processor handling, which would clear
GUEST_INTR_STATUS.SVI. Hence we need to do this in software in order
for future interrupts to get delivered.
Based on analysis by Yang Z Zhang <yang.z.zhang@intel.com>.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Yang Zhang <yang.z.zhang@intel.com>
Index: xen-4.3.1-testing/xen/arch/x86/hvm/vlapic.c
===================================================================
--- xen-4.3.1-testing.orig/xen/arch/x86/hvm/vlapic.c
+++ xen-4.3.1-testing/xen/arch/x86/hvm/vlapic.c
@@ -395,6 +395,9 @@ void vlapic_EOI_set(struct vlapic *vlapi
vlapic_clear_vector(vector, &vlapic->regs->data[APIC_ISR]);
+ if ( hvm_funcs.handle_eoi )
+ hvm_funcs.handle_eoi(vector);
+
if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) )
vioapic_update_EOI(vlapic_domain(vlapic), vector);
Index: xen-4.3.1-testing/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- xen-4.3.1-testing.orig/xen/arch/x86/hvm/vmx/vmx.c
+++ xen-4.3.1-testing/xen/arch/x86/hvm/vmx/vmx.c
@@ -1507,6 +1507,15 @@ static void vmx_sync_pir_to_irr(struct v
vlapic_set_vector(i, &vlapic->regs->data[APIC_IRR]);
}
+static void vmx_handle_eoi(u8 vector)
+{
+ unsigned long status = __vmread(GUEST_INTR_STATUS);
+
+ /* We need to clear the SVI field. */
+ status &= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
+ __vmwrite(GUEST_INTR_STATUS, status);
+}
+
static struct hvm_function_table __initdata vmx_function_table = {
.name = "VMX",
.cpu_up_prepare = vmx_cpu_up_prepare,
@@ -1559,6 +1568,7 @@ static struct hvm_function_table __initd
.process_isr = vmx_process_isr,
.deliver_posted_intr = vmx_deliver_posted_intr,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
+ .handle_eoi = vmx_handle_eoi,
.nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
};
@@ -1585,7 +1595,10 @@ const struct hvm_function_table * __init
setup_ept_dump();
}
-
+
+ if ( !cpu_has_vmx_virtual_intr_delivery )
+ vmx_function_table.handle_eoi = NULL;
+
if ( cpu_has_vmx_posted_intr_processing )
alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt);
else
Index: xen-4.3.1-testing/xen/include/asm-x86/hvm/hvm.h
===================================================================
--- xen-4.3.1-testing.orig/xen/include/asm-x86/hvm/hvm.h
+++ xen-4.3.1-testing/xen/include/asm-x86/hvm/hvm.h
@@ -186,6 +186,7 @@ struct hvm_function_table {
void (*process_isr)(int isr, struct vcpu *v);
void (*deliver_posted_intr)(struct vcpu *v, u8 vector);
void (*sync_pir_to_irr)(struct vcpu *v);
+ void (*handle_eoi)(u8 vector);
/*Walk nested p2m */
int (*nhvm_hap_walk_L1_p2m)(struct vcpu *v, paddr_t L2_gpa,

View File

@ -1,23 +0,0 @@
# Commit d838ac2539cf1987bea6e15662fd6a80a58fe26d
# Date 2013-08-27 11:12:12 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: don't allow Dom0 access to the HT address range
In particular, MMIO assignments should not be done using this area.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -1126,6 +1126,10 @@ int __init construct_dom0(
rc |= iomem_deny_access(dom0, paddr_to_pfn(MSI_ADDR_BASE_LO),
paddr_to_pfn(MSI_ADDR_BASE_LO +
MSI_ADDR_DEST_ID_MASK));
+ /* HyperTransport range. */
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+ rc |= iomem_deny_access(dom0, paddr_to_pfn(0xfdULL << 32),
+ paddr_to_pfn((1ULL << 40) - 1));
/* Remove access to E820_UNUSABLE I/O regions above 1MB. */
for ( i = 0; i < e820.nr_map; i++ )

View File

@ -1,632 +0,0 @@
# Commit eedd60391610629b4e8a2e8278b857ff884f750d
# Date 2013-10-14 08:57:56 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
scheduler: adjust internal locking interface
Make the locking functions return the lock pointers, so they can be
passed to the unlocking functions (which in turn can check that the
lock is still actually providing the intended protection, i.e. the
parameters determining which lock is the right one didn't change).
Further use proper spin lock primitives rather than open coded
local_irq_...() constructs, so that interrupts can be re-enabled as
appropriate while spinning.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -1170,6 +1170,7 @@ csched_runq_sort(struct csched_private *
struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
struct list_head *runq, *elem, *next, *last_under;
struct csched_vcpu *svc_elem;
+ spinlock_t *lock;
unsigned long flags;
int sort_epoch;
@@ -1179,7 +1180,7 @@ csched_runq_sort(struct csched_private *
spc->runq_sort_last = sort_epoch;
- pcpu_schedule_lock_irqsave(cpu, flags);
+ lock = pcpu_schedule_lock_irqsave(cpu, &flags);
runq = &spc->runq;
elem = runq->next;
@@ -1204,7 +1205,7 @@ csched_runq_sort(struct csched_private *
elem = next;
}
- pcpu_schedule_unlock_irqrestore(cpu, flags);
+ pcpu_schedule_unlock_irqrestore(lock, flags, cpu);
}
static void
@@ -1568,7 +1569,9 @@ csched_load_balance(struct csched_privat
* could cause a deadlock if the peer CPU is also load
* balancing and trying to lock this CPU.
*/
- if ( !pcpu_schedule_trylock(peer_cpu) )
+ spinlock_t *lock = pcpu_schedule_trylock(peer_cpu);
+
+ if ( !lock )
{
SCHED_STAT_CRANK(steal_trylock_failed);
peer_cpu = cpumask_cycle(peer_cpu, &workers);
@@ -1578,7 +1581,7 @@ csched_load_balance(struct csched_privat
/* Any work over there to steal? */
speer = cpumask_test_cpu(peer_cpu, online) ?
csched_runq_steal(peer_cpu, cpu, snext->pri, bstep) : NULL;
- pcpu_schedule_unlock(peer_cpu);
+ pcpu_schedule_unlock(lock, peer_cpu);
/* As soon as one vcpu is found, balancing ends */
if ( speer != NULL )
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -881,15 +881,17 @@ csched_vcpu_insert(const struct schedule
*/
if ( ! is_idle_vcpu(vc) )
{
+ spinlock_t *lock;
+
/* FIXME: Do we need the private lock here? */
list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu);
/* Add vcpu to runqueue of initial processor */
- vcpu_schedule_lock_irq(vc);
+ lock = vcpu_schedule_lock_irq(vc);
runq_assign(ops, vc);
- vcpu_schedule_unlock_irq(vc);
+ vcpu_schedule_unlock_irq(lock, vc);
sdom->nr_vcpus++;
}
@@ -916,14 +918,16 @@ csched_vcpu_remove(const struct schedule
if ( ! is_idle_vcpu(vc) )
{
+ spinlock_t *lock;
+
SCHED_STAT_CRANK(vcpu_destroy);
/* Remove from runqueue */
- vcpu_schedule_lock_irq(vc);
+ lock = vcpu_schedule_lock_irq(vc);
runq_deassign(ops, vc);
- vcpu_schedule_unlock_irq(vc);
+ vcpu_schedule_unlock_irq(lock, vc);
/* Remove from sdom list. Don't need a lock for this, as it's called
* syncronously when nothing else can happen. */
@@ -1010,8 +1014,7 @@ csched_context_saved(const struct schedu
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
s_time_t now = NOW();
-
- vcpu_schedule_lock_irq(vc);
+ spinlock_t *lock = vcpu_schedule_lock_irq(vc);
BUG_ON( !is_idle_vcpu(vc) && svc->rqd != RQD(ops, vc->processor));
@@ -1037,7 +1040,7 @@ csched_context_saved(const struct schedu
else if ( !is_idle_vcpu(vc) )
update_load(ops, svc->rqd, svc, -1, now);
- vcpu_schedule_unlock_irq(vc);
+ vcpu_schedule_unlock_irq(lock, vc);
}
#define MAX_LOAD (1ULL<<60);
@@ -1454,14 +1457,14 @@ csched_dom_cntl(
* must never lock csched_priv.lock if we're holding a runqueue lock.
* Also, calling vcpu_schedule_lock() is enough, since IRQs have already
* been disabled. */
- vcpu_schedule_lock(svc->vcpu);
+ spinlock_t *lock = vcpu_schedule_lock(svc->vcpu);
BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor));
svc->weight = sdom->weight;
update_max_weight(svc->rqd, svc->weight, old_weight);
- vcpu_schedule_unlock(svc->vcpu);
+ vcpu_schedule_unlock(lock, svc->vcpu);
}
}
}
@@ -1991,6 +1994,7 @@ static void init_pcpu(const struct sched
cpumask_set_cpu(cpu, &rqd->idle);
cpumask_set_cpu(cpu, &rqd->active);
+ /* _Not_ pcpu_schedule_unlock(): per_cpu().schedule_lock changed! */
spin_unlock(old_lock);
cpumask_set_cpu(cpu, &prv->initialized);
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -1350,14 +1350,16 @@ static int sedf_adjust_weights(struct cp
if ( EDOM_INFO(p)->weight )
{
/* Interrupts already off */
- vcpu_schedule_lock(p);
+ spinlock_t *lock = vcpu_schedule_lock(p);
+
EDOM_INFO(p)->period_orig =
EDOM_INFO(p)->period = WEIGHT_PERIOD;
EDOM_INFO(p)->slice_orig =
EDOM_INFO(p)->slice =
(EDOM_INFO(p)->weight *
(WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
- vcpu_schedule_unlock(p);
+
+ vcpu_schedule_unlock(lock, p);
}
}
}
@@ -1418,21 +1420,24 @@ static int sedf_adjust(const struct sche
{
/* (Here and everywhere in the following) IRQs are already off,
* hence vcpu_spin_lock() is the one. */
- vcpu_schedule_lock(v);
+ spinlock_t *lock = vcpu_schedule_lock(v);
+
EDOM_INFO(v)->extraweight = op->u.sedf.weight;
EDOM_INFO(v)->weight = 0;
EDOM_INFO(v)->slice = 0;
EDOM_INFO(v)->period = WEIGHT_PERIOD;
- vcpu_schedule_unlock(v);
+ vcpu_schedule_unlock(lock, v);
}
}
else
{
/* Weight-driven domains with real-time execution */
- for_each_vcpu ( p, v ) {
- vcpu_schedule_lock(v);
+ for_each_vcpu ( p, v )
+ {
+ spinlock_t *lock = vcpu_schedule_lock(v);
+
EDOM_INFO(v)->weight = op->u.sedf.weight;
- vcpu_schedule_unlock(v);
+ vcpu_schedule_unlock(lock, v);
}
}
}
@@ -1454,14 +1459,15 @@ static int sedf_adjust(const struct sche
/* Time-driven domains */
for_each_vcpu ( p, v )
{
- vcpu_schedule_lock(v);
+ spinlock_t *lock = vcpu_schedule_lock(v);
+
EDOM_INFO(v)->weight = 0;
EDOM_INFO(v)->extraweight = 0;
EDOM_INFO(v)->period_orig =
EDOM_INFO(v)->period = op->u.sedf.period;
EDOM_INFO(v)->slice_orig =
EDOM_INFO(v)->slice = op->u.sedf.slice;
- vcpu_schedule_unlock(v);
+ vcpu_schedule_unlock(lock, v);
}
}
@@ -1471,13 +1477,14 @@ static int sedf_adjust(const struct sche
for_each_vcpu ( p, v )
{
- vcpu_schedule_lock(v);
+ spinlock_t *lock = vcpu_schedule_lock(v);
+
EDOM_INFO(v)->status =
(EDOM_INFO(v)->status &
~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
EDOM_INFO(v)->latency = op->u.sedf.latency;
extraq_check(v);
- vcpu_schedule_unlock(v);
+ vcpu_schedule_unlock(lock, v);
}
}
else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -160,18 +160,16 @@ static inline void vcpu_runstate_change(
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
{
+ spinlock_t *lock = likely(v == current) ? NULL : vcpu_schedule_lock_irq(v);
s_time_t delta;
- if ( unlikely(v != current) )
- vcpu_schedule_lock_irq(v);
-
memcpy(runstate, &v->runstate, sizeof(*runstate));
delta = NOW() - runstate->state_entry_time;
if ( delta > 0 )
runstate->time[runstate->state] += delta;
- if ( unlikely(v != current) )
- vcpu_schedule_unlock_irq(v);
+ if ( unlikely(lock != NULL) )
+ vcpu_schedule_unlock_irq(lock, v);
}
uint64_t get_cpu_idle_time(unsigned int cpu)
@@ -333,8 +331,7 @@ void sched_destroy_domain(struct domain
void vcpu_sleep_nosync(struct vcpu *v)
{
unsigned long flags;
-
- vcpu_schedule_lock_irqsave(v, flags);
+ spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags);
if ( likely(!vcpu_runnable(v)) )
{
@@ -344,7 +341,7 @@ void vcpu_sleep_nosync(struct vcpu *v)
SCHED_OP(VCPU2OP(v), sleep, v);
}
- vcpu_schedule_unlock_irqrestore(v, flags);
+ vcpu_schedule_unlock_irqrestore(lock, flags, v);
TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
}
@@ -362,8 +359,7 @@ void vcpu_sleep_sync(struct vcpu *v)
void vcpu_wake(struct vcpu *v)
{
unsigned long flags;
-
- vcpu_schedule_lock_irqsave(v, flags);
+ spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags);
if ( likely(vcpu_runnable(v)) )
{
@@ -377,7 +373,7 @@ void vcpu_wake(struct vcpu *v)
vcpu_runstate_change(v, RUNSTATE_offline, NOW());
}
- vcpu_schedule_unlock_irqrestore(v, flags);
+ vcpu_schedule_unlock_irqrestore(lock, flags, v);
TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
}
@@ -528,10 +524,11 @@ static void vcpu_migrate(struct vcpu *v)
*/
void vcpu_force_reschedule(struct vcpu *v)
{
- vcpu_schedule_lock_irq(v);
+ spinlock_t *lock = vcpu_schedule_lock_irq(v);
+
if ( v->is_running )
set_bit(_VPF_migrating, &v->pause_flags);
- vcpu_schedule_unlock_irq(v);
+ vcpu_schedule_unlock_irq(lock, v);
if ( test_bit(_VPF_migrating, &v->pause_flags) )
{
@@ -546,7 +543,7 @@ void restore_vcpu_affinity(struct domain
for_each_vcpu ( d, v )
{
- vcpu_schedule_lock_irq(v);
+ spinlock_t *lock = vcpu_schedule_lock_irq(v);
if ( v->affinity_broken )
{
@@ -559,13 +556,13 @@ void restore_vcpu_affinity(struct domain
if ( v->processor == smp_processor_id() )
{
set_bit(_VPF_migrating, &v->pause_flags);
- vcpu_schedule_unlock_irq(v);
+ vcpu_schedule_unlock_irq(lock, v);
vcpu_sleep_nosync(v);
vcpu_migrate(v);
}
else
{
- vcpu_schedule_unlock_irq(v);
+ vcpu_schedule_unlock_irq(lock, v);
}
}
@@ -592,7 +589,7 @@ int cpu_disable_scheduler(unsigned int c
{
for_each_vcpu ( d, v )
{
- vcpu_schedule_lock_irq(v);
+ spinlock_t *lock = vcpu_schedule_lock_irq(v);
cpumask_and(&online_affinity, v->cpu_affinity, c->cpu_valid);
if ( cpumask_empty(&online_affinity) &&
@@ -613,13 +610,13 @@ int cpu_disable_scheduler(unsigned int c
if ( v->processor == cpu )
{
set_bit(_VPF_migrating, &v->pause_flags);
- vcpu_schedule_unlock_irq(v);
+ vcpu_schedule_unlock_irq(lock, v);
vcpu_sleep_nosync(v);
vcpu_migrate(v);
}
else
{
- vcpu_schedule_unlock_irq(v);
+ vcpu_schedule_unlock_irq(lock, v);
}
/*
@@ -646,6 +643,7 @@ int vcpu_set_affinity(struct vcpu *v, co
{
cpumask_t online_affinity;
cpumask_t *online;
+ spinlock_t *lock;
if ( v->domain->is_pinned )
return -EINVAL;
@@ -654,7 +652,7 @@ int vcpu_set_affinity(struct vcpu *v, co
if ( cpumask_empty(&online_affinity) )
return -EINVAL;
- vcpu_schedule_lock_irq(v);
+ lock = vcpu_schedule_lock_irq(v);
cpumask_copy(v->cpu_affinity, affinity);
@@ -662,7 +660,7 @@ int vcpu_set_affinity(struct vcpu *v, co
* when changing the affinity */
set_bit(_VPF_migrating, &v->pause_flags);
- vcpu_schedule_unlock_irq(v);
+ vcpu_schedule_unlock_irq(lock, v);
domain_update_node_affinity(v->domain);
@@ -776,10 +774,10 @@ static long do_poll(struct sched_poll *s
static long do_yield(void)
{
struct vcpu * v=current;
+ spinlock_t *lock = vcpu_schedule_lock_irq(v);
- vcpu_schedule_lock_irq(v);
SCHED_OP(VCPU2OP(v), yield, v);
- vcpu_schedule_unlock_irq(v);
+ vcpu_schedule_unlock_irq(lock, v);
TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id);
raise_softirq(SCHEDULE_SOFTIRQ);
@@ -1140,6 +1138,7 @@ static void schedule(void)
unsigned long *tasklet_work = &this_cpu(tasklet_work_to_do);
bool_t tasklet_work_scheduled = 0;
struct schedule_data *sd;
+ spinlock_t *lock;
struct task_slice next_slice;
int cpu = smp_processor_id();
@@ -1166,7 +1165,7 @@ static void schedule(void)
BUG();
}
- pcpu_schedule_lock_irq(cpu);
+ lock = pcpu_schedule_lock_irq(cpu);
stop_timer(&sd->s_timer);
@@ -1183,7 +1182,7 @@ static void schedule(void)
if ( unlikely(prev == next) )
{
- pcpu_schedule_unlock_irq(cpu);
+ pcpu_schedule_unlock_irq(lock, cpu);
trace_continue_running(next);
return continue_running(prev);
}
@@ -1221,7 +1220,7 @@ static void schedule(void)
ASSERT(!next->is_running);
next->is_running = 1;
- pcpu_schedule_unlock_irq(cpu);
+ pcpu_schedule_unlock_irq(lock, cpu);
SCHED_STAT_CRANK(sched_ctx);
@@ -1408,6 +1407,7 @@ int schedule_cpu_switch(unsigned int cpu
{
unsigned long flags;
struct vcpu *idle;
+ spinlock_t *lock;
void *ppriv, *ppriv_old, *vpriv, *vpriv_old;
struct scheduler *old_ops = per_cpu(scheduler, cpu);
struct scheduler *new_ops = (c == NULL) ? &ops : c->sched;
@@ -1426,7 +1426,7 @@ int schedule_cpu_switch(unsigned int cpu
return -ENOMEM;
}
- pcpu_schedule_lock_irqsave(cpu, flags);
+ lock = pcpu_schedule_lock_irqsave(cpu, &flags);
SCHED_OP(old_ops, tick_suspend, cpu);
vpriv_old = idle->sched_priv;
@@ -1437,7 +1437,7 @@ int schedule_cpu_switch(unsigned int cpu
SCHED_OP(new_ops, tick_resume, cpu);
SCHED_OP(new_ops, insert_vcpu, idle);
- pcpu_schedule_unlock_irqrestore(cpu, flags);
+ pcpu_schedule_unlock_irqrestore(lock, flags, cpu);
SCHED_OP(old_ops, free_vdata, vpriv_old);
SCHED_OP(old_ops, free_pdata, ppriv_old, cpu);
@@ -1495,10 +1495,11 @@ void schedule_dump(struct cpupool *c)
for_each_cpu (i, cpus)
{
- pcpu_schedule_lock(i);
+ spinlock_t *lock = pcpu_schedule_lock(i);
+
printk("CPU[%02d] ", i);
SCHED_OP(sched, dump_cpu_state, i);
- pcpu_schedule_unlock(i);
+ pcpu_schedule_unlock(lock, i);
}
}
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -47,96 +47,70 @@ DECLARE_PER_CPU(struct schedule_data, sc
DECLARE_PER_CPU(struct scheduler *, scheduler);
DECLARE_PER_CPU(struct cpupool *, cpupool);
-static inline spinlock_t * pcpu_schedule_lock(int cpu)
-{
- spinlock_t * lock=NULL;
-
- for ( ; ; )
- {
- /* The per_cpu(v->processor) may also change, if changing
- * cpu pool also changes the scheduler lock. Retry
- * until they match.
- */
- lock=per_cpu(schedule_data, cpu).schedule_lock;
-
- spin_lock(lock);
- if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) )
- break;
- spin_unlock(lock);
- }
- return lock;
+#define sched_lock(kind, param, cpu, irq, arg...) \
+static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
+{ \
+ for ( ; ; ) \
+ { \
+ spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \
+ /* \
+ * v->processor may change when grabbing the lock; but \
+ * per_cpu(v->processor) may also change, if changing cpu pool \
+ * also changes the scheduler lock. Retry until they match. \
+ * \
+ * It may also be the case that v->processor may change but the \
+ * lock may be the same; this will succeed in that case. \
+ */ \
+ spin_lock##irq(lock, ## arg); \
+ if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \
+ return lock; \
+ spin_unlock##irq(lock, ## arg); \
+ } \
}
-static inline int pcpu_schedule_trylock(int cpu)
-{
- spinlock_t * lock=NULL;
-
- lock=per_cpu(schedule_data, cpu).schedule_lock;
- if ( ! spin_trylock(lock) )
- return 0;
- if ( lock == per_cpu(schedule_data, cpu).schedule_lock )
- return 1;
- else
- {
- spin_unlock(lock);
- return 0;
- }
+#define sched_unlock(kind, param, cpu, irq, arg...) \
+static inline void kind##_schedule_unlock##irq(spinlock_t *lock \
+ EXTRA_TYPE(arg), param) \
+{ \
+ ASSERT(lock == per_cpu(schedule_data, cpu).schedule_lock); \
+ spin_unlock##irq(lock, ## arg); \
}
-#define pcpu_schedule_lock_irq(p) \
- do { local_irq_disable(); pcpu_schedule_lock(p); } while ( 0 )
-#define pcpu_schedule_lock_irqsave(p, flags) \
- do { local_irq_save(flags); pcpu_schedule_lock(p); } while ( 0 )
+#define EXTRA_TYPE(arg)
+sched_lock(pcpu, unsigned int cpu, cpu, )
+sched_lock(vcpu, const struct vcpu *v, v->processor, )
+sched_lock(pcpu, unsigned int cpu, cpu, _irq)
+sched_lock(vcpu, const struct vcpu *v, v->processor, _irq)
+sched_unlock(pcpu, unsigned int cpu, cpu, )
+sched_unlock(vcpu, const struct vcpu *v, v->processor, )
+sched_unlock(pcpu, unsigned int cpu, cpu, _irq)
+sched_unlock(vcpu, const struct vcpu *v, v->processor, _irq)
+#undef EXTRA_TYPE
+
+#define EXTRA_TYPE(arg) , unsigned long arg
+#define spin_unlock_irqsave spin_unlock_irqrestore
+sched_lock(pcpu, unsigned int cpu, cpu, _irqsave, *flags)
+sched_lock(vcpu, const struct vcpu *v, v->processor, _irqsave, *flags)
+#undef spin_unlock_irqsave
+sched_unlock(pcpu, unsigned int cpu, cpu, _irqrestore, flags)
+sched_unlock(vcpu, const struct vcpu *v, v->processor, _irqrestore, flags)
+#undef EXTRA_TYPE
+
+#undef sched_unlock
+#undef sched_lock
-static inline void pcpu_schedule_unlock(int cpu)
+static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu)
{
- spin_unlock(per_cpu(schedule_data, cpu).schedule_lock);
-}
+ spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock;
-#define pcpu_schedule_unlock_irq(p) \
- do { pcpu_schedule_unlock(p); local_irq_enable(); } while ( 0 )
-#define pcpu_schedule_unlock_irqrestore(p, flags) \
- do { pcpu_schedule_unlock(p); local_irq_restore(flags); } while ( 0 )
-
-static inline void vcpu_schedule_lock(struct vcpu *v)
-{
- spinlock_t * lock;
-
- for ( ; ; )
- {
- /* v->processor may change when grabbing the lock; but
- * per_cpu(v->processor) may also change, if changing
- * cpu pool also changes the scheduler lock. Retry
- * until they match.
- *
- * It may also be the case that v->processor may change
- * but the lock may be the same; this will succeed
- * in that case.
- */
- lock=per_cpu(schedule_data, v->processor).schedule_lock;
-
- spin_lock(lock);
- if ( likely(lock == per_cpu(schedule_data, v->processor).schedule_lock) )
- break;
- spin_unlock(lock);
- }
-}
-
-#define vcpu_schedule_lock_irq(v) \
- do { local_irq_disable(); vcpu_schedule_lock(v); } while ( 0 )
-#define vcpu_schedule_lock_irqsave(v, flags) \
- do { local_irq_save(flags); vcpu_schedule_lock(v); } while ( 0 )
-
-static inline void vcpu_schedule_unlock(struct vcpu *v)
-{
- spin_unlock(per_cpu(schedule_data, v->processor).schedule_lock);
+ if ( !spin_trylock(lock) )
+ return NULL;
+ if ( lock == per_cpu(schedule_data, cpu).schedule_lock )
+ return lock;
+ spin_unlock(lock);
+ return NULL;
}
-#define vcpu_schedule_unlock_irq(v) \
- do { vcpu_schedule_unlock(v); local_irq_enable(); } while ( 0 )
-#define vcpu_schedule_unlock_irqrestore(v, flags) \
- do { vcpu_schedule_unlock(v); local_irq_restore(flags); } while ( 0 )
-
struct task_slice {
struct vcpu *task;
s_time_t time;

View File

@ -1,63 +0,0 @@
# Commit ef55257bc81204e34691f1c2aa9e01f2d0768bdd
# Date 2013-10-14 08:58:31 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
sched: fix race between sched_move_domain() and vcpu_wake()
From: David Vrabel <david.vrabel@citrix.com>
sched_move_domain() changes v->processor for all the domain's VCPUs.
If another domain, softirq etc. triggers a simultaneous call to
vcpu_wake() (e.g., by setting an event channel as pending), then
vcpu_wake() may lock one schedule lock and try to unlock another.
vcpu_schedule_lock() attempts to handle this but only does so for the
window between reading the schedule_lock from the per-CPU data and the
spin_lock() call. This does not help with sched_move_domain()
changing v->processor between the calls to vcpu_schedule_lock() and
vcpu_schedule_unlock().
Fix the race by taking the schedule_lock for v->processor in
sched_move_domain().
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
Use vcpu_schedule_lock_irq() (which now returns the lock) to properly
retry the locking should the to be used lock have changed in the course
of acquiring it (issue pointed out by George Dunlap).
Add a comment explaining the state after the v->processor adjustment.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -276,6 +276,8 @@ int sched_move_domain(struct domain *d,
new_p = cpumask_first(c->cpu_valid);
for_each_vcpu ( d, v )
{
+ spinlock_t *lock;
+
vcpudata = v->sched_priv;
migrate_timer(&v->periodic_timer, new_p);
@@ -283,7 +285,16 @@ int sched_move_domain(struct domain *d,
migrate_timer(&v->poll_timer, new_p);
cpumask_setall(v->cpu_affinity);
+
+ lock = vcpu_schedule_lock_irq(v);
v->processor = new_p;
+ /*
+ * With v->processor modified we must not
+ * - make any further changes assuming we hold the scheduler lock,
+ * - use vcpu_schedule_unlock_irq().
+ */
+ spin_unlock_irq(lock);
+
v->sched_priv = vcpu_priv[v->vcpu_id];
evtchn_move_pirqs(v);

View File

@ -1,27 +0,0 @@
# Commit d38a668b6ef8c84d1d3fda9947ffb0056d01fe3a
# Date 2013-10-16 12:26:48 +0200
# Author Juergen Gross <juergen.gross@ts.fujitsu.com>
# Committer Jan Beulich <jbeulich@suse.com>
credit: unpause parked vcpu before destroying it
A capped out vcpu must be unpaused in case of moving it to another cpupool,
otherwise it will be paused forever.
Signed-off-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -931,6 +931,12 @@ csched_vcpu_remove(const struct schedule
SCHED_STAT_CRANK(vcpu_destroy);
+ if ( test_and_clear_bit(CSCHED_FLAG_VCPU_PARKED, &svc->flags) )
+ {
+ SCHED_STAT_CRANK(vcpu_unpark);
+ vcpu_unpause(svc->vcpu);
+ }
+
if ( __vcpu_on_runq(svc) )
__runq_remove(svc);

View File

@ -1,77 +0,0 @@
# Commit f72cb6bbc10348f4f7671428e5db509731e9e6a5
# Date 2013-10-17 11:35:26 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: print relevant (tail) part of filename for warnings and crashes
In particular when the origin construct is in a header file (and
hence the file name is an absolute path instead of just the file name
portion) the information can otherwise become rather useless when the
build tree isn't sitting relatively close to the file system root.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -953,7 +953,7 @@ void do_invalid_op(struct cpu_user_regs
{
struct bug_frame bug;
struct bug_frame_str bug_str;
- const char *p, *filename, *predicate, *eip = (char *)regs->eip;
+ const char *p, *prefix = "", *filename, *predicate, *eip = (char *)regs->eip;
unsigned long fixup;
int id, lineno;
@@ -995,12 +995,19 @@ void do_invalid_op(struct cpu_user_regs
}
/* WARN, BUG or ASSERT: decode the filename pointer and line number. */
- filename = p;
+ fixup = strlen(p);
+ if ( fixup > 50 )
+ {
+ filename = p + fixup - 47;
+ prefix = "...";
+ }
+ else
+ filename = p;
lineno = bug.id >> 2;
if ( id == BUGFRAME_warn )
{
- printk("Xen WARN at %.50s:%d\n", filename, lineno);
+ printk("Xen WARN at %s%s:%d\n", prefix, filename, lineno);
show_execution_state(regs);
regs->eip = (unsigned long)eip;
return;
@@ -1008,10 +1015,10 @@ void do_invalid_op(struct cpu_user_regs
if ( id == BUGFRAME_bug )
{
- printk("Xen BUG at %.50s:%d\n", filename, lineno);
+ printk("Xen BUG at %s%s:%d\n", prefix, filename, lineno);
DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
show_execution_state(regs);
- panic("Xen BUG at %.50s:%d\n", filename, lineno);
+ panic("Xen BUG at %s%s:%d\n", prefix, filename, lineno);
}
/* ASSERT: decode the predicate string pointer. */
@@ -1025,12 +1032,12 @@ void do_invalid_op(struct cpu_user_regs
if ( !is_kernel(predicate) )
predicate = "<unknown>";
- printk("Assertion '%s' failed at %.50s:%d\n",
- predicate, filename, lineno);
+ printk("Assertion '%s' failed at %s%s:%d\n",
+ predicate, prefix, filename, lineno);
DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
show_execution_state(regs);
- panic("Assertion '%s' failed at %.50s:%d\n",
- predicate, filename, lineno);
+ panic("Assertion '%s' failed at %s%s:%d\n",
+ predicate, prefix, filename, lineno);
die:
if ( (fixup = search_exception_table(regs->eip)) != 0 )

View File

@ -1,46 +0,0 @@
# Commit e47a90e6dca491c0ceea6ffa18055e7e32565e8e
# Date 2013-10-21 17:26:16 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/xsave: also save/restore XCR0 across suspend (ACPI S3)
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/acpi/suspend.c
+++ b/xen/arch/x86/acpi/suspend.c
@@ -13,12 +13,14 @@
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
#include <asm/i387.h>
+#include <asm/xstate.h>
#include <xen/hypercall.h>
static unsigned long saved_lstar, saved_cstar;
static unsigned long saved_sysenter_esp, saved_sysenter_eip;
static unsigned long saved_fs_base, saved_gs_base, saved_kernel_gs_base;
static uint16_t saved_segs[4];
+static uint64_t saved_xcr0;
void save_rest_processor_state(void)
{
@@ -38,6 +40,8 @@ void save_rest_processor_state(void)
rdmsrl(MSR_IA32_SYSENTER_ESP, saved_sysenter_esp);
rdmsrl(MSR_IA32_SYSENTER_EIP, saved_sysenter_eip);
}
+ if ( cpu_has_xsave )
+ saved_xcr0 = get_xcr0();
}
@@ -77,6 +81,9 @@ void restore_rest_processor_state(void)
do_set_segment_base(SEGBASE_GS_USER_SEL, saved_segs[3]);
}
+ if ( cpu_has_xsave && !set_xcr0(saved_xcr0) )
+ BUG();
+
/* Maybe load the debug registers. */
BUG_ON(is_hvm_vcpu(curr));
if ( !is_idle_vcpu(curr) && curr->arch.debugreg[7] )

View File

@ -1,62 +0,0 @@
# Commit 343cad8c70585c4dba8afc75e1ec1b7610605ab2
# Date 2013-10-28 12:00:36 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: refine address validity checks before accessing page tables
In commit 40d66baa ("x86: correct LDT checks") and d06a0d71 ("x86: add
address validity check to guest_map_l1e()") I didn't really pay
attention to the fact that these checks would better be done before the
paging_mode_translate() ones, as there's also no equivalent check down
the shadow code paths involved here (at least not up to the first use
of the address), and such generic checks shouldn't really be done by
particular backend functions anyway.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Tim Deegan <tim@xen.org>
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -356,12 +356,14 @@ guest_map_l1e(struct vcpu *v, unsigned l
{
l2_pgentry_t l2e;
+ if ( unlikely(!__addr_ok(addr)) )
+ return NULL;
+
if ( unlikely(paging_mode_translate(v->domain)) )
return paging_get_hostmode(v)->guest_map_l1e(v, addr, gl1mfn);
/* Find this l1e and its enclosing l1mfn in the linear map */
- if ( !__addr_ok(addr) ||
- __copy_from_user(&l2e,
+ if ( __copy_from_user(&l2e,
&__linear_l2_table[l2_linear_offset(addr)],
sizeof(l2_pgentry_t)) != 0 )
return NULL;
@@ -382,16 +384,21 @@ guest_unmap_l1e(struct vcpu *v, void *p)
/* Read the guest's l1e that maps this address. */
static inline void
-guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
+guest_get_eff_l1e(struct vcpu *v, unsigned long addr, l1_pgentry_t *eff_l1e)
{
+ if ( unlikely(!__addr_ok(addr)) )
+ {
+ *eff_l1e = l1e_empty();
+ return;
+ }
+
if ( likely(!paging_mode_translate(v->domain)) )
{
ASSERT(!paging_mode_external(v->domain));
- if ( !__addr_ok(addr) ||
- __copy_from_user(eff_l1e,
+ if ( __copy_from_user(eff_l1e,
&__linear_l1_table[l1_linear_offset(addr)],
sizeof(l1_pgentry_t)) != 0 )
- *(l1_pgentry_t *)eff_l1e = l1e_empty();
+ *eff_l1e = l1e_empty();
return;
}

View File

@ -1,74 +0,0 @@
# Commit 41a0cc9e26160a89245c9ba3233e3f70bf9cd4b4
# Date 2013-10-29 09:57:14 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
fix locking in cpu_disable_scheduler()
So commit eedd6039 ("scheduler: adjust internal locking interface")
uncovered - by now using proper spin lock constructs - a bug after all:
When bringing down a CPU, cpu_disable_scheduler() gets called with
interrupts disabled, and hence the use of vcpu_schedule_lock_irq() was
never really correct (i.e. the caller ended up with interrupts enabled
despite having disabled them explicitly).
Fixing this however surfaced another problem: The call path
vcpu_migrate() -> evtchn_move_pirqs() wants to acquire the event lock,
which however is a non-IRQ-safe once, and hence check_lock() doesn't
like this lock to be acquired when interrupts are already off. As we're
in stop-machine context here, getting things wrong wrt interrupt state
management during lock acquire/release is out of question though, so
the simple solution to this appears to be to just suppress spin lock
debugging for the period of time while the stop machine callback gets
run.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -600,7 +600,8 @@ int cpu_disable_scheduler(unsigned int c
{
for_each_vcpu ( d, v )
{
- spinlock_t *lock = vcpu_schedule_lock_irq(v);
+ unsigned long flags;
+ spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags);
cpumask_and(&online_affinity, v->cpu_affinity, c->cpu_valid);
if ( cpumask_empty(&online_affinity) &&
@@ -621,14 +622,12 @@ int cpu_disable_scheduler(unsigned int c
if ( v->processor == cpu )
{
set_bit(_VPF_migrating, &v->pause_flags);
- vcpu_schedule_unlock_irq(lock, v);
+ vcpu_schedule_unlock_irqrestore(lock, flags, v);
vcpu_sleep_nosync(v);
vcpu_migrate(v);
}
else
- {
- vcpu_schedule_unlock_irq(lock, v);
- }
+ vcpu_schedule_unlock_irqrestore(lock, flags, v);
/*
* A vcpu active in the hypervisor will not be migratable.
--- a/xen/common/stop_machine.c
+++ b/xen/common/stop_machine.c
@@ -110,6 +110,7 @@ int stop_machine_run(int (*fn)(void *),
local_irq_disable();
stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
stopmachine_wait_state();
+ spin_debug_disable();
stopmachine_set_state(STOPMACHINE_INVOKE);
if ( (cpu == smp_processor_id()) || (cpu == NR_CPUS) )
@@ -117,6 +118,7 @@ int stop_machine_run(int (*fn)(void *),
stopmachine_wait_state();
ret = stopmachine_data.fn_result;
+ spin_debug_enable();
stopmachine_set_state(STOPMACHINE_EXIT);
stopmachine_wait_state();
local_irq_enable();

View File

@ -1,101 +0,0 @@
References: bnc#848657 CVE-2013-4494 XSA-73
# HG changeset patch
# User Andrew Cooper <andrew.cooper3@citrix.com>
# Date 1383556439 -3600
# Node ID f63cb4c06a991a69b0f11789c88ef069eb39f64c
# Parent c30539bc5b235c9ce657f483c2305212ad1cdfba
gnttab: correct locking order reversal
Coverity ID 1087189
Correct a lock order reversal between a domains page allocation and grant
table locks.
This is CVE-2013-4494 / XSA-73.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Consolidate error handling.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Keir Fraser <keir@xen.org>
Tested-by: Matthew Daley <mattjd@gmail.com>
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1518,6 +1518,8 @@ gnttab_transfer(
for ( i = 0; i < count; i++ )
{
+ bool_t okay;
+
if (i && hypercall_preempt_check())
return i;
@@ -1626,16 +1628,18 @@ gnttab_transfer(
* pages when it is dying.
*/
if ( unlikely(e->is_dying) ||
- unlikely(e->tot_pages >= e->max_pages) ||
- unlikely(!gnttab_prepare_for_transfer(e, d, gop.ref)) )
+ unlikely(e->tot_pages >= e->max_pages) )
{
- if ( !e->is_dying )
- gdprintk(XENLOG_INFO, "gnttab_transfer: "
- "Transferee has no reservation "
- "headroom (%d,%d) or provided a bad grant ref (%08x) "
- "or is dying (%d)\n",
- e->tot_pages, e->max_pages, gop.ref, e->is_dying);
spin_unlock(&e->page_alloc_lock);
+
+ if ( e->is_dying )
+ gdprintk(XENLOG_INFO, "gnttab_transfer: "
+ "Transferee (d%d) is dying\n", e->domain_id);
+ else
+ gdprintk(XENLOG_INFO, "gnttab_transfer: "
+ "Transferee (d%d) has no headroom (tot %u, max %u)\n",
+ e->domain_id, e->tot_pages, e->max_pages);
+
rcu_unlock_domain(e);
put_gfn(d, gop.mfn);
page->count_info &= ~(PGC_count_mask|PGC_allocated);
@@ -1647,6 +1651,38 @@ gnttab_transfer(
/* Okay, add the page to 'e'. */
if ( unlikely(domain_adjust_tot_pages(e, 1) == 1) )
get_knownalive_domain(e);
+
+ /*
+ * We must drop the lock to avoid a possible deadlock in
+ * gnttab_prepare_for_transfer. We have reserved a page in e so can
+ * safely drop the lock and re-aquire it later to add page to the
+ * pagelist.
+ */
+ spin_unlock(&e->page_alloc_lock);
+ okay = gnttab_prepare_for_transfer(e, d, gop.ref);
+ spin_lock(&e->page_alloc_lock);
+
+ if ( unlikely(!okay) || unlikely(e->is_dying) )
+ {
+ bool_t drop_dom_ref = !domain_adjust_tot_pages(e, -1);
+
+ spin_unlock(&e->page_alloc_lock);
+
+ if ( okay /* i.e. e->is_dying due to the surrounding if() */ )
+ gdprintk(XENLOG_INFO, "gnttab_transfer: "
+ "Transferee (d%d) is now dying\n", e->domain_id);
+
+ if ( drop_dom_ref )
+ put_domain(e);
+ rcu_unlock_domain(e);
+
+ put_gfn(d, gop.mfn);
+ page->count_info &= ~(PGC_count_mask|PGC_allocated);
+ free_domheap_page(page);
+ gop.status = GNTST_general_error;
+ goto copyback;
+ }
+
page_list_add_tail(page, &e->page_list);
page_set_owner(page, e);

View File

@ -1,39 +0,0 @@
References: bnc#848014
# Commit 2c24cdcce3269f3286790c63821951a1de93c66a
# Date 2013-11-04 10:10:04 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/ACPI/x2APIC: guard against out of range ACPI or APIC IDs
Other than for the legacy APIC, the x2APIC MADT entries have valid
ranges possibly extending beyond what our internal arrays can handle,
and hence we need to guard ourselves against corrupting memory here.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -97,7 +97,20 @@ acpi_parse_x2apic(struct acpi_subtable_h
acpi_table_print_madt_entry(header);
- /* Record local apic id only when enabled */
+ /* Record local apic id only when enabled and fitting. */
+ if (processor->local_apic_id >= MAX_APICS ||
+ processor->uid >= MAX_MADT_ENTRIES) {
+ printk("%sAPIC ID %#x and/or ACPI ID %#x beyond limit"
+ " - processor ignored\n",
+ processor->lapic_flags & ACPI_MADT_ENABLED ?
+ KERN_WARNING "WARNING: " : KERN_INFO,
+ processor->local_apic_id, processor->uid);
+ /*
+ * Must not return an error here, to prevent
+ * acpi_table_parse_entries() from terminating early.
+ */
+ return 0 /* -ENOSPC */;
+ }
if (processor->lapic_flags & ACPI_MADT_ENABLED) {
x86_acpiid_to_apicid[processor->uid] =
processor->local_apic_id;

View File

@ -1,27 +0,0 @@
References: bnc#842417
# Commit 227258983401b7e6091967ffaf22ad83f4ebaf6f
# Date 2013-11-04 14:29:24 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: make sure memory block is RAM before passing to the allocator
Memory blocks outside of the always visible 1:1 mapping range get
passed to the allocator separately (once enough other setup was done).
Skipping non-RAM regions, however, was forgotten in adc5afbf ("x86:
support up to 16Tb").
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1154,6 +1154,8 @@ void __init __start_xen(unsigned long mb
{
uint64_t s, e;
+ if ( boot_e820.map[i].type != E820_RAM )
+ continue;
s = (boot_e820.map[i].addr + mask) & ~mask;
e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
if ( PFN_DOWN(e) <= limit )

View File

@ -1,82 +0,0 @@
# Commit 9d89100ba8b7b02adb7c2e89ef7c81e734942e7c
# Date 2013-11-05 14:51:53 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: 32-bit IN result must be zero-extended to 64 bits
Just like for all other operations with 32-bit operand size.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
# Commit 1e521eddeb51a9f1bf0e4dd1d17efc873eafae41
# Date 2013-11-15 11:01:49 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: 32-bit IN result must be zero-extended to 64 bits (part 2)
Just spotted a counterpart of what commit 9d89100b (same title) dealt
with.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -221,13 +221,15 @@ int handle_mmio_with_translation(unsigne
return handle_mmio();
}
-int handle_pio(uint16_t port, int size, int dir)
+int handle_pio(uint16_t port, unsigned int size, int dir)
{
struct vcpu *curr = current;
struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
unsigned long data, reps = 1;
int rc;
+ ASSERT((size - 1) < 4 && size != 3);
+
if ( dir == IOREQ_WRITE )
data = guest_cpu_user_regs()->eax;
@@ -237,7 +239,12 @@ int handle_pio(uint16_t port, int size,
{
case X86EMUL_OKAY:
if ( dir == IOREQ_READ )
- memcpy(&guest_cpu_user_regs()->eax, &data, vio->io_size);
+ {
+ if ( size == 4 ) /* Needs zero extension. */
+ guest_cpu_user_regs()->rax = (uint32_t)data;
+ else
+ memcpy(&guest_cpu_user_regs()->rax, &data, size);
+ }
break;
case X86EMUL_RETRY:
if ( vio->io_state != HVMIO_awaiting_completion )
@@ -281,8 +288,10 @@ void hvm_io_assist(void)
(void)handle_mmio();
break;
case HVMIO_handle_pio_awaiting_completion:
- memcpy(&guest_cpu_user_regs()->eax,
- &p->data, vio->io_size);
+ if ( vio->io_size == 4 ) /* Needs zero extension. */
+ guest_cpu_user_regs()->rax = (uint32_t)p->data;
+ else
+ memcpy(&guest_cpu_user_regs()->rax, &p->data, vio->io_size);
break;
default:
break;
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -119,7 +119,7 @@ void send_timeoffset_req(unsigned long t
void send_invalidate_req(void);
int handle_mmio(void);
int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn);
-int handle_pio(uint16_t port, int size, int dir);
+int handle_pio(uint16_t port, unsigned int size, int dir);
void hvm_interrupt_post(struct vcpu *v, int vector, int type);
void hvm_io_assist(void);
void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,

View File

@ -1,38 +0,0 @@
# Commit 117f67350fd18b11ab09d628b4edea3364b09441
# Date 2013-11-06 10:21:09 +0100
# Author Nathan Studer <nate.studer@dornerworks.com>
# Committer Jan Beulich <jbeulich@suse.com>
call sched_destroy_domain before cpupool_rm_domain
The domain destruction code, removes a domain from its cpupool
before attempting to destroy its scheduler information. Since
the scheduler framework uses the domain's cpupool information
to decide on which scheduler ops to use, this results in the
the wrong scheduler's destroy domain function being called
when the cpupool scheduler and the initial scheduler are
different.
Correct this by destroying the domain's scheduling information
before removing it from the pool.
Signed-off-by: Nathan Studer <nate.studer@dornerworks.com>
Reviewed-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -720,10 +720,10 @@ static void complete_domain_destroy(stru
rangeset_domain_destroy(d);
- cpupool_rm_domain(d);
-
sched_destroy_domain(d);
+ cpupool_rm_domain(d);
+
/* Free page used by xen oprofile buffer. */
#ifdef CONFIG_XENOPROF
free_xenoprof_pages(d);

View File

@ -1,29 +0,0 @@
# Commit 48535f5798e3e237d9920a74c1ce3802958136c0
# Date 2013-11-08 11:07:14 +0100
# Author Kouya Shimura <kouya@jp.fujitsu.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/hvm: fix restart of RTC periodic timer with vpt_align=1
The commit 58afa7ef "x86/hvm: Run the RTC periodic timer on a
consistent time series" aligns the RTC periodic timer to the VM's boot time.
However, it's aligned later again to the system time in create_periodic_time()
with vpt_align=1. The next tick might be skipped.
Signed-off-by: Kouya Shimura <kouya@jp.fujitsu.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/hvm/rtc.c
+++ b/xen/arch/x86/hvm/rtc.c
@@ -130,7 +130,10 @@ static void rtc_timer_update(RTCState *s
s->pt_code = period_code;
period = 1 << (period_code - 1); /* period in 32 Khz cycles */
period = DIV_ROUND(period * 1000000000ULL, 32768); /* in ns */
- delta = period - ((NOW() - s->start_time) % period);
+ if ( v->domain->arch.hvm_domain.params[HVM_PARAM_VPT_ALIGN] )
+ delta = 0;
+ else
+ delta = period - ((NOW() - s->start_time) % period);
create_periodic_time(v, &s->pt, delta, period,
RTC_IRQ, NULL, s);
}

View File

@ -1,107 +0,0 @@
References: bnc#833483
# Commit c1f2dfe8f6a559bc28935f24e31bb33d17d9713d
# Date 2013-11-08 11:08:32 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/EFI: make trampoline allocation more flexible
Certain UEFI implementations reserve all memory below 1Mb at boot time,
making it impossible to properly allocate the chunk necessary for the
trampoline. Fall back to simply grabbing a chunk from EfiBootServices*
regions immediately prior to calling ExitBootServices().
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/efi/boot.c
+++ b/xen/arch/x86/efi/boot.c
@@ -746,6 +746,22 @@ static void __init relocate_image(unsign
extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[];
extern const s32 __trampoline_seg_start[], __trampoline_seg_stop[];
+static void __init relocate_trampoline(unsigned long phys)
+{
+ const s32 *trampoline_ptr;
+
+ trampoline_phys = phys;
+ /* Apply relocations to trampoline. */
+ for ( trampoline_ptr = __trampoline_rel_start;
+ trampoline_ptr < __trampoline_rel_stop;
+ ++trampoline_ptr )
+ *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys;
+ for ( trampoline_ptr = __trampoline_seg_start;
+ trampoline_ptr < __trampoline_seg_stop;
+ ++trampoline_ptr )
+ *(u16 *)(*trampoline_ptr + (long)trampoline_ptr) = phys >> 4;
+}
+
void EFIAPI __init __attribute__((__noreturn__))
efi_start(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
{
@@ -765,7 +781,6 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info;
EFI_FILE_HANDLE dir_handle;
union string section = { NULL }, name;
- const s32 *trampoline_ptr;
struct e820entry *e;
u64 efer;
bool_t base_video = 0;
@@ -1268,23 +1283,13 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
cfg.size = trampoline_end - trampoline_start;
status = efi_bs->AllocatePages(AllocateMaxAddress, EfiLoaderData,
PFN_UP(cfg.size), &cfg.addr);
- if ( EFI_ERROR(status) )
+ if ( status == EFI_SUCCESS )
+ relocate_trampoline(cfg.addr);
+ else
{
cfg.addr = 0;
- blexit(L"No memory for trampoline\r\n");
+ PrintStr(L"Trampoline space cannot be allocated; will try fallback.\r\n");
}
- trampoline_phys = cfg.addr;
- /* Apply relocations to trampoline. */
- for ( trampoline_ptr = __trampoline_rel_start;
- trampoline_ptr < __trampoline_rel_stop;
- ++trampoline_ptr )
- *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) +=
- trampoline_phys;
- for ( trampoline_ptr = __trampoline_seg_start;
- trampoline_ptr < __trampoline_seg_stop;
- ++trampoline_ptr )
- *(u16 *)(*trampoline_ptr + (long)trampoline_ptr) =
- trampoline_phys >> 4;
/* Initialise L2 identity-map and boot-map page table entries (16MB). */
for ( i = 0; i < 8; ++i )
@@ -1400,10 +1405,14 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
type = E820_RESERVED;
break;
case EfiConventionalMemory:
- case EfiLoaderCode:
- case EfiLoaderData:
case EfiBootServicesCode:
case EfiBootServicesData:
+ if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
+ len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
+ cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
+ /* fall through */
+ case EfiLoaderCode:
+ case EfiLoaderData:
if ( desc->Attribute & EFI_MEMORY_WB )
type = E820_RAM;
else
@@ -1431,6 +1440,12 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
++e820nr;
}
}
+ if ( !trampoline_phys )
+ {
+ if ( !cfg.addr )
+ blexit(L"No memory for trampoline");
+ relocate_trampoline(cfg.addr);
+ }
status = efi_bs->ExitBootServices(ImageHandle, map_key);
if ( EFI_ERROR(status) )

View File

@ -1,62 +0,0 @@
References: bnc#849665 CVE-2013-4551 XSA-75
# Commit 4e87bc5b03e05123ba5c888f77969140c8ebd1bf
# Date 2013-11-11 09:15:04 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
nested VMX: VMLANUCH/VMRESUME emulation must check permission first thing
Otherwise uninitialized data may be used, leading to crashes.
This is CVE-2013-4551 / XSA-75.
Reported-and-tested-by: Jeff Zimmerman <Jeff_Zimmerman@McAfee.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-and-tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1508,15 +1508,10 @@ static void clear_vvmcs_launched(struct
}
}
-int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs)
+static int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs)
{
struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
- int rc;
-
- rc = vmx_inst_check_privilege(regs, 0);
- if ( rc != X86EMUL_OKAY )
- return rc;
/* check VMCS is valid and IO BITMAP is set */
if ( (nvcpu->nv_vvmcxaddr != VMCX_EADDR) &&
@@ -1535,6 +1530,10 @@ int nvmx_handle_vmresume(struct cpu_user
struct vcpu *v = current;
struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+ int rc = vmx_inst_check_privilege(regs, 0);
+
+ if ( rc != X86EMUL_OKAY )
+ return rc;
if ( vcpu_nestedhvm(v).nv_vvmcxaddr == VMCX_EADDR )
{
@@ -1554,10 +1553,13 @@ int nvmx_handle_vmresume(struct cpu_user
int nvmx_handle_vmlaunch(struct cpu_user_regs *regs)
{
bool_t launched;
- int rc;
struct vcpu *v = current;
struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+ int rc = vmx_inst_check_privilege(regs, 0);
+
+ if ( rc != X86EMUL_OKAY )
+ return rc;
if ( vcpu_nestedhvm(v).nv_vvmcxaddr == VMCX_EADDR )
{

View File

@ -1,101 +0,0 @@
References: bnc#842417
# Commit 178fd279dc138243b514b4ecd48509e4bf5d1ede
# Date 2013-11-11 11:01:04 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/idle: reduce contention on ACPI register accesses
Other than when they're located in I/O port space, accessing them when
in MMIO space (currently) implies usage of some sort of global lock: In
-unstable this would be due to the use of vmap(), is older trees the
necessary locking was introduced by 2ee9cbf9 ("ACPI: fix
acpi_os_map_memory()"). This contention was observed to result in Dom0
kernel soft lockups during the loading of the ACPI processor driver
there on systems with very many CPU cores.
There are a couple of things being done for this:
- re-order elements of an if() condition so that the register access
only happens when we really need it
- turn off arbitration disabling only when the first CPU leaves C3
(paralleling how arbitration disabling gets turned on)
- only set the (global) bus master reload flag once (when the first
target CPU gets processed)
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -439,8 +439,8 @@ static void acpi_processor_idle(void)
(next_state = cpuidle_current_governor->select(power)) > 0 )
{
cx = &power->states[next_state];
- if ( power->flags.bm_check && acpi_idle_bm_check()
- && cx->type == ACPI_STATE_C3 )
+ if ( cx->type == ACPI_STATE_C3 && power->flags.bm_check &&
+ acpi_idle_bm_check() )
cx = power->safe_state;
if ( cx->idx > max_cstate )
cx = &power->states[max_cstate];
@@ -563,8 +563,8 @@ static void acpi_processor_idle(void)
{
/* Enable bus master arbitration */
spin_lock(&c3_cpu_status.lock);
- acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
- c3_cpu_status.count--;
+ if ( c3_cpu_status.count-- == num_online_cpus() )
+ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
spin_unlock(&c3_cpu_status.lock);
}
@@ -821,12 +821,10 @@ static int check_cx(struct acpi_processo
return -EINVAL;
/* All the logic here assumes flags.bm_check is same across all CPUs */
- if ( bm_check_flag == -1 )
+ if ( bm_check_flag < 0 )
{
/* Determine whether bm_check is needed based on CPU */
acpi_processor_power_init_bm_check(&(power->flags));
- bm_check_flag = power->flags.bm_check;
- bm_control_flag = power->flags.bm_control;
}
else
{
@@ -853,14 +851,13 @@ static int check_cx(struct acpi_processo
}
}
/*
- * On older chipsets, BM_RLD needs to be set
- * in order for Bus Master activity to wake the
- * system from C3. Newer chipsets handle DMA
- * during C3 automatically and BM_RLD is a NOP.
- * In either case, the proper way to
- * handle BM_RLD is to set it and leave it set.
+ * On older chipsets, BM_RLD needs to be set in order for Bus
+ * Master activity to wake the system from C3, hence
+ * acpi_set_register() is always being called once below. Newer
+ * chipsets handle DMA during C3 automatically and BM_RLD is a
+ * NOP. In either case, the proper way to handle BM_RLD is to
+ * set it and leave it set.
*/
- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
}
else
{
@@ -875,7 +872,13 @@ static int check_cx(struct acpi_processo
" for C3 to be enabled on SMP systems\n"));
return -EINVAL;
}
- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+ }
+
+ if ( bm_check_flag < 0 )
+ {
+ bm_check_flag = power->flags.bm_check;
+ bm_control_flag = power->flags.bm_control;
+ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, bm_check_flag);
}
break;

View File

@ -1,75 +0,0 @@
# Commit 67348c3ac700b8bc9147638c719c3035c5ef20f5
# Date 2013-11-12 10:54:28 +0100
# Author Dario Faggioli <dario.faggioli@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
numa-sched: leave node-affinity alone if not in "auto" mode
If the domain's NUMA node-affinity is being specified by the
user/toolstack (instead of being automatically computed by Xen),
we really should stick to that. This means domain_update_node_affinity()
is wrong when it filters out some stuff from there even in "!auto"
mode.
This commit fixes that. Of course, this does not mean node-affinity
is always honoured (e.g., a vcpu won't run on a pcpu of a different
cpupool) but the necessary logic for taking into account all the
possible situations lives in the scheduler code, where it belongs.
What could happen without this change is that, under certain
circumstances, the node-affinity of a domain may change when the
user modifies the vcpu-affinity of the domain's vcpus. This, even
if probably not a real bug, is at least something the user does
not expect, so let's avoid it.
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -345,7 +345,6 @@ void domain_update_node_affinity(struct
cpumask_var_t cpumask;
cpumask_var_t online_affinity;
const cpumask_t *online;
- nodemask_t nodemask = NODE_MASK_NONE;
struct vcpu *v;
unsigned int node;
@@ -367,28 +366,19 @@ void domain_update_node_affinity(struct
cpumask_or(cpumask, cpumask, online_affinity);
}
+ /*
+ * If d->auto_node_affinity is true, the domain's node-affinity mask
+ * (d->node_affinity) is automaically computed from all the domain's
+ * vcpus' vcpu-affinity masks (the union of which we have just built
+ * above in cpumask). OTOH, if d->auto_node_affinity is false, we
+ * must leave the node-affinity of the domain alone.
+ */
if ( d->auto_node_affinity )
{
- /* Node-affinity is automaically computed from all vcpu-affinities */
+ nodes_clear(d->node_affinity);
for_each_online_node ( node )
if ( cpumask_intersects(&node_to_cpumask(node), cpumask) )
- node_set(node, nodemask);
-
- d->node_affinity = nodemask;
- }
- else
- {
- /* Node-affinity is provided by someone else, just filter out cpus
- * that are either offline or not in the affinity of any vcpus. */
- nodemask = d->node_affinity;
- for_each_node_mask ( node, d->node_affinity )
- if ( !cpumask_intersects(&node_to_cpumask(node), cpumask) )
- node_clear(node, nodemask);//d->node_affinity);
-
- /* Avoid loosing track of node-affinity because of a bad
- * vcpu-affinity has been specified. */
- if ( !nodes_empty(nodemask) )
- d->node_affinity = nodemask;
+ node_set(node, d->node_affinity);
}
sched_set_node_affinity(d, &d->node_affinity);

View File

@ -1,132 +0,0 @@
# Commit b1e87805bf37b446dade93a7eb922bb7d1269756
# Date 2013-11-12 11:51:15 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
nested SVM: adjust guest handling of structure mappings
For one, nestedsvm_vmcb_map() error checking must not consist of using
assertions: Global (permanent) mappings can fail, and hence failure
needs to be dealt with properly. And non-global (transient) mappings
can't fail anyway.
And then the I/O port access bitmap handling was broken: It checked
only to first of the accessed ports rather than each of them.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Christoph Egger <chegger@amazon.de>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -342,7 +342,7 @@ static int nsvm_vmrun_permissionmap(stru
unsigned int i;
enum hvm_copy_result ret;
unsigned long *ns_viomap;
- bool_t ioport_80, ioport_ed;
+ bool_t ioport_80 = 1, ioport_ed = 1;
ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
@@ -360,10 +360,12 @@ static int nsvm_vmrun_permissionmap(stru
svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa;
ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT, 0);
- ASSERT(ns_viomap != NULL);
- ioport_80 = test_bit(0x80, ns_viomap);
- ioport_ed = test_bit(0xed, ns_viomap);
- hvm_unmap_guest_frame(ns_viomap, 0);
+ if ( ns_viomap )
+ {
+ ioport_80 = test_bit(0x80, ns_viomap);
+ ioport_ed = test_bit(0xed, ns_viomap);
+ hvm_unmap_guest_frame(ns_viomap, 0);
+ }
svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed);
@@ -866,40 +868,45 @@ nsvm_vmcb_guest_intercepts_msr(unsigned
static int
nsvm_vmcb_guest_intercepts_ioio(paddr_t iopm_pa, uint64_t exitinfo1)
{
- unsigned long iopm_gfn = iopm_pa >> PAGE_SHIFT;
- unsigned long *io_bitmap = NULL;
+ unsigned long gfn = iopm_pa >> PAGE_SHIFT;
+ unsigned long *io_bitmap;
ioio_info_t ioinfo;
uint16_t port;
+ unsigned int size;
bool_t enabled;
- unsigned long gfn = 0; /* gcc ... */
ioinfo.bytes = exitinfo1;
port = ioinfo.fields.port;
+ size = ioinfo.fields.sz32 ? 4 : ioinfo.fields.sz16 ? 2 : 1;
- switch (port) {
- case 0 ... 32767: /* first 4KB page */
- gfn = iopm_gfn;
+ switch ( port )
+ {
+ case 0 ... 8 * PAGE_SIZE - 1: /* first 4KB page */
break;
- case 32768 ... 65535: /* second 4KB page */
- port -= 32768;
- gfn = iopm_gfn + 1;
+ case 8 * PAGE_SIZE ... 2 * 8 * PAGE_SIZE - 1: /* second 4KB page */
+ port -= 8 * PAGE_SIZE;
+ ++gfn;
break;
default:
BUG();
break;
}
- io_bitmap = hvm_map_guest_frame_ro(gfn, 0);
- if (io_bitmap == NULL) {
- gdprintk(XENLOG_ERR,
- "IOIO intercept: mapping of permission map failed\n");
- return NESTEDHVM_VMEXIT_ERROR;
+ for ( io_bitmap = hvm_map_guest_frame_ro(gfn, 0); ; )
+ {
+ enabled = io_bitmap && test_bit(port, io_bitmap);
+ if ( !enabled || !--size )
+ break;
+ if ( unlikely(++port == 8 * PAGE_SIZE) )
+ {
+ hvm_unmap_guest_frame(io_bitmap, 0);
+ io_bitmap = hvm_map_guest_frame_ro(++gfn, 0);
+ port -= 8 * PAGE_SIZE;
+ }
}
-
- enabled = test_bit(port, io_bitmap);
hvm_unmap_guest_frame(io_bitmap, 0);
- if (!enabled)
+ if ( !enabled )
return NESTEDHVM_VMEXIT_HOST;
return NESTEDHVM_VMEXIT_INJECT;
@@ -966,8 +973,8 @@ nsvm_vmcb_guest_intercepts_exitcode(stru
switch (exitcode) {
case VMEXIT_MSR:
ASSERT(regs != NULL);
- nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr);
- ASSERT(nv->nv_vvmcx != NULL);
+ if ( !nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr) )
+ break;
ns_vmcb = nv->nv_vvmcx;
vmexits = nsvm_vmcb_guest_intercepts_msr(svm->ns_cached_msrpm,
regs->ecx, ns_vmcb->exitinfo1 != 0);
@@ -975,8 +982,8 @@ nsvm_vmcb_guest_intercepts_exitcode(stru
return 0;
break;
case VMEXIT_IOIO:
- nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr);
- ASSERT(nv->nv_vvmcx != NULL);
+ if ( !nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr) )
+ break;
ns_vmcb = nv->nv_vvmcx;
vmexits = nsvm_vmcb_guest_intercepts_ioio(ns_vmcb->_iopm_base_pa,
ns_vmcb->exitinfo1);

View File

@ -1,105 +0,0 @@
References: bnc#846849
# Commit 58929248461ecadce13e92eb5a5d9ef718a7c88e
# Date 2013-11-12 11:52:19 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VMX: don't crash processing 'd' debug key
There's a window during scheduling where "current" and the active VMCS
may disagree: The former gets set much earlier than the latter. Since
both vmx_vmcs_enter() and vmx_vmcs_exit() immediately return when the
subject vCPU is "current", accessing VMCS fields would, depending on
whether there is any currently active VMCS, either read wrong data, or
cause a crash.
Going forward we might want to consider reducing the window during
which vmx_vmcs_enter() might fail (e.g. doing a plain __vmptrld() when
v->arch.hvm_vmx.vmcs != this_cpu(current_vmcs) but arch_vmx->active_cpu
== -1), but that would add complexities (acquiring and - more
importantly - properly dropping v->arch.hvm_vmx.vmcs_lock) that don't
look worthwhile adding right now.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -591,16 +591,16 @@ struct foreign_vmcs {
};
static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs);
-void vmx_vmcs_enter(struct vcpu *v)
+bool_t vmx_vmcs_try_enter(struct vcpu *v)
{
struct foreign_vmcs *fv;
/*
* NB. We must *always* run an HVM VCPU on its own VMCS, except for
- * vmx_vmcs_enter/exit critical regions.
+ * vmx_vmcs_enter/exit and scheduling tail critical regions.
*/
if ( likely(v == current) )
- return;
+ return v->arch.hvm_vmx.vmcs == this_cpu(current_vmcs);
fv = &this_cpu(foreign_vmcs);
@@ -623,6 +623,15 @@ void vmx_vmcs_enter(struct vcpu *v)
}
fv->count++;
+
+ return 1;
+}
+
+void vmx_vmcs_enter(struct vcpu *v)
+{
+ bool_t okay = vmx_vmcs_try_enter(v);
+
+ ASSERT(okay);
}
void vmx_vmcs_exit(struct vcpu *v)
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -669,7 +669,27 @@ void vmx_get_segment_register(struct vcp
{
uint32_t attr = 0;
- vmx_vmcs_enter(v);
+ /*
+ * We may get here in the context of dump_execstate(), which may have
+ * interrupted context switching between setting "current" and
+ * vmx_do_resume() reaching the end of vmx_load_vmcs(). That would make
+ * all the VMREADs below fail if we don't bail right away.
+ */
+ if ( unlikely(!vmx_vmcs_try_enter(v)) )
+ {
+ static bool_t warned;
+
+ if ( !warned )
+ {
+ warned = 1;
+ printk(XENLOG_WARNING "Segment register inaccessible for d%dv%d\n"
+ "(If you see this outside of debugging activity,"
+ " please report to xen-devel@lists.xenproject.org)\n",
+ v->domain->domain_id, v->vcpu_id);
+ }
+ memset(reg, 0, sizeof(*reg));
+ return;
+ }
switch ( seg )
{
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -144,6 +144,7 @@ struct arch_vmx_struct {
int vmx_create_vmcs(struct vcpu *v);
void vmx_destroy_vmcs(struct vcpu *v);
void vmx_vmcs_enter(struct vcpu *v);
+bool_t __must_check vmx_vmcs_try_enter(struct vcpu *v);
void vmx_vmcs_exit(struct vcpu *v);
#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004

View File

@ -1,84 +0,0 @@
# Commit 79233938ab2a8f273fd5dcdbf8e8381b9eb3a461
# Date 2013-11-12 16:28:47 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: eliminate has_arch_mmios()
... as being generally insufficient: Either has_arch_pdevs() or
cache_flush_permitted() should be used (in particular, it is
insufficient to consider MMIO ranges alone - I/O port ranges have the
same requirements if available to a guest).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -40,6 +40,7 @@
#include <asm/current.h>
#include <asm/e820.h>
#include <asm/io.h>
+#include <asm/iocap.h>
#include <asm/regs.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
@@ -1792,7 +1793,7 @@ int hvm_set_cr0(unsigned long value)
}
}
- if ( has_arch_mmios(v->domain) )
+ if ( cache_flush_permitted(v->domain) )
{
if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
{
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -40,6 +40,7 @@
#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/i387.h>
+#include <asm/iocap.h>
#include <asm/spinlock.h>
#include <asm/hvm/emulate.h>
#include <asm/hvm/hvm.h>
@@ -1973,7 +1974,7 @@ static void wbinvd_ipi(void *info)
static void svm_wbinvd_intercept(void)
{
- if ( has_arch_mmios(current->domain) )
+ if ( cache_flush_permitted(current->domain) )
on_each_cpu(wbinvd_ipi, NULL, 1);
}
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -28,6 +28,7 @@
#include <xen/perfc.h>
#include <asm/current.h>
#include <asm/io.h>
+#include <asm/iocap.h>
#include <asm/regs.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
@@ -2173,10 +2174,7 @@ static void wbinvd_ipi(void *info)
static void vmx_wbinvd_intercept(void)
{
- if ( !has_arch_mmios(current->domain) )
- return;
-
- if ( iommu_snoop )
+ if ( !cache_flush_permitted(current->domain) || iommu_snoop )
return;
if ( cpu_has_wbinvd_exiting )
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -316,7 +316,6 @@ struct arch_domain
} __cacheline_aligned;
#define has_arch_pdevs(d) (!list_empty(&(d)->arch.pdev_list))
-#define has_arch_mmios(d) (!rangeset_is_empty((d)->iomem_caps))
#define gdt_ldt_pt_idx(v) \
((v)->vcpu_id >> (PAGETABLE_ORDER - GDT_LDT_VCPU_SHIFT))

View File

@ -1,63 +0,0 @@
# Commit 1320b8100c2ed390fc640557a050f5c700d8338d
# Date 2013-11-15 17:38:10 +0100
# Author Nate Studer <nate.studer@dornerworks.com>
# Committer Jan Beulich <jbeulich@suse.com>
credit: Update other parameters when setting tslice_ms
Add a utility function to update the rest of the timeslice
accounting fields when updating the timeslice of the
credit scheduler, so that capped CPUs behave correctly.
Before this patch changing the timeslice to a value higher
than the default would result in a domain not utilizing
its full capacity and changing the timeslice to a value
lower than the default would result in a domain exceeding
its capacity.
Signed-off-by: Nate Studer <nate.studer@dornerworks.com>
Reviewed-by: Dario Faggioli <dario.faggioli@citrix.com>
Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -1073,6 +1073,17 @@ csched_dom_cntl(
return 0;
}
+static inline void
+__csched_set_tslice(struct csched_private *prv, unsigned timeslice)
+{
+ prv->tslice_ms = timeslice;
+ prv->ticks_per_tslice = CSCHED_TICKS_PER_TSLICE;
+ if ( prv->tslice_ms < prv->ticks_per_tslice )
+ prv->ticks_per_tslice = 1;
+ prv->tick_period_us = prv->tslice_ms * 1000 / prv->ticks_per_tslice;
+ prv->credits_per_tslice = CSCHED_CREDITS_PER_MSEC * prv->tslice_ms;
+}
+
static int
csched_sys_cntl(const struct scheduler *ops,
struct xen_sysctl_scheduler_op *sc)
@@ -1091,7 +1102,7 @@ csched_sys_cntl(const struct scheduler *
|| params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN))
|| MICROSECS(params->ratelimit_us) > MILLISECS(params->tslice_ms) )
goto out;
- prv->tslice_ms = params->tslice_ms;
+ __csched_set_tslice(prv, params->tslice_ms);
prv->ratelimit_us = params->ratelimit_us;
/* FALLTHRU */
case XEN_SYSCTL_SCHEDOP_getinfo:
@@ -1903,12 +1914,7 @@ csched_init(struct scheduler *ops)
sched_credit_tslice_ms = CSCHED_DEFAULT_TSLICE_MS;
}
- prv->tslice_ms = sched_credit_tslice_ms;
- prv->ticks_per_tslice = CSCHED_TICKS_PER_TSLICE;
- if ( prv->tslice_ms < prv->ticks_per_tslice )
- prv->ticks_per_tslice = 1;
- prv->tick_period_us = prv->tslice_ms * 1000 / prv->ticks_per_tslice;
- prv->credits_per_tslice = CSCHED_CREDITS_PER_MSEC * prv->tslice_ms;
+ __csched_set_tslice(prv, sched_credit_tslice_ms);
if ( MICROSECS(sched_ratelimit_us) > MILLISECS(sched_credit_tslice_ms) )
{

View File

@ -1,21 +0,0 @@
# Commit 6757efe1bf50ac7ff68fa4dd7d9333529f70ae9a
# Date 2013-11-15 17:43:28 +0100
# Author Dario Faggioli <dario.faggioli@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
fix leaking of v->cpu_affinity_saved on domain destruction
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -726,6 +726,7 @@ static void complete_domain_destroy(stru
{
free_cpumask_var(v->cpu_affinity);
free_cpumask_var(v->cpu_affinity_tmp);
+ free_cpumask_var(v->cpu_affinity_saved);
free_cpumask_var(v->vcpu_dirty_cpumask);
free_vcpu_struct(v);
}

View File

@ -1,115 +0,0 @@
# Commit e02b14e531a95399fc9d8647ec3cc6f310a7d455
# Date 2013-11-18 09:39:01 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
nested VMX: don't ignore mapping errors
Rather than ignoring failures to map the virtual VMCS as well as MSR or
I/O port bitmaps, convert those into failures of the respective
instructions (avoiding to dereference NULL pointers). Ultimately such
failures should be handled transparently (by using transient mappings
when they actually need to be accessed, just like nested SVM does).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Eddie Dong <eddie.dong@intel.com>
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -746,7 +746,7 @@ static void __clear_current_vvmcs(struct
__vmpclear(virt_to_maddr(nvcpu->nv_n2vmcx));
}
-static void __map_msr_bitmap(struct vcpu *v)
+static bool_t __must_check _map_msr_bitmap(struct vcpu *v)
{
struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
unsigned long gpa;
@@ -755,9 +755,11 @@ static void __map_msr_bitmap(struct vcpu
hvm_unmap_guest_frame(nvmx->msrbitmap, 1);
gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, MSR_BITMAP);
nvmx->msrbitmap = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
+
+ return nvmx->msrbitmap != NULL;
}
-static void __map_io_bitmap(struct vcpu *v, u64 vmcs_reg)
+static bool_t __must_check _map_io_bitmap(struct vcpu *v, u64 vmcs_reg)
{
struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
unsigned long gpa;
@@ -768,12 +770,14 @@ static void __map_io_bitmap(struct vcpu
hvm_unmap_guest_frame(nvmx->iobitmap[index], 1);
gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg);
nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
+
+ return nvmx->iobitmap[index] != NULL;
}
-static inline void map_io_bitmap_all(struct vcpu *v)
+static inline bool_t __must_check map_io_bitmap_all(struct vcpu *v)
{
- __map_io_bitmap (v, IO_BITMAP_A);
- __map_io_bitmap (v, IO_BITMAP_B);
+ return _map_io_bitmap(v, IO_BITMAP_A) &&
+ _map_io_bitmap(v, IO_BITMAP_B);
}
static void nvmx_purge_vvmcs(struct vcpu *v)
@@ -1609,9 +1613,15 @@ int nvmx_handle_vmptrld(struct cpu_user_
if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR )
{
nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, 1);
- nvcpu->nv_vvmcxaddr = gpa;
- map_io_bitmap_all (v);
- __map_msr_bitmap(v);
+ if ( nvcpu->nv_vvmcx )
+ nvcpu->nv_vvmcxaddr = gpa;
+ if ( !nvcpu->nv_vvmcx ||
+ !map_io_bitmap_all(v) ||
+ !_map_msr_bitmap(v) )
+ {
+ vmreturn(regs, VMFAIL_VALID);
+ goto out;
+ }
}
if ( cpu_has_vmx_vmcs_shadowing )
@@ -1723,6 +1733,7 @@ int nvmx_handle_vmwrite(struct cpu_user_
struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
unsigned long operand;
u64 vmcs_encoding;
+ bool_t okay = 1;
if ( decode_vmx_inst(regs, &decode, &operand, 0)
!= X86EMUL_OKAY )
@@ -1731,16 +1742,21 @@ int nvmx_handle_vmwrite(struct cpu_user_
vmcs_encoding = reg_read(regs, decode.reg2);
__set_vvmcs(nvcpu->nv_vvmcx, vmcs_encoding, operand);
- if ( vmcs_encoding == IO_BITMAP_A || vmcs_encoding == IO_BITMAP_A_HIGH )
- __map_io_bitmap (v, IO_BITMAP_A);
- else if ( vmcs_encoding == IO_BITMAP_B ||
- vmcs_encoding == IO_BITMAP_B_HIGH )
- __map_io_bitmap (v, IO_BITMAP_B);
+ switch ( vmcs_encoding )
+ {
+ case IO_BITMAP_A: case IO_BITMAP_A_HIGH:
+ okay = _map_io_bitmap(v, IO_BITMAP_A);
+ break;
+ case IO_BITMAP_B: case IO_BITMAP_B_HIGH:
+ okay = _map_io_bitmap(v, IO_BITMAP_B);
+ break;
+ case MSR_BITMAP: case MSR_BITMAP_HIGH:
+ okay = _map_msr_bitmap(v);
+ break;
+ }
- if ( vmcs_encoding == MSR_BITMAP || vmcs_encoding == MSR_BITMAP_HIGH )
- __map_msr_bitmap(v);
+ vmreturn(regs, okay ? VMSUCCEED : VMFAIL_VALID);
- vmreturn(regs, VMSUCCEED);
return X86EMUL_OKAY;
}

View File

@ -1,32 +0,0 @@
References: bnc#851386 CVE-2013-6375 XSA-78
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1384779355 -3600
# Node ID 81fec8e36840041ca5779a4c4f2eed98180eda2e
# Parent de9b11c80e2d3bd795d6329e0979c4734c3b4f96
VT-d: fix TLB flushing in dma_pte_clear_one()
The third parameter of __intel_iommu_iotlb_flush() is to indicate
whether the to be flushed entry was a present one. A few lines before,
we bailed if !dma_pte_present(*pte), so there's no need to check the
flag here again - we can simply always pass TRUE here.
This is CVE-2013-6375 / XSA-78.
Suggested-by: Cheng Yueqiang <yqcheng.2008@phdis.smu.edu.sg>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -646,7 +646,7 @@ static void dma_pte_clear_one(struct dom
iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
if ( !this_cpu(iommu_dont_flush_iotlb) )
- __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K , 0, 1);
+ __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K, 1, 1);
unmap_vtd_domain_page(page);

View File

@ -1,40 +0,0 @@
References: bnc#848014
# Commit a5db2c7aab7a638d84f22ac8fe5089d81175438b
# Date 2013-11-18 13:57:20 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: consider modules when cutting off memory
The code in question runs after module ranges got already removed from
the E820 table, so when determining the new maximum page/PDX we need to
explicitly take them into account.
Furthermore we need to round up the ending addresses here, in order to
fully cover eventual partial trailing pages.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1012,9 +1012,17 @@ void __init __start_xen(unsigned long mb
ASSERT(j);
}
map_e = boot_e820.map[j].addr + boot_e820.map[j].size;
- if ( (map_e >> PAGE_SHIFT) < max_page )
+ for ( j = 0; j < mbi->mods_count; ++j )
{
- max_page = map_e >> PAGE_SHIFT;
+ uint64_t end = pfn_to_paddr(mod[j].mod_start) +
+ mod[j].mod_end;
+
+ if ( map_e < end )
+ map_e = end;
+ }
+ if ( PFN_UP(map_e) < max_page )
+ {
+ max_page = PFN_UP(map_e);
max_pdx = pfn_to_pdx(max_page - 1) + 1;
}
printk(XENLOG_WARNING "Ignoring inaccessible memory range"

View File

@ -1,30 +0,0 @@
# Commit e95dc6ba69daef6468b3ae5912710727244d6e2f
# Date 2013-11-22 14:47:24 +0100
# Author Tomasz Wroblewski <tomasz.wroblewski@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/hvm: reset TSC to 0 after domain resume from S3
Host S3 implicitly resets the host TSC to 0, but the tsc offset for hvm
domains is not recalculated when they resume, causing it to go into
negative values. In Linux guest using tsc clocksource, this results in
a hang after wrap back to positive values since the tsc clocksource
implementation expects it reset.
Signed-off-by: Tomasz Wroblewski <tomasz.wroblewski@citrix.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3607,7 +3607,13 @@ static void hvm_s3_suspend(struct domain
static void hvm_s3_resume(struct domain *d)
{
if ( test_and_clear_bool(d->arch.hvm_domain.is_s3_suspended) )
+ {
+ struct vcpu *v;
+
+ for_each_vcpu( d, v )
+ hvm_set_guest_tsc(v, 0);
domain_unpause(d);
+ }
}
static int hvmop_set_isa_irq_level(

View File

@ -1,60 +0,0 @@
# Commit 2a16fcd5ba0244fef764886211452acc69c0ed00
# Date 2013-11-22 14:48:12 +0100
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/crash: disable the watchdog NMIs on the crashing cpu
nmi_shootdown_cpus() is called during a crash to park all the other
CPUs. This changes the NMI trap handlers which means there's no point
in having the watchdog still running.
This also disables the watchdog before executing any crash kexec image
and prevents the image from receiving unexpected NMIs.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
PVOps Linux as a kexec image shoots itself in the foot otherwise.
On a Core2 system, Linux declares a firmware bug and tries to invert some bits
in the performance counter register. It ends up setting the number of retired
instructions to generate another NMI to fewer instructions than the NMI
interrupt path itself, and ceases to make any useful progress.
The call to disable_lapic_nmi_watchdog() must be this late into the kexec path
to be sure that this cpu is the one which will execute the kexec image.
Otherwise there are race conditions where the NMIs might be disabled on the
wrong cpu, resulting in the kexec image still receiving NMIs.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -117,6 +117,7 @@ static void nmi_shootdown_cpus(void)
unsigned long msecs;
int i, cpu = smp_processor_id();
+ disable_lapic_nmi_watchdog();
local_irq_disable();
crashing_cpu = cpu;
--- a/xen/arch/x86/nmi.c
+++ b/xen/arch/x86/nmi.c
@@ -165,7 +165,7 @@ static void nmi_timer_fn(void *unused)
set_timer(&this_cpu(nmi_timer), NOW() + MILLISECS(1000));
}
-static void disable_lapic_nmi_watchdog(void)
+void disable_lapic_nmi_watchdog(void)
{
if (nmi_active <= 0)
return;
--- a/xen/include/asm-x86/apic.h
+++ b/xen/include/asm-x86/apic.h
@@ -200,6 +200,7 @@ extern void smp_local_timer_interrupt (s
extern void setup_boot_APIC_clock (void);
extern void setup_secondary_APIC_clock (void);
extern void setup_apic_nmi_watchdog (void);
+extern void disable_lapic_nmi_watchdog(void);
extern int reserve_lapic_nmi(void);
extern void release_lapic_nmi(void);
extern void self_nmi(void);

View File

@ -1,89 +0,0 @@
# Commit 7d8b5dd98463524686bdee8b973b53c00c232122
# Date 2013-11-25 11:19:04 +0100
# Author Liu Jinsong <jinsong.liu@intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/xsave: fix nonlazy state handling
Nonlazy xstates should be xsaved each time when vcpu_save_fpu.
Operation to nonlazy xstates will not trigger #NM exception, so
whenever vcpu scheduled in it got restored and whenever scheduled
out it should get saved.
Currently this bug affects AMD LWP feature, and later Intel MPX
feature. With the bugfix both LWP and MPX will work fine.
Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
Furthermore, during restore we also need to set nonlazy_xstate_used
according to the incoming accumulated XCR0.
Also adjust the changes to i387.c such that there won't be a pointless
clts()/stts() pair.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1146,6 +1146,8 @@ long arch_do_domctl(
{
v->arch.xcr0 = _xcr0;
v->arch.xcr0_accum = _xcr0_accum;
+ if ( _xcr0_accum & XSTATE_NONLAZY )
+ v->arch.nonlazy_xstate_used = 1;
memcpy(v->arch.xsave_area, _xsave_area,
evc->size - 2 * sizeof(uint64_t));
}
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1073,6 +1073,8 @@ static int hvm_load_cpu_xsave_states(str
v->arch.xcr0 = ctxt->xcr0;
v->arch.xcr0_accum = ctxt->xcr0_accum;
+ if ( ctxt->xcr0_accum & XSTATE_NONLAZY )
+ v->arch.nonlazy_xstate_used = 1;
memcpy(v->arch.xsave_area, &ctxt->save_area,
desc->length - offsetof(struct hvm_hw_cpu_xsave, save_area));
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -120,11 +120,22 @@ static inline void fpu_frstor(struct vcp
/*******************************/
/* FPU Save Functions */
/*******************************/
+
+static inline uint64_t vcpu_xsave_mask(const struct vcpu *v)
+{
+ if ( v->fpu_dirtied )
+ return v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY;
+
+ return v->arch.nonlazy_xstate_used ? XSTATE_NONLAZY : 0;
+}
+
/* Save x87 extended state */
static inline void fpu_xsave(struct vcpu *v)
{
bool_t ok;
+ uint64_t mask = vcpu_xsave_mask(v);
+ ASSERT(mask);
ASSERT(v->arch.xsave_area);
/*
* XCR0 normally represents what guest OS set. In case of Xen itself,
@@ -132,7 +143,7 @@ static inline void fpu_xsave(struct vcpu
*/
ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
ASSERT(ok);
- xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY);
+ xsave(v, mask);
ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
ASSERT(ok);
}
@@ -263,7 +274,7 @@ void vcpu_restore_fpu_lazy(struct vcpu *
*/
void vcpu_save_fpu(struct vcpu *v)
{
- if ( !v->fpu_dirtied )
+ if ( !v->fpu_dirtied && !v->arch.nonlazy_xstate_used )
return;
ASSERT(!is_idle_vcpu(v));

View File

@ -1,43 +0,0 @@
References: bnc#849667 CVE-2013-4553 XSA-74
x86: restrict XEN_DOMCTL_getmemlist
Coverity ID 1055652
(See the code comment.)
This is CVE-2013-4553 / XSA-74.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -329,6 +329,26 @@ long arch_do_domctl(
break;
}
+ /*
+ * XSA-74: This sub-hypercall is broken in several ways:
+ * - lock order inversion (p2m locks inside page_alloc_lock)
+ * - no preemption on huge max_pfns input
+ * - not (re-)checking d->is_dying with page_alloc_lock held
+ * - not honoring start_pfn input (which libxc also doesn't set)
+ * Additionally it is rather useless, as the result is stale by the
+ * time the caller gets to look at it.
+ * As it only has a single, non-production consumer (xen-mceinj),
+ * rather than trying to fix it we restrict it for the time being.
+ */
+ if ( /* No nested locks inside copy_to_guest_offset(). */
+ paging_mode_external(current->domain) ||
+ /* Arbitrary limit capping processing time. */
+ max_pfns > GB(4) / PAGE_SIZE )
+ {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
spin_lock(&d->page_alloc_lock);
ret = i = 0;

View File

@ -1,22 +0,0 @@
References: bnc#849668 CVE-2013-4554 XSA-76
x86/HVM: only allow ring 0 guest code to make hypercalls
Anything else would allow for privilege escalation.
This is CVE-2013-4554 / XSA-76.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3345,7 +3345,7 @@ int hvm_do_hypercall(struct cpu_user_reg
case 4:
case 2:
hvm_get_segment_register(curr, x86_seg_ss, &sreg);
- if ( unlikely(sreg.attr.fields.dpl == 3) )
+ if ( unlikely(sreg.attr.fields.dpl) )
{
default:
regs->eax = -EPERM;

View File

@ -0,0 +1,157 @@
From 9ca313aa0824f2d350a7a6c9b1ef6c47e0408f1d Mon Sep 17 00:00:00 2001
From: aliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
Date: Sat, 23 Aug 2008 23:27:37 +0000
Subject: [PATCH] VNC: Support for ExtendedKeyEvent client message
This patch adds support for the ExtendedKeyEvent client message. This message
allows a client to send raw scan codes directly to the server. If the client
and server are using the same keymap, then it's unnecessary to use the '-k'
option with QEMU when this extension is supported.
This is extension is currently only implemented by gtk-vnc based clients
(gvncviewer, virt-manager, vinagre, etc.).
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5076 c046a42c-6fe2-441c-8c8c-71466251a162
---
vnc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 50 insertions(+), 9 deletions(-)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1285,35 +1285,22 @@ static void press_key_altgr_down(VncStat
}
}
-static void do_key_event(VncState *vs, int down, uint32_t sym)
+static void do_key_event(VncState *vs, int down, int keycode, int sym, int shift)
{
- int keycode;
int shift_keys = 0;
- int shift = 0;
int keypad = 0;
int altgr = 0;
int altgr_keys = 0;
if (is_graphic_console()) {
- if (sym >= 'A' && sym <= 'Z') {
- sym = sym - 'A' + 'a';
- shift = 1;
- }
- else {
+ if (!shift)
shift = keysym_is_shift(vs->kbd_layout, sym & 0xFFFF);
- }
altgr = keysym_is_altgr(vs->kbd_layout, sym & 0xFFFF);
}
shift_keys = vs->modifiers_state[0x2a] | vs->modifiers_state[0x36];
altgr_keys = vs->modifiers_state[0xb8];
- keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
- if (keycode == 0) {
- fprintf(stderr, "Key lost : keysym=0x%x(%d)\n", sym, sym);
- return;
- }
-
/* QEMU console switch */
switch(keycode) {
case 0x2a: /* Left Shift */
@@ -1445,7 +1432,25 @@ static void do_key_event(VncState *vs, i
static void key_event(VncState *vs, int down, uint32_t sym)
{
- do_key_event(vs, down, sym);
+ int keycode;
+ int shift = 0;
+
+ if (sym >= 'A' && sym <= 'Z' && is_graphic_console()) {
+ sym = sym - 'A' + 'a';
+ shift = 1;
+ }
+ keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
+ do_key_event(vs, down, keycode, sym, shift);
+}
+
+static void ext_key_event(VncState *vs, int down,
+ uint32_t sym, uint16_t keycode)
+{
+ /* if the user specifies a keyboard layout, always use it */
+ if (keyboard_layout)
+ key_event(vs, down, sym);
+ else
+ do_key_event(vs, down, keycode, sym, 0);
}
static void framebuffer_set_updated(VncState *vs, int x, int y, int w, int h)
@@ -1534,6 +1539,15 @@ static void framebuffer_update_request(V
qemu_mod_timer(vs->timer, qemu_get_clock(rt_clock));
}
+static void send_ext_key_event_ack(VncState *vs)
+{
+ vnc_write_u8(vs, 0);
+ vnc_write_u8(vs, 0);
+ vnc_write_u16(vs, 1);
+ vnc_framebuffer_update(vs, 0, 0, ds_get_width(vs->ds), ds_get_height(vs->ds), -258);
+ vnc_flush(vs);
+}
+
static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings)
{
int i;
@@ -1562,6 +1576,9 @@ static void set_encodings(VncState *vs,
case -257:
vs->has_pointer_type_change = 1;
break;
+ case -258:
+ send_ext_key_event_ack(vs);
+ break;
case 0x574D5669:
vs->has_WMVi = 1;
default:
@@ -1774,6 +1791,24 @@ static int protocol_client_msg(VncState
client_cut_text(vs, read_u32(data, 4), (char *)(data + 8));
break;
+ case 255:
+ if (len == 1)
+ return 2;
+
+ switch (read_u8(data, 1)) {
+ case 0:
+ if (len == 2)
+ return 12;
+
+ ext_key_event(vs, read_u16(data, 2),
+ read_u32(data, 4), read_u32(data, 8));
+ break;
+ default:
+ printf("Msg: %d\n", read_u16(data, 0));
+ vnc_client_error(vs);
+ break;
+ }
+ break;
default:
printf("Msg: %d\n", data[0]);
vnc_client_error(vs);
@@ -2445,10 +2480,11 @@ void vnc_display_init(DisplayState *ds)
vs->ds = ds;
- if (!keyboard_layout)
- keyboard_layout = "en-us";
+ if (keyboard_layout)
+ vs->kbd_layout = init_keyboard_layout(keyboard_layout);
+ else
+ vs->kbd_layout = init_keyboard_layout("en-us");
- vs->kbd_layout = init_keyboard_layout(keyboard_layout);
if (!vs->kbd_layout)
exit(1);
vs->modifiers_state[0x45] = 1; /* NumLock on - on boot */

45
altgr_2.patch Normal file
View File

@ -0,0 +1,45 @@
When access domU from Windows VNC client, spanish keyboard altgr key
doesn't work. According to log info, we found that the keycodes passed
from vncclient to qemu vncserver have something wrong. When altgr and "2"
pressed, keycodes vncserver receives are:
ALT_R down,
CTRL_L down,
CTRL_L up,
ATL_R up,
"2" down,
"2" up,
...
Since when send "2" down, there is no altgr modifier, the char displayed
on screen will be "2" but not "@".
To solve this problem, there is another patch applied by upstream which
sends an additional altgr modifier before "2" down in the above case.
It works well when domU is windows, but on sles10 sp3 domU, sometimes it
display "@" and sometimes it still displays "2", especially when press
altgr+2 continuously.
For the sles10 sp3 domU problem, maybe because there are two many alt_r (same
keycode as altgr on "es") up and down events and the domU OS couldn't handle
it well.
To furtherly solve this problem, I write this patch, when vncserver
is "es" and receives a alt_r keysym (this is already abnormal since "es" has
no alt_r), then treat the alt_r as alt_l. This can avoid too many altgr
keycodes up and down events and make sure the intentionally added altgr keycode can take effect.
Signed-off by Chunyan Liu (cyliu@novell.com)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1440,6 +1440,9 @@ static void key_event(VncState *vs, int
int keycode;
int shift = 0;
+ if ( sym == 0xffea && keyboard_layout && !strcmp(keyboard_layout,"es") )
+ sym = 0xffe9;
+
if (sym >= 'A' && sym <= 'Z' && is_graphic_console()) {
sym = sym - 'A' + 'a';
shift = 1;

32
bdrv_default_rwflag.patch Normal file
View File

@ -0,0 +1,32 @@
Subject: modify default read/write flag in bdrv_init.
Signed-off by Chunyan Liu <cyliu@novell.com>
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -2627,6 +2627,8 @@ int drive_init(struct drive_opt *arg, in
strncpy(drives_table[nb_drives].serial, serial, sizeof(serial));
nb_drives++;
+ bdrv_flags = BDRV_O_RDWR;
+
switch(type) {
case IF_IDE:
case IF_XEN:
@@ -2640,6 +2642,7 @@ int drive_init(struct drive_opt *arg, in
break;
case MEDIA_CDROM:
bdrv_set_type_hint(bdrv, BDRV_TYPE_CDROM);
+ bdrv_flags &= ~BDRV_O_RDWR;
break;
}
break;
@@ -2660,7 +2663,6 @@ int drive_init(struct drive_opt *arg, in
}
if (!file[0])
return -2;
- bdrv_flags = 0;
if (snapshot) {
bdrv_flags |= BDRV_O_SNAPSHOT;
cache = 2; /* always use write-back with snapshot */

129
bdrv_open2_fix_flags.patch Normal file
View File

@ -0,0 +1,129 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.c
@@ -350,7 +350,7 @@ int bdrv_file_open(BlockDriverState **pb
int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
{
- return bdrv_open2(bs, filename, flags, NULL);
+ return bdrv_open2(bs, filename, flags|BDRV_O_RDWR, NULL);
}
int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
@@ -419,12 +419,13 @@ int bdrv_open2(BlockDriverState *bs, con
}
bs->drv = drv;
bs->opaque = qemu_mallocz(drv->instance_size);
- /* Note: for compatibility, we open disk image files as RDWR, and
- RDONLY as fallback */
if (!(flags & BDRV_O_FILE))
- open_flags = (flags & BDRV_O_ACCESS) | (flags & BDRV_O_CACHE_MASK);
+ open_flags = flags;
else
open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
+ if (!(open_flags & BDRV_O_RDWR))
+ bs->read_only = 1;
+
ret = drv->bdrv_open(bs, filename, open_flags);
if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/usb-msd.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/usb-msd.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/usb-msd.c
@@ -551,7 +551,7 @@ USBDevice *usb_msd_init(const char *file
s = qemu_mallocz(sizeof(MSDState));
bdrv = bdrv_new("usb");
- if (bdrv_open2(bdrv, filename, 0, drv) < 0)
+ if (bdrv_open2(bdrv, filename, BDRV_O_RDWR, drv) < 0)
goto fail;
s->bs = bdrv;
*pbs = bdrv;
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-img.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-img.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-img.c
@@ -32,7 +32,7 @@
#endif
/* Default to cache=writeback as data integrity is not important for qemu-tcg. */
-#define BRDV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_O_FLAGS BDRV_O_CACHE_WB
static void QEMU_NORETURN error(const char *fmt, ...)
{
@@ -185,7 +185,7 @@ static int read_password(char *buf, int
#endif
static BlockDriverState *bdrv_new_open(const char *filename,
- const char *fmt)
+ const char *fmt, int flags)
{
BlockDriverState *bs;
BlockDriver *drv;
@@ -201,7 +201,7 @@ static BlockDriverState *bdrv_new_open(c
} else {
drv = &bdrv_raw;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, flags, drv) < 0) {
error("Could not open '%s'", filename);
}
if (bdrv_is_encrypted(bs)) {
@@ -253,7 +253,7 @@ static int img_create(int argc, char **a
size = 0;
if (base_filename) {
BlockDriverState *bs;
- bs = bdrv_new_open(base_filename, NULL);
+ bs = bdrv_new_open(base_filename, NULL, BDRV_O_RDWR);
bdrv_get_geometry(bs, &size);
size *= 512;
bdrv_delete(bs);
@@ -332,7 +332,7 @@ static int img_commit(int argc, char **a
} else {
drv = NULL;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_RDWR, drv) < 0) {
error("Could not open '%s'", filename);
}
ret = bdrv_commit(bs);
@@ -455,7 +455,8 @@ static int img_convert(int argc, char **
total_sectors = 0;
for (bs_i = 0; bs_i < bs_n; bs_i++) {
- bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt);
+ bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt,
+ BDRV_O_CACHE_WB|BDRV_O_RDONLY);
if (!bs[bs_i])
error("Could not open '%s'", argv[optind + bs_i]);
bdrv_get_geometry(bs[bs_i], &bs_sectors);
@@ -483,7 +484,7 @@ static int img_convert(int argc, char **
}
}
- out_bs = bdrv_new_open(out_filename, out_fmt);
+ out_bs = bdrv_new_open(out_filename, out_fmt, BDRV_O_CACHE_WB|BDRV_O_RDWR);
bs_i = 0;
bs_offset = 0;
@@ -706,7 +707,7 @@ static int img_info(int argc, char **arg
} else {
drv = NULL;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_FLAGS|BDRV_O_RDWR, drv) < 0) {
error("Could not open '%s'", filename);
}
bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
@@ -810,7 +811,7 @@ static void img_snapshot(int argc, char
if (!bs)
error("Not enough memory");
- if (bdrv_open2(bs, filename, 0, NULL) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_RDWR, NULL) < 0) {
error("Could not open '%s'", filename);
}

72
bdrv_open2_flags_2.patch Normal file
View File

@ -0,0 +1,72 @@
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -225,6 +225,7 @@ static int open_disk(struct td_state *s,
BlockDriver* drv;
char* devname;
static int devnumber = 0;
+ int flags = readonly ? BDRV_O_RDONLY : BDRV_O_RDWR;
int i;
DPRINTF("Opening %s as blktap%d\n", path, devnumber);
@@ -247,7 +248,7 @@ static int open_disk(struct td_state *s,
DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");
/* Open the image */
- if (bdrv_open2(bs, path, 0, drv) != 0) {
+ if (bdrv_open2(bs, path, flags, drv) != 0) {
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -135,7 +135,8 @@ static void insert_media(void *opaque)
else
format = &bdrv_raw;
- bdrv_open2(bs, media_filename[i], 0, format);
+ /* Temporary BDRV_O_RDWR */
+ bdrv_open2(bs, media_filename[i], BDRV_O_RDWR, format);
#ifdef CONFIG_STUBDOM
{
char *buf, *backend, *params_path, *params;
@@ -510,7 +511,8 @@ void xenstore_parse_domain_config(int hv
}
for (i = 0; i < num; i++) {
- format = NULL; /* don't know what the format is yet */
+ flags = 0;
+ format = NULL; /* don't know what the format is yet */
/* read the backend path */
xenstore_get_backend_path(&bpath, "vbd", danger_path, hvm_domid, e_danger[i]);
if (bpath == NULL)
@@ -596,6 +598,17 @@ void xenstore_parse_domain_config(int hv
format = &bdrv_raw;
}
+ /* read the mode of the device */
+ if (pasprintf(&buf, "%s/mode", bpath) == -1)
+ continue;
+ free(mode);
+ mode = xs_read(xsh, XBT_NULL, buf, &len);
+
+ if (!strcmp(mode, "r") || !strcmp(mode, "ro"))
+ flags |= BDRV_O_RDONLY;
+ if (!strcmp(mode, "w") || !strcmp(mode, "rw"))
+ flags |= BDRV_O_RDWR;
+
#if 0
/* Phantom VBDs are disabled because the use of paths
* from guest-controlled areas in xenstore is unsafe.
@@ -663,7 +676,7 @@ void xenstore_parse_domain_config(int hv
#ifdef CONFIG_STUBDOM
if (pasprintf(&danger_buf, "%s/device/vbd/%s", danger_path, e_danger[i]) == -1)
continue;
- if (bdrv_open2(bs, danger_buf, BDRV_O_CACHE_WB /* snapshot and write-back */, &bdrv_raw) == 0) {
+ if (bdrv_open2(bs, danger_buf, flags|BDRV_O_CACHE_WB /* snapshot and write-back */, &bdrv_raw) == 0) {
if (pasprintf(&buf, "%s/params", bpath) == -1)
continue;
free(params);

View File

@ -8,11 +8,11 @@
xen/include/public/io/cdromif.h | 122 ++++
7 files changed, 726 insertions(+), 3 deletions(-)
Index: xen-4.3.0-testing/tools/blktap/drivers/Makefile
Index: xen-4.4.0-testing/tools/blktap/drivers/Makefile
===================================================================
--- xen-4.3.0-testing.orig/tools/blktap/drivers/Makefile
+++ xen-4.3.0-testing/tools/blktap/drivers/Makefile
@@ -38,8 +38,9 @@ endif
--- xen-4.4.0-testing.orig/tools/blktap/drivers/Makefile
+++ xen-4.4.0-testing/tools/blktap/drivers/Makefile
@@ -32,8 +32,9 @@ AIOLIBS := -laio
CFLAGS += $(PTHREAD_CFLAGS)
LDFLAGS += $(PTHREAD_LDFLAGS)
@ -24,7 +24,7 @@ Index: xen-4.3.0-testing/tools/blktap/drivers/Makefile
BLK-OBJS-y := block-aio.o
BLK-OBJS-y += block-sync.o
@@ -47,6 +48,7 @@ BLK-OBJS-y += block-vmdk.o
@@ -41,6 +42,7 @@ BLK-OBJS-y += block-vmdk.o
BLK-OBJS-y += block-ram.o
BLK-OBJS-y += block-qcow.o
BLK-OBJS-y += block-qcow2.o
@ -32,10 +32,10 @@ Index: xen-4.3.0-testing/tools/blktap/drivers/Makefile
BLK-OBJS-y += aes.o
BLK-OBJS-y += tapaio.o
BLK-OBJS-$(CONFIG_Linux) += blk_linux.o
Index: xen-4.3.0-testing/tools/blktap/drivers/block-cdrom.c
Index: xen-4.4.0-testing/tools/blktap/drivers/block-cdrom.c
===================================================================
--- /dev/null
+++ xen-4.3.0-testing/tools/blktap/drivers/block-cdrom.c
+++ xen-4.4.0-testing/tools/blktap/drivers/block-cdrom.c
@@ -0,0 +1,568 @@
+/* block-cdrom.c
+ *
@ -605,10 +605,10 @@ Index: xen-4.3.0-testing/tools/blktap/drivers/block-cdrom.c
+ .td_get_parent_id = tdcdrom_get_parent_id,
+ .td_validate_parent = tdcdrom_validate_parent
+};
Index: xen-4.3.0-testing/tools/blktap/drivers/tapdisk.c
Index: xen-4.4.0-testing/tools/blktap/drivers/tapdisk.c
===================================================================
--- xen-4.3.0-testing.orig/tools/blktap/drivers/tapdisk.c
+++ xen-4.3.0-testing/tools/blktap/drivers/tapdisk.c
--- xen-4.4.0-testing.orig/tools/blktap/drivers/tapdisk.c
+++ xen-4.4.0-testing/tools/blktap/drivers/tapdisk.c
@@ -735,6 +735,22 @@ static void get_io_request(struct td_sta
goto out;
}
@ -632,10 +632,10 @@ Index: xen-4.3.0-testing/tools/blktap/drivers/tapdisk.c
default:
DPRINTF("Unknown block operation\n");
break;
Index: xen-4.3.0-testing/tools/blktap/drivers/tapdisk.h
Index: xen-4.4.0-testing/tools/blktap/drivers/tapdisk.h
===================================================================
--- xen-4.3.0-testing.orig/tools/blktap/drivers/tapdisk.h
+++ xen-4.3.0-testing/tools/blktap/drivers/tapdisk.h
--- xen-4.4.0-testing.orig/tools/blktap/drivers/tapdisk.h
+++ xen-4.4.0-testing/tools/blktap/drivers/tapdisk.h
@@ -137,6 +137,9 @@ struct tap_disk {
int (*td_get_parent_id) (struct disk_driver *dd, struct disk_id *id);
int (*td_validate_parent)(struct disk_driver *dd,
@ -680,10 +680,10 @@ Index: xen-4.3.0-testing/tools/blktap/drivers/tapdisk.h
};
typedef struct driver_list_entry {
Index: xen-4.3.0-testing/tools/blktap/lib/blktaplib.h
Index: xen-4.4.0-testing/tools/blktap/lib/blktaplib.h
===================================================================
--- xen-4.3.0-testing.orig/tools/blktap/lib/blktaplib.h
+++ xen-4.3.0-testing/tools/blktap/lib/blktaplib.h
--- xen-4.4.0-testing.orig/tools/blktap/lib/blktaplib.h
+++ xen-4.4.0-testing/tools/blktap/lib/blktaplib.h
@@ -219,6 +219,7 @@ typedef struct msg_pid {
#define DISK_TYPE_RAM 3
#define DISK_TYPE_QCOW 4
@ -692,10 +692,10 @@ Index: xen-4.3.0-testing/tools/blktap/lib/blktaplib.h
/* xenstore/xenbus: */
#define DOMNAME "Domain-0"
Index: xen-4.3.0-testing/xen/include/public/io/blkif.h
Index: xen-4.4.0-testing/xen/include/public/io/blkif.h
===================================================================
--- xen-4.3.0-testing.orig/xen/include/public/io/blkif.h
+++ xen-4.3.0-testing/xen/include/public/io/blkif.h
--- xen-4.4.0-testing.orig/xen/include/public/io/blkif.h
+++ xen-4.4.0-testing/xen/include/public/io/blkif.h
@@ -444,7 +444,7 @@
* Used in SLES sources for device specific command packet
* contained within the request. Reserved for that purpose.
@ -705,10 +705,10 @@ Index: xen-4.3.0-testing/xen/include/public/io/blkif.h
/*
* Indicate to the backend device that a region of storage is no longer in
* use, and may be discarded at any time without impact to the client. If
Index: xen-4.3.0-testing/xen/include/public/io/cdromif.h
Index: xen-4.4.0-testing/xen/include/public/io/cdromif.h
===================================================================
--- /dev/null
+++ xen-4.3.0-testing/xen/include/public/io/cdromif.h
+++ xen-4.4.0-testing/xen/include/public/io/cdromif.h
@@ -0,0 +1,122 @@
+/******************************************************************************
+ * cdromif.h

55
blktap.patch Normal file
View File

@ -0,0 +1,55 @@
bug #239173
bug #242953
Index: xen-4.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3281,7 +3281,7 @@ class XendDomainInfo:
(fn, BOOTLOADER_LOOPBACK_DEVICE))
vbd = {
- 'mode': 'RO',
+ 'mode': 'RW',
'device': BOOTLOADER_LOOPBACK_DEVICE,
}
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -447,9 +447,9 @@ void xenstore_parse_domain_config(int hv
{
char **e_danger = NULL;
char *buf = NULL;
- char *fpath = NULL, *bpath = NULL,
+ char *fpath = NULL, *bpath = NULL, *btype = NULL,
*dev = NULL, *params = NULL, *drv = NULL;
- int i, ret;
+ int i, ret, is_tap;
unsigned int len, num, hd_index, pci_devid = 0;
BlockDriverState *bs;
BlockDriver *format;
@@ -486,6 +486,14 @@ void xenstore_parse_domain_config(int hv
e_danger[i]);
if (bpath == NULL)
continue;
+ /* check to see if type is tap or not */
+ if (pasprintf(&buf, "%s/type", bpath) == -1)
+ continue;
+ free(btype);
+ btype = xs_read(xsh, XBT_NULL, buf, &len);
+ if (btype == NULL)
+ continue;
+ is_tap = !strncmp(btype, "tap", 3);
/* read the name of the device */
if (pasprintf(&buf, "%s/dev", bpath) == -1)
continue;
@@ -762,6 +770,7 @@ void xenstore_parse_domain_config(int hv
free(mode);
free(params);
free(dev);
+ free(btype);
free(bpath);
free(buf);
free(danger_buf);

View File

@ -1,7 +1,7 @@
Index: xen-4.2.0-testing/tools/blktap/drivers/blktapctrl.c
Index: xen-4.4.0-testing/tools/blktap/drivers/blktapctrl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/blktap/drivers/blktapctrl.c
+++ xen-4.2.0-testing/tools/blktap/drivers/blktapctrl.c
--- xen-4.4.0-testing.orig/tools/blktap/drivers/blktapctrl.c
+++ xen-4.4.0-testing/tools/blktap/drivers/blktapctrl.c
@@ -282,7 +282,7 @@ static int del_disktype(blkif_t *blkif)
* qemu-dm instance. We may close the file handle only if there is
* no other disk left for this domain.

View File

@ -12,3 +12,16 @@ Index: xen-4.2.0-testing/tools/blktap/drivers/blktapctrl.c
#define PIDFILE "/var/run/blktapctrl.pid"
#define NUM_POLL_FDS 2
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -46,7 +46,7 @@
#define BLKTAP_CTRL_DIR "/var/run/tap"
/* If enabled, print debug messages to stderr */
-#if 1
+#if 0
#define DPRINTF(_f, _a...) fprintf(stderr, __FILE__ ":%d: " _f, __LINE__, ##_a)
#else
#define DPRINTF(_f, _a...) ((void)0)

View File

@ -2,22 +2,13 @@
# Usage: block-dmmd [add args | remove args]
#
# the dmmd device syntax (in xm commands/configs) is something like:
# the xm config file should have something like:
# dmmd:md;/dev/md0;md;/dev/md1;lvm;/dev/vg1/lv1
# or
# dmmd:lvm;/dev/vg1/lv1;lvm;/dev/vg1/lv2;md;/dev/md0
# device pairs (type;dev) are processed in order, with the last device
# assigned to the VM
#
# md devices can optionally:
# specify a config file through:
# md;/dev/md100(/var/xen/config/mdadm.conf)
# use an array name (mdadm -N option):
# dmmd:md;My-MD-name;lvm;/dev/vg1/lv1
#
# note the last device will be used for VM
# History:
# 2013-07-03, loic.devulder@mpsa.com:
# Partial rewrite of the script for supporting MD activation by name
# 2009-06-09, mh@novell.com:
# Emit debugging messages into a temporary file; if no longer needed,
# just comment the exec I/O redirection below
@ -48,7 +39,7 @@ function run_mdadm()
local msg
local rc
msg="$(/sbin/mdadm $mdadm_cmd 2>&1)"
msg="`/sbin/mdadm $mdadm_cmd 2>&1`"
rc=$?
case "$msg" in
*"has been started"* | *"already active"* )
@ -68,12 +59,11 @@ function run_mdadm()
function activate_md()
{
# Make it explicitly local
local par=$1
local cfg dev dev_path rc t mdadm_opts
local already_active=0 cfg dev rc t
if [ ${par} = ${par%%(*} ]; then
# No configuration file specified
# No configuration file specified:
dev=$par
cfg=
else
@ -81,50 +71,23 @@ function activate_md()
t=${par#*(}
cfg="-c ${t%%)*}"
fi
# Looking for device name or aliase
if [ ${dev:0:1} = / ]; then
dev_path=${dev%/*}
mdadm_opts=
else
dev_path=/dev/md
mdadm_opts="-s -N"
if /sbin/mdadm -Q -D $dev; then
already_active=1
fi
# Is md device already active?
# We need to use full path name, aliase is not possible...
/sbin/mdadm -Q -D $dev_path/${dev##*/} > /dev/null 2>&1 \
&& return 0
run_mdadm "-A $mdadm_opts $dev $cfg"
run_mdadm "-A $dev $cfg"
rc=$?
[ $rc -eq 2 ] && return 0
if [ $already_active -eq 1 ] && [ $rc -eq 2 ]; then
return 0
fi
return $rc
}
function deactivate_md()
{
local par=$1
local dev
if [ ${par} = ${par%%(*} ]; then
# No configuration file specified
dev=${par}
else
dev=${par%%(*}
fi
# Looking for device name or aliase
if [ ${dev:0:1} = / ]; then
dev_path=${dev%/*}
else
dev_path=/dev/md
fi
# We need the device name only while deactivating
/sbin/mdadm -S ${dev_path}/${dev##*/} > /dev/null 2>&1
local par=$1 # Make it explicitly local
## We need the device name only while deactivating
/sbin/mdadm -S ${par%%(*}
return $?
}
@ -136,20 +99,14 @@ function activate_lvm()
# Parse device-create-timeout from /etc/xen/xend-config.sxp
# If not set, use default timeout of 90s
parsed_timeout=$(grep -v "^[ \t]*#.*" /etc/xen/xend-config.sxp \
| sed -n 's/(device-create-timeout \+\([0-9]\+\))/\1/p')
[ ! -z $parsed_timeout ] \
&& run_timeout=$((${parsed_timeout}*9/10))
# First scan for PVs and VGs
# We need this for using md device as PV
/sbin/pvscan > /dev/null 2>&1
# /sbin/vgscan --mknodes > /dev/null 2>&1
parsed_timeout=$(grep -v "^[ \t]*#.*" /etc/xen/xend-config.sxp|sed -n 's/(device-create-timeout \+\([0-9]\+\))/\1/p')
if [ ! -z $parsed_timeout ]; then
run_timeout=$((${parsed_timeout}*9/10))
fi
end_time=$(($(date +%s)+${run_timeout}))
while true; do
/sbin/lvchange -aey $1 > /dev/null 2>&1
/sbin/lvchange -aey $1
if [ $? -eq 0 -a -e $1 ]; then
return 0
fi
@ -165,8 +122,7 @@ function activate_lvm()
function deactivate_lvm()
{
/sbin/lvchange -aen $1 > /dev/null 2>&1
/sbin/lvchange -aen $1
if [ $? -eq 0 ]; then
# We may have to deactivate the VG now, but can ignore errors:
# /sbin/vgchange -an ${1%/*} || :
@ -271,6 +227,7 @@ function parse_par()
fi
fi
push "$t $s"
done
}

140
build-tapdisk-ioemu.patch Normal file
View File

@ -0,0 +1,140 @@
From f1ebeae7802a5775422004f62630c42e46dcf664 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@suse.de>
Date: Tue, 10 Mar 2009 16:32:40 +0100
Subject: [PATCH 3/6] ioemu: Build tapdisk-ioemu binary
When changing away from the old ioemu, changes in the Makefiles
resulted in tapdisk-ioemu appearing there, but actually not
being built. This patch re-enables the build of tapdisk-ioemu.
Signed-off-by: Kevin Wolf <kwolf@suse.de>
---
Makefile | 22 +++++++++++++++-------
configure | 2 +-
qemu-tool.c | 2 +-
tapdisk-ioemu.c | 17 -----------------
4 files changed, 17 insertions(+), 26 deletions(-)
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/Makefile
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile
@@ -46,14 +46,6 @@ $(filter %-user,$(SUBDIR_RULES)): libqem
recurse-all: $(SUBDIR_RULES)
-CPPFLAGS += -I$(XEN_ROOT)/tools/libxc
-CPPFLAGS += -I$(XEN_ROOT)/tools/blktap/lib
-CPPFLAGS += -I$(XEN_ROOT)/tools/xenstore
-CPPFLAGS += -I$(XEN_ROOT)/tools/include
-
-tapdisk-ioemu: tapdisk-ioemu.c cutils.c block.c block-raw.c block-cow.c block-qcow.c aes.c block-vmdk.c block-cloop.c block-dmg.c block-bochs.c block-vpc.c block-vvfat.c block-qcow2.c hw/xen_blktap.c osdep.c
- $(CC) -DQEMU_TOOL $(CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) $(LDFLAGS) $(BASE_LDFLAGS) -o $@ $^ -lz $(LIBS)
-
#######################################################################
# BLOCK_OBJS is code used by both qemu system emulation and qemu-img
@@ -72,6 +64,21 @@ endif
BLOCK_OBJS += block-raw-posix.o
endif
+#######################################################################
+# tapdisk-ioemu
+
+hw/tapdisk-xen_blktap.o: hw/xen_blktap.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $<
+tapdisk-ioemu.o: tapdisk-ioemu.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $<
+
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/libxc
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/blktap/lib
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/xenstore
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/include
+tapdisk-ioemu: tapdisk-ioemu.o $(BLOCK_OBJS) qemu-tool.o hw/tapdisk-xen_blktap.o
+ $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS)
+
######################################################################
# libqemu_common.a: Target independent part of system emulation. The
# long term path is to suppress *all* target specific code in case of
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/configure
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/configure
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/configure
@@ -1512,7 +1512,7 @@ bsd)
;;
esac
-tools=
+tools="tapdisk-ioemu"
if test `expr "$target_list" : ".*softmmu.*"` != 0 ; then
tools="qemu-img\$(EXESUF) $tools"
if [ "$linux" = "yes" ] ; then
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-tool.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-tool.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-tool.c
@@ -68,7 +68,7 @@ void qemu_bh_delete(QEMUBH *bh)
qemu_free(bh);
}
-int qemu_set_fd_handler2(int fd,
+int __attribute__((weak)) qemu_set_fd_handler2(int fd,
IOCanRWHandler *fd_read_poll,
IOHandler *fd_read,
IOHandler *fd_write,
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
@@ -12,34 +12,12 @@
extern void qemu_aio_init(void);
extern void qemu_aio_poll(void);
-extern void bdrv_init(void);
-
-extern void *qemu_mallocz(size_t size);
-extern void qemu_free(void *ptr);
extern void *fd_start;
int domid = 0;
FILE* logfile;
-void term_printf(const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
-}
-
-void term_print_filename(const char *filename)
-{
- term_printf(filename);
-}
-
-
-typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
-typedef int IOCanRWHandler(void *opaque);
-typedef void IOHandler(void *opaque);
-
typedef struct IOHandlerRecord {
int fd;
IOCanRWHandler *fd_read_poll;
@@ -103,7 +81,6 @@ int main(void)
logfile = stderr;
bdrv_init();
- qemu_aio_init();
init_blktap();
/* Daemonize */
@@ -115,8 +92,6 @@ int main(void)
* completed aio operations.
*/
while (1) {
- qemu_aio_poll();
-
max_fd = -1;
FD_ZERO(&rfds);
for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next)

16
capslock_enable.patch Normal file
View File

@ -0,0 +1,16 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1329,6 +1329,11 @@ static void do_key_event(VncState *vs, i
}
break;
case 0x3a: /* CapsLock */
+ if(!down){
+ vs->modifiers_state[keycode] ^= 1;
+ kbd_put_keycode(keycode | 0x80);
+ }
+ return;
case 0x45: /* NumLock */
if (down) {
kbd_put_keycode(keycode & 0x7f);

496
cdrom-removable.patch Normal file
View File

@ -0,0 +1,496 @@
Index: xen-4.4.0-testing/tools/python/xen/xend/server/HalDaemon.py
===================================================================
--- /dev/null
+++ xen-4.4.0-testing/tools/python/xen/xend/server/HalDaemon.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+# -*- mode: python; -*-
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2007 Pat Campbell <plc@novell.com>
+# Copyright (C) 2007 Novell Inc.
+#============================================================================
+
+"""hald (Hardware Abstraction Layer Daemon) watcher for Xen management
+ of removable block device media.
+
+"""
+
+import gobject
+import dbus
+import dbus.glib
+import os
+import types
+import sys
+import signal
+import traceback
+from xen.xend.xenstore.xstransact import xstransact, complete
+from xen.xend.xenstore.xsutil import xshandle
+from xen.xend import PrettyPrint
+from xen.xend import XendLogging
+from xen.xend.XendLogging import log
+
+DEVICE_TYPES = ['vbd', 'tap']
+
+class HalDaemon:
+ """The Hald block device watcher for XEN
+ """
+
+ """Default path to the log file. """
+ logfile_default = "/var/log/xen/hald.log"
+
+ """Default level of information to be logged."""
+ loglevel_default = 'INFO'
+
+
+ def __init__(self):
+
+ XendLogging.init(self.logfile_default, self.loglevel_default)
+ log.debug( "%s", "__init__")
+
+ self.udi_dict = {}
+ self.debug = 0
+ self.dbpath = "/local/domain/0/backend"
+ self.bus = dbus.SystemBus()
+ self.hal_manager_obj = self.bus.get_object('org.freedesktop.Hal', '/org/freedesktop/Hal/Manager')
+ self.hal_manager = dbus.Interface( self.hal_manager_obj, 'org.freedesktop.Hal.Manager')
+ self.gatherBlockDevices()
+ self.registerDeviceCallbacks()
+
+ def run(self):
+ log.debug( "%s", "In new run" );
+ try:
+ self.mainloop = gobject.MainLoop()
+ self.mainloop.run()
+ except KeyboardInterrupt, ex:
+ log.debug('Keyboard exception handler: %s', ex )
+ self.mainloop.quit()
+ except Exception, ex:
+ log.debug('Generic exception handler: %s', ex )
+ self.mainloop.quit()
+
+ def __del__(self):
+ log.debug( "%s", "In del " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def shutdown(self):
+ log.debug( "%s", "In shutdown now " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def stop(self):
+ log.debug( "%s", "In stop now " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def gatherBlockDevices(self):
+
+ # Get all the current devices from hal and save in a dictionary
+ try:
+ device_names = self.hal_manager.GetAllDevices()
+ i = 0;
+ for name in device_names:
+ #log.debug("device name, device=%s",name)
+ dev_obj = self.bus.get_object ('org.freedesktop.Hal', name)
+ dev = dbus.Interface (dev_obj, 'org.freedesktop.Hal.Device')
+ dev_properties = dev_obj.GetAllProperties(dbus_interface="org.freedesktop.Hal.Device")
+ if dev_properties.has_key('block.device'):
+ dev_str = dev_properties['block.device']
+ dev_major = dev_properties['block.major']
+ dev_minor = dev_properties['block.minor']
+ udi_info = {}
+ udi_info['device'] = dev_str
+ udi_info['major'] = dev_major
+ udi_info['minor'] = dev_minor
+ udi_info['udi'] = name
+ self.udi_dict[i] = udi_info
+ i = i + 1
+ except Exception, ex:
+ print >>sys.stderr, 'Exception gathering block devices:', ex
+ log.warn("Exception gathering block devices (%s)",ex)
+
+ #
+ def registerDeviceCallbacks(self):
+ # setup the callbacks for when the gdl changes
+ self.hal_manager.connect_to_signal('DeviceAdded', self.device_added_callback)
+ self.hal_manager.connect_to_signal('DeviceRemoved', self.device_removed_callback)
+
+ #
+ def unRegisterDeviceCallbacks(self):
+ # setup the callbacks for when the gdl changes
+ self.hal_manager.remove_signal_receiver(self.device_added_callback,'DeviceAdded')
+ self.hal_manager.remove_signal_receiver(self.device_removed_callback,'DeviceRemoved')
+
+ #
+ def device_removed_callback(self,udi):
+ log.debug('UDI %s was removed',udi)
+ self.show_dict(self.udi_dict)
+ for key in self.udi_dict:
+ udi_info = self.udi_dict[key]
+ if udi_info['udi'] == udi:
+ device = udi_info['device']
+ major = udi_info['major']
+ minor = udi_info['minor']
+ self.change_xenstore( "remove", device, major, minor)
+
+ # Adds device to dictionary if not already there
+ def device_added_callback(self,udi):
+ log.debug('UDI %s was added', udi)
+ self.show_dict(self.udi_dict)
+ dev_obj = self.bus.get_object ('org.freedesktop.Hal', udi)
+ dev = dbus.Interface (dev_obj, 'org.freedesktop.Hal.Device')
+ device = dev.GetProperty ('block.device')
+ major = dev.GetProperty ('block.major')
+ minor = dev.GetProperty ('block.minor')
+ udi_info = {}
+ udi_info['device'] = device
+ udi_info['major'] = major
+ udi_info['minor'] = minor
+ udi_info['udi'] = udi
+ already = 0
+ cnt = 0;
+ for key in self.udi_dict:
+ info = self.udi_dict[key]
+ if info['udi'] == udi:
+ already = 1
+ break
+ cnt = cnt + 1
+ if already == 0:
+ self.udi_dict[cnt] = udi_info;
+ log.debug('UDI %s was added, device:%s major:%s minor:%s index:%d\n', udi, device, major, minor, cnt)
+ self.change_xenstore( "add", device, major, minor)
+
+ # Debug helper, shows dictionary contents
+ def show_dict(self,dict=None):
+ if self.debug == 0 :
+ return
+ if dict == None :
+ dict = self.udi_dict
+ for key in dict:
+ log.debug('udi_info %s udi_info:%s',key,dict[key])
+
+ # Set or clear xenstore media-present depending on the action argument
+ # for every vbd that has this block device
+ def change_xenstore(self,action, device, major, minor):
+ for type in DEVICE_TYPES:
+ path = self.dbpath + '/' + type
+ domains = xstransact.List(path)
+ log.debug('domains: %s', domains)
+ for domain in domains: # for each domain
+ devices = xstransact.List( path + '/' + domain)
+ log.debug('devices: %s',devices)
+ for device in devices: # for each vbd device
+ str = device.split('/')
+ vbd_type = None;
+ vbd_physical_device = None
+ vbd_media = None
+ vbd_device_path = path + '/' + domain + '/' + device
+ listing = xstransact.List(vbd_device_path)
+ for entry in listing: # for each entry
+ item = path + '/' + entry
+ value = xstransact.Read( vbd_device_path + '/' + entry)
+ log.debug('%s=%s',item,value)
+ if item.find('media-present') != -1:
+ vbd_media = item;
+ vbd_media_path = item
+ if item.find('physical-device') != -1:
+ vbd_physical_device = value;
+ if item.find('type') != -1:
+ vbd_type = value;
+ if vbd_type is not None and vbd_physical_device is not None and vbd_media is not None :
+ inode = vbd_physical_device.split(':')
+ imajor = parse_hex(inode[0])
+ iminor = parse_hex(inode[1])
+ log.debug("action:%s major:%s- minor:%s- imajor:%s- iminor:%s- inode: %s",
+ action,major,minor, imajor, iminor, inode)
+ if int(imajor) == int(major) and int(iminor) == int(minor):
+ if action == "add":
+ xs_dict = {'media': "1"}
+ xstransact.Write(vbd_device_path, 'media-present', "1" )
+ log.debug("wrote xenstore media-present 1 path:%s",vbd_media_path)
+ else:
+ xstransact.Write(vbd_device_path, 'media-present', "0" )
+ log.debug("wrote xenstore media 0 path:%s",vbd_media_path)
+
+def mylog( fmt, *args):
+ f = open('/tmp/haldaemon.log', 'a')
+ print >>f, "HalDaemon ", fmt % args
+ f.close()
+
+
+def parse_hex(val):
+ try:
+ if isinstance(val, types.StringTypes):
+ return int(val, 16)
+ else:
+ return val
+ except ValueError:
+ return None
+
+if __name__ == "__main__":
+ watcher = HalDaemon()
+ watcher.run()
+ print 'Falling off end'
+
+
Index: xen-4.4.0-testing/tools/python/xen/xend/server/Hald.py
===================================================================
--- /dev/null
+++ xen-4.4.0-testing/tools/python/xen/xend/server/Hald.py
@@ -0,0 +1,125 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2007 Pat Campbell <plc@novell.com>
+# Copyright (C) 2007 Novell Inc.
+#============================================================================
+
+import errno
+import types
+import os
+import sys
+import time
+import signal
+from traceback import print_exc
+
+from xen.xend.XendLogging import log
+
+class Hald:
+ def __init__(self):
+ self.ready = False
+ self.running = True
+
+ def run(self):
+ """Starts the HalDaemon process
+ """
+ self.ready = True
+ try:
+ myfile = self.find("xen/xend/server/HalDaemon.py")
+ args = (["python", myfile ])
+ self.pid = self.daemonize("python", args )
+ #log.debug( "%s %s pid:%d", "Hald.py starting ", args, self.pid )
+ except:
+ self.pid = -1
+ log.debug("Unable to start HalDaemon process")
+
+ def shutdown(self):
+ """Shutdown the HalDaemon process
+ """
+ log.debug("%s pid:%d", "Hald.shutdown()", self.pid)
+ self.running = False
+ self.ready = False
+ if self.pid != -1:
+ try:
+ os.kill(self.pid, signal.SIGINT)
+ except:
+ print_exc()
+
+ def daemonize(self,prog, args):
+ """Runs a program as a daemon with the list of arguments. Returns the PID
+ of the daemonized program, or returns 0 on error.
+ Copied from xm/create.py instead of importing to reduce coupling
+ """
+ r, w = os.pipe()
+ pid = os.fork()
+
+ if pid == 0:
+ os.close(r)
+ w = os.fdopen(w, 'w')
+ os.setsid()
+ try:
+ pid2 = os.fork()
+ except:
+ pid2 = None
+ if pid2 == 0:
+ os.chdir("/")
+ env = os.environ.copy()
+ env['PYTHONPATH'] = self.getpythonpath()
+ for fd in range(0, 256):
+ try:
+ os.close(fd)
+ except:
+ pass
+ os.open("/dev/null", os.O_RDWR)
+ os.dup2(0, 1)
+ os.dup2(0, 2)
+ os.execvpe(prog, args, env)
+ os._exit(1)
+ else:
+ w.write(str(pid2 or 0))
+ w.close()
+ os._exit(0)
+ os.close(w)
+ r = os.fdopen(r)
+ daemon_pid = int(r.read())
+ r.close()
+ os.waitpid(pid, 0)
+ #log.debug( "daemon_pid: %d", daemon_pid )
+ return daemon_pid
+
+ def getpythonpath(self):
+ str = " "
+ for p in sys.path:
+ if str != " ":
+ str = str + ":" + p
+ else:
+ if str != "":
+ str = p
+ return str
+
+ def find(self,path, matchFunc=os.path.isfile):
+ """Find a module in the sys.path
+ From web page: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52224
+ """
+ for dirname in sys.path:
+ candidate = os.path.join(dirname, path)
+ if matchFunc(candidate):
+ return candidate
+ raise Error("Can't find file %s" % path)
+
+if __name__ == "__main__":
+ watcher = Hald()
+ watcher.run()
+ time.sleep(10)
+ watcher.shutdown()
Index: xen-4.4.0-testing/tools/python/xen/xend/server/SrvServer.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xend/server/SrvServer.py
+++ xen-4.4.0-testing/tools/python/xen/xend/server/SrvServer.py
@@ -57,6 +57,7 @@ from xen.web.SrvDir import SrvDir
from SrvRoot import SrvRoot
from XMLRPCServer import XMLRPCServer
+from xen.xend.server.Hald import Hald
xoptions = XendOptions.instance()
@@ -252,6 +253,8 @@ def _loadConfig(servers, root, reload):
if xoptions.get_xend_unix_xmlrpc_server():
servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False))
+ servers.add(Hald())
+
def create():
root = SrvDir()
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -18,6 +18,7 @@
#include "exec-all.h"
#include "sysemu.h"
+#include "console.h"
#include "hw.h"
#include "pci.h"
#include "qemu-timer.h"
@@ -604,6 +605,21 @@ void xenstore_parse_domain_config(int hv
#endif
bs = bdrv_new(dev);
+
+ /* if cdrom physical put a watch on media-present */
+ if (bdrv_get_type_hint(bs) == BDRV_TYPE_CDROM) {
+ if (drv && !strcmp(drv, "phy")) {
+ if (pasprintf(&buf, "%s/media-present", bpath) != -1) {
+ if (bdrv_is_inserted(bs))
+ xs_write(xsh, XBT_NULL, buf, "1", strlen("1"));
+ else {
+ xs_write(xsh, XBT_NULL, buf, "0", strlen("0"));
+ }
+ xs_watch(xsh, buf, "media-present");
+ }
+ }
+ }
+
/* check if it is a cdrom */
if (danger_type && !strcmp(danger_type, "cdrom")) {
bdrv_set_type_hint(bs, BDRV_TYPE_CDROM);
@@ -1095,6 +1111,50 @@ static void xenstore_process_vcpu_set_ev
return;
}
+static void xenstore_process_media_change_event(char **vec)
+{
+ char *media_present = NULL;
+ unsigned int len;
+
+ media_present = xs_read(xsh, XBT_NULL, vec[XS_WATCH_PATH], &len);
+
+ if (media_present) {
+ BlockDriverState *bs;
+ char *buf = NULL, *cp = NULL, *path = NULL, *dev = NULL;
+
+ path = strdup(vec[XS_WATCH_PATH]);
+ cp = strstr(path, "media-present");
+ if (cp){
+ *(cp-1) = '\0';
+ pasprintf(&buf, "%s/dev", path);
+ dev = xs_read(xsh, XBT_NULL, buf, &len);
+ if (dev) {
+ if ( !strncmp(dev, "xvd", 3)) {
+ memmove(dev, dev+1, strlen(dev));
+ dev[0] = 'h';
+ dev[1] = 'd';
+ }
+ bs = bdrv_find(dev);
+ if (!bs) {
+ term_printf("device not found\n");
+ return;
+ }
+ if (strcmp(media_present, "0") == 0 && bs) {
+ bdrv_close(bs);
+ }
+ else if (strcmp(media_present, "1") == 0 &&
+ bs != NULL && bs->drv == NULL) {
+ if (bdrv_open(bs, bs->filename, 0 /* snapshot */) < 0) {
+ fprintf(logfile, "%s() qemu: could not open cdrom disk '%s'\n",
+ __func__, bs->filename);
+ }
+ bs->media_changed = 1;
+ }
+ }
+ }
+ }
+}
+
void xenstore_process_event(void *opaque)
{
char **vec, *offset, *bpath = NULL, *buf = NULL, *drv = NULL, *image = NULL;
@@ -1130,6 +1190,11 @@ void xenstore_process_event(void *opaque
xenstore_watch_callbacks[i].cb(vec[XS_WATCH_TOKEN],
xenstore_watch_callbacks[i].opaque);
+ if (!strcmp(vec[XS_WATCH_TOKEN], "media-present")) {
+ xenstore_process_media_change_event(vec);
+ goto out;
+ }
+
hd_index = drive_name_to_index(vec[XS_WATCH_TOKEN]);
if (hd_index == -1) {
fprintf(stderr,"medium change watch on `%s' -"

157
change-vnc-passwd.patch Normal file
View File

@ -0,0 +1,157 @@
Add support of change-vnc-password while vm is running.
Signed-off-by: Chunyan Liu <cyliu@novell.com>
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -200,7 +200,7 @@ DriveInfo drives_table[MAX_DRIVES+1];
int nb_drives;
enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
int vga_ram_size;
-static DisplayState *display_state;
+DisplayState *display_state;
int nographic;
static int curses;
static int sdl;
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -2600,6 +2600,7 @@ int vnc_display_password(DisplayState *d
if (password && password[0]) {
if (!(vs->password = qemu_strdup(password)))
return -1;
+ vs->auth = VNC_AUTH_VNC;
}
return 0;
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -25,6 +25,7 @@
#include "qemu-xen.h"
#include "xen_backend.h"
+extern DisplayState *display_state;
struct xs_handle *xsh = NULL;
static char *media_filename[MAX_DRIVES+1];
static QEMUTimer *insert_timer = NULL;
@@ -897,6 +898,19 @@ static void xenstore_process_dm_command_
} else if (!strncmp(command, "continue", len)) {
fprintf(logfile, "dm-command: continue after state save\n");
xen_pause_requested = 0;
+ } else if (!strncmp(command, "chgvncpasswd", len)) {
+ fprintf(logfile, "dm-command: change vnc passwd\n");
+ if (pasprintf(&path,
+ "/local/domain/0/backend/vfb/%u/0/vncpasswd", domid) == -1) {
+ fprintf(logfile, "out of memory reading dm command parameter\n");
+ goto out;
+ }
+ par = xs_read(xsh, XBT_NULL, path, &len);
+ if (!par)
+ goto out;
+ if (vnc_display_password(display_state, par) == 0)
+ xenstore_record_dm_state("vncpasswdchged");
+ free(par);
} else if (!strncmp(command, "usb-add", len)) {
fprintf(logfile, "dm-command: usb-add a usb device\n");
if (pasprintf(&path,
Index: xen-4.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1461,6 +1461,20 @@ class XendDomainInfo:
pci_conf = self.info['devices'][dev_uuid][1]
return map(pci_dict_to_bdf_str, pci_conf['devs'])
+ def chgvncpasswd(self, passwd):
+ if self._stateGet() != DOM_STATE_HALTED:
+ path = '/local/domain/0/backend/vfb/%u/0/' % self.getDomid()
+ xstransact.Write(path, 'vncpasswd', passwd)
+ self.image.signalDeviceModel("chgvncpasswd", "vncpasswdchged")
+
+ for dev_uuid, (dev_type, dev_info) in self.info['devices'].items():
+ if dev_type == 'vfb':
+ dev_info['vncpasswd'] = passwd
+ dev_info['other_config']['vncpasswd'] = passwd
+ self.info.device_update(dev_uuid, cfg_xenapi = dev_info)
+ break
+ xen.xend.XendDomain.instance().managed_config_save(self)
+
def setMemoryTarget(self, target):
"""Set the memory target of this domain.
@param target: In MiB.
Index: xen-4.4.0-testing/tools/python/xen/xend/server/XMLRPCServer.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xend/server/XMLRPCServer.py
+++ xen-4.4.0-testing/tools/python/xen/xend/server/XMLRPCServer.py
@@ -95,7 +95,7 @@ methods = ['device_create', 'device_conf
'destroyDevice','getDeviceSxprs',
'setMemoryTarget', 'setName', 'setVCpuCount', 'shutdown',
'send_sysrq', 'getVCPUInfo', 'waitForDevices',
- 'getRestartCount', 'getBlockDeviceClass']
+ 'getRestartCount', 'getBlockDeviceClass', 'chgvncpasswd']
exclude = ['domain_create', 'domain_restore']
Index: xen-4.4.0-testing/tools/python/xen/xm/main.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xm/main.py
+++ xen-4.4.0-testing/tools/python/xen/xm/main.py
@@ -21,6 +21,7 @@
"""Grand unified management application for Xen.
"""
+import getpass
import atexit
import cmd
import os
@@ -280,6 +281,9 @@ SUBCOMMAND_HELP = {
'getenforce' : ('', 'Returns the current enforcing mode for the Flask XSM module (Enforcing,Permissive)'),
'setenforce' : ('[ (Enforcing|1) | (Permissive|0) ]',
'Modifies the current enforcing mode for the Flask XSM module'),
+ #change vnc password
+ 'change-vnc-passwd' : ('<Domain>',\
+ 'Change vnc password'),
}
SUBCOMMAND_OPTIONS = {
@@ -404,6 +408,7 @@ common_commands = [
"usb-del",
"domstate",
"vcpu-set",
+ "change-vnc-passwd",
]
domain_commands = [
@@ -441,6 +446,7 @@ domain_commands = [
"vcpu-list",
"vcpu-pin",
"vcpu-set",
+ "change-vnc-passwd",
]
host_commands = [
@@ -3751,6 +3757,10 @@ def xm_cpupool_migrate(args):
else:
server.xend.cpu_pool.migrate(domname, poolname)
+def xm_chgvncpasswd(args):
+ arg_check(args, "change-vnc-passwd", 1)
+ pwd = getpass.getpass("Enter new password: ")
+ server.xend.domain.chgvncpasswd(args[0], pwd)
commands = {
"shell": xm_shell,
@@ -3857,6 +3867,8 @@ commands = {
"usb-del": xm_usb_del,
#domstate
"domstate": xm_domstate,
+ #change vnc password:
+ "change-vnc-passwd": xm_chgvncpasswd,
}
## The commands supported by a separate argument parser in xend.xm.

View File

@ -1,8 +1,8 @@
Index: xen-4.3.0-testing/tools/configure
Index: xen-4.4.0-testing/tools/configure
===================================================================
--- xen-4.3.0-testing.orig/tools/configure
+++ xen-4.3.0-testing/tools/configure
@@ -605,9 +605,6 @@ libgcrypt
--- xen-4.4.0-testing.orig/tools/configure
+++ xen-4.4.0-testing/tools/configure
@@ -633,9 +633,6 @@ libgcrypt
EXTFS_LIBS
system_aio
zlib
@ -12,14 +12,14 @@ Index: xen-4.3.0-testing/tools/configure
glib_LIBS
glib_CFLAGS
PKG_CONFIG_LIBDIR
@@ -7068,104 +7065,104 @@ $as_echo "yes" >&6; }
@@ -7202,104 +7199,104 @@ $as_echo "yes" >&6; }
fi
# Extract the first word of "wget", so it can be a program name with args.
-set dummy wget; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_WGET+set}" = set; then :
-if ${ac_cv_path_WGET+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- case $WGET in
@ -33,7 +33,7 @@ Index: xen-4.3.0-testing/tools/configure
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_path_WGET="$as_dir/$ac_word$ac_exec_ext"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
@ -66,7 +66,7 @@ Index: xen-4.3.0-testing/tools/configure
-set dummy ftp; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_FTP+set}" = set; then :
-if ${ac_cv_path_FTP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- case $FTP in
@ -80,7 +80,7 @@ Index: xen-4.3.0-testing/tools/configure
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_path_FTP="$as_dir/$ac_word$ac_exec_ext"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
@ -117,7 +117,7 @@ Index: xen-4.3.0-testing/tools/configure
+#set dummy wget; ac_word=$2
+#{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+#$as_echo_n "checking for $ac_word... " >&6; }
+#if test "${ac_cv_path_WGET+set}" = set; then :
+#if ${ac_cv_path_WGET+:} false; then :
+# $as_echo_n "(cached) " >&6
+#else
+# case $WGET in
@ -131,7 +131,7 @@ Index: xen-4.3.0-testing/tools/configure
+# IFS=$as_save_IFS
+# test -z "$as_dir" && as_dir=.
+# for ac_exec_ext in '' $ac_executable_extensions; do
+# if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+# if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+# ac_cv_path_WGET="$as_dir/$ac_word$ac_exec_ext"
+# $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+# break 2
@ -164,7 +164,7 @@ Index: xen-4.3.0-testing/tools/configure
+#set dummy ftp; ac_word=$2
+#{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+#$as_echo_n "checking for $ac_word... " >&6; }
+#if test "${ac_cv_path_FTP+set}" = set; then :
+#if ${ac_cv_path_FTP+:} false; then :
+# $as_echo_n "(cached) " >&6
+#else
+# case $FTP in
@ -178,7 +178,7 @@ Index: xen-4.3.0-testing/tools/configure
+# IFS=$as_save_IFS
+# test -z "$as_dir" && as_dir=.
+# for ac_exec_ext in '' $ac_executable_extensions; do
+# if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+# if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+# ac_cv_path_FTP="$as_dir/$ac_word$ac_exec_ext"
+# $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+# break 2
@ -215,11 +215,11 @@ Index: xen-4.3.0-testing/tools/configure
Index: xen-4.3.0-testing/stubdom/configure
Index: xen-4.4.0-testing/stubdom/configure
===================================================================
--- xen-4.3.0-testing.orig/stubdom/configure
+++ xen-4.3.0-testing/stubdom/configure
@@ -594,8 +594,6 @@ LDFLAGS
--- xen-4.4.0-testing.orig/stubdom/configure
+++ xen-4.4.0-testing/stubdom/configure
@@ -623,8 +623,6 @@ LDFLAGS
CFLAGS
CC
FETCHER
@ -228,14 +228,14 @@ Index: xen-4.3.0-testing/stubdom/configure
CMAKE
extfiles
debug
@@ -2165,104 +2163,104 @@ extfiles=$ax_cv_extfiles
@@ -2300,104 +2298,104 @@ extfiles=$ax_cv_extfiles
# Extract the first word of "wget", so it can be a program name with args.
-set dummy wget; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_WGET+set}" = set; then :
-if ${ac_cv_path_WGET+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- case $WGET in
@ -249,7 +249,7 @@ Index: xen-4.3.0-testing/stubdom/configure
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_path_WGET="$as_dir/$ac_word$ac_exec_ext"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
@ -282,7 +282,7 @@ Index: xen-4.3.0-testing/stubdom/configure
-set dummy ftp; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_FTP+set}" = set; then :
-if ${ac_cv_path_FTP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- case $FTP in
@ -296,7 +296,7 @@ Index: xen-4.3.0-testing/stubdom/configure
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_path_FTP="$as_dir/$ac_word$ac_exec_ext"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
@ -333,7 +333,7 @@ Index: xen-4.3.0-testing/stubdom/configure
+#set dummy wget; ac_word=$2
+#{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+#$as_echo_n "checking for $ac_word... " >&6; }
+#if test "${ac_cv_path_WGET+set}" = set; then :
+#if ${ac_cv_path_WGET+:} false; then :
+# $as_echo_n "(cached) " >&6
+#else
+# case $WGET in
@ -347,7 +347,7 @@ Index: xen-4.3.0-testing/stubdom/configure
+# IFS=$as_save_IFS
+# test -z "$as_dir" && as_dir=.
+# for ac_exec_ext in '' $ac_executable_extensions; do
+# if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+# if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+# ac_cv_path_WGET="$as_dir/$ac_word$ac_exec_ext"
+# $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+# break 2
@ -380,7 +380,7 @@ Index: xen-4.3.0-testing/stubdom/configure
+#set dummy ftp; ac_word=$2
+#{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+#$as_echo_n "checking for $ac_word... " >&6; }
+#if test "${ac_cv_path_FTP+set}" = set; then :
+#if ${ac_cv_path_FTP+:} false; then :
+# $as_echo_n "(cached) " >&6
+#else
+# case $FTP in
@ -394,7 +394,7 @@ Index: xen-4.3.0-testing/stubdom/configure
+# IFS=$as_save_IFS
+# test -z "$as_dir" && as_dir=.
+# for ac_exec_ext in '' $ac_executable_extensions; do
+# if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+# if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+# ac_cv_path_FTP="$as_dir/$ac_word$ac_exec_ext"
+# $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+# break 2

View File

@ -0,0 +1,32 @@
qcow2 corruption: Fix alloc_cluster_link_l2 (Kevin Wolf)
This patch fixes a qcow2 corruption bug introduced in SVN Rev 5861. L2 tables
are big endian, so entries must be converted before being passed to functions.
This bug is easy to trigger. The following script will create and destroy a
qcow2 image (the header is gone after three loop iterations):
#!/bin/bash
qemu-img create -f qcow2 test.qcow 1M
for i in $(seq 1 10); do
qemu-system-x86_64 -hda test.qcow -monitor stdio > /dev/null 2>&1 <<EOF
savevm test-$i
quit
EOF
done
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block-qcow2.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block-qcow2.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block-qcow2.c
@@ -916,7 +916,7 @@ static int alloc_cluster_link_l2(BlockDr
goto err;
for (i = 0; i < j; i++)
- free_any_clusters(bs, old_cluster[i], 1);
+ free_any_clusters(bs, be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1);
ret = 0;
err:

View File

@ -0,0 +1,18 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -247,8 +247,11 @@ static int open_disk(struct td_state *s,
drv = blktap_drivers[i].drv;
DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");
- /* Open the image */
- if (bdrv_open2(bs, path, flags, drv) != 0) {
+ /* Open the image
+ * Use BDRV_O_CACHE_WB for write-through caching,
+ * no flags for write-back caching
+ */
+ if (bdrv_open2(bs, path, flags|BDRV_O_CACHE_WB, drv) != 0) {
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}

View File

@ -0,0 +1,73 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -360,6 +360,15 @@ static void qemu_send_responses(void* op
}
/**
+ * Callback function for AIO flush
+ */
+static void qemu_flush_response(void* opaque, int ret) {
+ if (ret != 0) {
+ DPRINTF("aio_flush: ret = %d (%s)\n", ret, strerror(-ret));
+ }
+}
+
+/**
* Callback function for the IO message pipe. Reads requests from the ring
* and processes them (call qemu read/write functions).
*
@@ -378,6 +387,7 @@ static void handle_blktap_iomsg(void* pr
blkif_t *blkif = s->blkif;
tapdev_info_t *info = s->ring_info;
int page_size = getpagesize();
+ int sync;
struct aiocb_info *aiocb_info;
@@ -410,7 +420,7 @@ static void handle_blktap_iomsg(void* pr
/* Don't allow writes on readonly devices */
if ((s->flags & TD_RDONLY) &&
- (req->operation == BLKIF_OP_WRITE)) {
+ (req->operation != BLKIF_OP_READ)) {
blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
goto send_response;
}
@@ -431,7 +441,7 @@ static void handle_blktap_iomsg(void* pr
DPRINTF("Sector request failed:\n");
DPRINTF("%s request, idx [%d,%d] size [%llu], "
"sector [%llu,%llu]\n",
- (req->operation == BLKIF_OP_WRITE ?
+ (req->operation != BLKIF_OP_READ ?
"WRITE" : "READ"),
idx,i,
(long long unsigned)
@@ -444,8 +454,14 @@ static void handle_blktap_iomsg(void* pr
blkif->pending_list[idx].secs_pending += nsects;
- switch (req->operation)
+ sync = 0;
+ switch (req->operation)
{
+ case BLKIF_OP_WRITE_BARRIER:
+ sync = 1;
+ bdrv_aio_flush(s->bs, qemu_flush_response, NULL);
+ /* fall through */
+
case BLKIF_OP_WRITE:
aiocb_info = malloc(sizeof(*aiocb_info));
@@ -465,6 +481,10 @@ static void handle_blktap_iomsg(void* pr
DPRINTF("ERROR: bdrv_write() == NULL\n");
goto send_response;
}
+
+ if (sync)
+ bdrv_aio_flush(s->bs, qemu_flush_response, NULL);
+
break;
case BLKIF_OP_READ:

View File

@ -0,0 +1,24 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_machine_fv.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_machine_fv.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_machine_fv.c
@@ -270,6 +270,7 @@ void qemu_invalidate_entry(uint8_t *buff
#endif /* defined(MAPCACHE) */
+extern void init_blktap(void);
static void xen_init_fv(ram_addr_t ram_size, int vga_ram_size,
const char *boot_device,
@@ -295,6 +296,11 @@ static void xen_init_fv(ram_addr_t ram_s
}
#endif
+#ifndef CONFIG_STUBDOM
+ /* Initialize tapdisk client */
+ init_blktap();
+#endif
+
#ifdef CONFIG_STUBDOM /* the hvmop is not supported on older hypervisors */
xc_set_hvm_param(xc_handle, domid, HVM_PARAM_DM_DOMAIN, DOMID_SELF);
#endif

View File

@ -0,0 +1,89 @@
From 5ac882a6d7499e4a36103db071203bf4d1ddfe1f Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@suse.de>
Date: Tue, 10 Mar 2009 16:26:45 +0100
Subject: [PATCH 2/6] ioemu: Use the image format sent by blktapctrl
Currently the blktap backend in ioemu lets qemu guess which format an
image is in. This was a security problem and the blktap backend
doesn't work any more since this was fixed in qemu.
This patch changes ioemu to respect the format it gets from blktapctrl.
Signed-off-by: Kevin Wolf <kwolf@suse.de>
---
hw/xen_blktap.c | 22 +++++++++++++++++++---
hw/xen_blktap.h | 14 ++++++++++++++
2 files changed, 33 insertions(+), 3 deletions(-)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -218,9 +218,10 @@ static int map_new_dev(struct td_state *
return -1;
}
-static int open_disk(struct td_state *s, char *path, int readonly)
+static int open_disk(struct td_state *s, char *path, int driver, int readonly)
{
BlockDriverState* bs;
+ BlockDriver* drv;
char* devname;
static int devnumber = 0;
int i;
@@ -230,7 +231,22 @@ static int open_disk(struct td_state *s,
bs = bdrv_new(devname);
free(devname);
- if (bdrv_open(bs, path, 0) != 0) {
+ /* Search for disk driver */
+ for (i = 0; blktap_drivers[i].idnum >= 0; i++) {
+ if (blktap_drivers[i].idnum == driver)
+ break;
+ }
+
+ if (blktap_drivers[i].idnum < 0) {
+ fprintf(stderr, "Could not find image format id %d\n", driver);
+ return -ENOMEM;
+ }
+
+ drv = blktap_drivers[i].drv;
+ DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");
+
+ /* Open the image */
+ if (bdrv_open2(bs, path, 0, drv) != 0) {
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}
@@ -521,7 +537,7 @@ static void handle_blktap_ctrlmsg(void*
s = state_init();
/*Open file*/
- if (s == NULL || open_disk(s, path, msg->readonly)) {
+ if (s == NULL || open_disk(s, path, msg->drivertype, msg->readonly)) {
msglen = sizeof(msg_hdr_t);
msg->type = CTLMSG_IMG_FAIL;
msg->len = msglen;
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.h
@@ -52,4 +52,18 @@ typedef struct fd_list_entry {
int init_blktap(void);
+typedef struct disk_info {
+ int idnum;
+ struct BlockDriver *drv;
+} disk_info_t;
+
+static disk_info_t blktap_drivers[] = {
+ { DISK_TYPE_AIO, &bdrv_raw },
+ { DISK_TYPE_SYNC, &bdrv_raw },
+ { DISK_TYPE_VMDK, &bdrv_vmdk },
+ { DISK_TYPE_QCOW, &bdrv_qcow },
+ { DISK_TYPE_QCOW2, &bdrv_qcow2 },
+ { -1, NULL }
+};
+
#endif /*XEN_BLKTAP_H_*/

View File

@ -0,0 +1,44 @@
From cb982fd919a52ff86f01025d0f92225bc7b2a956 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@suse.de>
Date: Tue, 10 Mar 2009 16:44:31 +0100
Subject: [PATCH 5/6] ioemu: Fail on too small blktap disks
The blktap infrastructure doesn't seems to be able to cope with images
that are smaller than a sector, it produced hangs for me. Such an
image isn't really useful anyway, so just fail gracefully.
Signed-off-by: Kevin Wolf <kwolf@suse.de>
---
hw/xen_blktap.c | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -256,6 +256,12 @@ static int open_disk(struct td_state *s,
s->size = bs->total_sectors;
s->sector_size = 512;
+ if (s->size == 0) {
+ fprintf(stderr, "Error: Disk image %s is too small\n",
+ path);
+ return -ENOMEM;
+ }
+
s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0);
#ifndef QEMU_TOOL
Index: xen-4.2.0-testing/tools/python/xen/xend/server/DevController.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/server/DevController.py
+++ xen-4.2.0-testing/tools/python/xen/xend/server/DevController.py
@@ -155,7 +155,7 @@ class DevController:
(devid, self.deviceClass))
elif status == Error:
- self.destroyDevice(devid, False)
+ self.destroyDevice(devid, True)
if err is None:
raise VmError("Device %s (%s) could not be connected. "
"Backend device not found." %

View File

@ -0,0 +1,76 @@
Index: xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
===================================================================
--- xen-4.2.3-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
+++ xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
@@ -1,6 +1,8 @@
#ifndef QEMU_XEN_H
#define QEMU_XEN_H
+#include "hw/boards.h"
+
/* vl.c */
extern int restore;
extern int vga_ram_size;
@@ -65,7 +67,7 @@ void handle_buffered_pio(void);
/* xenstore.c */
void xenstore_init(void);
uint32_t xenstore_read_target(void);
-void xenstore_parse_domain_config(int domid);
+void xenstore_parse_domain_config(int domid, QEMUMachine *machine);
int xenstore_parse_disable_pf_config(void);
int xenstore_fd(void);
void xenstore_process_event(void *opaque);
Index: xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.2.3-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -5862,9 +5862,9 @@ int main(int argc, char **argv, char **e
if ((msg = xenbus_read(XBT_NIL, "domid", &domid_s)))
fprintf(stderr,"Can not read our own domid: %s\n", msg);
else
- xenstore_parse_domain_config(atoi(domid_s));
+ xenstore_parse_domain_config(atoi(domid_s), machine);
#else
- xenstore_parse_domain_config(domid);
+ xenstore_parse_domain_config(domid, machine);
#endif /* CONFIG_STUBDOM */
}
Index: xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.2.3-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -446,7 +446,7 @@ void xenstore_init(void)
}
}
-void xenstore_parse_domain_config(int hvm_domid)
+void xenstore_parse_domain_config(int hvm_domid, QEMUMachine *machine)
{
char **e_danger = NULL;
char *buf = NULL;
@@ -740,11 +740,19 @@ void xenstore_parse_domain_config(int hv
#endif
- drives_table[nb_drives].bdrv = bs;
- drives_table[nb_drives].used = 1;
- media_filename[nb_drives] = strdup(bs->filename);
- nb_drives++;
-
+ if (machine == &xenfv_machine) {
+ drives_table[nb_drives].bdrv = bs;
+ drives_table[nb_drives].used = 1;
+#ifdef CONFIG_STUBDOM
+ media_filename[nb_drives] = strdup(danger_buf);
+#else
+ media_filename[nb_drives] = strdup(bs->filename);
+#endif
+ nb_drives++;
+ } else {
+ qemu_aio_flush();
+ bdrv_close(bs);
+ }
}
#ifdef CONFIG_STUBDOM

80
ioemu-disable-scsi.patch Normal file
View File

@ -0,0 +1,80 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
@@ -359,6 +359,8 @@ static void platform_ioport_write(void *
case 4:
fprintf(logfile, "Disconnect IDE hard disk...\n");
ide_unplug_harddisks();
+ fprintf(logfile, "Disconnect SCSI hard disk...\n");
+ pci_unplug_scsi();
fprintf(logfile, "Disconnect netifs...\n");
pci_unplug_netifs();
fprintf(logfile, "Shutdown taps...\n");
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
@@ -47,6 +47,7 @@ void unset_vram_mapping(void *opaque);
#endif
void pci_unplug_netifs(void);
+void pci_unplug_scsi(void);
void destroy_hvm_domain(void);
void unregister_iomem(target_phys_addr_t start);
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pci.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pci.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pci.c
@@ -871,6 +871,50 @@ void pci_unplug_netifs(void)
}
}
+void pci_unplug_scsi(void)
+{
+ PCIBus *bus;
+ PCIDevice *dev;
+ PCIIORegion *region;
+ int x;
+ int i;
+
+ /* We only support one PCI bus */
+ for (bus = first_bus; bus; bus = NULL) {
+ for (x = 0; x < 256; x++) {
+ dev = bus->devices[x];
+ if (dev &&
+ dev->config[0xa] == 0 &&
+ dev->config[0xb] == 1
+#ifdef CONFIG_PASSTHROUGH
+ && test_pci_devfn(x) != 1
+#endif
+ ) {
+ /* Found a scsi disk. Remove it from the bus. Note that
+ we don't free it here, since there could still be
+ references to it floating around. There are only
+ ever one or two structures leaked, and it's not
+ worth finding them all. */
+ bus->devices[x] = NULL;
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ region = &dev->io_regions[i];
+ if (region->addr == (uint32_t)-1 ||
+ region->size == 0)
+ continue;
+ fprintf(logfile, "region type %d at [%x,%x).\n",
+ region->type, region->addr,
+ region->addr+region->size);
+ if (region->type == PCI_ADDRESS_SPACE_IO) {
+ isa_unassign_ioport(region->addr, region->size);
+ } else if (region->type == PCI_ADDRESS_SPACE_MEM) {
+ unregister_iomem(region->addr);
+ }
+ }
+ }
+ }
+ }
+}
+
typedef struct {
PCIDevice dev;
PCIBus *bus;

View File

@ -0,0 +1,65 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
@@ -30,6 +30,8 @@
#include "qemu-xen.h"
#include "net.h"
#include "xen_platform.h"
+#include "sysemu.h"
+#include <xc_private.h>
#include <assert.h>
#include <xenguest.h>
@@ -335,11 +337,51 @@ static void xen_platform_ioport_writeb(v
}
}
+static uint32_t ioport_base;
+
+static void platform_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ DECLARE_DOMCTL;
+ int rc;
+
+ if (val == 0)
+ qemu_invalidate_map_cache();
+
+ switch (addr - ioport_base) {
+ case 0:
+ fprintf(logfile, "Init hypercall page %x, addr %x.\n", val, addr);
+ domctl.domain = (domid_t)domid;
+ domctl.u.hypercall_init.gmfn = val;
+ domctl.cmd = XEN_DOMCTL_hypercall_init;
+ rc = xc_domctl(xc_handle, &domctl);
+ fprintf(logfile, "result -> %d.\n", rc);
+ break;
+ case 4:
+ fprintf(logfile, "Disconnect IDE hard disk...\n");
+ ide_unplug_harddisks();
+ fprintf(logfile, "Disconnect netifs...\n");
+ pci_unplug_netifs();
+ fprintf(logfile, "Shutdown taps...\n");
+ net_tap_shutdown_all();
+ fprintf(logfile, "Done.\n");
+ break;
+ default:
+ fprintf(logfile, "Write to bad port %x (base %x) on evtchn device.\n",
+ addr, ioport_base);
+ break;
+ }
+}
+
static void platform_ioport_map(PCIDevice *pci_dev, int region_num, uint32_t addr, uint32_t size, int type)
{
+ ioport_base = addr;
+
+ register_ioport_write(addr, 16, 4, platform_ioport_write, NULL);
+/*
PCIXenPlatformState *d = (PCIXenPlatformState *)pci_dev;
register_ioport_write(addr, size, 1, xen_platform_ioport_writeb, d);
register_ioport_read(addr, size, 1, xen_platform_ioport_readb, d);
+*/
}
static uint32_t platform_mmio_read(void *opaque, target_phys_addr_t addr)

30
ioemu-vnc-resize.patch Normal file
View File

@ -0,0 +1,30 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1751,6 +1751,25 @@ static int protocol_client_msg(VncState
}
set_encodings(vs, (int32_t *)(data + 4), limit);
+
+ /*
+ * The initialization of a VNC connection can race with xenfb changing
+ * the resolution. This happens when the VNC connection is already
+ * established, but the client has not yet advertised has_resize, so it
+ * won't get notified of the switch.
+ *
+ * Therefore we resend the resolution as soon as the client has sent its
+ * encodings.
+ */
+ if (vs->has_resize) {
+ /* Resize the VNC window */
+ vnc_write_u8(vs, 0); /* msg id */
+ vnc_write_u8(vs, 0);
+ vnc_write_u16(vs, 1); /* number of rects */
+ vnc_framebuffer_update(vs, 0, 0, vs->serverds.width, vs->serverds.height, -223);
+
+ vnc_flush(vs);
+ }
break;
case 3:
if (len == 1)

View File

@ -0,0 +1,34 @@
Subject: qdev: convert watchdogs
From: Markus Armbruster armbru@redhat.com Fri Aug 21 10:31:34 2009 +0200
Date: Thu Aug 27 20:35:24 2009 -0500:
Git: 09aaa1602f9381c0e0fb539390b1793e51bdfc7b
* THIS IS ONLY THE BUG FIX PART OF THE UPSTREAM PATCH *
Fixes ib700 not to use vm_clock before it is initialized: in
wdt_ib700_init(), called from register_watchdogs(), which runs before
init_timers(). The bug made ib700_write_enable_reg() crash in
qemu_del_timer().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/wdt_ib700.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/wdt_ib700.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/wdt_ib700.c
@@ -93,6 +93,7 @@ static int ib700_load(QEMUFile *f, void
/* Create and initialize a virtual IB700 during PC creation. */
static void ib700_pc_init(PCIBus *unused)
{
+ timer = qemu_new_timer(vm_clock, ib700_timer_expired, NULL);
register_savevm("ib700_wdt", -1, 0, ib700_save, ib700_load, NULL);
register_ioport_write(0x441, 2, 1, ib700_write_disable_reg, NULL);
@@ -108,5 +109,4 @@ static WatchdogTimerModel model = {
void wdt_ib700_init(void)
{
watchdog_add_model(&model);
- timer = qemu_new_timer(vm_clock, ib700_timer_expired, NULL);
}

View File

@ -0,0 +1,72 @@
Subject: Move watchdog, watchdog_action, give them internal linkage
From: Markus Armbruster armbru@redhat.com Fri Aug 21 10:31:32 2009 +0200
Date: Thu Aug 27 20:30:23 2009 -0500:
Git: 88b3be201acf64e0bd19782bebd533901c951c87
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/watchdog.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.c
@@ -26,6 +26,16 @@
#include "sysemu.h"
#include "hw/watchdog.h"
+/* Possible values for action parameter. */
+#define WDT_RESET 1 /* Hard reset. */
+#define WDT_SHUTDOWN 2 /* Shutdown. */
+#define WDT_POWEROFF 3 /* Quit. */
+#define WDT_PAUSE 4 /* Pause. */
+#define WDT_DEBUG 5 /* Prints a message and continues running. */
+#define WDT_NONE 6 /* Do nothing. */
+
+static WatchdogTimerModel *watchdog;
+static int watchdog_action = WDT_RESET;
static LIST_HEAD(watchdog_list, WatchdogTimerModel) watchdog_list;
void watchdog_add_model(WatchdogTimerModel *model)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/watchdog.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.h
@@ -27,13 +27,6 @@
extern void wdt_i6300esb_init(void);
extern void wdt_ib700_init(void);
-/* Possible values for action parameter. */
-#define WDT_RESET 1 /* Hard reset. */
-#define WDT_SHUTDOWN 2 /* Shutdown. */
-#define WDT_POWEROFF 3 /* Quit. */
-#define WDT_PAUSE 4 /* Pause. */
-#define WDT_DEBUG 5 /* Prints a message and continues running. */
-#define WDT_NONE 6 /* Do nothing. */
struct WatchdogTimerModel {
LIST_ENTRY(WatchdogTimerModel) entry;
@@ -50,10 +43,6 @@ struct WatchdogTimerModel {
};
typedef struct WatchdogTimerModel WatchdogTimerModel;
-/* in vl.c */
-extern WatchdogTimerModel *watchdog;
-extern int watchdog_action;
-
/* in hw/watchdog.c */
extern int select_watchdog(const char *p);
extern int select_watchdog_action(const char *action);
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -250,8 +250,6 @@ int no_shutdown = 0;
int cursor_hide = 1;
int graphic_rotate = 0;
int daemonize = 0;
-WatchdogTimerModel *watchdog = NULL;
-int watchdog_action = WDT_RESET;
const char *option_rom[MAX_OPTION_ROMS];
int nb_option_roms;
int semihosting_enabled = 0;

View File

@ -0,0 +1,963 @@
Subject: Hardware watchdog
From: Richard W.M. Jones rjones@redhat.com Sat Apr 25 13:56:19 2009 +0100
Date: Fri May 1 09:44:11 2009 -0500:
Git: 9dd986ccf68f142aaafe543d80cf877716d91d4e
Here is an updated hardware watchdog patch, which should fix
everything that was raised about the previous version ...
Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile.target
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/Makefile.target
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile.target
@@ -580,6 +580,10 @@ OBJS += e1000.o
# Serial mouse
OBJS += msmouse.o
+# Generic watchdog support and some watchdog devices
+OBJS += watchdog.o
+OBJS += wdt_ib700.o wdt_i6300esb.o
+
ifeq ($(TARGET_BASE_ARCH), i386)
# Hardware support
ifdef CONFIG_AUDIO
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pc.c
@@ -41,6 +41,7 @@
#include "virtio-balloon.h"
#include "virtio-console.h"
#include "hpet_emul.h"
+#include "watchdog.h"
#ifdef CONFIG_PASSTHROUGH
#include "pass-through.h"
@@ -1050,6 +1051,8 @@ vga_bios_error:
}
}
+ watchdog_pc_init(pci_bus);
+
for(i = 0; i < nb_nics; i++) {
NICInfo *nd = &nd_table[i];
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.c
===================================================================
--- /dev/null
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.c
@@ -0,0 +1,136 @@
+/*
+ * Virtual hardware watchdog.
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ * USA.
+ *
+ * By Richard W.M. Jones (rjones@redhat.com).
+ */
+
+#include "qemu-common.h"
+#include "sys-queue.h"
+#include "sysemu.h"
+#include "hw/watchdog.h"
+
+static LIST_HEAD(watchdog_list, WatchdogTimerModel) watchdog_list;
+
+void watchdog_add_model(WatchdogTimerModel *model)
+{
+ LIST_INSERT_HEAD(&watchdog_list, model, entry);
+}
+
+/* Returns:
+ * 0 = continue
+ * 1 = exit program with error
+ * 2 = exit program without error
+ */
+int select_watchdog(const char *p)
+{
+ WatchdogTimerModel *model;
+
+ if (watchdog) {
+ fprintf(stderr,
+ "qemu: only one watchdog option may be given\n");
+ return 1;
+ }
+
+ /* -watchdog ? lists available devices and exits cleanly. */
+ if (strcmp(p, "?") == 0) {
+ LIST_FOREACH(model, &watchdog_list, entry) {
+ fprintf(stderr, "\t%s\t%s\n",
+ model->wdt_name, model->wdt_description);
+ }
+ return 2;
+ }
+
+ LIST_FOREACH(model, &watchdog_list, entry) {
+ if (strcasecmp(model->wdt_name, p) == 0) {
+ watchdog = model;
+ return 0;
+ }
+ }
+
+ fprintf(stderr, "Unknown -watchdog device. Supported devices are:\n");
+ LIST_FOREACH(model, &watchdog_list, entry) {
+ fprintf(stderr, "\t%s\t%s\n",
+ model->wdt_name, model->wdt_description);
+ }
+ return 1;
+}
+
+int select_watchdog_action(const char *p)
+{
+ if (strcasecmp(p, "reset") == 0)
+ watchdog_action = WDT_RESET;
+ else if (strcasecmp(p, "shutdown") == 0)
+ watchdog_action = WDT_SHUTDOWN;
+ else if (strcasecmp(p, "poweroff") == 0)
+ watchdog_action = WDT_POWEROFF;
+ else if (strcasecmp(p, "pause") == 0)
+ watchdog_action = WDT_PAUSE;
+ else if (strcasecmp(p, "debug") == 0)
+ watchdog_action = WDT_DEBUG;
+ else if (strcasecmp(p, "none") == 0)
+ watchdog_action = WDT_NONE;
+ else
+ return -1;
+
+ return 0;
+}
+
+/* This actually performs the "action" once a watchdog has expired,
+ * ie. reboot, shutdown, exit, etc.
+ */
+void watchdog_perform_action(void)
+{
+ switch(watchdog_action) {
+ case WDT_RESET: /* same as 'system_reset' in monitor */
+ qemu_system_reset_request();
+ break;
+
+ case WDT_SHUTDOWN: /* same as 'system_powerdown' in monitor */
+ qemu_system_powerdown_request();
+ break;
+
+ case WDT_POWEROFF: /* same as 'quit' command in monitor */
+ exit(0);
+ break;
+
+ case WDT_PAUSE: /* same as 'stop' command in monitor */
+ vm_stop(0);
+ break;
+
+ case WDT_DEBUG:
+ fprintf(stderr, "watchdog: timer fired\n");
+ break;
+
+ case WDT_NONE:
+ break;
+ }
+}
+
+void watchdog_pc_init(PCIBus *pci_bus)
+{
+ if (watchdog)
+ watchdog->wdt_pc_init(pci_bus);
+}
+
+void register_watchdogs(void)
+{
+ wdt_ib700_init();
+ wdt_i6300esb_init();
+}
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.h
===================================================================
--- /dev/null
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.h
@@ -0,0 +1,65 @@
+/*
+ * Virtual hardware watchdog.
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ * USA.
+ *
+ * By Richard W.M. Jones (rjones@redhat.com).
+ */
+
+#ifndef QEMU_WATCHDOG_H
+#define QEMU_WATCHDOG_H
+
+extern void wdt_i6300esb_init(void);
+extern void wdt_ib700_init(void);
+
+/* Possible values for action parameter. */
+#define WDT_RESET 1 /* Hard reset. */
+#define WDT_SHUTDOWN 2 /* Shutdown. */
+#define WDT_POWEROFF 3 /* Quit. */
+#define WDT_PAUSE 4 /* Pause. */
+#define WDT_DEBUG 5 /* Prints a message and continues running. */
+#define WDT_NONE 6 /* Do nothing. */
+
+struct WatchdogTimerModel {
+ LIST_ENTRY(WatchdogTimerModel) entry;
+
+ /* Short name of the device - used to select it on the command line. */
+ const char *wdt_name;
+ /* Longer description (eg. manufacturer and full model number). */
+ const char *wdt_description;
+
+ /* This callback should create/register the device. It is called
+ * indirectly from hw/pc.c when the virtual PC is being set up.
+ */
+ void (*wdt_pc_init)(PCIBus *pci_bus);
+};
+typedef struct WatchdogTimerModel WatchdogTimerModel;
+
+/* in vl.c */
+extern WatchdogTimerModel *watchdog;
+extern int watchdog_action;
+
+/* in hw/watchdog.c */
+extern int select_watchdog(const char *p);
+extern int select_watchdog_action(const char *action);
+extern void watchdog_add_model(WatchdogTimerModel *model);
+extern void watchdog_perform_action(void);
+extern void watchdog_pc_init(PCIBus *pci_bus);
+extern void register_watchdogs(void);
+
+#endif /* QEMU_WATCHDOG_H */
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/wdt_i6300esb.c
===================================================================
--- /dev/null
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/wdt_i6300esb.c
@@ -0,0 +1,470 @@
+/*
+ * Virtual hardware watchdog.
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ * USA.
+ *
+ * By Richard W.M. Jones (rjones@redhat.com).
+ */
+
+#include <inttypes.h>
+
+#include "qemu-common.h"
+#include "qemu-timer.h"
+#include "watchdog.h"
+#include "hw.h"
+#include "isa.h"
+#include "pc.h"
+#include "pci.h"
+
+/*#define I6300ESB_DEBUG 1*/
+
+#ifdef I6300ESB_DEBUG
+#define i6300esb_debug(fs,...) \
+ fprintf(stderr,"i6300esb: %s: "fs,__func__,##__VA_ARGS__)
+#else
+#define i6300esb_debug(fs,...)
+#endif
+
+#ifndef PCI_DEVICE_ID_INTEL_ESB_9
+#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab
+#endif
+
+/* PCI configuration registers */
+#define ESB_CONFIG_REG 0x60 /* Config register */
+#define ESB_LOCK_REG 0x68 /* WDT lock register */
+
+/* Memory mapped registers (offset from base address) */
+#define ESB_TIMER1_REG 0x00 /* Timer1 value after each reset */
+#define ESB_TIMER2_REG 0x04 /* Timer2 value after each reset */
+#define ESB_GINTSR_REG 0x08 /* General Interrupt Status Register */
+#define ESB_RELOAD_REG 0x0c /* Reload register */
+
+/* Lock register bits */
+#define ESB_WDT_FUNC (0x01 << 2) /* Watchdog functionality */
+#define ESB_WDT_ENABLE (0x01 << 1) /* Enable WDT */
+#define ESB_WDT_LOCK (0x01 << 0) /* Lock (nowayout) */
+
+/* Config register bits */
+#define ESB_WDT_REBOOT (0x01 << 5) /* Enable reboot on timeout */
+#define ESB_WDT_FREQ (0x01 << 2) /* Decrement frequency */
+#define ESB_WDT_INTTYPE (0x11 << 0) /* Interrupt type on timer1 timeout */
+
+/* Reload register bits */
+#define ESB_WDT_RELOAD (0x01 << 8) /* prevent timeout */
+
+/* Magic constants */
+#define ESB_UNLOCK1 0x80 /* Step 1 to unlock reset registers */
+#define ESB_UNLOCK2 0x86 /* Step 2 to unlock reset registers */
+
+/* Device state. */
+struct I6300State {
+ PCIDevice dev; /* PCI device state, must be first field. */
+
+ int reboot_enabled; /* "Reboot" on timer expiry. The real action
+ * performed depends on the -watchdog-action
+ * param passed on QEMU command line.
+ */
+ int clock_scale; /* Clock scale. */
+#define CLOCK_SCALE_1KHZ 0
+#define CLOCK_SCALE_1MHZ 1
+
+ int int_type; /* Interrupt type generated. */
+#define INT_TYPE_IRQ 0 /* APIC 1, INT 10 */
+#define INT_TYPE_SMI 2
+#define INT_TYPE_DISABLED 3
+
+ int free_run; /* If true, reload timer on expiry. */
+ int locked; /* If true, enabled field cannot be changed. */
+ int enabled; /* If true, watchdog is enabled. */
+
+ QEMUTimer *timer; /* The actual watchdog timer. */
+
+ uint32_t timer1_preload; /* Values preloaded into timer1, timer2. */
+ uint32_t timer2_preload;
+ int stage; /* Stage (1 or 2). */
+
+ int unlock_state; /* Guest writes 0x80, 0x86 to unlock the
+ * registers, and we transition through
+ * states 0 -> 1 -> 2 when this happens.
+ */
+
+ int previous_reboot_flag; /* If the watchdog caused the previous
+ * reboot, this flag will be set.
+ */
+};
+
+typedef struct I6300State I6300State;
+
+/* This function is called when the watchdog has either been enabled
+ * (hence it starts counting down) or has been keep-alived.
+ */
+static void i6300esb_restart_timer(I6300State *d, int stage)
+{
+ int64_t timeout;
+
+ if (!d->enabled)
+ return;
+
+ d->stage = stage;
+
+ if (d->stage <= 1)
+ timeout = d->timer1_preload;
+ else
+ timeout = d->timer2_preload;
+
+ if (d->clock_scale == CLOCK_SCALE_1KHZ)
+ timeout <<= 15;
+ else
+ timeout <<= 5;
+
+ /* Get the timeout in units of ticks_per_sec. */
+ timeout = ticks_per_sec * timeout / 33000000;
+
+ i6300esb_debug("stage %d, timeout %" PRIi64 "\n", d->stage, timeout);
+
+ qemu_mod_timer(d->timer, qemu_get_clock(vm_clock) + timeout);
+}
+
+/* This is called when the guest disables the watchdog. */
+static void i6300esb_disable_timer(I6300State *d)
+{
+ i6300esb_debug("timer disabled\n");
+
+ qemu_del_timer(d->timer);
+}
+
+static void i6300esb_reset(I6300State *d)
+{
+ /* XXX We should probably reset other parts of the state here,
+ * but we should also reset our state on general machine reset
+ * too. For now just disable the timer so it doesn't fire
+ * again after the reboot.
+ */
+ i6300esb_disable_timer(d);
+}
+
+/* This function is called when the watchdog expires. Note that
+ * the hardware has two timers, and so expiry happens in two stages.
+ * If d->stage == 1 then we perform the first stage action (usually,
+ * sending an interrupt) and then restart the timer again for the
+ * second stage. If the second stage expires then the watchdog
+ * really has run out.
+ */
+static void i6300esb_timer_expired(void *vp)
+{
+ I6300State *d = (I6300State *) vp;
+
+ i6300esb_debug("stage %d\n", d->stage);
+
+ if (d->stage == 1) {
+ /* What to do at the end of stage 1? */
+ switch (d->int_type) {
+ case INT_TYPE_IRQ:
+ fprintf(stderr, "i6300esb_timer_expired: I would send APIC 1 INT 10 here if I knew how (XXX)\n");
+ break;
+ case INT_TYPE_SMI:
+ fprintf(stderr, "i6300esb_timer_expired: I would send SMI here if I knew how (XXX)\n");
+ break;
+ }
+
+ /* Start the second stage. */
+ i6300esb_restart_timer(d, 2);
+ } else {
+ /* Second stage expired, reboot for real. */
+ if (d->reboot_enabled) {
+ d->previous_reboot_flag = 1;
+ watchdog_perform_action(); /* This reboots, exits, etc */
+ i6300esb_reset(d);
+ }
+
+ /* In "free running mode" we start stage 1 again. */
+ if (d->free_run)
+ i6300esb_restart_timer(d, 1);
+ }
+}
+
+static void i6300esb_config_write(PCIDevice *dev, uint32_t addr,
+ uint32_t data, int len)
+{
+ I6300State *d = (I6300State *) dev;
+ int old;
+
+ i6300esb_debug("addr = %x, data = %x, len = %d\n", addr, data, len);
+
+ if (addr == ESB_CONFIG_REG && len == 2) {
+ d->reboot_enabled = (data & ESB_WDT_REBOOT) == 0;
+ d->clock_scale =
+ (data & ESB_WDT_FREQ) != 0 ? CLOCK_SCALE_1MHZ : CLOCK_SCALE_1KHZ;
+ d->int_type = (data & ESB_WDT_INTTYPE);
+ } else if (addr == ESB_LOCK_REG && len == 1) {
+ if (!d->locked) {
+ d->locked = (data & ESB_WDT_LOCK) != 0;
+ d->free_run = (data & ESB_WDT_FUNC) != 0;
+ old = d->enabled;
+ d->enabled = (data & ESB_WDT_ENABLE) != 0;
+ if (!old && d->enabled) /* Enabled transitioned from 0 -> 1 */
+ i6300esb_restart_timer(d, 1);
+ else if (!d->enabled)
+ i6300esb_disable_timer(d);
+ }
+ } else {
+ pci_default_write_config(dev, addr, data, len);
+ }
+}
+
+static uint32_t i6300esb_config_read(PCIDevice *dev, uint32_t addr, int len)
+{
+ I6300State *d = (I6300State *) dev;
+ uint32_t data;
+
+ i6300esb_debug ("addr = %x, len = %d\n", addr, len);
+
+ if (addr == ESB_CONFIG_REG && len == 2) {
+ data =
+ (d->reboot_enabled ? 0 : ESB_WDT_REBOOT) |
+ (d->clock_scale == CLOCK_SCALE_1MHZ ? ESB_WDT_FREQ : 0) |
+ d->int_type;
+ return data;
+ } else if (addr == ESB_LOCK_REG && len == 1) {
+ data =
+ (d->free_run ? ESB_WDT_FUNC : 0) |
+ (d->locked ? ESB_WDT_LOCK : 0) |
+ (d->enabled ? ESB_WDT_ENABLE : 0);
+ return data;
+ } else {
+ return pci_default_read_config(dev, addr, len);
+ }
+}
+
+static uint32_t i6300esb_mem_readb(void *vp, target_phys_addr_t addr)
+{
+ i6300esb_debug ("addr = %x\n", (int) addr);
+
+ return 0;
+}
+
+static uint32_t i6300esb_mem_readw(void *vp, target_phys_addr_t addr)
+{
+ uint32_t data = 0;
+ I6300State *d = (I6300State *) vp;
+
+ i6300esb_debug("addr = %x\n", (int) addr);
+
+ if (addr == 0xc) {
+ /* The previous reboot flag is really bit 9, but there is
+ * a bug in the Linux driver where it thinks it's bit 12.
+ * Set both.
+ */
+ data = d->previous_reboot_flag ? 0x1200 : 0;
+ }
+
+ return data;
+}
+
+static uint32_t i6300esb_mem_readl(void *vp, target_phys_addr_t addr)
+{
+ i6300esb_debug("addr = %x\n", (int) addr);
+
+ return 0;
+}
+
+static void i6300esb_mem_writeb(void *vp, target_phys_addr_t addr, uint32_t val)
+{
+ I6300State *d = (I6300State *) vp;
+
+ i6300esb_debug("addr = %x, val = %x\n", (int) addr, val);
+
+ if (addr == 0xc && val == 0x80)
+ d->unlock_state = 1;
+ else if (addr == 0xc && val == 0x86 && d->unlock_state == 1)
+ d->unlock_state = 2;
+}
+
+static void i6300esb_mem_writew(void *vp, target_phys_addr_t addr, uint32_t val)
+{
+ I6300State *d = (I6300State *) vp;
+
+ i6300esb_debug("addr = %x, val = %x\n", (int) addr, val);
+
+ if (addr == 0xc && val == 0x80)
+ d->unlock_state = 1;
+ else if (addr == 0xc && val == 0x86 && d->unlock_state == 1)
+ d->unlock_state = 2;
+ else {
+ if (d->unlock_state == 2) {
+ if (addr == 0xc) {
+ if ((val & 0x100) != 0)
+ /* This is the "ping" from the userspace watchdog in
+ * the guest ...
+ */
+ i6300esb_restart_timer(d, 1);
+
+ /* Setting bit 9 resets the previous reboot flag.
+ * There's a bug in the Linux driver where it sets
+ * bit 12 instead.
+ */
+ if ((val & 0x200) != 0 || (val & 0x1000) != 0) {
+ d->previous_reboot_flag = 0;
+ }
+ }
+
+ d->unlock_state = 0;
+ }
+ }
+}
+
+static void i6300esb_mem_writel(void *vp, target_phys_addr_t addr, uint32_t val)
+{
+ I6300State *d = (I6300State *) vp;
+
+ i6300esb_debug ("addr = %x, val = %x\n", (int) addr, val);
+
+ if (addr == 0xc && val == 0x80)
+ d->unlock_state = 1;
+ else if (addr == 0xc && val == 0x86 && d->unlock_state == 1)
+ d->unlock_state = 2;
+ else {
+ if (d->unlock_state == 2) {
+ if (addr == 0)
+ d->timer1_preload = val & 0xfffff;
+ else if (addr == 4)
+ d->timer2_preload = val & 0xfffff;
+
+ d->unlock_state = 0;
+ }
+ }
+}
+
+static void i6300esb_map(PCIDevice *dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ static CPUReadMemoryFunc *mem_read[3] = {
+ i6300esb_mem_readb,
+ i6300esb_mem_readw,
+ i6300esb_mem_readl,
+ };
+ static CPUWriteMemoryFunc *mem_write[3] = {
+ i6300esb_mem_writeb,
+ i6300esb_mem_writew,
+ i6300esb_mem_writel,
+ };
+ I6300State *d = (I6300State *) dev;
+ int io_mem;
+
+ i6300esb_debug("addr = %x, size = %x, type = %d\n", addr, size, type);
+
+ io_mem = cpu_register_io_memory (0, mem_read, mem_write, d);
+ cpu_register_physical_memory (addr, 0x10, io_mem);
+ /* qemu_register_coalesced_mmio (addr, 0x10); ? */
+}
+
+static void i6300esb_save(QEMUFile *f, void *vp)
+{
+ I6300State *d = (I6300State *) vp;
+
+ pci_device_save(&d->dev, f);
+ qemu_put_be32(f, d->reboot_enabled);
+ qemu_put_be32(f, d->clock_scale);
+ qemu_put_be32(f, d->int_type);
+ qemu_put_be32(f, d->free_run);
+ qemu_put_be32(f, d->locked);
+ qemu_put_be32(f, d->enabled);
+ qemu_put_timer(f, d->timer);
+ qemu_put_be32(f, d->timer1_preload);
+ qemu_put_be32(f, d->timer2_preload);
+ qemu_put_be32(f, d->stage);
+ qemu_put_be32(f, d->unlock_state);
+ qemu_put_be32(f, d->previous_reboot_flag);
+}
+
+static int i6300esb_load(QEMUFile *f, void *vp, int version)
+{
+ I6300State *d = (I6300State *) vp;
+
+ if (version != sizeof (I6300State))
+ return -EINVAL;
+
+ pci_device_load(&d->dev, f);
+ d->reboot_enabled = qemu_get_be32(f);
+ d->clock_scale = qemu_get_be32(f);
+ d->int_type = qemu_get_be32(f);
+ d->free_run = qemu_get_be32(f);
+ d->locked = qemu_get_be32(f);
+ d->enabled = qemu_get_be32(f);
+ qemu_get_timer(f, d->timer);
+ d->timer1_preload = qemu_get_be32(f);
+ d->timer2_preload = qemu_get_be32(f);
+ d->stage = qemu_get_be32(f);
+ d->unlock_state = qemu_get_be32(f);
+ d->previous_reboot_flag = qemu_get_be32(f);
+
+ return 0;
+}
+
+/* Create and initialize a virtual Intel 6300ESB during PC creation. */
+static void i6300esb_pc_init(PCIBus *pci_bus)
+{
+ I6300State *d;
+ uint8_t *pci_conf;
+
+ if (!pci_bus) {
+ fprintf(stderr, "wdt_i6300esb: no PCI bus in this machine\n");
+ return;
+ }
+
+ d = (I6300State *)
+ pci_register_device (pci_bus, "i6300esb_wdt", sizeof (I6300State),
+ -1,
+ i6300esb_config_read, i6300esb_config_write);
+
+ d->reboot_enabled = 1;
+ d->clock_scale = CLOCK_SCALE_1KHZ;
+ d->int_type = INT_TYPE_IRQ;
+ d->free_run = 0;
+ d->locked = 0;
+ d->enabled = 0;
+ d->timer = qemu_new_timer(vm_clock, i6300esb_timer_expired, d);
+ d->timer1_preload = 0xfffff;
+ d->timer2_preload = 0xfffff;
+ d->stage = 1;
+ d->unlock_state = 0;
+ d->previous_reboot_flag = 0;
+
+ pci_conf = d->dev.config;
+ pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+ pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_ESB_9);
+ pci_config_set_class(pci_conf, PCI_CLASS_SYSTEM_OTHER);
+ pci_conf[0x0e] = 0x00;
+
+ pci_register_io_region(&d->dev, 0, 0x10,
+ PCI_ADDRESS_SPACE_MEM, i6300esb_map);
+
+ register_savevm("i6300esb_wdt", -1, sizeof(I6300State),
+ i6300esb_save, i6300esb_load, d);
+}
+
+static WatchdogTimerModel model = {
+ .wdt_name = "i6300esb",
+ .wdt_description = "Intel 6300ESB",
+ .wdt_pc_init = i6300esb_pc_init,
+};
+
+void wdt_i6300esb_init(void)
+{
+ watchdog_add_model(&model);
+}
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/wdt_ib700.c
===================================================================
--- /dev/null
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/wdt_ib700.c
@@ -0,0 +1,112 @@
+/*
+ * Virtual hardware watchdog.
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ * USA.
+ *
+ * By Richard W.M. Jones (rjones@redhat.com).
+ */
+
+#include "qemu-common.h"
+#include "qemu-timer.h"
+#include "watchdog.h"
+#include "hw.h"
+#include "isa.h"
+#include "pc.h"
+
+/*#define IB700_DEBUG 1*/
+
+#ifdef IB700_DEBUG
+#define ib700_debug(fs,...) \
+ fprintf(stderr,"ib700: %s: "fs,__func__,##__VA_ARGS__)
+#else
+#define ib700_debug(fs,...)
+#endif
+
+/* This is the timer. We use a global here because the watchdog
+ * code ensures there is only one watchdog (it is located at a fixed,
+ * unchangable IO port, so there could only ever be one anyway).
+ */
+static QEMUTimer *timer = NULL;
+
+/* A write to this register enables the timer. */
+static void ib700_write_enable_reg(void *vp, uint32_t addr, uint32_t data)
+{
+ static int time_map[] = {
+ 30, 28, 26, 24, 22, 20, 18, 16,
+ 14, 12, 10, 8, 6, 4, 2, 0
+ };
+ int64 timeout;
+
+ ib700_debug("addr = %x, data = %x\n", addr, data);
+
+ timeout = (int64_t) time_map[data & 0xF] * ticks_per_sec;
+ qemu_mod_timer(timer, qemu_get_clock (vm_clock) + timeout);
+}
+
+/* A write (of any value) to this register disables the timer. */
+static void ib700_write_disable_reg(void *vp, uint32_t addr, uint32_t data)
+{
+ ib700_debug("addr = %x, data = %x\n", addr, data);
+
+ qemu_del_timer(timer);
+}
+
+/* This is called when the watchdog expires. */
+static void ib700_timer_expired(void *vp)
+{
+ ib700_debug("watchdog expired\n");
+
+ watchdog_perform_action();
+ qemu_del_timer(timer);
+}
+
+static void ib700_save(QEMUFile *f, void *vp)
+{
+ qemu_put_timer(f, timer);
+}
+
+static int ib700_load(QEMUFile *f, void *vp, int version)
+{
+ if (version != 0)
+ return -EINVAL;
+
+ qemu_get_timer(f, timer);
+
+ return 0;
+}
+
+/* Create and initialize a virtual IB700 during PC creation. */
+static void ib700_pc_init(PCIBus *unused)
+{
+ register_savevm("ib700_wdt", -1, 0, ib700_save, ib700_load, NULL);
+
+ register_ioport_write(0x441, 2, 1, ib700_write_disable_reg, NULL);
+ register_ioport_write(0x443, 2, 1, ib700_write_enable_reg, NULL);
+}
+
+static WatchdogTimerModel model = {
+ .wdt_name = "ib700",
+ .wdt_description = "iBASE 700",
+ .wdt_pc_init = ib700_pc_init,
+};
+
+void wdt_ib700_init(void)
+{
+ watchdog_add_model(&model);
+ timer = qemu_new_timer(vm_clock, ib700_timer_expired, NULL);
+}
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/monitor.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/monitor.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/monitor.c
@@ -26,6 +26,7 @@
#include "hw/pcmcia.h"
#include "hw/pc.h"
#include "hw/pci.h"
+#include "hw/watchdog.h"
#include "gdbstub.h"
#include "net.h"
#include "qemu-char.h"
@@ -531,6 +532,13 @@ static void do_gdbserver(const char *por
}
#endif
+static void do_watchdog_action(const char *action)
+{
+ if (select_watchdog_action(action) == -1) {
+ qemu_printf("Unknown watchdog action '%s'\n", action);
+ }
+}
+
static void term_printc(int c)
{
term_printf("'");
@@ -1605,6 +1613,8 @@ static const term_cmd_t term_cmds[] = {
"target", "request VM to change it's memory allocation (in MB)" },
{ "set_link", "ss", do_set_link,
"name [up|down]", "change the link status of a network adapter" },
+ { "watchdog_action", "s", do_watchdog_action,
+ "[reset|shutdown|poweroff|pause|debug|none]", "change watchdog action" },
{ "cpu_set", "is", do_cpu_set_nr,
"cpu [online|offline]", "change cpu state" },
{ NULL, NULL, },
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -30,6 +30,7 @@
#include "hw/isa.h"
#include "hw/baum.h"
#include "hw/bt.h"
+#include "hw/watchdog.h"
#include "net.h"
#include "console.h"
#include "sysemu.h"
@@ -249,6 +250,8 @@ int no_shutdown = 0;
int cursor_hide = 1;
int graphic_rotate = 0;
int daemonize = 0;
+WatchdogTimerModel *watchdog = NULL;
+int watchdog_action = WDT_RESET;
const char *option_rom[MAX_OPTION_ROMS];
int nb_option_roms;
int semihosting_enabled = 0;
@@ -4177,6 +4180,10 @@ static void help(int exitcode)
"-startdate select initial date of the clock\n"
"-icount [N|auto]\n"
" enable virtual instruction counter with 2^N clock ticks per instruction\n"
+ "-watchdog i6300esb|ib700\n"
+ " enable virtual hardware watchdog [default=none]\n"
+ "-watchdog-action reset|shutdown|poweroff|pause|debug|none\n"
+ " action when watchdog fires [default=reset]\n"
"-echr chr set terminal escape character instead of ctrl-a\n"
"-virtioconsole c\n"
" set virtio console\n"
@@ -4324,6 +4331,8 @@ enum {
QEMU_OPTION_localtime,
QEMU_OPTION_startdate,
QEMU_OPTION_icount,
+ QEMU_OPTION_watchdog,
+ QEMU_OPTION_watchdog_action,
QEMU_OPTION_echr,
QEMU_OPTION_virtiocon,
QEMU_OPTION_show_cursor,
@@ -4450,6 +4459,8 @@ static const QEMUOption qemu_options[] =
{ "localtime", 0, QEMU_OPTION_localtime },
{ "startdate", HAS_ARG, QEMU_OPTION_startdate },
{ "icount", HAS_ARG, QEMU_OPTION_icount },
+ { "watchdog", HAS_ARG, QEMU_OPTION_watchdog },
+ { "watchdog-action", HAS_ARG, QEMU_OPTION_watchdog_action },
{ "echr", HAS_ARG, QEMU_OPTION_echr },
{ "virtioconsole", HAS_ARG, QEMU_OPTION_virtiocon },
{ "show-cursor", 0, QEMU_OPTION_show_cursor },
@@ -4951,6 +4962,8 @@ int main(int argc, char **argv, char **e
tb_size = 0;
autostart= 1;
+ register_watchdogs();
+
optind = 1;
for(;;) {
if (optind >= argc)
@@ -5325,6 +5338,17 @@ int main(int argc, char **argv, char **e
serial_devices[serial_device_index] = optarg;
serial_device_index++;
break;
+ case QEMU_OPTION_watchdog:
+ i = select_watchdog(optarg);
+ if (i > 0)
+ exit (i == 1 ? 1 : 0);
+ break;
+ case QEMU_OPTION_watchdog_action:
+ if (select_watchdog_action(optarg) == -1) {
+ fprintf(stderr, "Unknown -watchdog-action parameter\n");
+ exit(1);
+ }
+ break;
case QEMU_OPTION_virtiocon:
if (virtio_console_index >= MAX_VIRTIO_CONSOLES) {
fprintf(stderr, "qemu: too many virtio consoles\n");

245
kernel-boot-hvm.patch Normal file
View File

@ -0,0 +1,245 @@
Direct kernel boot to HVM guests has regression from xen-3.3 to xen-4.0.
Foreport this feature to latest qemu-xen. Make a fake boot sector with given
kernel and initrd, which could be accessed by hvmloader.
Signed-off-by: Chunyan Liu <cyliu@novell.com>
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.c
@@ -596,6 +596,16 @@ int bdrv_read(BlockDriverState *bs, int6
if (bdrv_check_request(bs, sector_num, nb_sectors))
return -EIO;
+
+ if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
+ memcpy(buf, bs->boot_sector_data, 512);
+ sector_num++;
+ nb_sectors--;
+ buf += 512;
+ if (nb_sectors == 0)
+ return 0;
+ }
+
if (drv->bdrv_pread) {
int ret, len;
len = nb_sectors * 512;
@@ -631,6 +641,10 @@ int bdrv_write(BlockDriverState *bs, int
if (bdrv_check_request(bs, sector_num, nb_sectors))
return -EIO;
+ if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
+ memcpy(bs->boot_sector_data, buf, 512);
+ }
+
if (drv->bdrv_pwrite) {
int ret, len, count = 0;
len = nb_sectors * 512;
@@ -934,6 +948,16 @@ void bdrv_guess_geometry(BlockDriverStat
}
}
+/* force a given boot sector. */
+void bdrv_set_boot_sector(BlockDriverState *bs, const uint8_t *data, int size)
+{
+ bs->boot_sector_enabled = 1;
+ if (size > 512)
+ size = 512;
+ memcpy(bs->boot_sector_data, data, size);
+ memset(bs->boot_sector_data + size, 0, 512 - size);
+}
+
void bdrv_set_geometry_hint(BlockDriverState *bs,
int cyls, int heads, int secs)
{
@@ -1464,6 +1488,14 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDri
if (bdrv_check_request(bs, sector_num, nb_sectors))
return NULL;
+ /* XXX: we assume that nb_sectors == 0 is suppored by the async read */
+ if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
+ memcpy(buf, bs->boot_sector_data, 512);
+ sector_num++;
+ nb_sectors--;
+ buf += 512;
+ }
+
ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
if (ret) {
@@ -1489,6 +1521,10 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDr
if (bdrv_check_request(bs, sector_num, nb_sectors))
return NULL;
+ if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
+ memcpy(bs->boot_sector_data, buf, 512);
+ }
+
ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
if (ret) {
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block_int.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block_int.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block_int.h
@@ -122,6 +122,9 @@ struct BlockDriverState {
BlockDriver *drv; /* NULL means no media */
void *opaque;
+ int boot_sector_enabled;
+ uint8_t boot_sector_data[512];
+
char filename[1024];
char backing_file[1024]; /* if non zero, the image is a diff of
this file image */
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pc.c
@@ -474,45 +474,28 @@ static void bochs_bios_init(void)
/* Generate an initial boot sector which sets state and jump to
a specified vector */
-static void generate_bootsect(uint8_t *option_rom,
- uint32_t gpr[8], uint16_t segs[6], uint16_t ip)
+static void generate_bootsect(uint32_t gpr[8], uint16_t segs[6], uint16_t ip)
{
- uint8_t rom[512], *p, *reloc;
- uint8_t sum;
+ uint8_t bootsect[512], *p;
int i;
+ int hda;
+
+ hda = drive_get_index(IF_IDE, 0, 0);
+ if (hda == -1) {
+ fprintf(stderr, "A disk image must be given for 'hda' when booting "
+ "a Linux kernel\n(if you really don't want it, use /dev/zero)\n");
+ exit(1);
+ }
+ memset(bootsect, 0, sizeof(bootsect));
- memset(rom, 0, sizeof(rom));
-
- p = rom;
- /* Make sure we have an option rom signature */
- *p++ = 0x55;
- *p++ = 0xaa;
-
- /* ROM size in sectors*/
- *p++ = 1;
-
- /* Hook int19 */
-
- *p++ = 0x50; /* push ax */
- *p++ = 0x1e; /* push ds */
- *p++ = 0x31; *p++ = 0xc0; /* xor ax, ax */
- *p++ = 0x8e; *p++ = 0xd8; /* mov ax, ds */
-
- *p++ = 0xc7; *p++ = 0x06; /* movvw _start,0x64 */
- *p++ = 0x64; *p++ = 0x00;
- reloc = p;
- *p++ = 0x00; *p++ = 0x00;
-
- *p++ = 0x8c; *p++ = 0x0e; /* mov cs,0x66 */
- *p++ = 0x66; *p++ = 0x00;
-
- *p++ = 0x1f; /* pop ds */
- *p++ = 0x58; /* pop ax */
- *p++ = 0xcb; /* lret */
-
- /* Actual code */
- *reloc = (p - rom);
+ /* Copy the MSDOS partition table if possible */
+ bdrv_read(drives_table[hda].bdrv, 0, bootsect, 1);
+ /* Make sure we have a partition signature */
+ bootsect[510] = 0x55;
+ bootsect[511] = 0xaa;
+ /* Actual code */
+ p = bootsect;
*p++ = 0xfa; /* CLI */
*p++ = 0xfc; /* CLD */
@@ -542,13 +525,7 @@ static void generate_bootsect(uint8_t *o
*p++ = segs[1]; /* CS */
*p++ = segs[1] >> 8;
- /* sign rom */
- sum = 0;
- for (i = 0; i < (sizeof(rom) - 1); i++)
- sum += rom[i];
- rom[sizeof(rom) - 1] = -sum;
-
- memcpy(option_rom, rom, sizeof(rom));
+ bdrv_set_boot_sector(drives_table[hda].bdrv, bootsect, sizeof(bootsect));
}
static long get_file_size(FILE *f)
@@ -565,8 +542,7 @@ static long get_file_size(FILE *f)
return size;
}
-static void load_linux(uint8_t *option_rom,
- const char *kernel_filename,
+static void load_linux(const char *kernel_filename,
const char *initrd_filename,
const char *kernel_cmdline)
{
@@ -632,7 +608,9 @@ static void load_linux(uint8_t *option_r
/* Special pages are placed at end of low RAM: pick an arbitrary one and
* subtract a suitably large amount of padding (64kB) to skip BIOS data. */
- xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &end_low_ram);
+ //xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &end_low_ram);
+ /* BUFIO Page beyond last_pfn, use 0x7ffc instead. Fix ME. */
+ end_low_ram = 0x7ffc;
end_low_ram = (end_low_ram << 12) - (64*1024);
/* highest address for loading the initrd */
@@ -721,7 +699,7 @@ static void load_linux(uint8_t *option_r
memset(gpr, 0, sizeof gpr);
gpr[4] = cmdline_addr-real_addr-16; /* SP (-16 is paranoia) */
- generate_bootsect(option_rom, gpr, seg, 0);
+ generate_bootsect(gpr, seg, 0);
#endif
}
@@ -932,14 +910,6 @@ vga_bios_error:
int size, offset;
offset = 0;
- if (linux_boot) {
- option_rom_offset = qemu_ram_alloc(TARGET_PAGE_SIZE);
- load_linux(phys_ram_base + option_rom_offset,
- kernel_filename, initrd_filename, kernel_cmdline);
- cpu_register_physical_memory(0xd0000, TARGET_PAGE_SIZE,
- option_rom_offset | IO_MEM_ROM);
- offset = TARGET_PAGE_SIZE;
- }
for (i = 0; i < nb_option_roms; i++) {
size = get_image_size(option_rom[i]);
@@ -973,6 +943,9 @@ vga_bios_error:
bochs_bios_init();
+ if (linux_boot)
+ load_linux(kernel_filename, initrd_filename, kernel_cmdline);
+
cpu_irq = qemu_allocate_irqs(pic_irq_request, NULL, 1);
i8259 = i8259_init(cpu_irq[0]);
ferr_irq = i8259[13];
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.h
@@ -82,6 +82,7 @@ int64_t bdrv_getlength(BlockDriverState
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs);
int bdrv_commit(BlockDriverState *bs);
+void bdrv_set_boot_sector(BlockDriverState *bs, const uint8_t *data, int size);
/* async block I/O */
typedef struct BlockDriverAIOCB BlockDriverAIOCB;
typedef void BlockDriverCompletionFunc(void *opaque, int ret);

142
log-guest-console.patch Normal file
View File

@ -0,0 +1,142 @@
Add code to support logging xen-domU console, as what xenconsoled does. Log info
will be saved in /var/log/xen/console/guest-domUname.log.
Signed-off-by: Chunyan Liu <cyliu@novell.com>
---
hw/xen_console.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 71 insertions(+), 0 deletions(-)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
@@ -38,6 +38,8 @@
#include "qemu-char.h"
#include "xen_backend.h"
+static int log_guest = 0;
+
struct buffer {
uint8_t *data;
size_t consumed;
@@ -54,8 +56,24 @@ struct XenConsole {
void *sring;
CharDriverState *chr;
int backlog;
+ int log_fd;
};
+static int write_all(int fd, const char* buf, size_t len)
+{
+ while (len) {
+ ssize_t ret = write(fd, buf, len);
+ if (ret == -1 && errno == EINTR)
+ continue;
+ if (ret < 0)
+ return -1;
+ len -= ret;
+ buf += ret;
+ }
+
+ return 0;
+}
+
static void buffer_append(struct XenConsole *con)
{
struct buffer *buffer = &con->buffer;
@@ -83,6 +101,15 @@ static void buffer_append(struct XenCons
intf->out_cons = cons;
xen_be_send_notify(&con->xendev);
+ if (con->log_fd != -1) {
+ int logret;
+ logret = write_all(con->log_fd, buffer->data + buffer->size - size, size);
+ if (logret < 0) {
+ xen_be_printf(&con->xendev, 1, "Write to log failed on domain %d: %d (%s)\n",
+ con->xendev.dom, errno, strerror(errno));
+ }
+ }
+
if (buffer->max_capacity &&
buffer->size > buffer->max_capacity) {
/* Discard the middle of the data. */
@@ -176,6 +203,37 @@ static void xencons_send(struct XenConso
}
}
+static int create_domain_log(struct XenConsole *con)
+{
+ char *logfile;
+ char *path, *domname;
+ int fd;
+ const char *logdir = "/var/log/xen/console";
+
+ path = xs_get_domain_path(xenstore, con->xendev.dom);
+ domname = xenstore_read_str(path, "name");
+ free(path);
+ if (!domname)
+ return -1;
+
+ if (mkdir(logdir, 0755) && errno != EEXIST)
+ {
+ xen_be_printf(&con->xendev, 1, "Directory %s does not exist and fail to create it!", logdir);
+ return -1;
+ }
+
+ if (asprintf(&logfile, "%s/guest-%s.log", logdir, domname) < 0)
+ return -1;
+ qemu_free(domname);
+
+ fd = open(logfile, O_WRONLY|O_CREAT|O_APPEND, 0644);
+ free(logfile);
+ if (fd == -1)
+ xen_be_printf(&con->xendev, 1, "Failed to open log %s: %d (%s)", logfile, errno, strerror(errno));
+
+ return fd;
+}
+
/* -------------------------------------------------------------------- */
static int con_init(struct XenDevice *xendev)
@@ -183,6 +241,7 @@ static int con_init(struct XenDevice *xe
struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
char *type, *dom, label[32];
const char *output;
+ char *logenv = NULL;
/* setup */
dom = xs_get_domain_path(xenstore, con->xendev.dom);
@@ -209,6 +268,10 @@ static int con_init(struct XenDevice *xe
con->chr = qemu_chr_open(label, output, NULL);
xenstore_store_pv_console_info(con->xendev.dev, con->chr, output);
+ logenv = getenv("XENCONSOLED_TRACE");
+ if (logenv != NULL && strlen(logenv) == strlen("guest") && !strcmp(logenv, "guest")) {
+ log_guest = 1;
+ }
return 0;
}
@@ -246,6 +309,9 @@ static int con_initialise(struct XenDevi
con->xendev.remote_port,
con->xendev.local_port,
con->buffer.max_capacity);
+ con->log_fd = -1;
+ if (log_guest)
+ con->log_fd = create_domain_log(con);
return 0;
}
@@ -266,6 +332,12 @@ static void con_disconnect(struct XenDev
xc_gnttab_munmap(xendev->gnttabdev, con->sring, 1);
con->sring = NULL;
}
+
+ if (con->log_fd != -1) {
+ close(con->log_fd);
+ con->log_fd = -1;
+ }
+
}
static void con_event(struct XenDevice *xendev)

View File

@ -2,8 +2,10 @@ Make our PV drivers work with older hosts that do not recognize the new PV driv
Signed-off-by: K. Y. Srinivasan <ksrinivasan@novell.com>
--- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
Index: xen-4.4.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
===================================================================
--- xen-4.4.0-testing.orig/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ xen-4.4.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
@@ -322,7 +322,10 @@ static int check_platform_magic(struct d
if (magic != XEN_IOPORT_MAGIC_VAL) {

View File

@ -0,0 +1,24 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
@@ -365,6 +365,19 @@ static void platform_ioport_write(void *
net_tap_shutdown_all();
fprintf(logfile, "Done.\n");
break;
+ case 8:
+ if (val ==1 ) {
+ fprintf(logfile, "Disconnect IDE hard disk...\n");
+ ide_unplug_harddisks();
+ fprintf(logfile, "Done.\n");
+ } else if (val == 2) {
+ fprintf(logfile, "Disconnect netifs...\n");
+ pci_unplug_netifs();
+ fprintf(logfile, "Shutdown taps...\n");
+ net_tap_shutdown_all();
+ fprintf(logfile, "Done.\n");
+ }
+ break;
default:
fprintf(logfile, "Write to bad port %x (base %x) on evtchn device.\n",
addr, ioport_base);

View File

@ -1,7 +1,7 @@
Index: xen-4.3.1-testing/tools/pygrub/src/pygrub
Index: xen-4.4.0-testing/tools/pygrub/src/pygrub
===================================================================
--- xen-4.3.1-testing.orig/tools/pygrub/src/pygrub
+++ xen-4.3.1-testing/tools/pygrub/src/pygrub
--- xen-4.4.0-testing.orig/tools/pygrub/src/pygrub
+++ xen-4.4.0-testing/tools/pygrub/src/pygrub
@@ -26,6 +26,7 @@ import fsimage
import grub.GrubConf
import grub.LiloConf

85
qemu-dm-segfault.patch Normal file
View File

@ -0,0 +1,85 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ide.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/ide.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ide.c
@@ -935,8 +935,9 @@ static inline void ide_dma_submit_check(
static inline void ide_set_irq(IDEState *s)
{
- BMDMAState *bm = s->bmdma;
- if (!s->bs) return; /* ouch! (see ide_flush_cb) */
+ BMDMAState *bm;
+ if (!s || !s->bs) return; /* ouch! (see ide_flush_cb) */
+ bm = s->bmdma;
if (!(s->cmd & IDE_CMD_DISABLE_IRQ)) {
if (bm) {
bm->status |= BM_STATUS_INT;
@@ -1224,14 +1225,14 @@ static void ide_read_dma_cb(void *opaque
int n;
int64_t sector_num;
+ if (!s || !s->bs) return; /* ouch! (see ide_flush_cb) */
+
if (ret < 0) {
dma_buf_commit(s, 1);
ide_dma_error(s);
return;
}
- if (!s->bs) return; /* ouch! (see ide_flush_cb) */
-
n = s->io_buffer_size >> 9;
sector_num = ide_get_sector(s);
if (n > 0) {
@@ -1335,6 +1336,8 @@ static void ide_write_flush_cb(void *opa
BMDMAState *bm = opaque;
IDEState *s = bm->ide_if;
+ if (!s) return; /* yikes */
+
if (ret != 0) {
ide_dma_error(s);
return;
@@ -1366,13 +1369,13 @@ static void ide_write_dma_cb(void *opaqu
int n;
int64_t sector_num;
+ if (!s || !s->bs) return; /* ouch! (see ide_flush_cb) */
+
if (ret < 0) {
if (ide_handle_write_error(s, -ret, BM_STATUS_DMA_RETRY))
return;
}
- if (!s->bs) return; /* ouch! (see ide_flush_cb) */
-
n = s->io_buffer_size >> 9;
sector_num = ide_get_sector(s);
if (n > 0) {
@@ -1429,7 +1432,7 @@ static void ide_flush_cb(void *opaque, i
{
IDEState *s = opaque;
- if (!s->bs) return; /* ouch! (see below) */
+ if (!s || !s->bs) return; /* ouch! (see below) */
if (ret) {
/* We are completely doomed. The IDE spec does not permit us
@@ -1686,7 +1689,7 @@ static void ide_atapi_cmd_read_dma_cb(vo
IDEState *s = bm->ide_if;
int data_offset, n;
- if (!s->bs) return; /* ouch! (see ide_flush_cb) */
+ if (!s || !s->bs) return; /* ouch! (see ide_flush_cb) */
if (ret < 0) {
ide_atapi_io_error(s, ret);
@@ -2365,7 +2368,7 @@ static void cdrom_change_cb(void *opaque
IDEState *s = opaque;
uint64_t nb_sectors;
- if (!s->bs) return; /* ouch! (see ide_flush_cb) */
+ if (!s || !s->bs) return; /* ouch! (see ide_flush_cb) */
bdrv_get_geometry(s->bs, &nb_sectors);
s->nb_sectors = nb_sectors;

16
qemu-ifup-set-mtu.patch Normal file
View File

@ -0,0 +1,16 @@
Index: xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
===================================================================
--- xen-4.2.3-testing.orig/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
+++ xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
@@ -20,4 +20,11 @@ then
fi
ifconfig $1 0.0.0.0 up
+
+mtu="`ip link show $bridge | awk '/mtu/ { print $5 }'`"
+if [ -n "$mtu" ] && [ "$mtu" -gt 0 ]
+then
+ ip link set $1 mtu $mtu || :
+fi
+
brctl addif $bridge $1 || true

38
qemu-security-etch1.patch Normal file
View File

@ -0,0 +1,38 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
@@ -218,7 +218,7 @@ static int ne2000_can_receive(void *opaq
NE2000State *s = opaque;
if (s->cmd & E8390_STOP)
- return 1;
+ return 0;
return !ne2000_buffer_full(s);
}
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pc.c
@@ -413,7 +413,8 @@ static void bochs_bios_write(void *opaqu
case 0x400:
case 0x401:
fprintf(stderr, "BIOS panic at rombios.c, line %d\n", val);
- exit(1);
+ /* according to documentation, these can be safely ignored */
+ break;
case 0x402:
case 0x403:
#ifdef DEBUG_BIOS
@@ -436,8 +437,9 @@ static void bochs_bios_write(void *opaqu
/* LGPL'ed VGA BIOS messages */
case 0x501:
case 0x502:
+ /* according to documentation, these can be safely ignored */
fprintf(stderr, "VGA BIOS panic, line %d\n", val);
- exit(1);
+ break;
case 0x500:
case 0x503:
#ifdef DEBUG_BIOS

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4b43f14e9cb63a52647fcde22a087606b723ba9b96b7b1a9226826f4896d7f99
size 6037766
oid sha256:a6f6a14a6fdbe159290e7da69b3761ba2f1219b5e83211f553f6d3dadc65f2cb
size 7570519

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ac42d369d2b90589531a8d224ac3a65df9cbf58e5625fec98ed5297eb0610d4a
size 3213204
oid sha256:ae6c395eabff717667a5853898fe59a7b46a9cd91319c46b2928ae034689433d
size 3212965

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:455da225a5a4ef25c7f91e7aecec407a012bf5aed4f9d82c8f214f364e9db261
size 366311
oid sha256:713b82f6be5d63b52d35838158b5b5609c700a13cfa01b84b4fb1cf2798f895d
size 366292

View File

@ -16,46 +16,25 @@ This patch sets the MTU for both 'online' and 'add' in the vif-bridge script.
Signed-off-by: Charles Arnold <carnold@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Index: xen-4.3.0-testing/tools/hotplug/Linux/vif-bridge
Index: xen-4.4.0-testing/tools/hotplug/Linux/vif-bridge
===================================================================
--- xen-4.3.0-testing.orig/tools/hotplug/Linux/vif-bridge
+++ xen-4.3.0-testing/tools/hotplug/Linux/vif-bridge
@@ -89,11 +89,7 @@ fi
--- xen-4.4.0-testing.orig/tools/hotplug/Linux/vif-bridge
+++ xen-4.4.0-testing/tools/hotplug/Linux/vif-bridge
@@ -88,7 +88,7 @@ fi
case "$command" in
online)
setup_virtual_bridge_port "$dev"
- mtu="`ip link show $bridge | awk '/mtu/ { print $5 }'`"
- if [ -n "$mtu" ] && [ "$mtu" -gt 0 ]
- then
- ip link set $dev mtu $mtu || :
- fi
- set_mtu $bridge $dev
+ set_mtu "$bridge" "$dev"
add_to_bridge "$bridge" "$dev"
;;
@@ -104,6 +100,7 @@ case "$command" in
@@ -99,7 +99,7 @@ case "$command" in
add)
setup_virtual_bridge_port "$dev"
- set_mtu $bridge $dev
+ set_mtu "$bridge" "$dev"
add_to_bridge "$bridge" "$dev"
;;
esac
Index: xen-4.3.0-testing/tools/hotplug/Linux/xen-network-common.sh
===================================================================
--- xen-4.3.0-testing.orig/tools/hotplug/Linux/xen-network-common.sh
+++ xen-4.3.0-testing/tools/hotplug/Linux/xen-network-common.sh
@@ -132,3 +132,13 @@ add_to_bridge () {
ip link set ${dev} up
}
+# Usage: set_mtu bridge dev
+set_mtu () {
+ local bridge=$1
+ local dev=$2
+ mtu="`ip link show ${bridge}| awk '/mtu/ { print $5 }'`"
+ if [ -n "$mtu" ] && [ "$mtu" -gt 0 ]
+ then
+ ip link set ${dev} mtu $mtu || :
+ fi
+}

View File

@ -0,0 +1,46 @@
From 903a145f3eace5e3ae914f0335ab6c4e33635d2f Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@suse.de>
Date: Tue, 10 Mar 2009 16:36:23 +0100
Subject: [PATCH 4/6] tapdisk-ioemu: Write messages to a logfile
Typically, tapdisk-ioemu runs as a daemon and messages to stderr are
simply lost. Write them to a logfile instead.
Signed-off-by: Kevin Wolf <kwolf@suse.de>
---
tapdisk-ioemu.c | 19 +++++++++++++------
1 files changed, 13 insertions(+), 6 deletions(-)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
@@ -78,15 +78,22 @@ int main(void)
struct timeval tv;
void *old_fd_start = NULL;
- logfile = stderr;
-
+ /* Daemonize */
+ if (fork() != 0)
+ exit(0);
+
bdrv_init();
init_blktap();
- /* Daemonize */
- if (fork() != 0)
- exit(0);
-
+ logfile = fopen("/var/log/xen/tapdisk-ioemu.log", "a");
+ if (logfile) {
+ setbuf(logfile, NULL);
+ fclose(stderr);
+ stderr = logfile;
+ } else {
+ logfile = stderr;
+ }
+
/*
* Main loop: Pass events to the corrsponding handlers and check for
* completed aio operations.

View File

@ -0,0 +1,89 @@
From 9062564d79cb45029403cc998b48410e42ead924 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@suse.de>
Date: Tue, 10 Mar 2009 16:45:44 +0100
Subject: [PATCH 6/6] tapdisk-ioemu: Fix shutdown condition
Even when opening the only image a tapdisk-ioemu instance is
responsible for fails, it can't immediately shut down. blktapctrl
still wants to communicate with tapdisk-ioemu and close the disk.
This patch changes tapdisk-ioemu to count the connections to
blktapctrl rather than the number of opened disk images.
Signed-off-by: Kevin Wolf <kwolf@suse.de>
---
hw/xen_blktap.c | 5 ++++-
tapdisk-ioemu.c | 13 ++++++++++---
2 files changed, 14 insertions(+), 4 deletions(-)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -65,6 +65,7 @@ int read_fd;
int write_fd;
static pid_t process;
+int connected_disks = 0;
fd_list_entry_t *fd_start = NULL;
static void handle_blktap_iomsg(void* private);
@@ -541,6 +542,7 @@ static void handle_blktap_ctrlmsg(void*
/* Allocate the disk structs */
s = state_init();
+ connected_disks++;
/*Open file*/
if (s == NULL || open_disk(s, path, msg->drivertype, msg->readonly)) {
@@ -591,7 +593,8 @@ static void handle_blktap_ctrlmsg(void*
case CTLMSG_CLOSE:
s = get_state(msg->cookie);
if (s) unmap_disk(s);
- break;
+ connected_disks--;
+ break;
case CTLMSG_PID:
memset(buf, 0x00, MSG_SIZE);
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
@@ -14,6 +14,7 @@ extern void qemu_aio_init(void);
extern void qemu_aio_poll(void);
extern void *fd_start;
+extern int connected_disks;
int domid = 0;
FILE* logfile;
@@ -76,7 +77,7 @@ int main(void)
int max_fd;
fd_set rfds;
struct timeval tv;
- void *old_fd_start = NULL;
+ int old_connected_disks = 0;
/* Daemonize */
if (fork() != 0)
@@ -128,11 +129,17 @@ int main(void)
pioh = &ioh->next;
}
+ if (old_connected_disks != connected_disks)
+ fprintf(stderr, "connected disks: %d => %d\n",
+ old_connected_disks, connected_disks);
+
/* Exit when the last image has been closed */
- if (old_fd_start != NULL && fd_start == NULL)
+ if (old_connected_disks != 0 && connected_disks == 0) {
+ fprintf(stderr, "Last image is closed, exiting.\n");
exit(0);
+ }
- old_fd_start = fd_start;
+ old_connected_disks = connected_disks;
}
return 0;
}

View File

@ -1,8 +1,8 @@
Index: xen-4.2.0-testing/tools/hotplug/Linux/vif-bridge
Index: xen-4.4.0-testing/tools/hotplug/Linux/vif-bridge
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/vif-bridge
+++ xen-4.2.0-testing/tools/hotplug/Linux/vif-bridge
@@ -101,9 +101,9 @@ case "$command" in
--- xen-4.4.0-testing.orig/tools/hotplug/Linux/vif-bridge
+++ xen-4.4.0-testing/tools/hotplug/Linux/vif-bridge
@@ -97,9 +97,9 @@ case "$command" in
;;
esac

View File

@ -1,5 +1,7 @@
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
Index: xen-4.4.0-testing/xen/arch/x86/platform_hypercall.c
===================================================================
--- xen-4.4.0-testing.orig/xen/arch/x86/platform_hypercall.c
+++ xen-4.4.0-testing/xen/arch/x86/platform_hypercall.c
@@ -25,7 +25,7 @@
#include <xen/irq.h>
#include <asm/current.h>
@ -51,8 +53,10 @@
default:
ret = -ENOSYS;
break;
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
Index: xen-4.4.0-testing/xen/include/public/platform.h
===================================================================
--- xen-4.4.0-testing.orig/xen/include/public/platform.h
+++ xen-4.4.0-testing/xen/include/public/platform.h
@@ -527,6 +527,16 @@ struct xenpf_core_parking {
typedef struct xenpf_core_parking xenpf_core_parking_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t);

View File

@ -1,23 +1,23 @@
Index: xen-4.3.0-testing/xen/arch/x86/x86_64/entry.S
Index: xen-4.4.0-testing/xen/arch/x86/x86_64/entry.S
===================================================================
--- xen-4.3.0-testing.orig/xen/arch/x86/x86_64/entry.S
+++ xen-4.3.0-testing/xen/arch/x86/x86_64/entry.S
@@ -433,22 +433,35 @@ UNLIKELY_END(bounce_failsafe)
jz domain_crash_synchronous
--- xen-4.4.0-testing.orig/xen/arch/x86/x86_64/entry.S
+++ xen-4.4.0-testing/xen/arch/x86/x86_64/entry.S
@@ -438,19 +438,30 @@ UNLIKELY_START(z, create_bounce_frame_ba
__UNLIKELY_END(create_bounce_frame_bad_bounce_ip)
movq %rax,UREGS_rip+8(%rsp)
ret
- _ASM_EXTABLE(.Lft2, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft3, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft4, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft5, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft6, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft7, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft8, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft9, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft10, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft11, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft12, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft13, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft2, dom_crash_sync_extable)
- _ASM_EXTABLE(.Lft3, dom_crash_sync_extable)
- _ASM_EXTABLE(.Lft4, dom_crash_sync_extable)
- _ASM_EXTABLE(.Lft5, dom_crash_sync_extable)
- _ASM_EXTABLE(.Lft6, dom_crash_sync_extable)
- _ASM_EXTABLE(.Lft7, dom_crash_sync_extable)
- _ASM_EXTABLE(.Lft8, dom_crash_sync_extable)
- _ASM_EXTABLE(.Lft9, dom_crash_sync_extable)
- _ASM_EXTABLE(.Lft10, dom_crash_sync_extable)
- _ASM_EXTABLE(.Lft11, dom_crash_sync_extable)
- _ASM_EXTABLE(.Lft12, dom_crash_sync_extable)
- _ASM_EXTABLE(.Lft13, dom_crash_sync_extable)
+ _ASM_EXTABLE(.Lft2, domain_crash_page_fault_32)
+ _ASM_EXTABLE(.Lft3, domain_crash_page_fault_24)
+ _ASM_EXTABLE(.Lft4, domain_crash_page_fault_8)
@ -31,11 +31,6 @@ Index: xen-4.3.0-testing/xen/arch/x86/x86_64/entry.S
+ _ASM_EXTABLE(.Lft12, domain_crash_page_fault_8)
+ _ASM_EXTABLE(.Lft13, domain_crash_page_fault)
+.section .rodata, "a", @progbits
domain_crash_synchronous_string:
.asciz "domain_crash_sync called from entry.S\n"
+.previous
+domain_crash_page_fault_32:
+ addq $8,%rsi
+domain_crash_page_fault_24:
@ -47,6 +42,6 @@ Index: xen-4.3.0-testing/xen/arch/x86/x86_64/entry.S
+domain_crash_page_fault:
+ movq %rsi,%rdi
+ call show_page_walk
ENTRY(domain_crash_synchronous)
ENTRY(dom_crash_sync_extable)
# Get out of the guest-save area of the stack.
GET_STACK_BASE(%rax)

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7298c50445e274cdc34a36aaf295a015fb626249b1c8158ad8b37da2f141d930
size 4359136

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:551f2a6e4d0ee6853794cca558eb2344b45b6ad1ee0cf42a5981b70cdb7c402b
size 4347631

View File

@ -1,14 +1,14 @@
Index: xen-4.3.0-testing/xen/Makefile
Index: xen-4.4.0-testing/xen/Makefile
===================================================================
--- xen-4.3.0-testing.orig/xen/Makefile
+++ xen-4.3.0-testing/xen/Makefile
--- xen-4.4.0-testing.orig/xen/Makefile
+++ xen-4.4.0-testing/xen/Makefile
@@ -1,3 +1,4 @@
+export XEN_CHANGESET = unavailable
# This is the correct place to edit the build version.
# All other places this is stored (eg. compile.h) should be autogenerated.
export XEN_VERSION = 4
@@ -116,7 +117,7 @@ delete-unfresh-files:
@rm -f $@1 $@2
@@ -122,7 +123,7 @@ delete-unfresh-files:
@mv -f $@.tmp $@
# compile.h contains dynamic build info. Rebuilt on every 'make' invocation.
-include/xen/compile.h: include/xen/compile.h.in .banner
@ -16,16 +16,15 @@ Index: xen-4.3.0-testing/xen/Makefile
@sed -e 's/@@date@@/$(shell LC_ALL=C date)/g' \
-e 's/@@time@@/$(shell LC_ALL=C date +%T)/g' \
-e 's/@@whoami@@/$(XEN_WHOAMI)/g' \
@@ -126,10 +127,9 @@ include/xen/compile.h: include/xen/compi
@@ -132,9 +133,9 @@ include/xen/compile.h: include/xen/compi
-e 's/@@version@@/$(XEN_VERSION)/g' \
-e 's/@@subversion@@/$(XEN_SUBVERSION)/g' \
-e 's/@@extraversion@@/$(XEN_EXTRAVERSION)/g' \
- -e 's!@@changeset@@!$(shell tools/scmversion $(XEN_ROOT) || echo "unavailable")!g' \
+ -e 's!@@changeset@@!$(XEN_CHANGESET)!g' \
< include/xen/compile.h.in > $@.new
- @grep \" .banner >> $@.new
- @grep -v \" .banner
+ tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) >> $@.new
- @cat .banner
+ @echo "Xen $(XEN_FULLVERSION)" > .banner
@$(PYTHON) tools/fig-to-oct.py < .banner >> $@.new
@mv -f $@.new $@
include/asm-$(TARGET_ARCH)/asm-offsets.h: arch/$(TARGET_ARCH)/asm-offsets.s

View File

@ -1,7 +1,7 @@
Index: xen-4.3.0-testing/tools/xenstore/Makefile
Index: xen-4.4.0-testing/tools/xenstore/Makefile
===================================================================
--- xen-4.3.0-testing.orig/tools/xenstore/Makefile
+++ xen-4.3.0-testing/tools/xenstore/Makefile
--- xen-4.4.0-testing.orig/tools/xenstore/Makefile
+++ xen-4.4.0-testing/tools/xenstore/Makefile
@@ -10,6 +10,7 @@ CFLAGS += $(CFLAGS_libxenctrl)
CLIENTS := xenstore-exists xenstore-list xenstore-read xenstore-rm xenstore-chmod
@ -56,17 +56,19 @@ Index: xen-4.3.0-testing/tools/xenstore/Makefile
$(INSTALL_DIR) $(DESTDIR)$(LIBDIR)
$(INSTALL_PROG) libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)
ln -sf libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)/libxenstore.so.$(MAJOR)
Index: xen-4.3.0-testing/tools/hotplug/Linux/Makefile
Index: xen-4.4.0-testing/tools/hotplug/Linux/Makefile
===================================================================
--- xen-4.3.0-testing.orig/tools/hotplug/Linux/Makefile
+++ xen-4.3.0-testing/tools/hotplug/Linux/Makefile
@@ -43,12 +43,12 @@ install: all install-initd install-scrip
--- xen-4.4.0-testing.orig/tools/hotplug/Linux/Makefile
+++ xen-4.4.0-testing/tools/hotplug/Linux/Makefile
@@ -46,14 +46,14 @@ install: all install-initd install-scrip
.PHONY: install-initd
install-initd:
[ -d $(DESTDIR)$(INITD_DIR) ] || $(INSTALL_DIR) $(DESTDIR)$(INITD_DIR)
- [ -d $(DESTDIR)$(SYSCONFIG_DIR) ] || $(INSTALL_DIR) $(DESTDIR)$(SYSCONFIG_DIR)
+ [ -d $(DESTDIR)/var/adm/fillup-templates ] || $(INSTALL_DIR) $(DESTDIR)/var/adm/fillup-templates/
ifeq ($(CONFIG_XEND),y)
$(INSTALL_PROG) $(XEND_INITD) $(DESTDIR)$(INITD_DIR)
endif
$(INSTALL_PROG) $(XENDOMAINS_INITD) $(DESTDIR)$(INITD_DIR)
- $(INSTALL_DATA) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xendomains
+ $(INSTALL_DATA) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)/var/adm/fillup-templates/
@ -76,10 +78,10 @@ Index: xen-4.3.0-testing/tools/hotplug/Linux/Makefile
$(INSTALL_PROG) init.d/xen-watchdog $(DESTDIR)$(INITD_DIR)
.PHONY: install-scripts
Index: xen-4.3.0-testing/tools/firmware/etherboot/Makefile
Index: xen-4.4.0-testing/tools/firmware/etherboot/Makefile
===================================================================
--- xen-4.3.0-testing.orig/tools/firmware/etherboot/Makefile
+++ xen-4.3.0-testing/tools/firmware/etherboot/Makefile
--- xen-4.4.0-testing.orig/tools/firmware/etherboot/Makefile
+++ xen-4.4.0-testing/tools/firmware/etherboot/Makefile
@@ -28,12 +28,12 @@ all: $(ROMS)
$(MAKE) -C $D/src bin/$(*F).rom

View File

@ -0,0 +1,70 @@
CVE-2007-0998 - remote compromise of dom0
Rather than completely disabling QEMU's console (which would remove
the "sendkey" command, among other useful things), remove all console
commands that can read/write dom0's state.
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/monitor.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/monitor.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/monitor.c
@@ -1497,6 +1497,7 @@ static const term_cmd_t term_cmds[] = {
"device|all", "commit changes to the disk images (if -snapshot is used) or backing files" },
{ "info", "s?", do_info,
"subcommand", "show various information about the system state" },
+#ifdef CONFIG_TRUSTED_CLIENT
{ "q|quit", "", do_quit,
"", "quit the emulator" },
{ "eject", "-fB", do_eject,
@@ -1509,6 +1510,7 @@ static const term_cmd_t term_cmds[] = {
"filename", "output logs to 'filename'" },
{ "log", "s", do_log,
"item1[,...]", "activate logging of the specified items to '/tmp/qemu.log'" },
+#endif
{ "savevm", "s?", do_savevm,
"tag|id", "save a VM snapshot. If no tag or id are provided, a new snapshot is created" },
{ "loadvm", "s", do_loadvm,
@@ -1538,8 +1540,10 @@ static const term_cmd_t term_cmds[] = {
"", "reset the system" },
{ "system_powerdown", "", do_system_powerdown,
"", "send system power down event" },
+#ifdef CONFIG_TRUSTED_CLIENT
{ "sum", "ii", do_sum,
"addr size", "compute the checksum of a memory region" },
+#endif
{ "usb_add", "s", do_usb_add,
"device", "add USB device (e.g. 'host:bus.addr' or 'host:vendor_id:product_id')" },
{ "usb_del", "s", do_usb_del,
@@ -1558,6 +1562,7 @@ static const term_cmd_t term_cmds[] = {
"state", "change mouse button state (1=L, 2=M, 4=R)" },
{ "mouse_set", "i", do_mouse_set,
"index", "set which mouse device receives events" },
+#ifdef CONFIG_TRUSTED_CLIENT
#ifdef HAS_AUDIO
{ "wavcapture", "si?i?i?", do_wav_capture,
"path [frequency bits channels]",
@@ -1565,6 +1570,7 @@ static const term_cmd_t term_cmds[] = {
#endif
{ "stopcapture", "i", do_stop_capture,
"capture index", "stop capture" },
+#endif
{ "memsave", "lis", do_memory_save,
"addr size file", "save to disk virtual memory dump starting at 'addr' of size 'size'", },
{ "pmemsave", "lis", do_physical_memory_save,
@@ -1646,6 +1652,7 @@ static const term_cmd_t info_cmds[] = {
"", "show KVM information", },
{ "usb", "", usb_info,
"", "show guest USB devices", },
+#ifdef CONFIG_TRUSTED_CLIENT
{ "usbhost", "", usb_host_info,
"", "show host USB devices", },
{ "profile", "", do_info_profile,
@@ -1677,6 +1684,7 @@ static const term_cmd_t info_cmds[] = {
{ "migrate", "", do_info_migrate, "", "show migration status" },
{ "balloon", "", do_info_balloon,
"", "show balloon information" },
+#endif
{ NULL, NULL, },
};

View File

@ -0,0 +1,106 @@
Index: xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/net.h
===================================================================
--- xen-4.2.3-testing.orig/tools/qemu-xen-traditional-dir-remote/net.h
+++ xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/net.h
@@ -107,8 +107,8 @@ void net_host_device_add(const char *dev
void net_host_device_remove(int vlan_id, const char *device);
#ifndef DEFAULT_NETWORK_SCRIPT
-#define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
-#define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/qemu-ifdown"
+#define DEFAULT_NETWORK_SCRIPT "/etc/xen/qemu-ifup"
+#define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/xen/qemu-ifdown"
#endif
#ifdef __sun__
#define SMBD_COMMAND "/usr/sfw/sbin/smbd"
Index: xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/net.c
===================================================================
--- xen-4.2.3-testing.orig/tools/qemu-xen-traditional-dir-remote/net.c
+++ xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/net.c
@@ -1764,9 +1764,10 @@ int net_client_init(const char *device,
}
if (get_param_value(script_arg, sizeof(script_arg), "scriptarg", p) == 0 &&
get_param_value(script_arg, sizeof(script_arg), "bridge", p) == 0) { /* deprecated; for xend compatibility */
- pstrcpy(script_arg, sizeof(script_arg), "");
+ ret = net_tap_init(vlan, device, name, ifname, setup_script, NULL, NULL);
+ } else {
+ ret = net_tap_init(vlan, device, name, ifname, setup_script, down_script, script_arg);
}
- ret = net_tap_init(vlan, device, name, ifname, setup_script, down_script, script_arg);
}
} else
#endif
Index: xen-4.2.3-testing/tools/python/xen/xend/image.py
===================================================================
--- xen-4.2.3-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.2.3-testing/tools/python/xen/xend/image.py
@@ -912,11 +912,13 @@ class HVMImageHandler(ImageHandler):
mac = devinfo.get('mac')
if mac is None:
raise VmError("MAC address not specified or generated.")
- bridge = devinfo.get('bridge', 'xenbr0')
+ bridge = devinfo.get('bridge', None)
model = devinfo.get('model', 'rtl8139')
ret.append("-net")
- ret.append("nic,vlan=%d,macaddr=%s,model=%s" %
- (nics, mac, model))
+ net = "nic,vlan=%d,macaddr=%s,model=%s" % (nics, mac, model)
+ if bridge:
+ net += ",bridge=%s" % bridge
+ ret.append(net)
vifname = "vif%d.%d-emu" % (self.vm.getDomid(), nics-1)
ret.append("-net")
if osdep.tapif_script is not None:
Index: xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
===================================================================
--- xen-4.2.3-testing.orig/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
+++ xen-4.2.3-testing/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
@@ -1,36 +1,22 @@
#!/bin/sh
-#. /etc/rc.d/init.d/functions
-#ulimit -c unlimited
-
echo 'config qemu network with xen bridge for ' $*
+# If bridge is not specified, try device with default route.
bridge=$2
+if [ -z "$bridge" ]; then
+ bridge=$(ip route list | awk '/^default / { print $NF }')
+fi
-#
-# Old style bridge setup with netloop, used to have a bridge name
-# of xenbrX, enslaving pethX and vif0.X, and then configuring
-# eth0.
-#
-# New style bridge setup does not use netloop, so the bridge name
-# is ethX and the physical device is enslaved pethX
-#
-# So if...
-#
-# - User asks for xenbrX
-# - AND xenbrX doesn't exist
-# - AND there is a ethX device which is a bridge
-#
-# ..then we translate xenbrX to ethX
-#
-# This lets old config files work without modification
-#
-if [ ! -e "/sys/class/net/$bridge" ] && [ -z "${bridge##xenbr*}" ]
+# Exit if $bridge is not a bridge. Exit with 0 status
+# so qemu-dm process is not terminated. No networking in
+# vm is bad but not catastrophic. The vm could still run
+# cpu and disk IO workloads.
+# Include an useful error message in qemu-dm log file.
+if [ ! -e "/sys/class/net/${bridge}/bridge" ]
then
- if [ -e "/sys/class/net/eth${bridge#xenbr}/bridge" ]
- then
- bridge="eth${bridge#xenbr}"
- fi
+ echo "WARNING! ${bridge} is not a bridge. qemu-ifup exiting. VM may not have a functioning networking stack."
+ exit 0
fi
ifconfig $1 0.0.0.0 up

76
xen-qemu-iscsi-fix.patch Normal file
View File

@ -0,0 +1,76 @@
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -451,7 +451,7 @@ void xenstore_parse_domain_config(int hv
char *buf = NULL;
char *fpath = NULL, *bpath = NULL, *btype = NULL,
*dev = NULL, *params = NULL, *drv = NULL;
- int i, ret, is_tap;
+ int i, j, ret, is_tap;
unsigned int len, num, hd_index, pci_devid = 0;
BlockDriverState *bs;
BlockDriver *format;
@@ -535,12 +535,7 @@ void xenstore_parse_domain_config(int hv
continue;
free(danger_type);
danger_type = xs_read(xsh, XBT_NULL, danger_buf, &len);
- if (pasprintf(&buf, "%s/params", bpath) == -1)
- continue;
- free(params);
- params = xs_read(xsh, XBT_NULL, buf, &len);
- if (params == NULL)
- continue;
+
/* read the name of the device */
if (pasprintf(&buf, "%s/type", bpath) == -1)
continue;
@@ -548,6 +543,35 @@ void xenstore_parse_domain_config(int hv
drv = xs_read(xsh, XBT_NULL, buf, &len);
if (drv == NULL)
continue;
+
+ free(params);
+ if (!strcmp(drv,"iscsi") || !strcmp(drv, "npiv") ||
+ !strcmp(drv,"dmmd")) {
+ if (pasprintf(&buf, "%s/node", bpath) == -1)
+ continue;
+
+ /* wait for block-[iscsi|npiv|dmmd] script to complete and populate the
+ * node entry. try 30 times (30 secs) */
+ for (j = 0; j < 30; j++) {
+ params = xs_read(xsh, XBT_NULL, buf, &len);
+ if (params != NULL)
+ break;
+ sleep(1);
+ }
+ if (params == NULL) {
+ fprintf(stderr, "qemu: %s device not found -- timed out \n", drv);
+ continue;
+ }
+ }
+ else
+ {
+ if (pasprintf(&buf, "%s/params", bpath) == -1)
+ continue;
+ params = xs_read(xsh, XBT_NULL, buf, &len);
+ if (params == NULL)
+ continue;
+ }
+
/* Obtain blktap sub-type prefix */
if ((!strcmp(drv, "tap") || !strcmp(drv, "qdisk")) && params[0]) {
char *offset = strchr(params, ':');
@@ -665,6 +689,12 @@ void xenstore_parse_domain_config(int hv
format = &bdrv_host_device;
else
format = &bdrv_raw;
+ } else if (!strcmp(drv,"iscsi")) {
+ format = &bdrv_raw;
+ } else if (!strcmp(drv,"npiv")) {
+ format = &bdrv_raw;
+ } else if (!strcmp(drv,"dmmd")) {
+ format = &bdrv_raw;
} else {
format = bdrv_find_format(drv);
if (!format) {

View File

@ -1,12 +0,0 @@
Index: xen-4.2.0-testing/tools/examples/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/Makefile
+++ xen-4.2.0-testing/tools/examples/Makefile
@@ -18,7 +18,6 @@ XEN_CONFIGS += xmexample.hvm
XEN_CONFIGS += xmexample.hvm-stubdom
XEN_CONFIGS += xmexample.pv-grub
XEN_CONFIGS += xmexample.nbd
-XEN_CONFIGS += xmexample.vti
XEN_CONFIGS += xlexample.hvm
XEN_CONFIGS += xlexample.pvlinux
XEN_CONFIGS += xend-pci-quirks.sxp

View File

@ -301,42 +301,6 @@ Index: xen-4.3.0-testing/tools/examples/xmexample.pv-grub
vif = [ '' ]
Index: xen-4.3.0-testing/tools/examples/xmexample.vti
===================================================================
--- xen-4.3.0-testing.orig/tools/examples/xmexample.vti
+++ xen-4.3.0-testing/tools/examples/xmexample.vti
@@ -40,11 +40,26 @@ name = "ExampleVTIDomain"
# In Windows OS, smaller size shows better performance.
#vhpt = 23
-# Optionally define mac and/or bridge for the network interfaces.
-# Random MACs are assigned if not given.
-#vif = [ 'type=ioemu, mac=00:16:3e:00:00:11, bridge=xenbr0, model=ne2k_pci' ]
-# type=ioemu specify the NIC is an ioemu device not netfront
-vif = [ 'type=ioemu, bridge=xenbr0' ]
+#----------------------------------------------------------------------------
+# Define network interfaces.
+
+# By default, no network interfaces are configured. You may have one created
+# with sensible defaults using an empty vif clause:
+#
+# vif = [ '' ]
+#
+# or optionally override backend, bridge, ip, mac, script, type, model,
+# or vifname.
+#
+# An emulated RealTek 8139 network interface can be configured with:
+#
+# vif = [ 'mac=00:16:3e:00:00:11, type=ioemu, model=rtl8139, bridge=br0' ]
+#
+# A para-virtual network interface can be configured with:
+#
+# vif = [ 'mac=00:16:3e:00:00:11, type=netfront, bridge=br0' ]
+#
+vif = [ '' ]
#----------------------------------------------------------------------------
# Define the disk devices you want the domain to have access to, and
Index: xen-4.3.0-testing/docs/man/xl.pod.1
===================================================================
--- xen-4.3.0-testing.orig/docs/man/xl.pod.1

View File

@ -1,3 +1,9 @@
-------------------------------------------------------------------
Tue Jan 2 11:52:11 MST 2014 - carnold@suse.com
- Update to Xen 4.4.0 RC1 c/s 28233
- Drop 32bit support from spec file
-------------------------------------------------------------------
Wed Jan 1 10:28:10 UTC 2014 - coolo@suse.com

View File

@ -0,0 +1,43 @@
user: Olaf Hering <olaf@aepfle.de>
date: Thu Mar 28 15:36:02 2013 +0100
files: tools/python/xen/xend/XendCheckpoint.py
description:
tools/xend: move assert to exception block
The two assert in restore trigger sometimes after hundreds of
migrations. If they trigger the destination host will not destroy the
newly created, still empty guest. After a second migration attempt to
this host there will be two guets with the same name and uuid. This
situation is poorly handled by the xm tools.
With this change the guest will be destroyed.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/python/xen/xend/XendCheckpoint.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
Index: xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendCheckpoint.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
@@ -262,9 +262,6 @@ def restore(xd, fd, dominfo = None, paus
store_port = dominfo.getStorePort()
console_port = dominfo.getConsolePort()
- assert store_port
- assert console_port
-
# if hvm, pass mem size to calculate the store_mfn
if is_hvm:
apic = int(dominfo.info['platform'].get('apic', 0))
@@ -276,6 +273,9 @@ def restore(xd, fd, dominfo = None, paus
pae = 0
try:
+ assert store_port
+ assert console_port
+
restore_image = image.create(dominfo, dominfo.info)
memory = restore_image.getRequiredAvailableMemory(
dominfo.info['memory_dynamic_max'] / 1024)

View File

@ -0,0 +1,144 @@
user: Olaf Hering <olaf@aepfle.de>
date: Wed Mar 06 17:05:15 2013 +0100
files: tools/libxc/xenguest.h tools/python/xen/xend/XendCheckpoint.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xm/migrate.py tools/xcutils/xc_save.c
description:
tools: add xm migrate --log_progress option
xc_domain_save does print progress messages. These verbose messages are
disabled per default to avoid flood in xend.log. Sometimes it is helpful
to see progress when migrating large and busy guests. So add a new
option to xm migrate to actually enable the printing of progress
messsages.
xl migrate is not modified with this change because it does not use the
stdio logger.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Index: xen-4.4.0-testing/tools/libxc/xenguest.h
===================================================================
--- xen-4.4.0-testing.orig/tools/libxc/xenguest.h
+++ xen-4.4.0-testing/tools/libxc/xenguest.h
@@ -29,6 +29,7 @@
#define XCFLAGS_STDVGA (1 << 3)
#define XCFLAGS_CHECKPOINT_COMPRESS (1 << 4)
#define XCFLAGS_DOMSAVE_ABORT_IF_BUSY (1 << 5)
+#define XCFLAGS_PROGRESS (1 << 6)
#define X86_64_B_SIZE 64
#define X86_32_B_SIZE 32
Index: xen-4.4.0-testing/tools/python/xen/xend/XendCheckpoint.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xend/XendCheckpoint.py
+++ xen-4.4.0-testing/tools/python/xen/xend/XendCheckpoint.py
@@ -121,16 +121,19 @@ def save(fd, dominfo, network, live, dst
max_iters = dominfo.info.get('max_iters', "0")
max_factor = dominfo.info.get('max_factor', "0")
abort_if_busy = dominfo.info.get('abort_if_busy', "0")
+ log_save_progress = dominfo.info.get('log_save_progress', "0")
if max_iters == "None":
max_iters = "0"
if max_factor == "None":
max_factor = "0"
if abort_if_busy == "None":
abort_if_busy = "0"
+ if log_save_progress == "None":
+ log_save_progress = "0"
cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
str(dominfo.getDomid()),
max_iters, max_factor,
- str( int(live) | (int(hvm) << 2) | (int(abort_if_busy) << 5) ) ]
+ str( int(live) | (int(hvm) << 2) | (int(abort_if_busy) << 5) | (int(log_save_progress) << 6) ) ]
log.debug("[xc_save]: %s", string.join(cmd))
def saveInputHandler(line, tochild):
Index: xen-4.4.0-testing/tools/python/xen/xend/XendDomain.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xend/XendDomain.py
+++ xen-4.4.0-testing/tools/python/xen/xend/XendDomain.py
@@ -1832,17 +1832,18 @@ class XendDomain:
log.exception(ex)
raise XendError(str(ex))
- def domain_migrate_constraints_set(self, domid, max_iters, max_factor, abort_if_busy):
+ def domain_migrate_constraints_set(self, domid, max_iters, max_factor, abort_if_busy, log_save_progress):
"""Set the Migrate Constraints of this domain.
@param domid: Domain ID or Name
@param max_iters: Number of iterations before final suspend
@param max_factor: Max amount of memory to transfer before final suspend
@param abort_if_busy: Abort migration instead of doing final suspend
+ @param log_save_progress: Log progress of migrate to xend.log
"""
dominfo = self.domain_lookup_nr(domid)
if not dominfo:
raise XendInvalidDomain(str(domid))
- dominfo.setMigrateConstraints(max_iters, max_factor, abort_if_busy)
+ dominfo.setMigrateConstraints(max_iters, max_factor, abort_if_busy, log_save_progress)
def domain_maxmem_set(self, domid, mem):
"""Set the memory limit for a domain.
Index: xen-4.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1475,17 +1475,19 @@ class XendDomainInfo:
break
xen.xend.XendDomain.instance().managed_config_save(self)
- def setMigrateConstraints(self, max_iters, max_factor, abort_if_busy):
+ def setMigrateConstraints(self, max_iters, max_factor, abort_if_busy, log_save_progress):
"""Set the Migrate Constraints of this domain.
@param max_iters: Number of iterations before final suspend
@param max_factor: Max amount of memory to transfer before final suspend
@param abort_if_busy: Abort migration instead of doing final suspend
+ @param log_save_progress: Log progress of migrate to xend.log
"""
log.debug("Setting migration constraints of domain %s (%s) to '%s' '%s' '%s'.",
self.info['name_label'], str(self.domid), max_iters, max_factor, abort_if_busy)
self.info['max_iters'] = str(max_iters)
self.info['max_factor'] = str(max_factor)
self.info['abort_if_busy'] = str(abort_if_busy)
+ self.info['log_save_progress'] = str(log_save_progress)
def setMemoryTarget(self, target):
"""Set the memory target of this domain.
Index: xen-4.4.0-testing/tools/python/xen/xm/migrate.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xm/migrate.py
+++ xen-4.4.0-testing/tools/python/xen/xm/migrate.py
@@ -67,6 +67,10 @@ gopts.opt('abort_if_busy',
fn=set_true, default=0,
use="Abort migration instead of doing final suspend.")
+gopts.opt('log_progress',
+ fn=set_true, default=0,
+ use="Log progress of migration to xend.log")
+
def help():
return str(gopts)
@@ -95,7 +99,8 @@ def main(argv):
server.xend.domain.migrate_constraints_set(dom,
opts.vals.max_iters,
opts.vals.max_factor,
- opts.vals.abort_if_busy)
+ opts.vals.abort_if_busy,
+ opts.vals.log_progress)
server.xend.domain.migrate(dom, dst, opts.vals.live,
opts.vals.port,
opts.vals.node,
Index: xen-4.4.0-testing/tools/xcutils/xc_save.c
===================================================================
--- xen-4.4.0-testing.orig/tools/xcutils/xc_save.c
+++ xen-4.4.0-testing/tools/xcutils/xc_save.c
@@ -184,7 +184,8 @@ main(int argc, char **argv)
si.suspend_evtchn = -1;
lvl = si.flags & XCFLAGS_DEBUG ? XTL_DEBUG: XTL_DETAIL;
- lflags = XTL_STDIOSTREAM_SHOW_PID | XTL_STDIOSTREAM_HIDE_PROGRESS;
+ lflags = XTL_STDIOSTREAM_SHOW_PID;
+ lflags |= si.flags & XCFLAGS_PROGRESS ? 0 : XTL_STDIOSTREAM_HIDE_PROGRESS;
l = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr, lvl, lflags);
si.xch = xc_interface_open(l, 0, 0);
if (!si.xch)

View File

@ -0,0 +1,232 @@
user: Olaf Hering <olaf@aepfle.de>
date: Wed Mar 06 17:05:14 2013 +0100
files: docs/man/xl.pod.1 tools/libxc/xc_domain_save.c tools/libxc/xenguest.h tools/libxl/Makefile tools/libxl/libxl.c tools/libxl/libxl.h tools/libxl/libxl_dom.c tools/libxl/libxl_internal.h tools/libxl/libxl_save_callout.c tools/libxl/xl_cmdimpl.c tools/libxl/xl_cmdtable.c tools/python/xen/xend/XendCheckpoint.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xm/migrate.py
description:
tools: set migration constraints from cmdline
Add new options to xm/xl migrate to control the process of migration.
The intention is to optionally abort the migration if it takes too long
to migrate a busy guest due to the high number of dirty pages. Currently
the guest is suspended to transfer the remaining dirty pages. This
transfer can take too long, which can confuse the guest if its suspended
for too long.
-M <number> Number of iterations before final suspend (default: 30)
--max_iters <number>
-m <factor> Max amount of memory to transfer before final suspend (default: 3*RAM)
--max_factor <factor>
-A Abort migration instead of doing final suspend.
--abort_if_busy
The changes to libxl change the API, handle LIBXL_API_VERSION == 0x040200.
TODO:
eventually add also --min_remaining (default value 50) in a seperate patch
v6:
- update the LIBXL_API_VERSION handling for libxl_domain_suspend
change it to an inline function if LIBXL_API_VERSION is defined to 4.2.0
- rename libxl_save_properties to libxl_domain_suspend_properties
- rename ->xlflags to ->flags within that struct
v5:
- adjust libxl_domain_suspend prototype, move flags, max_iters,
max_factor into a new, optional struct libxl_save_properties
- rename XCFLAGS_DOMSAVE_NOSUSPEND to XCFLAGS_DOMSAVE_ABORT_IF_BUSY
- rename LIBXL_SUSPEND_NO_FINAL_SUSPEND to LIBXL_SUSPEND_ABORT_IF_BUSY
- rename variables no_suspend to abort_if_busy
- rename option -N/--no_suspend to -A/--abort_if_busy
- update xl.1, extend description of -A option
v4:
- update default for no_suspend from None to 0 in XendCheckpoint.py:save
- update logoutput in setMigrateConstraints
- change xm migrate defaults from None to 0
- add new options to xl.1
- fix syntax error in XendDomain.py:domain_migrate_constraints_set
- fix xm migrate -N option name to match xl migrate
v3:
- move logic errors in libxl__domain_suspend and fixed help text in
cmd_table to separate patches
- fix syntax error in XendCheckpoint.py
- really pass max_iters and max_factor in libxl__xc_domain_save
- make libxl_domain_suspend_0x040200 declaration globally visible
- bump libxenlight.so SONAME from 2.0 to 2.1 due to changed
libxl_domain_suspend
v2:
- use LIBXL_API_VERSION and define libxl_domain_suspend_0x040200
- fix logic error in min_reached check in xc_domain_save
- add longopts
- update --help text
- correct description of migrate --help text
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/libxc/xc_domain_save.c | 13 ++++++++++++-
tools/libxc/xenguest.h | 1 +
tools/python/xen/xend/XendCheckpoint.py | 14 ++++++++++++--
tools/python/xen/xend/XendDomain.py | 12 ++++++++++++
tools/python/xen/xend/XendDomainInfo.py | 12 ++++++++++++
tools/python/xen/xm/migrate.py | 16 ++++++++++++++++
6 files changed, 65 insertions(+), 3 deletions(-)
Index: xen-4.4.0-testing/tools/libxc/xc_domain_save.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxc/xc_domain_save.c
+++ xen-4.4.0-testing/tools/libxc/xc_domain_save.c
@@ -809,6 +809,7 @@ int xc_domain_save(xc_interface *xch, in
int rc = 1, frc, i, j, last_iter = 0, iter = 0;
int live = (flags & XCFLAGS_LIVE);
int debug = (flags & XCFLAGS_DEBUG);
+ int abort_if_busy = (flags & XCFLAGS_DOMSAVE_ABORT_IF_BUSY);
int superpages = !!hvm;
int race = 0, sent_last_iter, skip_this_iter = 0;
unsigned int sent_this_iter = 0;
@@ -1532,10 +1533,20 @@ int xc_domain_save(xc_interface *xch, in
if ( live )
{
+ int min_reached = sent_this_iter + skip_this_iter < 50;
if ( (iter >= max_iters) ||
- (sent_this_iter+skip_this_iter < 50) ||
+ min_reached ||
(total_sent > dinfo->p2m_size*max_factor) )
{
+ if ( !min_reached && abort_if_busy )
+ {
+ ERROR("Live migration aborted, as requested. (guest too busy?)"
+ " total_sent %lu iter %d, max_iters %u max_factor %u",
+ total_sent, iter, max_iters, max_factor);
+ rc = 1;
+ goto out;
+ }
+
DPRINTF("Start last iteration\n");
last_iter = 1;
Index: xen-4.4.0-testing/tools/libxc/xenguest.h
===================================================================
--- xen-4.4.0-testing.orig/tools/libxc/xenguest.h
+++ xen-4.4.0-testing/tools/libxc/xenguest.h
@@ -28,6 +28,7 @@
#define XCFLAGS_HVM (1 << 2)
#define XCFLAGS_STDVGA (1 << 3)
#define XCFLAGS_CHECKPOINT_COMPRESS (1 << 4)
+#define XCFLAGS_DOMSAVE_ABORT_IF_BUSY (1 << 5)
#define X86_64_B_SIZE 64
#define X86_32_B_SIZE 32
Index: xen-4.4.0-testing/tools/python/xen/xend/XendCheckpoint.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xend/XendCheckpoint.py
+++ xen-4.4.0-testing/tools/python/xen/xend/XendCheckpoint.py
@@ -118,9 +118,19 @@ def save(fd, dominfo, network, live, dst
# enabled. Passing "0" simply uses the defaults compiled into
# libxenguest; see the comments and/or code in xc_linux_save() for
# more information.
+ max_iters = dominfo.info.get('max_iters', "0")
+ max_factor = dominfo.info.get('max_factor', "0")
+ abort_if_busy = dominfo.info.get('abort_if_busy', "0")
+ if max_iters == "None":
+ max_iters = "0"
+ if max_factor == "None":
+ max_factor = "0"
+ if abort_if_busy == "None":
+ abort_if_busy = "0"
cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
- str(dominfo.getDomid()), "0", "0",
- str(int(live) | (int(hvm) << 2)) ]
+ str(dominfo.getDomid()),
+ max_iters, max_factor,
+ str( int(live) | (int(hvm) << 2) | (int(abort_if_busy) << 5) ) ]
log.debug("[xc_save]: %s", string.join(cmd))
def saveInputHandler(line, tochild):
Index: xen-4.4.0-testing/tools/python/xen/xend/XendDomain.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xend/XendDomain.py
+++ xen-4.4.0-testing/tools/python/xen/xend/XendDomain.py
@@ -1832,6 +1832,18 @@ class XendDomain:
log.exception(ex)
raise XendError(str(ex))
+ def domain_migrate_constraints_set(self, domid, max_iters, max_factor, abort_if_busy):
+ """Set the Migrate Constraints of this domain.
+ @param domid: Domain ID or Name
+ @param max_iters: Number of iterations before final suspend
+ @param max_factor: Max amount of memory to transfer before final suspend
+ @param abort_if_busy: Abort migration instead of doing final suspend
+ """
+ dominfo = self.domain_lookup_nr(domid)
+ if not dominfo:
+ raise XendInvalidDomain(str(domid))
+ dominfo.setMigrateConstraints(max_iters, max_factor, abort_if_busy)
+
def domain_maxmem_set(self, domid, mem):
"""Set the memory limit for a domain.
Index: xen-4.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1475,6 +1475,18 @@ class XendDomainInfo:
break
xen.xend.XendDomain.instance().managed_config_save(self)
+ def setMigrateConstraints(self, max_iters, max_factor, abort_if_busy):
+ """Set the Migrate Constraints of this domain.
+ @param max_iters: Number of iterations before final suspend
+ @param max_factor: Max amount of memory to transfer before final suspend
+ @param abort_if_busy: Abort migration instead of doing final suspend
+ """
+ log.debug("Setting migration constraints of domain %s (%s) to '%s' '%s' '%s'.",
+ self.info['name_label'], str(self.domid), max_iters, max_factor, abort_if_busy)
+ self.info['max_iters'] = str(max_iters)
+ self.info['max_factor'] = str(max_factor)
+ self.info['abort_if_busy'] = str(abort_if_busy)
+
def setMemoryTarget(self, target):
"""Set the memory target of this domain.
@param target: In MiB.
Index: xen-4.4.0-testing/tools/python/xen/xm/migrate.py
===================================================================
--- xen-4.4.0-testing.orig/tools/python/xen/xm/migrate.py
+++ xen-4.4.0-testing/tools/python/xen/xm/migrate.py
@@ -55,6 +55,18 @@ gopts.opt('change_home_server', short='c
fn=set_true, default=0,
use="Change home server for managed domains.")
+gopts.opt('max_iters', val='max_iters',
+ fn=set_int, default=0,
+ use="Number of iterations before final suspend (default: 30).")
+
+gopts.opt('max_factor', val='max_factor',
+ fn=set_int, default=0,
+ use="Max amount of memory to transfer before final suspend (default: 3*RAM).")
+
+gopts.opt('abort_if_busy',
+ fn=set_true, default=0,
+ use="Abort migration instead of doing final suspend.")
+
def help():
return str(gopts)
@@ -80,6 +92,10 @@ def main(argv):
server.xenapi.VM.migrate(vm_ref, dst, bool(opts.vals.live),
other_config)
else:
+ server.xend.domain.migrate_constraints_set(dom,
+ opts.vals.max_iters,
+ opts.vals.max_factor,
+ opts.vals.abort_if_busy)
server.xend.domain.migrate(dom, dst, opts.vals.live,
opts.vals.port,
opts.vals.node,

Some files were not shown because too many files have changed in this diff Show More