Accepting request 480654 from home:jfehlig:branches:Virtualization
- qemu: Fix monitor timeout with large memory VMs 67dcb797-virTimeBackOffWait-sleepcap.patch 85af0b80-qemu-adaptive-montimeout.patch bsc#1013113 OBS-URL: https://build.opensuse.org/request/show/480654 OBS-URL: https://build.opensuse.org/package/show/Virtualization/libvirt?expand=0&rev=591
This commit is contained in:
parent
04a0a5949c
commit
51e9f38d3f
66
67dcb797-virTimeBackOffWait-sleepcap.patch
Normal file
66
67dcb797-virTimeBackOffWait-sleepcap.patch
Normal file
@ -0,0 +1,66 @@
|
||||
commit 67dcb797ed7f1fbb048aa47006576f424923933b
|
||||
Author: Michal Privoznik <mprivozn@redhat.com>
|
||||
Date: Mon Mar 13 11:05:08 2017 +0100
|
||||
|
||||
virTimeBackOffWait: Avoid long periods of sleep
|
||||
|
||||
While connecting to qemu monitor, the first thing we do is wait
|
||||
for it to show up. However, we are doing it with some timeout to
|
||||
avoid indefinite waits (e.g. when qemu doesn't create the monitor
|
||||
socket at all). After beaa447a29 we are using exponential back
|
||||
off timeout meaning, after the first connection attempt we wait
|
||||
1ms, then 2ms, then 4 and so on. This allows us to bring down
|
||||
wait time for small domains where qemu initializes quickly.
|
||||
However, on the other end of this scale are some domains with
|
||||
huge amounts of guest memory. Now imagine that we've gotten up to
|
||||
wait time of 15 seconds. The next one is going to be 30 seconds,
|
||||
and the one after that whole minute. Well, okay - with current
|
||||
code we are not going to wait longer than 30 seconds in total,
|
||||
but this is going to change in the next commit.
|
||||
|
||||
The exponential back off is usable only for first few iterations.
|
||||
Then it needs to be caped (one second was chosen as the limit)
|
||||
and switch to constant wait time.
|
||||
|
||||
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
|
||||
|
||||
Index: libvirt-3.1.0/src/util/virtime.c
|
||||
===================================================================
|
||||
--- libvirt-3.1.0.orig/src/util/virtime.c
|
||||
+++ libvirt-3.1.0/src/util/virtime.c
|
||||
@@ -390,6 +390,9 @@ virTimeBackOffStart(virTimeBackOffVar *v
|
||||
return 0;
|
||||
}
|
||||
|
||||
+
|
||||
+#define VIR_TIME_BACKOFF_CAP 1000
|
||||
+
|
||||
/**
|
||||
* virTimeBackOffWait
|
||||
* @var: Timeout variable (with type virTimeBackOffVar *).
|
||||
@@ -410,7 +413,9 @@ virTimeBackOffStart(virTimeBackOffVar *v
|
||||
* The while loop that runs the body of the code repeatedly, with an
|
||||
* exponential backoff. It first waits for first milliseconds, then
|
||||
* runs the body, then waits for 2*first ms, then runs the body again.
|
||||
- * Then 4*first ms, and so on.
|
||||
+ * Then 4*first ms, and so on, up until wait time would reach
|
||||
+ * VIR_TIME_BACK_OFF_CAP (whole second). Then it switches to constant
|
||||
+ * waiting time of VIR_TIME_BACK_OFF_CAP.
|
||||
*
|
||||
* When timeout milliseconds is reached, the while loop ends.
|
||||
*
|
||||
@@ -429,8 +434,13 @@ virTimeBackOffWait(virTimeBackOffVar *va
|
||||
if (t > var->limit_t)
|
||||
return 0; /* ends the while loop */
|
||||
|
||||
+ /* Compute next wait time. Cap at VIR_TIME_BACKOFF_CAP
|
||||
+ * to avoid long useless sleeps. */
|
||||
next = var->next;
|
||||
- var->next *= 2;
|
||||
+ if (var->next < VIR_TIME_BACKOFF_CAP)
|
||||
+ var->next *= 2;
|
||||
+ else if (var->next > VIR_TIME_BACKOFF_CAP)
|
||||
+ var->next = VIR_TIME_BACKOFF_CAP;
|
||||
|
||||
/* If sleeping would take us beyond the limit, then shorten the
|
||||
* sleep. This is so we always run the body just before the final
|
171
85af0b80-qemu-adaptive-montimeout.patch
Normal file
171
85af0b80-qemu-adaptive-montimeout.patch
Normal file
@ -0,0 +1,171 @@
|
||||
commit 85af0b803cd19a03f71bd01ab4e045552410368f
|
||||
Author: Michal Privoznik <mprivozn@redhat.com>
|
||||
Date: Sat Mar 11 07:23:42 2017 +0100
|
||||
|
||||
qemu: Adaptive timeout for connecting to monitor
|
||||
|
||||
There were couple of reports on the list (e.g. [1]) that guests
|
||||
with huge amounts of RAM are unable to start because libvirt
|
||||
kills qemu in the initialization phase. The problem is that if
|
||||
guest is configured to use hugepages kernel has to zero them all
|
||||
out before handing over to qemu process. For instance, 402GiB
|
||||
worth of 1GiB pages took around 105 seconds (~3.8GiB/s). Since we
|
||||
do not want to make the timeout for connecting to monitor
|
||||
configurable, we have to teach libvirt to count with this
|
||||
fact. This commit implements "1s per each 1GiB of RAM" approach
|
||||
as suggested here [2].
|
||||
|
||||
1: https://www.redhat.com/archives/libvir-list/2017-March/msg00373.html
|
||||
2: https://www.redhat.com/archives/libvir-list/2017-March/msg00405.html
|
||||
|
||||
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
|
||||
|
||||
Index: libvirt-3.1.0/src/qemu/qemu_capabilities.c
|
||||
===================================================================
|
||||
--- libvirt-3.1.0.orig/src/qemu/qemu_capabilities.c
|
||||
+++ libvirt-3.1.0/src/qemu/qemu_capabilities.c
|
||||
@@ -4571,7 +4571,7 @@ virQEMUCapsInitQMPCommandRun(virQEMUCaps
|
||||
cmd->vm->pid = cmd->pid;
|
||||
|
||||
if (!(cmd->mon = qemuMonitorOpen(cmd->vm, &cmd->config, true,
|
||||
- &callbacks, NULL)))
|
||||
+ 0, &callbacks, NULL)))
|
||||
goto ignore;
|
||||
|
||||
virObjectLock(cmd->mon);
|
||||
Index: libvirt-3.1.0/src/qemu/qemu_monitor.c
|
||||
===================================================================
|
||||
--- libvirt-3.1.0.orig/src/qemu/qemu_monitor.c
|
||||
+++ libvirt-3.1.0/src/qemu/qemu_monitor.c
|
||||
@@ -327,11 +327,13 @@ qemuMonitorDispose(void *obj)
|
||||
|
||||
|
||||
static int
|
||||
-qemuMonitorOpenUnix(const char *monitor, pid_t cpid)
|
||||
+qemuMonitorOpenUnix(const char *monitor,
|
||||
+ pid_t cpid,
|
||||
+ unsigned long long timeout)
|
||||
{
|
||||
struct sockaddr_un addr;
|
||||
int monfd;
|
||||
- virTimeBackOffVar timeout;
|
||||
+ virTimeBackOffVar timebackoff;
|
||||
int ret = -1;
|
||||
|
||||
if ((monfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
|
||||
@@ -348,9 +350,9 @@ qemuMonitorOpenUnix(const char *monitor,
|
||||
goto error;
|
||||
}
|
||||
|
||||
- if (virTimeBackOffStart(&timeout, 1, 30*1000 /* ms */) < 0)
|
||||
+ if (virTimeBackOffStart(&timebackoff, 1, timeout * 1000) < 0)
|
||||
goto error;
|
||||
- while (virTimeBackOffWait(&timeout)) {
|
||||
+ while (virTimeBackOffWait(&timebackoff)) {
|
||||
ret = connect(monfd, (struct sockaddr *) &addr, sizeof(addr));
|
||||
|
||||
if (ret == 0)
|
||||
@@ -871,10 +873,30 @@ qemuMonitorOpenInternal(virDomainObjPtr
|
||||
}
|
||||
|
||||
|
||||
+#define QEMU_DEFAULT_MONITOR_WAIT 30
|
||||
+
|
||||
+/**
|
||||
+ * qemuMonitorOpen:
|
||||
+ * @vm: domain object
|
||||
+ * @config: monitor configuration
|
||||
+ * @json: enable JSON on the monitor
|
||||
+ * @timeout: number of seconds to add to default timeout
|
||||
+ * @cb: monitor event handles
|
||||
+ * @opaque: opaque data for @cb
|
||||
+ *
|
||||
+ * Opens the monitor for running qemu. It may happen that it
|
||||
+ * takes some time for qemu to create the monitor socket (e.g.
|
||||
+ * because kernel is zeroing configured hugepages), therefore we
|
||||
+ * wait up to default + timeout seconds for the monitor to show
|
||||
+ * up after which a failure is claimed.
|
||||
+ *
|
||||
+ * Returns monitor object, NULL on error.
|
||||
+ */
|
||||
qemuMonitorPtr
|
||||
qemuMonitorOpen(virDomainObjPtr vm,
|
||||
virDomainChrSourceDefPtr config,
|
||||
bool json,
|
||||
+ unsigned long long timeout,
|
||||
qemuMonitorCallbacksPtr cb,
|
||||
void *opaque)
|
||||
{
|
||||
@@ -882,10 +904,14 @@ qemuMonitorOpen(virDomainObjPtr vm,
|
||||
bool hasSendFD = false;
|
||||
qemuMonitorPtr ret;
|
||||
|
||||
+ timeout += QEMU_DEFAULT_MONITOR_WAIT;
|
||||
+
|
||||
switch (config->type) {
|
||||
case VIR_DOMAIN_CHR_TYPE_UNIX:
|
||||
hasSendFD = true;
|
||||
- if ((fd = qemuMonitorOpenUnix(config->data.nix.path, vm ? vm->pid : 0)) < 0)
|
||||
+ if ((fd = qemuMonitorOpenUnix(config->data.nix.path,
|
||||
+ vm ? vm->pid : 0,
|
||||
+ timeout)) < 0)
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
Index: libvirt-3.1.0/src/qemu/qemu_monitor.h
|
||||
===================================================================
|
||||
--- libvirt-3.1.0.orig/src/qemu/qemu_monitor.h
|
||||
+++ libvirt-3.1.0/src/qemu/qemu_monitor.h
|
||||
@@ -246,6 +246,7 @@ char *qemuMonitorUnescapeArg(const char
|
||||
qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm,
|
||||
virDomainChrSourceDefPtr config,
|
||||
bool json,
|
||||
+ unsigned long long timeout,
|
||||
qemuMonitorCallbacksPtr cb,
|
||||
void *opaque)
|
||||
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4);
|
||||
Index: libvirt-3.1.0/src/qemu/qemu_process.c
|
||||
===================================================================
|
||||
--- libvirt-3.1.0.orig/src/qemu/qemu_process.c
|
||||
+++ libvirt-3.1.0/src/qemu/qemu_process.c
|
||||
@@ -1656,6 +1656,7 @@ qemuConnectMonitor(virQEMUDriverPtr driv
|
||||
qemuDomainObjPrivatePtr priv = vm->privateData;
|
||||
int ret = -1;
|
||||
qemuMonitorPtr mon = NULL;
|
||||
+ unsigned long long timeout = 0;
|
||||
|
||||
if (virSecurityManagerSetDaemonSocketLabel(driver->securityManager,
|
||||
vm->def) < 0) {
|
||||
@@ -1664,6 +1665,12 @@ qemuConnectMonitor(virQEMUDriverPtr driv
|
||||
return -1;
|
||||
}
|
||||
|
||||
+ /* When using hugepages, kernel zeroes them out before
|
||||
+ * handing them over to qemu. This can be very time
|
||||
+ * consuming. Therefore, add a second to timeout for each
|
||||
+ * 1GiB of guest RAM. */
|
||||
+ timeout = vm->def->mem.total_memory / (1024 * 1024);
|
||||
+
|
||||
/* Hold an extra reference because we can't allow 'vm' to be
|
||||
* deleted until the monitor gets its own reference. */
|
||||
virObjectRef(vm);
|
||||
@@ -1674,6 +1681,7 @@ qemuConnectMonitor(virQEMUDriverPtr driv
|
||||
mon = qemuMonitorOpen(vm,
|
||||
priv->monConfig,
|
||||
priv->monJSON,
|
||||
+ timeout,
|
||||
&monitorCallbacks,
|
||||
driver);
|
||||
|
||||
Index: libvirt-3.1.0/tests/qemumonitortestutils.c
|
||||
===================================================================
|
||||
--- libvirt-3.1.0.orig/tests/qemumonitortestutils.c
|
||||
+++ libvirt-3.1.0/tests/qemumonitortestutils.c
|
||||
@@ -1175,6 +1175,7 @@ qemuMonitorTestNew(bool json,
|
||||
if (!(test->mon = qemuMonitorOpen(test->vm,
|
||||
&src,
|
||||
json,
|
||||
+ 0,
|
||||
&qemuMonitorTestCallbacks,
|
||||
driver)))
|
||||
goto error;
|
@ -1,3 +1,11 @@
|
||||
-------------------------------------------------------------------
|
||||
Thu Mar 16 14:23:16 UTC 2017 - jfehlig@suse.com
|
||||
|
||||
- qemu: Fix monitor timeout with large memory VMs
|
||||
67dcb797-virTimeBackOffWait-sleepcap.patch
|
||||
85af0b80-qemu-adaptive-montimeout.patch
|
||||
bsc#1013113
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Tue Mar 7 22:49:32 UTC 2017 - mwilck@suse.com
|
||||
|
||||
|
@ -317,6 +317,8 @@ Source4: libvirtd-relocation-server.fw
|
||||
Source99: baselibs.conf
|
||||
Source100: %{name}-rpmlintrc
|
||||
# Upstream patches
|
||||
Patch0: 67dcb797-virTimeBackOffWait-sleepcap.patch
|
||||
Patch1: 85af0b80-qemu-adaptive-montimeout.patch
|
||||
# Patches pending upstream review
|
||||
Patch100: libxl-dom-reset.patch
|
||||
Patch101: network-don-t-use-dhcp-authoritative-on-static-netwo.patch
|
||||
@ -884,6 +886,8 @@ libvirt plugin for NSS for translating domain names into IP addresses.
|
||||
|
||||
%prep
|
||||
%setup -q
|
||||
%patch0 -p1
|
||||
%patch1 -p1
|
||||
%patch100 -p1
|
||||
%patch101 -p1
|
||||
%patch150 -p1
|
||||
|
@ -17,11 +17,11 @@ Signed-off-by: Martin Wilck <mwilck@suse.com>
|
||||
tests/networkxml2confdata/dhcp6host-routed-network.conf | 1 -
|
||||
2 files changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c
|
||||
index ae1589d8c..17c6f3a0f 100644
|
||||
--- a/src/network/bridge_driver.c
|
||||
+++ b/src/network/bridge_driver.c
|
||||
@@ -1355,7 +1355,14 @@ networkDnsmasqConfContents(virNetworkObjPtr network,
|
||||
Index: libvirt-3.1.0/src/network/bridge_driver.c
|
||||
===================================================================
|
||||
--- libvirt-3.1.0.orig/src/network/bridge_driver.c
|
||||
+++ libvirt-3.1.0/src/network/bridge_driver.c
|
||||
@@ -1398,7 +1398,14 @@ networkDnsmasqConfContents(virNetworkObj
|
||||
if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET)) {
|
||||
if (ipdef->nranges || ipdef->nhosts) {
|
||||
virBufferAddLit(&configbuf, "dhcp-no-override\n");
|
||||
@ -37,10 +37,10 @@ index ae1589d8c..17c6f3a0f 100644
|
||||
}
|
||||
|
||||
if (ipdef->tftproot) {
|
||||
diff --git a/tests/networkxml2confdata/dhcp6host-routed-network.conf b/tests/networkxml2confdata/dhcp6host-routed-network.conf
|
||||
index 87a149880..5728ee430 100644
|
||||
--- a/tests/networkxml2confdata/dhcp6host-routed-network.conf
|
||||
+++ b/tests/networkxml2confdata/dhcp6host-routed-network.conf
|
||||
Index: libvirt-3.1.0/tests/networkxml2confdata/dhcp6host-routed-network.conf
|
||||
===================================================================
|
||||
--- libvirt-3.1.0.orig/tests/networkxml2confdata/dhcp6host-routed-network.conf
|
||||
+++ libvirt-3.1.0/tests/networkxml2confdata/dhcp6host-routed-network.conf
|
||||
@@ -10,7 +10,6 @@ bind-dynamic
|
||||
interface=virbr1
|
||||
dhcp-range=192.168.122.1,static
|
||||
@ -49,6 +49,3 @@ index 87a149880..5728ee430 100644
|
||||
dhcp-range=2001:db8:ac10:fd01::1,static,64
|
||||
dhcp-hostsfile=/var/lib/libvirt/dnsmasq/local.hostsfile
|
||||
addn-hosts=/var/lib/libvirt/dnsmasq/local.addnhosts
|
||||
--
|
||||
2.11.0
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user