diff --git a/67dcb797-virTimeBackOffWait-sleepcap.patch b/67dcb797-virTimeBackOffWait-sleepcap.patch new file mode 100644 index 0000000..37a3ead --- /dev/null +++ b/67dcb797-virTimeBackOffWait-sleepcap.patch @@ -0,0 +1,66 @@ +commit 67dcb797ed7f1fbb048aa47006576f424923933b +Author: Michal Privoznik +Date: Mon Mar 13 11:05:08 2017 +0100 + + virTimeBackOffWait: Avoid long periods of sleep + + While connecting to qemu monitor, the first thing we do is wait + for it to show up. However, we are doing it with some timeout to + avoid indefinite waits (e.g. when qemu doesn't create the monitor + socket at all). After beaa447a29 we are using exponential back + off timeout meaning, after the first connection attempt we wait + 1ms, then 2ms, then 4 and so on. This allows us to bring down + wait time for small domains where qemu initializes quickly. + However, on the other end of this scale are some domains with + huge amounts of guest memory. Now imagine that we've gotten up to + wait time of 15 seconds. The next one is going to be 30 seconds, + and the one after that whole minute. Well, okay - with current + code we are not going to wait longer than 30 seconds in total, + but this is going to change in the next commit. + + The exponential back off is usable only for first few iterations. + Then it needs to be caped (one second was chosen as the limit) + and switch to constant wait time. + + Signed-off-by: Michal Privoznik + +Index: libvirt-3.1.0/src/util/virtime.c +=================================================================== +--- libvirt-3.1.0.orig/src/util/virtime.c ++++ libvirt-3.1.0/src/util/virtime.c +@@ -390,6 +390,9 @@ virTimeBackOffStart(virTimeBackOffVar *v + return 0; + } + ++ ++#define VIR_TIME_BACKOFF_CAP 1000 ++ + /** + * virTimeBackOffWait + * @var: Timeout variable (with type virTimeBackOffVar *). +@@ -410,7 +413,9 @@ virTimeBackOffStart(virTimeBackOffVar *v + * The while loop that runs the body of the code repeatedly, with an + * exponential backoff. It first waits for first milliseconds, then + * runs the body, then waits for 2*first ms, then runs the body again. +- * Then 4*first ms, and so on. ++ * Then 4*first ms, and so on, up until wait time would reach ++ * VIR_TIME_BACK_OFF_CAP (whole second). Then it switches to constant ++ * waiting time of VIR_TIME_BACK_OFF_CAP. + * + * When timeout milliseconds is reached, the while loop ends. + * +@@ -429,8 +434,13 @@ virTimeBackOffWait(virTimeBackOffVar *va + if (t > var->limit_t) + return 0; /* ends the while loop */ + ++ /* Compute next wait time. Cap at VIR_TIME_BACKOFF_CAP ++ * to avoid long useless sleeps. */ + next = var->next; +- var->next *= 2; ++ if (var->next < VIR_TIME_BACKOFF_CAP) ++ var->next *= 2; ++ else if (var->next > VIR_TIME_BACKOFF_CAP) ++ var->next = VIR_TIME_BACKOFF_CAP; + + /* If sleeping would take us beyond the limit, then shorten the + * sleep. This is so we always run the body just before the final diff --git a/85af0b80-qemu-adaptive-montimeout.patch b/85af0b80-qemu-adaptive-montimeout.patch new file mode 100644 index 0000000..c6ed37f --- /dev/null +++ b/85af0b80-qemu-adaptive-montimeout.patch @@ -0,0 +1,171 @@ +commit 85af0b803cd19a03f71bd01ab4e045552410368f +Author: Michal Privoznik +Date: Sat Mar 11 07:23:42 2017 +0100 + + qemu: Adaptive timeout for connecting to monitor + + There were couple of reports on the list (e.g. [1]) that guests + with huge amounts of RAM are unable to start because libvirt + kills qemu in the initialization phase. The problem is that if + guest is configured to use hugepages kernel has to zero them all + out before handing over to qemu process. For instance, 402GiB + worth of 1GiB pages took around 105 seconds (~3.8GiB/s). Since we + do not want to make the timeout for connecting to monitor + configurable, we have to teach libvirt to count with this + fact. This commit implements "1s per each 1GiB of RAM" approach + as suggested here [2]. + + 1: https://www.redhat.com/archives/libvir-list/2017-March/msg00373.html + 2: https://www.redhat.com/archives/libvir-list/2017-March/msg00405.html + + Signed-off-by: Michal Privoznik + +Index: libvirt-3.1.0/src/qemu/qemu_capabilities.c +=================================================================== +--- libvirt-3.1.0.orig/src/qemu/qemu_capabilities.c ++++ libvirt-3.1.0/src/qemu/qemu_capabilities.c +@@ -4571,7 +4571,7 @@ virQEMUCapsInitQMPCommandRun(virQEMUCaps + cmd->vm->pid = cmd->pid; + + if (!(cmd->mon = qemuMonitorOpen(cmd->vm, &cmd->config, true, +- &callbacks, NULL))) ++ 0, &callbacks, NULL))) + goto ignore; + + virObjectLock(cmd->mon); +Index: libvirt-3.1.0/src/qemu/qemu_monitor.c +=================================================================== +--- libvirt-3.1.0.orig/src/qemu/qemu_monitor.c ++++ libvirt-3.1.0/src/qemu/qemu_monitor.c +@@ -327,11 +327,13 @@ qemuMonitorDispose(void *obj) + + + static int +-qemuMonitorOpenUnix(const char *monitor, pid_t cpid) ++qemuMonitorOpenUnix(const char *monitor, ++ pid_t cpid, ++ unsigned long long timeout) + { + struct sockaddr_un addr; + int monfd; +- virTimeBackOffVar timeout; ++ virTimeBackOffVar timebackoff; + int ret = -1; + + if ((monfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { +@@ -348,9 +350,9 @@ qemuMonitorOpenUnix(const char *monitor, + goto error; + } + +- if (virTimeBackOffStart(&timeout, 1, 30*1000 /* ms */) < 0) ++ if (virTimeBackOffStart(&timebackoff, 1, timeout * 1000) < 0) + goto error; +- while (virTimeBackOffWait(&timeout)) { ++ while (virTimeBackOffWait(&timebackoff)) { + ret = connect(monfd, (struct sockaddr *) &addr, sizeof(addr)); + + if (ret == 0) +@@ -871,10 +873,30 @@ qemuMonitorOpenInternal(virDomainObjPtr + } + + ++#define QEMU_DEFAULT_MONITOR_WAIT 30 ++ ++/** ++ * qemuMonitorOpen: ++ * @vm: domain object ++ * @config: monitor configuration ++ * @json: enable JSON on the monitor ++ * @timeout: number of seconds to add to default timeout ++ * @cb: monitor event handles ++ * @opaque: opaque data for @cb ++ * ++ * Opens the monitor for running qemu. It may happen that it ++ * takes some time for qemu to create the monitor socket (e.g. ++ * because kernel is zeroing configured hugepages), therefore we ++ * wait up to default + timeout seconds for the monitor to show ++ * up after which a failure is claimed. ++ * ++ * Returns monitor object, NULL on error. ++ */ + qemuMonitorPtr + qemuMonitorOpen(virDomainObjPtr vm, + virDomainChrSourceDefPtr config, + bool json, ++ unsigned long long timeout, + qemuMonitorCallbacksPtr cb, + void *opaque) + { +@@ -882,10 +904,14 @@ qemuMonitorOpen(virDomainObjPtr vm, + bool hasSendFD = false; + qemuMonitorPtr ret; + ++ timeout += QEMU_DEFAULT_MONITOR_WAIT; ++ + switch (config->type) { + case VIR_DOMAIN_CHR_TYPE_UNIX: + hasSendFD = true; +- if ((fd = qemuMonitorOpenUnix(config->data.nix.path, vm ? vm->pid : 0)) < 0) ++ if ((fd = qemuMonitorOpenUnix(config->data.nix.path, ++ vm ? vm->pid : 0, ++ timeout)) < 0) + return NULL; + break; + +Index: libvirt-3.1.0/src/qemu/qemu_monitor.h +=================================================================== +--- libvirt-3.1.0.orig/src/qemu/qemu_monitor.h ++++ libvirt-3.1.0/src/qemu/qemu_monitor.h +@@ -246,6 +246,7 @@ char *qemuMonitorUnescapeArg(const char + qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm, + virDomainChrSourceDefPtr config, + bool json, ++ unsigned long long timeout, + qemuMonitorCallbacksPtr cb, + void *opaque) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4); +Index: libvirt-3.1.0/src/qemu/qemu_process.c +=================================================================== +--- libvirt-3.1.0.orig/src/qemu/qemu_process.c ++++ libvirt-3.1.0/src/qemu/qemu_process.c +@@ -1656,6 +1656,7 @@ qemuConnectMonitor(virQEMUDriverPtr driv + qemuDomainObjPrivatePtr priv = vm->privateData; + int ret = -1; + qemuMonitorPtr mon = NULL; ++ unsigned long long timeout = 0; + + if (virSecurityManagerSetDaemonSocketLabel(driver->securityManager, + vm->def) < 0) { +@@ -1664,6 +1665,12 @@ qemuConnectMonitor(virQEMUDriverPtr driv + return -1; + } + ++ /* When using hugepages, kernel zeroes them out before ++ * handing them over to qemu. This can be very time ++ * consuming. Therefore, add a second to timeout for each ++ * 1GiB of guest RAM. */ ++ timeout = vm->def->mem.total_memory / (1024 * 1024); ++ + /* Hold an extra reference because we can't allow 'vm' to be + * deleted until the monitor gets its own reference. */ + virObjectRef(vm); +@@ -1674,6 +1681,7 @@ qemuConnectMonitor(virQEMUDriverPtr driv + mon = qemuMonitorOpen(vm, + priv->monConfig, + priv->monJSON, ++ timeout, + &monitorCallbacks, + driver); + +Index: libvirt-3.1.0/tests/qemumonitortestutils.c +=================================================================== +--- libvirt-3.1.0.orig/tests/qemumonitortestutils.c ++++ libvirt-3.1.0/tests/qemumonitortestutils.c +@@ -1175,6 +1175,7 @@ qemuMonitorTestNew(bool json, + if (!(test->mon = qemuMonitorOpen(test->vm, + &src, + json, ++ 0, + &qemuMonitorTestCallbacks, + driver))) + goto error; diff --git a/libvirt.changes b/libvirt.changes index 4e6906b..c2a7f74 100644 --- a/libvirt.changes +++ b/libvirt.changes @@ -1,3 +1,18 @@ +------------------------------------------------------------------- +Thu Mar 16 14:23:16 UTC 2017 - jfehlig@suse.com + +- qemu: Fix monitor timeout with large memory VMs + 67dcb797-virTimeBackOffWait-sleepcap.patch + 85af0b80-qemu-adaptive-montimeout.patch + bsc#1013113 + +------------------------------------------------------------------- +Tue Mar 7 22:49:32 UTC 2017 - mwilck@suse.com + +- network: don't use dhcp-authoritative on static networks + bsc#1015588 + * added network-don-t-use-dhcp-authoritative-on-static-netwo.patch + ------------------------------------------------------------------- Fri Mar 3 14:11:24 UTC 2017 - jfehlig@suse.com diff --git a/libvirt.spec b/libvirt.spec index de14728..553152e 100644 --- a/libvirt.spec +++ b/libvirt.spec @@ -317,8 +317,11 @@ Source4: libvirtd-relocation-server.fw Source99: baselibs.conf Source100: %{name}-rpmlintrc # Upstream patches +Patch0: 67dcb797-virTimeBackOffWait-sleepcap.patch +Patch1: 85af0b80-qemu-adaptive-montimeout.patch # Patches pending upstream review Patch100: libxl-dom-reset.patch +Patch101: network-don-t-use-dhcp-authoritative-on-static-netwo.patch # Need to go upstream Patch150: xen-pv-cdrom.patch Patch151: blockcopy-check-dst-identical-device.patch @@ -883,7 +886,10 @@ libvirt plugin for NSS for translating domain names into IP addresses. %prep %setup -q +%patch0 -p1 +%patch1 -p1 %patch100 -p1 +%patch101 -p1 %patch150 -p1 %patch151 -p1 %patch152 -p1 diff --git a/network-don-t-use-dhcp-authoritative-on-static-netwo.patch b/network-don-t-use-dhcp-authoritative-on-static-netwo.patch new file mode 100644 index 0000000..2e0529c --- /dev/null +++ b/network-don-t-use-dhcp-authoritative-on-static-netwo.patch @@ -0,0 +1,51 @@ +From 15c7f9a6e7678238ef06f5d805984addb6f8bcdb Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Thu, 15 Dec 2016 10:17:05 +0100 +Subject: [PATCH] network: don't use dhcp-authoritative on static networks + +"Static" DHCP networks are those where no dynamic DHCP range is +defined, only a list of host entries is used to serve permanent +IP addresses. On such networks, we don't want dnsmasq to reply +to other requests than those statically defined. But +"dhcp-authoritative" will cause dnsmasq to do just that. +Therefore we can't use "dhcp-authoritative" for static networks. + +Fixes: 4ac20b3ae "network: add dnsmasq option 'dhcp-authoritative'" +Signed-off-by: Martin Wilck +--- + src/network/bridge_driver.c | 9 ++++++++- + tests/networkxml2confdata/dhcp6host-routed-network.conf | 1 - + 2 files changed, 8 insertions(+), 2 deletions(-) + +Index: libvirt-3.1.0/src/network/bridge_driver.c +=================================================================== +--- libvirt-3.1.0.orig/src/network/bridge_driver.c ++++ libvirt-3.1.0/src/network/bridge_driver.c +@@ -1398,7 +1398,14 @@ networkDnsmasqConfContents(virNetworkObj + if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET)) { + if (ipdef->nranges || ipdef->nhosts) { + virBufferAddLit(&configbuf, "dhcp-no-override\n"); +- virBufferAddLit(&configbuf, "dhcp-authoritative\n"); ++ /* ++ * Use "dhcp-authoritative" only for dynamic DHCP. ++ * In a static-only network, it would cause dnsmasq ++ * to reply to requests from other hosts than those ++ * statically defined. ++ */ ++ if (ipdef->nranges || !ipdef->nhosts) ++ virBufferAddLit(&configbuf, "dhcp-authoritative\n"); + } + + if (ipdef->tftproot) { +Index: libvirt-3.1.0/tests/networkxml2confdata/dhcp6host-routed-network.conf +=================================================================== +--- libvirt-3.1.0.orig/tests/networkxml2confdata/dhcp6host-routed-network.conf ++++ libvirt-3.1.0/tests/networkxml2confdata/dhcp6host-routed-network.conf +@@ -10,7 +10,6 @@ bind-dynamic + interface=virbr1 + dhcp-range=192.168.122.1,static + dhcp-no-override +-dhcp-authoritative + dhcp-range=2001:db8:ac10:fd01::1,static,64 + dhcp-hostsfile=/var/lib/libvirt/dnsmasq/local.hostsfile + addn-hosts=/var/lib/libvirt/dnsmasq/local.addnhosts