diff --git a/0471637d-cgroups-vcpu-bw.patch b/0471637d-cgroups-vcpu-bw.patch new file mode 100644 index 0000000..8c4a300 --- /dev/null +++ b/0471637d-cgroups-vcpu-bw.patch @@ -0,0 +1,63 @@ +commit 0471637d5628106d058f0eb5516ffa7d5285cc6f +Author: Martin Kletzander +Date: Thu May 16 14:37:54 2013 +0200 + + qemu: Fix cgroup handling when setting VCPU BW + + Commit 632f78c introduced a regression which causes schedinfo being + unable to set some parameters. When migrating to priv->cgroup there + was missing variable left out and due to passed NULL to underlying + function, the setting failed. + + Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=963592 + +Index: libvirt-1.0.5/src/qemu/qemu_driver.c +=================================================================== +--- libvirt-1.0.5.orig/src/qemu/qemu_driver.c ++++ libvirt-1.0.5/src/qemu/qemu_driver.c +@@ -7721,7 +7721,6 @@ qemuDomainSetSchedulerParametersFlags(vi + { + virQEMUDriverPtr driver = dom->conn->privateData; + int i; +- virCgroupPtr group = NULL; + virDomainObjPtr vm = NULL; + virDomainDefPtr vmdef = NULL; + unsigned long long value_ul; +@@ -7799,7 +7798,7 @@ qemuDomainSetSchedulerParametersFlags(vi + QEMU_SCHED_MIN_PERIOD, QEMU_SCHED_MAX_PERIOD); + + if (flags & VIR_DOMAIN_AFFECT_LIVE && value_ul) { +- if ((rc = qemuSetVcpusBWLive(vm, group, value_ul, 0))) ++ if ((rc = qemuSetVcpusBWLive(vm, priv->cgroup, value_ul, 0))) + goto cleanup; + + vm->def->cputune.period = value_ul; +@@ -7813,7 +7812,7 @@ qemuDomainSetSchedulerParametersFlags(vi + QEMU_SCHED_MIN_QUOTA, QEMU_SCHED_MAX_QUOTA); + + if (flags & VIR_DOMAIN_AFFECT_LIVE && value_l) { +- if ((rc = qemuSetVcpusBWLive(vm, group, 0, value_l))) ++ if ((rc = qemuSetVcpusBWLive(vm, priv->cgroup, 0, value_l))) + goto cleanup; + + vm->def->cputune.quota = value_l; +@@ -7827,7 +7826,8 @@ qemuDomainSetSchedulerParametersFlags(vi + QEMU_SCHED_MIN_PERIOD, QEMU_SCHED_MAX_PERIOD); + + if (flags & VIR_DOMAIN_AFFECT_LIVE && value_ul) { +- if ((rc = qemuSetEmulatorBandwidthLive(vm, group, value_ul, 0))) ++ if ((rc = qemuSetEmulatorBandwidthLive(vm, priv->cgroup, ++ value_ul, 0))) + goto cleanup; + + vm->def->cputune.emulator_period = value_ul; +@@ -7841,7 +7841,8 @@ qemuDomainSetSchedulerParametersFlags(vi + QEMU_SCHED_MIN_QUOTA, QEMU_SCHED_MAX_QUOTA); + + if (flags & VIR_DOMAIN_AFFECT_LIVE && value_l) { +- if ((rc = qemuSetEmulatorBandwidthLive(vm, group, 0, value_l))) ++ if ((rc = qemuSetEmulatorBandwidthLive(vm, priv->cgroup, ++ 0, value_l))) + goto cleanup; + + vm->def->cputune.emulator_quota = value_l; diff --git a/0ced83dc-cgroup-escape-dot.patch b/0ced83dc-cgroup-escape-dot.patch new file mode 100644 index 0000000..bd98495 --- /dev/null +++ b/0ced83dc-cgroup-escape-dot.patch @@ -0,0 +1,24 @@ +commit 0ced83dcfbb19af1201202e1af0a7073c338aabd +Author: Daniel P. Berrange +Date: Fri May 3 20:56:50 2013 +0100 + + Escaping leading '.' in cgroup names + + Escaping a leading '.' with '_' in the cgroup names + + Signed-off-by: Daniel P. Berrange + +Index: libvirt-1.0.5/src/util/vircgroup.c +=================================================================== +--- libvirt-1.0.5.orig/src/util/vircgroup.c ++++ libvirt-1.0.5/src/util/vircgroup.c +@@ -1106,7 +1106,8 @@ static int virCgroupPartitionNeedsEscapi + if (STRPREFIX(path, "cgroup.")) + return 1; + +- if (path[0] == '_') ++ if (path[0] == '_' || ++ path[0] == '.') + return 1; + + if (!(fp = fopen("/proc/cgroups", "r"))) diff --git a/486a86eb-cgroups-docs.patch b/486a86eb-cgroups-docs.patch new file mode 100644 index 0000000..655c6f8 --- /dev/null +++ b/486a86eb-cgroups-docs.patch @@ -0,0 +1,317 @@ +commit 486a86eb184c008c5957fb68c63f163289f3344b +Author: Daniel P. Berrange +Date: Fri May 3 16:58:26 2013 +0100 + + Add docs about cgroups layout and usage + + Describe the new cgroups layout, how to customize placement + of guests and what virsh commands are used to access the + parameters. + + Signed-off-by: Daniel P. Berrange + +Index: libvirt-1.0.5/docs/cgroups.html.in +=================================================================== +--- /dev/null ++++ libvirt-1.0.5/docs/cgroups.html.in +@@ -0,0 +1,285 @@ ++ ++ ++ ++ ++

Control Groups Resource Management

++ ++
    ++ ++

    ++ The QEMU and LXC drivers make use of the Linux "Control Groups" facility ++ for applying resource management to their virtual machines and containers. ++

    ++ ++

    Required controllers

    ++ ++

    ++ The control groups filesystem supports multiple "controllers". By default ++ the init system (such as systemd) should mount all controllers compiled ++ into the kernel at /sys/fs/cgroup/$CONTROLLER-NAME. Libvirt ++ will never attempt to mount any controllers itself, merely detect where ++ they are mounted. ++

    ++ ++

    ++ The QEMU driver is capable of using the cpuset, ++ cpu, memory, blkio and ++ devices controllers. None of them are compulsory. ++ If any controller is not mounted, the resource management APIs ++ which use it will cease to operate. It is possible to explicitly ++ turn off use of a controller, even when mounted, via the ++ /etc/libvirt/qemu.conf configuration file. ++

    ++ ++

    ++ The LXC driver is capable of using the cpuset, ++ cpu, cpuset, freezer, ++ memory, blkio and devices ++ controllers. The cpuset, devices ++ and memory controllers are compulsory. Without ++ them mounted, no containers can be started. If any of the ++ other controllers are not mounted, the resource management APIs ++ which use them will cease to operate. ++

    ++ ++

    Current cgroups layout

    ++ ++

    ++ As of libvirt 1.0.5 or later, the cgroups layout created by libvirt has been ++ simplified, in order to facilitate the setup of resource control policies by ++ administrators / management applications. The layout is based on the concepts of ++ "partitions" and "consumers". Each virtual machine or container is a consumer, ++ and has a corresponding cgroup named $VMNAME.libvirt-{qemu,lxc}. ++ Each consumer is associated with exactly one partition, which also have a ++ corresponding cgroup usually named $PARTNAME.partition. The ++ exceptions to this naming rule are the three top level default partitions, ++ named /system (for system services), /user (for ++ user login sessions) and /machine (for virtual machines and ++ containers). By default every consumer will of course be associated with ++ the /machine partition. This leads to a hierarchy that looks ++ like ++

    ++ ++
    ++$ROOT
    ++  |
    ++  +- system
    ++  |   |
    ++  |   +- libvirtd.service
    ++  |
    ++  +- machine
    ++      |
    ++      +- vm1.libvirt-qemu
    ++      |   |
    ++      |   +- emulator
    ++      |   +- vcpu0
    ++      |   +- vcpu1
    ++      |
    ++      +- vm2.libvirt-qemu
    ++      |   |
    ++      |   +- emulator
    ++      |   +- vcpu0
    ++      |   +- vcpu1
    ++      |
    ++      +- vm3.libvirt-qemu
    ++      |   |
    ++      |   +- emulator
    ++      |   +- vcpu0
    ++      |   +- vcpu1
    ++      |
    ++      +- container1.libvirt-lxc
    ++      |
    ++      +- container2.libvirt-lxc
    ++      |
    ++      +- container3.libvirt-lxc
    ++    
    ++ ++

    ++ The default cgroups layout ensures that, when there is contention for ++ CPU time, it is shared equally between system services, user sessions ++ and virtual machines / containers. This prevents virtual machines from ++ locking the administrator out of the host, or impacting execution of ++ system services. Conversely, when there is no contention from ++ system services / user sessions, it is possible for virtual machines ++ to fully utilize the host CPUs. ++

    ++ ++

    Using custom partitions

    ++ ++

    ++ If there is a need to apply resource constraints to groups of ++ virtual machines or containers, then the single default ++ partition /machine may not be sufficiently ++ flexible. The administrator may wish to sub-divide the ++ default partition, for example into "testing" and "production" ++ partitions, and then assign each guest to a specific ++ sub-partition. This is achieved via a small element addition ++ to the guest domain XML config, just below the main domain ++ element ++

    ++ ++
    ++  ...
    ++  <resource>
    ++    <partition>/machine/production</partition>
    ++  </resource>
    ++  ...
    ++    
    ++ ++

    ++ Libvirt will not auto-create the cgroups directory to back ++ this partition. In the future, libvirt / virsh will provide ++ APIs / commands to create custom partitions, but currently ++ this is left as an exercise for the administrator. For ++ example, given the XML config above, the admin would need ++ to create a cgroup named '/machine/production.partition' ++

    ++ ++
    ++# cd /sys/fs/cgroup
    ++# for i in blkio cpu,cpuacct cpuset devices freezer memory net_cls perf_event
    ++  do
    ++    mkdir $i/machine/production.partition
    ++  done
    ++# for i in cpuset.cpus  cpuset.mems
    ++  do
    ++    cat cpuset/machine/$i > cpuset/machine/production.partition/$i
    ++  done
    ++
    ++ ++

    ++ Note: the cgroups directory created as a ".partition" ++ suffix, but the XML config does not require this suffix. ++

    ++ ++

    ++ Note: the ability to place guests in custom ++ partitions is only available with libvirt >= 1.0.5, using ++ the new cgroup layout. The legacy cgroups layout described ++ later did not support customization per guest. ++

    ++ ++

    Resource management APIs/commands

    ++ ++

    ++ Since libvirt aims to provide an API which is portable across ++ hypervisors, the concept of cgroups is not exposed directly ++ in the API or XML configuration. It is considered to be an ++ internal implementation detail. Instead libvirt provides a ++ set of APIs for applying resource controls, which are then ++ mapped to corresponding cgroup tunables ++

    ++ ++

    Scheduler tuning

    ++ ++

    ++ Parameters from the "cpu" controller are exposed via the ++ schedinfo command in virsh. ++

    ++ ++
    ++# virsh schedinfo demo
    ++Scheduler      : posix
    ++cpu_shares     : 1024
    ++vcpu_period    : 100000
    ++vcpu_quota     : -1
    ++emulator_period: 100000
    ++emulator_quota : -1
    ++ ++ ++

    Block I/O tuning

    ++ ++

    ++ Parameters from the "blkio" controller are exposed via the ++ bkliotune command in virsh. ++

    ++ ++ ++
    ++# virsh blkiotune demo
    ++weight         : 500
    ++device_weight  : 
    ++ ++

    Memory tuning

    ++ ++

    ++ Parameters from the "memory" controller are exposed via the ++ memtune command in virsh. ++

    ++ ++
    ++# virsh memtune demo
    ++hard_limit     : 580192
    ++soft_limit     : unlimited
    ++swap_hard_limit: unlimited
    ++    
    ++ ++

    Network tuning

    ++ ++

    ++ The net_cls is not currently used. Instead traffic ++ filter policies are set directly against individual virtual ++ network interfaces. ++

    ++ ++

    Legacy cgroups layout

    ++ ++

    ++ Prior to libvirt 1.0.5, the cgroups layout created by libvirt was different ++ from that described above, and did not allow for administrator customization. ++ Libvirt used a fixed, 3-level hierarchy libvirt/{qemu,lxc}/$VMNAME ++ which was rooted at the point in the hierarchy where libvirtd itself was ++ located. So if libvirtd was placed at /system/libvirtd.service ++ by systemd, the groups for each virtual machine / container would be located ++ at /system/libvirtd.service/libvirt/{qemu,lxc}/$VMNAME. In addition ++ to this, the QEMU drivers further child groups for each vCPU thread and the ++ emulator thread(s). This leads to a hierarchy that looked like ++

    ++ ++ ++
    ++$ROOT
    ++  |
    ++  +- system
    ++      |
    ++      +- libvirtd.service
    ++           |
    ++           +- libvirt
    ++               |
    ++               +- qemu
    ++               |   |
    ++               |   +- vm1
    ++               |   |   |
    ++               |   |   +- emulator
    ++               |   |   +- vcpu0
    ++               |   |   +- vcpu1
    ++               |   |
    ++               |   +- vm2
    ++               |   |   |
    ++               |   |   +- emulator
    ++               |   |   +- vcpu0
    ++               |   |   +- vcpu1
    ++               |   |
    ++               |   +- vm3
    ++               |       |
    ++               |       +- emulator
    ++               |       +- vcpu0
    ++               |       +- vcpu1
    ++               |
    ++               +- lxc
    ++                   |
    ++                   +- container1
    ++                   |
    ++                   +- container2
    ++                   |
    ++                   +- container3
    ++    
    ++ ++

    ++ Although current releases are much improved, historically the use of deep ++ hierarchies has had a significant negative impact on the kernel scalability. ++ The legacy libvirt cgroups layout highlighted these problems, to the detriment ++ of the performance of virtual machines and containers. ++

    ++ ++ +Index: libvirt-1.0.5/docs/sitemap.html.in +=================================================================== +--- libvirt-1.0.5.orig/docs/sitemap.html.in ++++ libvirt-1.0.5/docs/sitemap.html.in +@@ -87,6 +87,10 @@ + Ensuring exclusive guest access to disks + +
  • ++ CGroups ++ Control groups integration ++
  • ++
  • + Hooks + Hooks for system specific management +
  • diff --git a/a2214c52-iohelper.patch b/a2214c52-iohelper.patch new file mode 100644 index 0000000..a71458c --- /dev/null +++ b/a2214c52-iohelper.patch @@ -0,0 +1,28 @@ +commit a2214c5257d3bd7b086ce04aca1648e8ff05ee96 +Author: Daniel P. Berrange +Date: Fri May 10 14:45:05 2013 +0100 + + Fix iohelper usage with streams opened for read + + In b2878ed860ceceec3cd6481424fed0b543b687cd we added the O_NOCTTY + flag when opening files in the stream code. Unfortunately a later + piece of code was comparing the flags == O_RDONLY, without masking + out the non-access mode flags. This broke the iohelper when used + with streams for read, since it caused us to attach the stream + output pipe to the stream input FD instead of output FD :-( + + Signed-off-by: Daniel P. Berrange + +Index: libvirt-1.0.5/src/fdstream.c +=================================================================== +--- libvirt-1.0.5.orig/src/fdstream.c ++++ libvirt-1.0.5/src/fdstream.c +@@ -640,7 +640,7 @@ virFDStreamOpenFileInternal(virStreamPtr + virCommandTransferFD(cmd, fd); + virCommandAddArgFormat(cmd, "%d", fd); + +- if (oflags == O_RDONLY) { ++ if ((oflags & O_ACCMODE) == O_RDONLY) { + childfd = fds[1]; + fd = fds[0]; + virCommandSetOutputFD(cmd, &childfd); diff --git a/bbe97ae9-no-cgroups.patch b/bbe97ae9-no-cgroups.patch index e98256c..431fe1d 100644 --- a/bbe97ae9-no-cgroups.patch +++ b/bbe97ae9-no-cgroups.patch @@ -39,7 +39,7 @@ Index: libvirt-1.0.5/src/util/vircgroup.c =================================================================== --- libvirt-1.0.5.orig/src/util/vircgroup.c +++ libvirt-1.0.5/src/util/vircgroup.c -@@ -1167,14 +1167,14 @@ static int virCgroupPartitionEscape(char +@@ -1168,14 +1168,14 @@ static int virCgroupPartitionEscape(char return 0; } @@ -57,7 +57,7 @@ Index: libvirt-1.0.5/src/util/vircgroup.c for (i = 0 ; tokens[i] != NULL ; i++) { /* Whitelist the 3 top level fixed dirs -@@ -1193,20 +1193,27 @@ static char *virCgroupSetPartitionSuffix +@@ -1194,20 +1194,27 @@ static char *virCgroupSetPartitionSuffix !strchr(tokens[i], '.')) { if (VIR_REALLOC_N(tokens[i], strlen(tokens[i]) + strlen(".partition") + 1) < 0) { @@ -88,7 +88,7 @@ Index: libvirt-1.0.5/src/util/vircgroup.c cleanup: virStringFreeList(tokens); -@@ -1241,9 +1248,9 @@ int virCgroupNewPartition(const char *pa +@@ -1242,9 +1249,9 @@ int virCgroupNewPartition(const char *pa /* XXX convert all cgroups APIs to use error report * APIs instead of returning errno */ diff --git a/c2cf5f1c-no-cgroups-fix.patch b/c2cf5f1c-no-cgroups-fix.patch new file mode 100644 index 0000000..3417262 --- /dev/null +++ b/c2cf5f1c-no-cgroups-fix.patch @@ -0,0 +1,49 @@ +commit c2cf5f1c2abc81d607abe34bf5dc4c615a9b8b4d +Author: Daniel P. Berrange +Date: Thu May 16 18:47:07 2013 +0100 + + Fix failure to detect missing cgroup partitions + + Change bbe97ae968eba60b71e0066d49f9fc909966d9d6 caused the + QEMU driver to ignore ENOENT errors from cgroups, in order + to cope with missing /proc/cgroups. This is not good though + because many other things can cause ENOENT and should not + be ignored. The callers expect to see ENXIO when cgroups + are not present, so adjust the code to report that errno + when /proc/cgroups is missing + + Signed-off-by: Daniel P. Berrange + +Index: libvirt-1.0.5/src/qemu/qemu_cgroup.c +=================================================================== +--- libvirt-1.0.5.orig/src/qemu/qemu_cgroup.c ++++ libvirt-1.0.5/src/qemu/qemu_cgroup.c +@@ -415,8 +415,7 @@ int qemuInitCgroup(virQEMUDriverPtr driv + if (rc != 0) { + if (rc == -ENXIO || + rc == -EPERM || +- rc == -EACCES || +- rc == -ENOENT) { /* No cgroups mounts == success */ ++ rc == -EACCES) { /* No cgroups mounts == success */ + VIR_DEBUG("No cgroups present/configured/accessible, ignoring error"); + goto done; + } +Index: libvirt-1.0.5/src/util/vircgroup.c +=================================================================== +--- libvirt-1.0.5.orig/src/util/vircgroup.c ++++ libvirt-1.0.5/src/util/vircgroup.c +@@ -1110,8 +1110,13 @@ static int virCgroupPartitionNeedsEscapi + path[0] == '.') + return 1; + +- if (!(fp = fopen("/proc/cgroups", "r"))) ++ if (!(fp = fopen("/proc/cgroups", "r"))) { ++ /* The API contract is that we return ENXIO ++ * if cgroups are not available on a host */ ++ if (errno == ENOENT) ++ errno = ENXIO; + return -errno; ++ } + + /* + * Data looks like this: diff --git a/ca697e90-CVE-2013-1962.patch b/ca697e90-CVE-2013-1962.patch new file mode 100644 index 0000000..a90ba6b --- /dev/null +++ b/ca697e90-CVE-2013-1962.patch @@ -0,0 +1,26 @@ +commit ca697e90d5bd6a6dfb94bfb6d4438bdf9a44b739 +Author: Ján Tomko +Date: Fri Apr 12 17:30:56 2013 +0200 + + daemon: fix leak after listing all volumes + + CVE-2013-1962 + + remoteDispatchStoragePoolListAllVolumes wasn't freeing the pool. + The pool also held a reference to the connection, preventing it from + getting freed and closing the netcf interface driver, which held two + sockets open. + +Index: libvirt-1.0.5/daemon/remote.c +=================================================================== +--- libvirt-1.0.5.orig/daemon/remote.c ++++ libvirt-1.0.5/daemon/remote.c +@@ -4226,6 +4226,8 @@ cleanup: + virStorageVolFree(vols[i]); + VIR_FREE(vols); + } ++ if (pool) ++ virStoragePoolFree(pool); + return rv; + } + diff --git a/f493d83f-cgroup-swap-control.patch b/f493d83f-cgroup-swap-control.patch new file mode 100644 index 0000000..9b77ee9 --- /dev/null +++ b/f493d83f-cgroup-swap-control.patch @@ -0,0 +1,34 @@ +commit f493d83fbd3257453e63f2f32ee90a216fd531c1 +Author: Daniel P. Berrange +Date: Thu May 9 13:53:39 2013 +0100 + + Cope with missing swap cgroup controls + + It is possible to build a kernel without swap cgroup controls + present. This causes a fatal error when querying memory + parameters. Treat missing swap controls as meaning "unlimited". + The fatal error remains if the user tries to actually change + the limit. + + Signed-off-by: Daniel P. Berrange + +Index: libvirt-1.0.5/src/qemu/qemu_driver.c +=================================================================== +--- libvirt-1.0.5.orig/src/qemu/qemu_driver.c ++++ libvirt-1.0.5/src/qemu/qemu_driver.c +@@ -7351,9 +7351,12 @@ qemuDomainGetMemoryParameters(virDomainP + case 2: /* fill swap hard limit here */ + rc = virCgroupGetMemSwapHardLimit(priv->cgroup, &val); + if (rc != 0) { +- virReportSystemError(-rc, "%s", +- _("unable to get swap hard limit")); +- goto cleanup; ++ if (rc != -ENOENT) { ++ virReportSystemError(-rc, "%s", ++ _("unable to get swap hard limit")); ++ goto cleanup; ++ } ++ val = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED; + } + if (virTypedParameterAssign(param, + VIR_DOMAIN_MEMORY_SWAP_HARD_LIMIT, diff --git a/libvirt.changes b/libvirt.changes index 8508803..cc8c1b3 100644 --- a/libvirt.changes +++ b/libvirt.changes @@ -1,3 +1,25 @@ +------------------------------------------------------------------- +Sat May 18 09:15:00 MDT 2013 - jfehlig@suse.com + +- fix leak after listing all volumes - CVE-2013-1962 + ca697e90-CVE-2013-1962.patch + bnc#820397 +- Fix iohelper usage with streams opened for read + a2214c52-iohelper.patch +- Cope with missing swap cgroup controls + f493d83f-cgroup-swap-control.patch + bnc#819976 +- Fix cgroup handling when setting VCPU BW + 0471637d-cgroups-vcpu-bw.patch + rhb#963592 +- Escape a leading '.' with '_' in the cgroup names + 0ced83dc-cgroup-escape-dot.patch +- Add missing documentation on new cgroup layout + 486a86eb-cgroups-docs.patch +- Another fix related to systems with no cgroups + c2cf5f1c-no-cgroups-fix.patch + bnc#819963 + ------------------------------------------------------------------- Thu May 16 16:25:13 MDT 2013 - jfehlig@suse.com diff --git a/libvirt.spec b/libvirt.spec index 9b66118..0c0353c 100644 --- a/libvirt.spec +++ b/libvirt.spec @@ -423,7 +423,14 @@ Source1: libvirtd.init Source2: libvirtd-relocation-server.fw Source99: baselibs.conf # Upstream patches -Patch0: bbe97ae9-no-cgroups.patch +Patch0: f493d83f-cgroup-swap-control.patch +Patch1: 486a86eb-cgroups-docs.patch +Patch2: 0ced83dc-cgroup-escape-dot.patch +Patch3: bbe97ae9-no-cgroups.patch +Patch4: 0471637d-cgroups-vcpu-bw.patch +Patch5: c2cf5f1c-no-cgroups-fix.patch +Patch6: a2214c52-iohelper.patch +Patch7: ca697e90-CVE-2013-1962.patch # Need to go upstream Patch100: xen-name-for-devid.patch Patch101: clone.patch @@ -565,6 +572,13 @@ Authors: %prep %setup -q %patch0 -p1 +%patch1 -p1 +%patch2 -p1 +%patch3 -p1 +%patch4 -p1 +%patch5 -p1 +%patch6 -p1 +%patch7 -p1 %patch100 -p1 %patch101 %patch102 -p1