From a416fd0dcd96342cb76e59ac1fd711df447e30c17e3b587edf18906d7daa676a Mon Sep 17 00:00:00 2001 From: Mark Post Date: Tue, 16 May 2017 01:32:01 +0000 Subject: [PATCH] Accepting request 495131 from openSUSE:Factory:zSystems Bug fix from Beta testing. OBS-URL: https://build.opensuse.org/request/show/495131 OBS-URL: https://build.opensuse.org/package/show/Base:System/s390-tools?expand=0&rev=8 --- ...ce-kill-which-can-corrupt-kernel-blk.patch | 144 ++++++++++++++++++ s390-tools.changes | 6 + s390-tools.spec | 2 + 3 files changed, 152 insertions(+) create mode 100644 s390-tools-sles12sp3-ziomon-no-blktrace-kill-which-can-corrupt-kernel-blk.patch diff --git a/s390-tools-sles12sp3-ziomon-no-blktrace-kill-which-can-corrupt-kernel-blk.patch b/s390-tools-sles12sp3-ziomon-no-blktrace-kill-which-can-corrupt-kernel-blk.patch new file mode 100644 index 0000000..7782dd1 --- /dev/null +++ b/s390-tools-sles12sp3-ziomon-no-blktrace-kill-which-can-corrupt-kernel-blk.patch @@ -0,0 +1,144 @@ +Subject: [PATCH] [BZ 149058] ziomon: no blktrace kill which can corrupt kernel blktrace state +From: Steffen Maier + +Description: ziomon: no blktrace kill which can corrupt kernel blktrace state +Symptom: Ziomon terminates with the following error messages: + + $ ziomon -d -o ... + ... + ziomon: Failed to stop trace on /dev/sdX + ... + ziomon: Failed to stop trace on /dev/sdY + ziomon: blktrace has leftovers, manual cleanup required! + + Subsequent ziomon runs fail likely until the next reboot: + + $ ziomon -d -o ... + ... + Collecting data...BLKTRACESETUP(2) /dev/sdX failed: \ + 2/No such file or directory + BLKTRACESETUP(2) /dev/sdY failed: 2/No such file or directory + ... + Thread Z failed open /sys/kernel/debug/block/(null)/traceZ: \ + 2/No such file or directory + FAILED to start thread on CPU 0: 1/Operation not permitted + ... + Error: blktrace has errors, aborting + ... + ziomon: Failed to stop trace on /dev/sdX + ... + ziomon: Failed to stop trace on /dev/sdY + ziomon: blktrace has leftovers, manual cleanup required! +Problem: While we call blktrace with stopwatch option '-w $WRP_DURATION' + to have itself terminate automatically after the (mandatory) + sampling duration, there might be a race + with our own timed blktrace checking loop between "Collecting + data..." and "done" + or any of SIGHUP SIGTERM SIGINT SIGQUIT from the user might + prematurely trigger emergency_shutdown(), + so blktrace might still be running or not have completed its + cleanup when we reach the blktrace leftover checks in shutdown(). + + In that case, ziomon uses the meanwhile hidden kill option of + blktrace which can corrupt kernel state if blktrace user space + has not yet completed its regular cleanup. +Solution: The correct way to have the blktrace process terminate and + cleanup properly including any dependent kernel state, before + blktrace's stopwatch makes it terminate and cleanup + automatically, is to only send SIGINT to it. + + http://git.kernel.org/cgit/linux/kernel/git/axboe/blktrace.git + 86596c7579c6 ("blktrace: remove -k from manpage synopsis") + fb7f86674a51 ("blktrace: disable kill option - take 2") + Additional explanation for blktrace -k: + http://marc.info/?l=linux-btrace&m=131245973528087&w=2i + linux-btrace ("Re: Recent changes") + The following kernel commits do not change above termination + statement: + http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git + 39cbb602b543 ("Remove double removal of blktrace directory") + fd51d251e4cd ("blktrace: remove debugfs entries on bad path") + f48fc4d32e24 + ("block: get rid of the manual directory counting in blktrace") + + As of this writing, the (multithreaded) blktrace also wires + SIGHUP SIGTERM SIGALRM with the same signal handler + handle_sigint(), but to adhere to its documentation move from + SIGTERM to SIGINT anyway. + + We must not use the meanwhile hidden kill option of blktrace + because this corrupts kernel state if blktrace user space has not + yet completed its regular cleanup. + + In order to avoid zombies we wait for all children. After all, we + want everything to be completed before ziomon terminates and the + user could safely run another instance of ziomon. Also, we get + the possibility for users to detect and report issues with + children that don't finish timely. + + Since we do not know how to manually cleanup, drop the check for + blktrace leftovers in shutdown(). +Reproduction: Run ziomon with many devices on the command line. + Possibly (other) load on the system increases probability. +Upstream-ID: - +Problem-ID: 149058 + +Signed-off-by: Steffen Maier +--- + ziomon/ziomon | 17 ++++++----------- + 1 file changed, 6 insertions(+), 11 deletions(-) + +--- a/ziomon/ziomon ++++ b/ziomon/ziomon +@@ -5,7 +5,7 @@ + # + # Wrapper script to start all processes + # +-# Copyright IBM Corp. 2008 ++# Copyright IBM Corp. 2008, 2016 + # + # Author(s): Stefan Raspl + # +@@ -84,7 +84,7 @@ function check_for_int() { + + function print_version() { + echo "$WRP_TOOLNAME: I/O data collection utility, version %S390_TOOLS_VERSION%"; +- echo "Copyright IBM Corp. 2008"; ++ echo "Copyright IBM Corp. 2008, 2016"; + } + + +@@ -356,8 +356,6 @@ function start_trace() { + + + function shutdown() { +- local failed=0; +- + echo "Shutting down"; + # one more second to write final result + sleep 2; +@@ -365,7 +363,7 @@ function shutdown() { + [ -d /proc/$WRP_ZIOMON_UTIL_PID ] && echo "Shutting down utilization process" && kill -s SIGTERM $WRP_ZIOMON_UTIL_PID; + fi + if [ "$WRP_BLKTRACE_PID" != "" ]; then +- [ -d /proc/$WRP_BLKTRACE_PID ] && echo "Shutting down blktrace process" && kill -s SIGTERM $WRP_BLKTRACE_PID; ++ [ -d /proc/$WRP_BLKTRACE_PID ] && echo "Shutting down blktrace process" && kill -s SIGINT $WRP_BLKTRACE_PID; + fi + if [ "$WRP_BLKIOMON_PID" != "" ]; then + [ -d /proc/$WRP_BLKIOMON_PID ] && echo "Shutting down blkiomon process" && kill -s SIGTERM $WRP_BLKIOMON_PID; +@@ -381,12 +379,9 @@ function shutdown() { + kill -s SIGTERM $WRP_ZIOMON_MGR_PID; + fi + fi +- # kill running traces on devices +- for dev in ${WRP_DEVICES[@]}; do +- [ -d /sys/kernel/debug/block/${dev##*/} ] && debug "Trace on $dev exists, attempt to kill..." && blktrace -k $dev >/dev/null 2>&1; +- [ -d /sys/kernel/debug/block/${dev##*/} ] && echo "$WRP_TOOLNAME: Failed to stop trace on $dev" && (( failed++ )); +- done +- [ $failed -ne 0 ] && echo "$WRP_TOOLNAME: blktrace has leftovers, manual cleanup required!"; ++ # synchronize with all children to avoid zombies ++ # and to prepare for a clean subsequent re-run of ziomon ++ wait + + if [ -e $WRP_MSG_Q_PATH ]; then + if [ $WRP_DEBUG -gt 1 ]; then diff --git a/s390-tools.changes b/s390-tools.changes index 63c51a2..67bc80c 100644 --- a/s390-tools.changes +++ b/s390-tools.changes @@ -1,3 +1,9 @@ +------------------------------------------------------------------- +Tue May 16 00:57:05 UTC 2017 - mpost@suse.com + +- Added s390-tools-sles12sp3-ziomon-no-blktrace-kill-which-can-corrupt-kernel-blk.patch + (bsc#1038861) + ------------------------------------------------------------------- Tue Apr 18 22:25:10 UTC 2017 - mpost@suse.com diff --git a/s390-tools.spec b/s390-tools.spec index 3ba6a3a..49bd172 100644 --- a/s390-tools.spec +++ b/s390-tools.spec @@ -132,6 +132,7 @@ Patch29: s390-tools-sles12sp3-dasdfmt-10-Add-expand-format-mode.patch Patch30: s390-tools-sles12sp3-util_proc-fix-memory-allocation-error-messages.patch Patch31: s390-tools-sles12sp3-mon_fsstatd-fix-double-free-in-error-path-and-skip-virtual-fs.patch Patch32: s390-tools-sles12sp3-dbginfo-Collect-docker-debug-data.patch +Patch33: s390-tools-sles12sp3-ziomon-no-blktrace-kill-which-can-corrupt-kernel-blk.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build ExclusiveArch: s390 s390x @@ -220,6 +221,7 @@ to list files and directories. %patch30 -p1 %patch31 -p1 %patch32 -p1 +%patch33 -p1 cp -vi %{S:22} CAUTION