From: Petr Tesarik Subject: Avoid entering failed state on CPU hotplug References: bnc#874992, bnc#809209 Patch-mainline: scheduled for v0.8.13 When more than 5 CPUs are quickly offlined or onlined, the kdump service enters and remains in a "failed state". This issue appears to be caused by systemd's service start rate limit default [more than 5 times within 10 seconds per systemd.service(5) man page] being exceeded due to the udev initiated kdump service restart for every CPU offline or online operation. Avoid the problem by preventing the systemd rate limit threshold encounter by flushing the reset rate counter prior to each CPU offline and online related kdump service restart. Additionally, limit the restart to IBM POWER, because other systems do not appear to be affected by bnc 389658. And the z/Architecture do not need a restart on memory hotplug either, so the udev rules are not needed at all. Signed-off-by: Petr Tesarik diff --git a/70-kdump.rules b/70-kdump.rules deleted file mode 100644 index c215f23..0000000 --- a/70-kdump.rules +++ /dev/null @@ -1,12 +0,0 @@ -# -# Kdump core headers needs to be regnerated if the CPUs or memory changes. -# For this, reload kdump. -# -# Novell Bug #389658 -# - -SUBSYSTEM=="cpu", ACTION=="online", PROGRAM="/etc/init.d/boot.kdump try-restart" -SUBSYSTEM=="cpu", ACTION=="offline", PROGRAM="/etc/init.d/boot.kdump try-restart" -SUBSYSTEM=="memory", ACTION=="add", PROGRAM="/etc/init.d/boot.kdump try-restart" -SUBSYSTEM=="memory", ACTION=="remove", PROGRAM="/etc/init.d/boot.kdump try-restart" - diff --git a/70-kdump.rules.in b/70-kdump.rules.in new file mode 100644 index 0000000..0ec2127 --- /dev/null +++ b/70-kdump.rules.in @@ -0,0 +1,37 @@ +@if @ARCH@ s390 s390x +# +# For s390x the ELF header is created in the kdump kernel and therefore +# no kdump udev rules are required. +# +@else +# +# Kdump core headers needs to be regnerated if the CPUs or memory changes. +# For this, reload kdump. +# +# Novell Bug #389658 +# + +TEST="/usr/bin/systemctl", GOTO="kdump_systemd" + +@if @ARCH@ ppc ppc64 ppc64le +SUBSYSTEM=="cpu", ACTION=="online", RUN+="/etc/init.d/boot.kdump try-restart" +SUBSYSTEM=="cpu", ACTION=="offline", RUN+="/etc/init.d/boot.kdump try-restart" +@endif +SUBSYSTEM=="memory", ACTION=="add", RUN+="/etc/init.d/boot.kdump try-restart" +SUBSYSTEM=="memory", ACTION=="remove", RUN+="/etc/init.d/boot.kdump try-restart" + +GOTO="kdump_end" + +# Systemd limits service start rate, so if udev events are emitted too +# often, kdump will enter failed state, unless the counter is reset here. +LABEL="kdump_systemd" + +@if @ARCH@ ppc ppc64 ppc64le +SUBSYSTEM=="cpu", ACTION=="online", RUN+="/usr/bin/systemctl reset-failed kdump", RUN+="/usr/bin/systemctl try-restart kdump" +SUBSYSTEM=="cpu", ACTION=="offline", RUN+="/usr/bin/systemctl reset-failed kdump", RUN+="/usr/bin/systemctl try-restart kdump" +@endif +SUBSYSTEM=="memory", ACTION=="add", RUN+="/usr/bin/systemctl reset-failed kdump", RUN+="/usr/bin/systemctl try-restart kdump" +SUBSYSTEM=="memory", ACTION=="remove", RUN+="/usr/bin/systemctl reset-failed kdump", RUN+="/usr/bin/systemctl try-restart kdump" + +LABEL="kdump_end" +@endif diff --git a/CMakeLists.txt b/CMakeLists.txt index 59c4607..e10afa9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,7 @@ SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake) cmake_minimum_required(VERSION 2.6.2) set (PACKAGE_STRING "kdump") -set (PACKAGE_VERSION "0.8.12") +set (PACKAGE_VERSION "0.8.13") include_directories("${PROJECT_BINARY_DIR}") @@ -195,8 +195,26 @@ INSTALL(FILES /var/adm/fillup-templates/ ) +ADD_CUSTOM_COMMAND( + OUTPUT + 70-kdump.rules + COMMAND + ARCH=${CMAKE_SYSTEM_PROCESSOR} + awk -f ${CMAKE_CURRENT_SOURCE_DIR}/process_cond.awk + ${CMAKE_CURRENT_SOURCE_DIR}/70-kdump.rules.in + > ${CMAKE_CURRENT_BINARY_DIR}/70-kdump.rules + DEPENDS + 70-kdump.rules.in +) +ADD_CUSTOM_TARGET( + kdump.rules + ALL + DEPENDS + 70-kdump.rules +) + INSTALL(FILES - ${CMAKE_CURRENT_SOURCE_DIR}/70-kdump.rules + ${CMAKE_CURRENT_BINARY_DIR}/70-kdump.rules DESTINATION /etc/udev/rules.d ) diff --git a/process_cond.awk b/process_cond.awk index 82ffe92..d6f966b 100755 --- a/process_cond.awk +++ b/process_cond.awk @@ -5,7 +5,12 @@ ENVIRON[var[1]] \ substr($0, RSTART + RLENGTH) stack[sp++] = remove - remove = remove || ($2 != $3) + condition = 0 + for (i = 3; i <= NF; i++) { + if ($2 == $i) + condition = 1 + } + remove = remove || !condition skip = 1 } /^@else\>/ {