From d5a2e95d8c8aaecbdd13260b956503bd321f327378f55588e66e37c0ae0d0a4f Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 14 Aug 2018 13:00:16 +0000
Subject: [PATCH] Accepting request 629222 from
 home:eeich:branches:network:cluster

- Update to 17.11.9
  * Fix segfault in slurmctld when a job's node bitmap is NULL during a
    scheduling cycle.  Primarily caused by EnforcePartLimits=ALL.
  * Remove erroneous unlock in acct_gather_energy/ipmi.
  * Enable support for hwloc version 2.0.1.
  * Fix 'srun -q' (--qos) option handling.
  * Fix socket communication issue that can lead to lost task completition
    messages, which will cause a permanently stuck srun process.
  * Handle creation of TMPDIR if environment variable is set or changed in
    a task prolog script.
  * Avoid node layout fragmentation if running with a fixed CPU count but
    without Sockets and CoresPerSocket defined.
  * burst_buffer/cray - Fix datawarp swap default pool overriding jobdw.
  * Fix incorrect job priority assignment for multi-partition job with
    different PriorityTier settings on the partitions.
  * Fix sinfo to print correct node state.

- When using a remote shared StateSaveLocation, slurmctld needs to
  be started after remote filesystems have become available.
  Add 'remote-fs.target' to the 'After=' directive in slurmctld.service
  (boo#1103561).

- Update to 17.11.8
  * Fix incomplete RESPONSE_[RESOURCE|JOB_PACK]_ALLOCATION building path.
  * Do not allocate nodes that were marked down due to the node not responding
    by ResumeTimeout.
  * task/cray plugin - search for "mems" cgroup information in the file
    "cpuset.mems" then fall back to the file "mems".
  * Fix ipmi profile debug uninitialized variable.
  * PMIx: fixed the direct connect inline msg sending.

OBS-URL: https://build.opensuse.org/request/show/629222
OBS-URL: https://build.opensuse.org/package/show/network:cluster/slurm?expand=0&rev=64
---
 slurm-17.11.7.tar.bz2                         |  3 -
 slurm-17.11.9.tar.bz2                         |  3 +
 slurm.changes                                 | 95 +++++++++++++++++++
 slurm.spec                                    | 33 +++++--
 ...it-when-backup-controller-takes-over.patch | 58 +++++++++++
 5 files changed, 180 insertions(+), 12 deletions(-)
 delete mode 100644 slurm-17.11.7.tar.bz2
 create mode 100644 slurm-17.11.9.tar.bz2
 create mode 100644 slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch

diff --git a/slurm-17.11.7.tar.bz2 b/slurm-17.11.7.tar.bz2
deleted file mode 100644
index b2466b1..0000000
--- a/slurm-17.11.7.tar.bz2
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a4ab10870b1c35f67a3465796960b32e4270e52acc257987b10acc4f17035a57
-size 6249399
diff --git a/slurm-17.11.9.tar.bz2 b/slurm-17.11.9.tar.bz2
new file mode 100644
index 0000000..fbc6787
--- /dev/null
+++ b/slurm-17.11.9.tar.bz2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c56ed2eab6d2d2adf2ab5aec203175a64b9e8c5a5ba2af29470358e7808bd942
+size 6258698
diff --git a/slurm.changes b/slurm.changes
index 510569e..c939652 100644
--- a/slurm.changes
+++ b/slurm.changes
@@ -1,3 +1,98 @@
+-------------------------------------------------------------------
+Tue Aug 14 10:26:43 UTC 2018 - eich@suse.com
+
+- Update to 17.11.9
+  * Fix segfault in slurmctld when a job's node bitmap is NULL during a
+    scheduling cycle.  Primarily caused by EnforcePartLimits=ALL.
+  * Remove erroneous unlock in acct_gather_energy/ipmi.
+  * Enable support for hwloc version 2.0.1.
+  * Fix 'srun -q' (--qos) option handling.
+  * Fix socket communication issue that can lead to lost task completition
+    messages, which will cause a permanently stuck srun process.
+  * Handle creation of TMPDIR if environment variable is set or changed in
+    a task prolog script.
+  * Avoid node layout fragmentation if running with a fixed CPU count but
+    without Sockets and CoresPerSocket defined.
+  * burst_buffer/cray - Fix datawarp swap default pool overriding jobdw.
+  * Fix incorrect job priority assignment for multi-partition job with
+    different PriorityTier settings on the partitions.
+  * Fix sinfo to print correct node state.
+
+-------------------------------------------------------------------
+Thu Aug  2 11:35:55 UTC 2018 - eich@suse.com
+
+- When using a remote shared StateSaveLocation, slurmctld needs to
+  be started after remote filesystems have become available.
+  Add 'remote-fs.target' to the 'After=' directive in slurmctld.service
+  (boo#1103561).
+
+-------------------------------------------------------------------
+Tue Jul 31 18:29:40 UTC 2018 - eich@suse.com
+
+- Update to 17.11.8
+  * Fix incomplete RESPONSE_[RESOURCE|JOB_PACK]_ALLOCATION building path.
+  * Do not allocate nodes that were marked down due to the node not responding
+    by ResumeTimeout.
+  * task/cray plugin - search for "mems" cgroup information in the file
+    "cpuset.mems" then fall back to the file "mems".
+  * Fix ipmi profile debug uninitialized variable.
+  * PMIx: fixed the direct connect inline msg sending.
+  * MYSQL: Fix issue not handling all fields when loading an archive dump.
+  * Allow a job_submit plugin to change the admin_comment field during
+    job_submit_plugin_modify().
+  * job_submit/lua - fix access into reservation table.
+  * MySQL - Prevent deadlock caused by archive logic locking reads.
+  * Don't enforce MaxQueryTimeRange when requesting specific jobs.
+  * Modify --test-only logic to properly support jobs submitted to more than
+    one partition.
+  * Prevent slurmctld from abort when attempting to set non-existing
+    qos as def_qos_id.
+  * Add new job dependency type of "afterburstbuffer". The pending job will be
+    delayed until the first job completes execution and it's burst buffer
+    stage-out is completed.
+  * Reorder proctrack/task plugin load in the slurmstepd to match that of
+    slurmd
+    and avoid race condition calling task before proctrack can introduce.
+  * Prevent reboot of a busy KNL node when requesting inactive features.
+  * Revert to previous behavior when requesting memory per cpu/node introduced
+    in 17.11.7.
+  * Fix to reinitialize previously adjusted job members to their original
+    value
+    when validating the job memory in multi-partition requests.
+  * Fix _step_signal() from always returning SLURM_SUCCESS.
+  * Combine active and available node feature change logs on one line rather
+    than one line per node for performance reasons.
+  * Prevent occasionally leaking freezer cgroups.
+  * Fix potential segfault when closing the mpi/pmi2 plugin.
+  * Fix issues with  --exclusive=[user|mcs] to work correctly
+    with preemption or when job requests a specific list of hosts.
+  * Make code compile with hdf5 1.10.2+
+  * mpi/pmix: Fixed the collectives canceling.
+  * SlurmDBD: improve error message handling on archive load failure.
+  * Fix incorrect locking when deleting reservations.
+  * Fix incorrect locking when setting up the power save module.
+  * Fix setting format output length for squeue when showing array jobs.
+  * Add xstrstr function.
+  * Fix printing out of --hint options in sbatch, salloc --help.
+  * Prevent possible divide by zero in _validate_time_limit().
+  * Add Delegate=yes to the slurmd.service file to prevent systemd from
+    interfering with the jobs' cgroup hierarchies.
+  * Change the backlog argument to the listen() syscall within srun to 4096
+    to match elsewhere in the code, and avoid communication problems at scale.
+
+-------------------------------------------------------------------
+Tue Jul 31 17:30:08 UTC 2018 - eich@suse.com
+
+- Fix race in the slurmctld backup controller which prevents it
+  to clean up allocations on nodes properly after failing over
+  (bsc#1084917).
+- Handled %license in a backward compatible manner.
+
+-------------------------------------------------------------------
+Sat Jul 28 15:30:58 UTC 2018 - eich@suse.com
+
+- Add a 'Recommends: slurm-munge' to slurm-slurmdbd.
+
 -------------------------------------------------------------------
 Wed Jul 11 12:04:55 UTC 2018 - eich@suse.com
 
diff --git a/slurm.spec b/slurm.spec
index aca9dbe..aa38751 100644
--- a/slurm.spec
+++ b/slurm.spec
@@ -18,7 +18,7 @@
 
 # Check file META in sources: update so_version to (API_CURRENT - API_AGE)
 %define so_version 32
-%define ver 17.11.7
+%define ver 17.11.9
 # so-version is 0 and seems to be stable
 %define pmi_so 0
 
@@ -73,6 +73,7 @@ Patch5:         slurmd-uses-xdaemon_-for-systemd.patch
 Patch6:         slurmdbd-uses-xdaemon_-for-systemd.patch
 Patch7:         slurmsmwd-uses-xdaemon_-for-systemd.patch
 Patch8:         removed-deprecated-xdaemon.patch
+Patch9:         slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch
 
 Requires:       slurm-config = %{version}
 Requires:       slurm-node = %{version}
@@ -208,6 +209,7 @@ Group:          Productivity/Clustering/Computing
 Requires:       slurm-config = %{version}
 Requires:       slurm-plugins = %{version}
 Requires:       slurm-sql = %{version}
+Recommends:     slurm-munge = %{version}
 %if 0%{?with_systemd}
 %{?systemd_requires}
 %else
@@ -328,6 +330,7 @@ for the slurm daemons.
 %patch6 -p1
 %patch7 -p1
 %patch8 -p1
+%patch9 -p1
 
 %build
 %configure --enable-shared \
@@ -399,14 +402,20 @@ PartitionName=normal Nodes=linux Default=YES MaxTime=24:00:00 State=UP
 EOF
 # 9/17/14 karl.w.schulz@intel.com - Add option to drop VM cache during epilog
 sed -i '/^# No other SLURM jobs,/i \\n# Drop clean caches (OpenHPC)\necho 3 > /proc/sys/vm/drop_caches\n\n#' %{buildroot}/%{_sysconfdir}/%{name}/slurm.epilog.clean
-# chnage slurmdbd.conf for our needs
-sed -i 's@LogFile=/var/log/slurm/slurmdbd.log@LogFile=/var/log/slurmdbd.log@' %{buildroot}/%{_sysconfdir}/%{name}/slurmdbd.conf
-sed -i -e "s@PidFile=.*@PidFile=%{_localstatedir}/run/slurm/slurmdbd.pid@"  %{buildroot}/%{_sysconfdir}/%{name}/slurmdbd.conf
-# manage local state dir
+# change slurmdbd.conf for our needs
+sed -i 's@LogFile=/var/log/slurm/slurmdbd.log@LogFile=/var/log/slurmdbd.log@'\
+ %{buildroot}/%{_sysconfdir}/%{name}/slurmdbd.conf
+sed -i -e "s@PidFile=.*@PidFile=%{_localstatedir}/run/slurm/slurmdbd.pid@" \
+ %{buildroot}/%{_sysconfdir}/%{name}/slurmdbd.conf
+# manage local state dir and a remote states save location
 mkdir -p %{buildroot}/%_localstatedir/lib/slurm
-sed -i -e "s@PIDFile=.*@PIDFile=%{_localstatedir}/run/slurm/slurmctld.pid@" %{buildroot}/%{_unitdir}/slurmctld.service
-sed -i -e "s@PIDFile=.*@PIDFile=%{_localstatedir}/run/slurm/slurmd.pid@" %{buildroot}/%{_unitdir}/slurmd.service
-sed -i -e "s@PIDFile=.*@PIDFile=%{_localstatedir}/run/slurm/slurmdbd.pid@" %{buildroot}/%{_unitdir}/slurmdbd.service
+sed -i -e "s@PIDFile=.*@PIDFile=%{_localstatedir}/run/slurm/slurmctld.pid@" \
+ -e "s@After=.*@After=network.target munge.service remote-fs.target@" \
+ %{buildroot}/%{_unitdir}/slurmctld.service
+sed -i -e "s@PIDFile=.*@PIDFile=%{_localstatedir}/run/slurm/slurmd.pid@" \
+ %{buildroot}/%{_unitdir}/slurmd.service
+sed -i -e "s@PIDFile=.*@PIDFile=%{_localstatedir}/run/slurm/slurmdbd.pid@" \
+ %{buildroot}/%{_unitdir}/slurmdbd.service
 %endif
 
 # Delete unpackaged files:
@@ -604,10 +613,16 @@ exit 0
 %_res_update slurmdbd.service
 %_rest slurmdbd
 
+%if 0%{?sle_version} > 120200 || 0%{?suse_version} > 1320
+%define my_license %license 
+%else 
+%define my_license %doc 
+%endif
+
 %files
 %defattr(-,root,root)
 %doc AUTHORS NEWS RELEASE_NOTES DISCLAIMER
-%license COPYING
+%my_license COPYING
 %doc doc/html
 %{_bindir}/sacct
 %{_bindir}/sacctmgr
diff --git a/slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch b/slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch
new file mode 100644
index 0000000..9474643
--- /dev/null
+++ b/slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch
@@ -0,0 +1,58 @@
+From: Egbert Eich <eich@suse.com>
+Date: Tue Jul 31 17:31:15 2018 +0200
+Subject: slurmctld: rerun agent_init() when backup controller takes over
+Patch-mainline: Not yet
+Git-commit: 169d9522c89a10dcffbf1403c20b4e6249bac79b
+References: 
+
+A slurmctld backup controller often fails to clean up jobs which have
+finished, the node appears in an 'IDLE+COMPLETING' state while squeue -l
+still shows the job in a completing state.
+This situation persists until the primary controller is restarted and
+cleans up all tasks in 'COMPLETING' state.
+This issue is caused by a race condition in the backup controller:
+When the backup controller detects that the primary controller is
+inaccessible, it will run thru a restart cycle. To trigger the shutdown
+of some entities, it will set slurmctld_config.shutdown_time to a value
+!= 0. Before continuing as the controller in charge, it resets this
+variable to 0 again.
+The agent which handles the request queue - from a separate thread -
+wakes up periodically (in a 2 sec interval) and checks for things to do.
+If it finds slurmctld_config.shutdown_time set to a value != 0, it will
+terminate.
+If this wakeup occurs in the 'takeover window' between the variable
+being set to !=0 and reset to 0, the agent goes away and will no longer
+be available to handle queued requests as there is nothing at the end
+of the 'takeover window' that would restart it.
+
+This fix adds a restart of the agent by calling agent_init() after
+slurmctld_config.shutdown_time has been reset to 0.
+Should an agent still be running (because it didn't wake up during the
+'takeover window') it will be caught in agent_init().
+
+Signed-off-by: Egbert Eich <eich@suse.com>
+---
+ src/slurmctld/backup.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c
+index 24ddcde..cf3bb43 100644
+--- a/src/slurmctld/backup.c
++++ b/src/slurmctld/backup.c
+@@ -65,6 +65,7 @@
+ #include "src/slurmctld/read_config.h"
+ #include "src/slurmctld/slurmctld.h"
+ #include "src/slurmctld/trigger_mgr.h"
++#include "src/slurmctld/agent.h"
+ 
+ #define SHUTDOWN_WAIT     2	/* Time to wait for primary server shutdown */
+ 
+@@ -225,6 +226,9 @@ void run_backup(slurm_trigger_callbacks_t *callbacks)
+ 		abort();
+ 	}
+ 	slurmctld_config.shutdown_time = (time_t) 0;
++	/* Reinit agent in case it has been terminated - agent_init()
++	   will check itself */
++	agent_init();
+ 	unlock_slurmctld(config_write_lock);
+ 	select_g_select_nodeinfo_set_all();
+