Accepting request 773459 from home:mslacken:branches:network:cluster

- Updated to version 20.02.0-0pre1, highlights are
  Highlights:
 * Exclusive behavior of a node includes all GRES on a node as well
   as the cpus.
 * Use python3 instead of python for internal build/test scripts.
   The slurm.spec file has been updated to depend on python3 as well.
 * Added new NodeSet configuration option to help simplify partition
   configuration sections for heterogeneous / condo*style clusters.
 * Added slurm.conf option MaxDBDMsgs to control how many messages will be
   stored in the slurmctld before throwing them away when the slurmdbd is down.
 * The checkpoint plugin interface and all associated API calls have been
   removed.
 * slurm_init_job_desc_msg() initializes mail_type as uint16_t. This allows
   mail_type to be set to NONE with scontrol.
 * Add new slurm_spank_log() function to print messages back to the user from
   within a SPANK plugin without prepending "error: " from slurm_error().
 * Enforce having partition name and nodelist=ALL when creating reservations
   with flags=PART_NODES.
 * SPANK - removed never-implemented slurm_spank_slurmd_init() interface. This
   hook has always been accessible through slurm_spank_init() in the
   S_CTX_SLURMD context instead.
 * sbcast - add new BcastAddr option to NodeName lines to allow sbcast traffic
   to flow over an alternate network path.
 * Added auth/jwt plugin, and 'scontrol token' subcommand.  PMIx - improve
 * performance of proc map generation.  Deprecate kill_invalid_depend in
 * SchedulerParameters and move it to a new
   option called DependencyParameters.
 * Enable job dependencies for any job on any cluster in the same federation.
 * Allow clusters to be added automatically to db at startup of ctld.  Add
 * AccountingStorageExternalHost slurm.conf parameter.  The

OBS-URL: https://build.opensuse.org/request/show/773459
OBS-URL: https://build.opensuse.org/package/show/network:cluster/slurm?expand=0&rev=130
This commit is contained in:
Egbert Eich 2020-02-11 14:31:26 +00:00 committed by Git OBS Bridge
parent d94a66a178
commit 54640668e5
11 changed files with 79 additions and 484 deletions

View File

@ -1,47 +0,0 @@
From: Egbert Eich <eich@suse.com>
Date: Tue Nov 20 11:54:02 2018 +0100
Subject: removed deprecated xdaemon
Patch-mainline: Not yet
Git-commit: b39551df0f202203c16d4e9a9a7b640691acf882
References: bsc#1084125
Signed-off-by: Egbert Eich <eich@suse.com>
---
slurm-18.08.3/src/common/daemonize.c | 12 ------------
slurm-18.08.3/src/common/daemonize.h | 1 -
2 files changed, 13 deletions(-)
diff --git a/slurm-18.08.3/src/common/daemonize.c b/slurm-18.08.3/src/common/daemonize.c
index fee9d60..bec8202 100644
--- a/src/common/daemonize.c
+++ b/src/common/daemonize.c
@@ -138,18 +138,6 @@ void xdaemon_finish(int fd)
}
}
-/*
- * keep depercated api
- */
-
-int xdaemon(void)
-{
- int ret_val;
- ret_val= xdaemon_init();
- xdaemon_finish(ret_val);
- return ret_val;
-}
-
/*
* Read and return pid stored in pidfile.
* Returns 0 if file doesn't exist or pid cannot be read.
diff --git a/slurm-18.08.3/src/common/daemonize.h b/slurm-18.08.3/src/common/daemonize.h
index 8b60b4f..b7cb625 100644
--- a/src/common/daemonize.h
+++ b/src/common/daemonize.h
@@ -44,7 +44,6 @@
* Start fork process into background and inherit new session.
*
*/
-extern int xdaemon(void);
extern int xdaemon_init(void);
/*

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:715be211b7bef80d06df0011ec91c51ab740031bd9ff722a5e60c595feaad282
size 6232295

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9cd1e835df91b44b8bae27406e49ef79408e2a7a3e04e2c44e9c3ee816f1c338
size 6231642

View File

@ -1,3 +1,69 @@
-------------------------------------------------------------------
Tue Feb 11 10:09:43 UTC 2020 - Christian Goll <cgoll@suse.com>
- Updated to version 20.02.0-0pre1, highlights are
Highlights:
* Exclusive behavior of a node includes all GRES on a node as well
as the cpus.
* Use python3 instead of python for internal build/test scripts.
The slurm.spec file has been updated to depend on python3 as well.
* Added new NodeSet configuration option to help simplify partition
configuration sections for heterogeneous / condo*style clusters.
* Added slurm.conf option MaxDBDMsgs to control how many messages will be
stored in the slurmctld before throwing them away when the slurmdbd is down.
* The checkpoint plugin interface and all associated API calls have been
removed.
* slurm_init_job_desc_msg() initializes mail_type as uint16_t. This allows
mail_type to be set to NONE with scontrol.
* Add new slurm_spank_log() function to print messages back to the user from
within a SPANK plugin without prepending "error: " from slurm_error().
* Enforce having partition name and nodelist=ALL when creating reservations
with flags=PART_NODES.
* SPANK - removed never-implemented slurm_spank_slurmd_init() interface. This
hook has always been accessible through slurm_spank_init() in the
S_CTX_SLURMD context instead.
* sbcast - add new BcastAddr option to NodeName lines to allow sbcast traffic
to flow over an alternate network path.
* Added auth/jwt plugin, and 'scontrol token' subcommand. PMIx - improve
* performance of proc map generation. Deprecate kill_invalid_depend in
* SchedulerParameters and move it to a new
option called DependencyParameters.
* Enable job dependencies for any job on any cluster in the same federation.
* Allow clusters to be added automatically to db at startup of ctld. Add
* AccountingStorageExternalHost slurm.conf parameter. The
* "ConditionPathExists" condition in slurmd.service has been disabled by
default to permit simpler installation of a "configless" Slurm cluster.
* In SchedulerParameters remove deprecated max_job_bf and replace with
bf_max_job_test.
* Disable sbatch, salloc, srun --reboot for non-admins. SPANK - added support
* for S_JOB_GID in the job script context with
spank_get_item().
* Prolog/Epilog - add SLURM_JOB_GID environment variable.
configuration file changes:
* The mpi/openmpi plugin has been removed as it does nothing.
MpiDefault=openmpi will be translated to the functionally-equivalent
MpiDefault=none.
command changes (see man pages for details)
* Display StepId=<jobid>.batch instead of StepId=<jobid>.4294967294 in output
of "scontrol show step". (slurm_sprint_job_step_info())
* MPMD in srun will now defer PATH resolution for the commands to launch to
slurmstepd. Previously it would handle resolution client*side, but with
a non*standard approach that walked PATH in reverse.
* squeue - added "--me" option, equivalent to --user=$USER.
* The LicensesUsed line has been removed from 'scontrol show config'.
Please see the 'scontrol show licenses' command as an alternative.
* sbatch - adjusted backoff times for "--wait" option to reduce load on
slurmctld. This results in a steady*state delay of 32s between queries,
instead of the prior 10s delay.
- Removed following deprecated patches:
* removed patch slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch
* removed patch split-xdaemon-in-xdaemon_init-and-xdaemon_finish-for.patch
* removed patch slurmctld-uses-xdaemon_-for-systemd.patch
* removed patch slurmd-uses-xdaemon_-for-systemd.patch
* removed patch slurmdbd-uses-xdaemon_-for-systemd.patch
* removed patch slurmsmwd-uses-xdaemon_-for-systemd.patch
* removed patch removed-deprecated-xdaemon.patch
------------------------------------------------------------------- -------------------------------------------------------------------
Wed Feb 5 15:37:05 UTC 2020 - Christian Goll <cgoll@suse.com> Wed Feb 5 15:37:05 UTC 2020 - Christian Goll <cgoll@suse.com>

View File

@ -17,10 +17,10 @@
# Check file META in sources: update so_version to (API_CURRENT - API_AGE) # Check file META in sources: update so_version to (API_CURRENT - API_AGE)
%define so_version 34 %define so_version 35
%define ver 19.05.5 %define ver 20.02.0
%define _ver _19_05 %define _ver _20_02
%define dl_ver %{ver} %define dl_ver %{ver}-0pre1
# so-version is 0 and seems to be stable # so-version is 0 and seems to be stable
%define pmi_so 0 %define pmi_so 0
%define nss_so 2 %define nss_so 2
@ -117,13 +117,6 @@ Source1: slurm-rpmlintrc
Patch0: Remove-rpath-from-build.patch Patch0: Remove-rpath-from-build.patch
Patch1: slurm-2.4.4-init.patch Patch1: slurm-2.4.4-init.patch
Patch2: pam_slurm-Initialize-arrays-and-pass-sizes.patch Patch2: pam_slurm-Initialize-arrays-and-pass-sizes.patch
Patch3: split-xdaemon-in-xdaemon_init-and-xdaemon_finish-for.patch
Patch4: slurmctld-uses-xdaemon_-for-systemd.patch
Patch5: slurmd-uses-xdaemon_-for-systemd.patch
Patch6: slurmdbd-uses-xdaemon_-for-systemd.patch
Patch7: slurmsmwd-uses-xdaemon_-for-systemd.patch
Patch8: removed-deprecated-xdaemon.patch
Patch9: slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch
%{?upgrade:Provides: %{pname} = %{version}} %{?upgrade:Provides: %{pname} = %{version}}
%{?upgrade:Conflicts: %{pname}} %{?upgrade:Conflicts: %{pname}}
@ -508,14 +501,6 @@ Contains also cray specific documentation.
%patch0 -p2 %patch0 -p2
%patch1 -p1 %patch1 -p1
%patch2 -p1 %patch2 -p1
%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1
# Drop this fix as it is considered to be resolved by c1a537dbbe6
##%patch9 -p1
%build %build
%define _lto_cflags %{nil} %define _lto_cflags %{nil}
@ -892,7 +877,6 @@ exit 0
%{_bindir}/sprio %{_bindir}/sprio
%{_bindir}/squeue %{_bindir}/squeue
%{_bindir}/sreport %{_bindir}/sreport
%{_bindir}/smap
%{_bindir}/sshare %{_bindir}/sshare
%{_bindir}/sstat %{_bindir}/sstat
%{_bindir}/strigger %{_bindir}/strigger
@ -916,7 +900,6 @@ exit 0
%{_mandir}/man1/sgather.1.* %{_mandir}/man1/sgather.1.*
%{_mandir}/man1/sinfo.1* %{_mandir}/man1/sinfo.1*
%{_mandir}/man1/slurm.1* %{_mandir}/man1/slurm.1*
%{_mandir}/man1/smap.1*
%{_mandir}/man1/sprio.1* %{_mandir}/man1/sprio.1*
%{_mandir}/man1/squeue.1* %{_mandir}/man1/squeue.1*
%{_mandir}/man1/sreport.1* %{_mandir}/man1/sreport.1*
@ -1032,10 +1015,11 @@ exit 0
%{_libdir}/slurm/acct_gather_profile_none.so %{_libdir}/slurm/acct_gather_profile_none.so
%{?have_json_c:%{_libdir}/slurm/burst_buffer_datawarp.so} %{?have_json_c:%{_libdir}/slurm/burst_buffer_datawarp.so}
%{_libdir}/slurm/burst_buffer_generic.so %{_libdir}/slurm/burst_buffer_generic.so
%{_libdir}/slurm/checkpoint_none.so
%{_libdir}/slurm/checkpoint_ompi.so
%{_libdir}/slurm/core_spec_none.so %{_libdir}/slurm/core_spec_none.so
%{_libdir}/slurm/cli_filter_none.so %{_libdir}/slurm/cli_filter_none.so
%{_libdir}/slurm/cli_filter_lua.so
%{_libdir}/slurm/cli_filter_syslog.so
%{_libdir}/slurm/cli_filter_user_defaults.so
%{_libdir}/slurm/cred_none.so %{_libdir}/slurm/cred_none.so
%{_libdir}/slurm/ext_sensors_none.so %{_libdir}/slurm/ext_sensors_none.so
%{_libdir}/slurm/gpu_generic.so %{_libdir}/slurm/gpu_generic.so
@ -1048,6 +1032,7 @@ exit 0
%{_libdir}/slurm/jobacct_gather_none.so %{_libdir}/slurm/jobacct_gather_none.so
%{_libdir}/slurm/jobcomp_filetxt.so %{_libdir}/slurm/jobcomp_filetxt.so
%{_libdir}/slurm/jobcomp_none.so %{_libdir}/slurm/jobcomp_none.so
%{_libdir}/slurm/jobcomp_lua.so
%{_libdir}/slurm/jobcomp_script.so %{_libdir}/slurm/jobcomp_script.so
%{_libdir}/slurm/job_container_cncu.so %{_libdir}/slurm/job_container_cncu.so
%{_libdir}/slurm/job_container_none.so %{_libdir}/slurm/job_container_none.so
@ -1066,7 +1051,6 @@ exit 0
%{_libdir}/slurm/mcs_none.so %{_libdir}/slurm/mcs_none.so
%{_libdir}/slurm/mcs_user.so %{_libdir}/slurm/mcs_user.so
%{_libdir}/slurm/mpi_none.so %{_libdir}/slurm/mpi_none.so
%{_libdir}/slurm/mpi_openmpi.so
%{_libdir}/slurm/mpi_pmi2.so %{_libdir}/slurm/mpi_pmi2.so
%if %{with pmix} %if %{with pmix}
%{_libdir}/slurm/mpi_pmix.so %{_libdir}/slurm/mpi_pmix.so
@ -1076,6 +1060,7 @@ exit 0
%{_libdir}/slurm/preempt_none.so %{_libdir}/slurm/preempt_none.so
%{_libdir}/slurm/preempt_partition_prio.so %{_libdir}/slurm/preempt_partition_prio.so
%{_libdir}/slurm/preempt_qos.so %{_libdir}/slurm/preempt_qos.so
%{_libdir}/slurm/prep_script.so
%{_libdir}/slurm/priority_basic.so %{_libdir}/slurm/priority_basic.so
%{_libdir}/slurm/priority_multifactor.so %{_libdir}/slurm/priority_multifactor.so
%{_libdir}/slurm/proctrack_cgroup.so %{_libdir}/slurm/proctrack_cgroup.so
@ -1216,10 +1201,10 @@ exit 0
%{_libdir}/slurm/acct_gather_energy_cray_aries.so %{_libdir}/slurm/acct_gather_energy_cray_aries.so
%{_libdir}/slurm/core_spec_cray_aries.so %{_libdir}/slurm/core_spec_cray_aries.so
%{_libdir}/slurm/job_submit_cray_aries.so %{_libdir}/slurm/job_submit_cray_aries.so
%{_libdir}/slurm/mpi_cray_shasta.so
%{_libdir}/slurm/select_cray_aries.so %{_libdir}/slurm/select_cray_aries.so
%{_libdir}/slurm/switch_cray_aries.so %{_libdir}/slurm/switch_cray_aries.so
%{_libdir}/slurm/task_cray_aries.so %{_libdir}/slurm/task_cray_aries.so
%{_mandir}/man5/cray.*
%if 0%{?have_json_c} %if 0%{?have_json_c}
%{_libdir}/slurm/node_features_knl_cray.so %{_libdir}/slurm/node_features_knl_cray.so
%{_libdir}/slurm/power_cray_aries.so %{_libdir}/slurm/power_cray_aries.so

View File

@ -1,58 +0,0 @@
From: Egbert Eich <eich@suse.com>
Date: Tue Nov 20 09:22:15 2018 +0100
Subject: slurmctld: rerun agent_init() when backup controller takes over
Patch-mainline: Not yet
Git-commit: 21a7abc02e4a27cc64a213ba1fc8572a20e21ba9
References: bsc#1084917
A slurmctld backup controller often fails to clean up jobs which have
finished, the node appears in an 'IDLE+COMPLETING' state while squeue -l
still shows the job in a completing state.
This situation persists until the primary controller is restarted and
cleans up all tasks in 'COMPLETING' state.
This issue is caused by a race condition in the backup controller:
When the backup controller detects that the primary controller is
inaccessible, it will run thru a restart cycle. To trigger the shutdown
of some entities, it will set slurmctld_config.shutdown_time to a value
!= 0. Before continuing as the controller in charge, it resets this
variable to 0 again.
The agent which handles the request queue - from a separate thread -
wakes up periodically (in a 2 sec interval) and checks for things to do.
If it finds slurmctld_config.shutdown_time set to a value != 0, it will
terminate.
If this wakeup occurs in the 'takeover window' between the variable
being set to !=0 and reset to 0, the agent goes away and will no longer
be available to handle queued requests as there is nothing at the end
of the 'takeover window' that would restart it.
This fix adds a restart of the agent by calling agent_init() after
slurmctld_config.shutdown_time has been reset to 0.
Should an agent still be running (because it didn't wake up during the
'takeover window') it will be caught in agent_init().
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/backup.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c
index de74513..2b4c74e 100644
--- a/src/slurmctld/backup.c
+++ b/src/slurmctld/backup.c
@@ -65,6 +65,7 @@
#include "src/slurmctld/read_config.h"
#include "src/slurmctld/slurmctld.h"
#include "src/slurmctld/trigger_mgr.h"
+#include "src/slurmctld/agent.h"
#define _DEBUG 0
#define SHUTDOWN_WAIT 2 /* Time to wait for primary server shutdown */
@@ -258,6 +259,9 @@ void run_backup(slurm_trigger_callbacks_t *callbacks)
error("Unable to recover slurm state");
abort();
}
+ /* Reinit agent in case it has been terminated - agent_init()
+ will check itself */
+ agent_init();
slurmctld_config.shutdown_time = (time_t) 0;
unlock_slurmctld(config_write_lock);
select_g_select_nodeinfo_set_all();

View File

@ -1,47 +0,0 @@
From: Egbert Eich <eich@suse.com>
Date: Tue Nov 20 09:47:47 2018 +0100
Subject: slurmctld uses xdaemon_* for systemd
Patch-mainline: Not yet
Git-commit: 0f0c00a4a57d12be04d16f4646c186d3e5f03dd1
References: bsc#1084125
Signed-off-by: Egbert Eich <eich@suse.com>
---
slurm-18.08.3/src/slurmctld/controller.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/slurm-18.08.3/src/slurmctld/controller.c b/slurm-18.08.3/src/slurmctld/controller.c
index a1762de..d123db3 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -260,7 +260,7 @@ static void * _wait_primary_prog(void *arg);
/* main - slurmctld main function, start various threads and process RPCs */
int main(int argc, char **argv)
{
- int cnt, error_code, i;
+ int cnt, error_code, i, fd;
struct timeval start, now;
struct stat stat_buf;
struct rlimit rlim;
@@ -326,7 +326,11 @@ int main(int argc, char **argv)
if (daemonize) {
slurmctld_config.daemonize = 1;
- if (xdaemon())
+ /*
+ * Just start daemonizing if not in test mode
+ */
+ fd = xdaemon_init();
+ if (fd == -1)
error("daemon(): %m");
log_set_timefmt(slurmctld_conf.log_fmt);
log_alter(log_opts, LOG_DAEMON,
@@ -348,6 +352,9 @@ int main(int argc, char **argv)
_init_pidfile();
_become_slurm_user();
}
+ if (daemonize) {
+ xdaemon_finish(fd);
+ }
/*
* Create StateSaveLocation directory if necessary.

View File

@ -1,44 +0,0 @@
From: Egbert Eich <eich@suse.com>
Date: Tue Nov 20 09:52:22 2018 +0100
Subject: slurmd uses xdaemon_* for systemd
Patch-mainline: Not yet
Git-commit: 3988e62eb8c20a29a7a016f264c6d65e114cfdf4
References: bsc#1084125
Signed-off-by: Egbert Eich <eich@suse.com>
---
slurm-18.08.3/src/slurmd/slurmd/slurmd.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/slurm-18.08.3/src/slurmd/slurmd/slurmd.c b/slurm-18.08.3/src/slurmd/slurmd/slurmd.c
index aa35f8a..b2feaf9 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -215,7 +215,7 @@ static void _wait_for_all_threads(int secs);
int
main (int argc, char **argv)
{
- int i, pidfd;
+ int i, pidfd, pipefd;
int blocked_signals[] = {SIGPIPE, 0};
int cc;
char *oom_value;
@@ -300,7 +300,8 @@ main (int argc, char **argv)
* Become a daemon if desired.
*/
if (conf->daemonize) {
- if (xdaemon())
+ pipefd = xdaemon_init();
+ if (pipefd == -1)
error("Couldn't daemonize slurmd: %m");
}
test_core_limit();
@@ -356,6 +357,9 @@ main (int argc, char **argv)
conf->pid = getpid();
pidfd = create_pidfile(conf->pidfile, 0);
+ if (conf->daemonize) {
+ xdaemon_finish(pipefd);
+ }
rfc2822_timestamp(time_stamp, sizeof(time_stamp));
info("%s started on %s", slurm_prog_name, time_stamp);

View File

@ -1,72 +0,0 @@
From: Egbert Eich <eich@suse.com>
Date: Tue Nov 20 09:58:47 2018 +0100
Subject: slurmdbd uses xdaemon_* for systemd
Patch-mainline: Not yet
Git-commit: 8a286cbaf3fe7ebe009106675a4624a2272d616f
References: bsc#1084125
Signed-off-by: Egbert Eich <eich@suse.com>
---
slurm-18.08.3/src/slurmdbd/slurmdbd.c | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/slurm-18.08.3/src/slurmdbd/slurmdbd.c b/slurm-18.08.3/src/slurmdbd/slurmdbd.c
index 471c724..8c7ea94 100644
--- a/src/slurmdbd/slurmdbd.c
+++ b/src/slurmdbd/slurmdbd.c
@@ -103,7 +103,7 @@ static List lft_rgt_list = NULL;
static void _become_slurm_user(void);
static void _commit_handler_cancel(void);
static void *_commit_handler(void *no_data);
-static void _daemonize(void);
+static int _daemonize_start(void);
static void _default_sigaction(int sig);
static void _free_dbd_stats(void);
static void _init_config(void);
@@ -127,6 +127,7 @@ int main(int argc, char **argv)
{
char node_name_short[128];
char node_name_long[128];
+ int pipefd;
void *db_conn = NULL;
assoc_init_args_t assoc_init_arg;
@@ -139,8 +140,9 @@ int main(int argc, char **argv)
_update_nice();
_kill_old_slurmdbd();
- if (foreground == 0)
- _daemonize();
+ if (foreground == 0) {
+ pipefd = _daemonize_start();
+ }
/*
* Need to create pidfile here in case we setuid() below
@@ -149,7 +151,9 @@ int main(int argc, char **argv)
* able to write a core dump.
*/
_init_pidfile();
-
+ if (foreground == 0) {
+ xdaemon_finish(pipefd);
+ }
/*
* Do plugin init's after _init_pidfile so systemd is happy as
* slurm_acct_storage_init() could take a long time to finish if running
@@ -598,11 +602,14 @@ static void _init_pidfile(void)
/* Become a daemon (child of init) and
* "cd" to the LogFile directory (if one is configured) */
-static void _daemonize(void)
+static int _daemonize_start(void)
{
- if (xdaemon())
+ int retval;
+ retval = xdaemon_init();
+ if (retval == -1)
error("daemon(): %m");
log_alter(log_opts, LOG_DAEMON, slurmdbd_conf->log_file);
+ return retval;
}
static void _set_work_dir(void)

View File

@ -1,40 +0,0 @@
From: Egbert Eich <eich@suse.com>
Date: Tue Nov 20 10:07:35 2018 +0100
Subject: slurmsmwd uses xdaemon_* for systemd
Patch-mainline: Not yet
Git-commit: 110d76a0c56b35c8c3c9b24e136476a67a6eb413
References: bsc#1084125
Signed-off-by: Egbert Eich <eich@suse.com>
---
slurm-18.08.3/contribs/cray/slurmsmwd/main.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/slurm-18.08.3/contribs/cray/slurmsmwd/main.c b/slurm-18.08.3/contribs/cray/slurmsmwd/main.c
index a5247bf..1efb1f8 100644
--- a/contribs/cray/slurmsmwd/main.c
+++ b/contribs/cray/slurmsmwd/main.c
@@ -538,6 +538,7 @@ int main(int argc, char **argv)
{
pthread_t processing_thread, signal_handler_thread;
pthread_attr_t thread_attr;
+ int pipefd;
_parse_commandline(argc, argv);
@@ -546,11 +547,15 @@ int main(int argc, char **argv)
slurmsmwd_print_config();
if (!foreground) {
- if (xdaemon())
+ pipefd = xdaemon_init();
+ if (pipefd == -1)
error("daemon(): %m");
}
if (create_pidfile("/var/run/slurmsmwd.pid", 0) < 0)
fatal("Unable to create pidfile /var/run/slurmswmd.pid");
+ if (!foreground) {
+ xdaemon_finish(pipefd);
+ }
slurm_mutex_init(&down_node_lock);

View File

@ -1,148 +0,0 @@
From 1f12c590038c7f738ff19159629fdc38de5cba82 Mon Sep 17 00:00:00 2001
From: Christian Goll <cgoll@suse.de>
Date: Mon, 9 Apr 2018 10:05:50 +0200
Subject: [PATCH 1/6] split xdaemon in xdaemon_init and xdaemon_finish for
systemd compatibilty
---
src/common/daemonize.c | 73 ++++++++++++++++++++++++++++++++++++++++++++------
src/common/daemonize.h | 10 +++++--
2 files changed, 73 insertions(+), 10 deletions(-)
diff --git a/src/common/daemonize.c b/src/common/daemonize.c
index e22a1d0a7f..2987a40af0 100644
--- a/src/common/daemonize.c
+++ b/src/common/daemonize.c
@@ -53,31 +53,75 @@
#include "src/common/xassert.h"
/*
- * Double-fork and go into background.
+ * Start daemonization with double-fork and go into background.
* Caller is responsible for umasks
*/
-int xdaemon(void)
+int xdaemon_init(void)
{
- int devnull;
-
+ int fds [2];
+ int n;
+ signed char priority;
+ char ebuf [1024];
+ /*
+ * Create pipe in order to get signal from grand child to terminate
+ */
+ if (pipe (fds) < 0) {
+ error("Failed to create daemon pipe");
+ }
switch (fork()) {
case 0 : break; /* child */
case -1 : return -1;
- default : _exit(0); /* exit parent */
+ default : {
+ if (close (fds[1]) < 0) {
+ error("Failed to close write-pipe in parent process");
+ }
+
+ /*
+ * get signal of grandchild to exit
+ */
+ if ((n = read (fds[0], &priority, sizeof (priority))) < 0) {
+ error("Failed to read status from grandchild process");
+ }
+ if ((n > 0) && (priority >= 0)) {
+ if ((n = read (fds[0], ebuf, sizeof (ebuf))) < 0) {
+ error("Failed to read err msg from grandchild process");
+ }
+ if ((n > 0) && (ebuf[0] != '\0')) {
+ error("Error with forking and steeing up pipe: %s", ebuf);
+ }
+ return -1;
+ }
+ _exit(0);
+ }
}
if (setsid() < 0)
return -1;
-
+ if (close (fds[0]) < 0) {
+ error("Failed to close read-pipe in child process");
+ }
switch (fork()) {
case 0 : break; /* child */
case -1: return -1;
default: _exit(0); /* exit parent */
}
+ return (fds[1]);
+}
+/*
+ * finish daemonization after pidfile was written
+ */
+
+
+void xdaemon_finish(int fd)
+{
/*
- * dup stdin, stdout, and stderr onto /dev/null
+ * PID file was written, now do dup stdin, stdout,
+ * and stderr onto /dev/null and close pipe
+ * so that systemd realizes we are daemonized
*/
+ int devnull;
+
devnull = open("/dev/null", O_RDWR);
if (devnull < 0)
error("Unable to open /dev/null: %m");
@@ -89,8 +133,21 @@ int xdaemon(void)
error("Unable to dup /dev/null onto stderr: %m");
if (close(devnull) < 0)
error("Unable to close /dev/null: %m");
+ if ((fd >= 0) && (close (fd) < 0)) {
+ error( "Failed to close write-pipe in grandchild process");
+ }
+}
+
+/*
+ * keep depercated api
+ */
- return 0;
+int xdaemon(void)
+{
+ int ret_val;
+ ret_val= xdaemon_init();
+ xdaemon_finish(ret_val);
+ return ret_val;
}
/*
diff --git a/src/common/daemonize.h b/src/common/daemonize.h
index 22a31f6ccf..8b2a866b61 100644
--- a/src/common/daemonize.h
+++ b/src/common/daemonize.h
@@ -41,11 +41,17 @@
#define _HAVE_DAEMONIZE_H
/*
- * Fork process into background and inherit new session.
+ * Start fork process into background and inherit new session.
*
- * Returns -1 on error.
*/
extern int xdaemon(void);
+extern int xdaemon_init(void);
+
+/*
+ * Finish daemonization by ending grandparen
+ */
+
+extern void xdaemon_finish(int fd);
/* Write pid into file pidfile if uid is not 0 change the owner of the
* pidfile to that user.
--
2.13.7