Accepting request 773459 from home:mslacken:branches:network:cluster
- Updated to version 20.02.0-0pre1, highlights are Highlights: * Exclusive behavior of a node includes all GRES on a node as well as the cpus. * Use python3 instead of python for internal build/test scripts. The slurm.spec file has been updated to depend on python3 as well. * Added new NodeSet configuration option to help simplify partition configuration sections for heterogeneous / condo*style clusters. * Added slurm.conf option MaxDBDMsgs to control how many messages will be stored in the slurmctld before throwing them away when the slurmdbd is down. * The checkpoint plugin interface and all associated API calls have been removed. * slurm_init_job_desc_msg() initializes mail_type as uint16_t. This allows mail_type to be set to NONE with scontrol. * Add new slurm_spank_log() function to print messages back to the user from within a SPANK plugin without prepending "error: " from slurm_error(). * Enforce having partition name and nodelist=ALL when creating reservations with flags=PART_NODES. * SPANK - removed never-implemented slurm_spank_slurmd_init() interface. This hook has always been accessible through slurm_spank_init() in the S_CTX_SLURMD context instead. * sbcast - add new BcastAddr option to NodeName lines to allow sbcast traffic to flow over an alternate network path. * Added auth/jwt plugin, and 'scontrol token' subcommand. PMIx - improve * performance of proc map generation. Deprecate kill_invalid_depend in * SchedulerParameters and move it to a new option called DependencyParameters. * Enable job dependencies for any job on any cluster in the same federation. * Allow clusters to be added automatically to db at startup of ctld. Add * AccountingStorageExternalHost slurm.conf parameter. The OBS-URL: https://build.opensuse.org/request/show/773459 OBS-URL: https://build.opensuse.org/package/show/network:cluster/slurm?expand=0&rev=130
This commit is contained in:
parent
d94a66a178
commit
54640668e5
@ -1,47 +0,0 @@
|
|||||||
From: Egbert Eich <eich@suse.com>
|
|
||||||
Date: Tue Nov 20 11:54:02 2018 +0100
|
|
||||||
Subject: removed deprecated xdaemon
|
|
||||||
Patch-mainline: Not yet
|
|
||||||
Git-commit: b39551df0f202203c16d4e9a9a7b640691acf882
|
|
||||||
References: bsc#1084125
|
|
||||||
|
|
||||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
|
||||||
---
|
|
||||||
slurm-18.08.3/src/common/daemonize.c | 12 ------------
|
|
||||||
slurm-18.08.3/src/common/daemonize.h | 1 -
|
|
||||||
2 files changed, 13 deletions(-)
|
|
||||||
diff --git a/slurm-18.08.3/src/common/daemonize.c b/slurm-18.08.3/src/common/daemonize.c
|
|
||||||
index fee9d60..bec8202 100644
|
|
||||||
--- a/src/common/daemonize.c
|
|
||||||
+++ b/src/common/daemonize.c
|
|
||||||
@@ -138,18 +138,6 @@ void xdaemon_finish(int fd)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
-/*
|
|
||||||
- * keep depercated api
|
|
||||||
- */
|
|
||||||
-
|
|
||||||
-int xdaemon(void)
|
|
||||||
-{
|
|
||||||
- int ret_val;
|
|
||||||
- ret_val= xdaemon_init();
|
|
||||||
- xdaemon_finish(ret_val);
|
|
||||||
- return ret_val;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
/*
|
|
||||||
* Read and return pid stored in pidfile.
|
|
||||||
* Returns 0 if file doesn't exist or pid cannot be read.
|
|
||||||
diff --git a/slurm-18.08.3/src/common/daemonize.h b/slurm-18.08.3/src/common/daemonize.h
|
|
||||||
index 8b60b4f..b7cb625 100644
|
|
||||||
--- a/src/common/daemonize.h
|
|
||||||
+++ b/src/common/daemonize.h
|
|
||||||
@@ -44,7 +44,6 @@
|
|
||||||
* Start fork process into background and inherit new session.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
-extern int xdaemon(void);
|
|
||||||
extern int xdaemon_init(void);
|
|
||||||
|
|
||||||
/*
|
|
@ -1,3 +0,0 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
|
||||||
oid sha256:715be211b7bef80d06df0011ec91c51ab740031bd9ff722a5e60c595feaad282
|
|
||||||
size 6232295
|
|
3
slurm-20.02.0-0pre1.tar.bz2
Normal file
3
slurm-20.02.0-0pre1.tar.bz2
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:9cd1e835df91b44b8bae27406e49ef79408e2a7a3e04e2c44e9c3ee816f1c338
|
||||||
|
size 6231642
|
@ -1,3 +1,69 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Feb 11 10:09:43 UTC 2020 - Christian Goll <cgoll@suse.com>
|
||||||
|
|
||||||
|
- Updated to version 20.02.0-0pre1, highlights are
|
||||||
|
Highlights:
|
||||||
|
* Exclusive behavior of a node includes all GRES on a node as well
|
||||||
|
as the cpus.
|
||||||
|
* Use python3 instead of python for internal build/test scripts.
|
||||||
|
The slurm.spec file has been updated to depend on python3 as well.
|
||||||
|
* Added new NodeSet configuration option to help simplify partition
|
||||||
|
configuration sections for heterogeneous / condo*style clusters.
|
||||||
|
* Added slurm.conf option MaxDBDMsgs to control how many messages will be
|
||||||
|
stored in the slurmctld before throwing them away when the slurmdbd is down.
|
||||||
|
* The checkpoint plugin interface and all associated API calls have been
|
||||||
|
removed.
|
||||||
|
* slurm_init_job_desc_msg() initializes mail_type as uint16_t. This allows
|
||||||
|
mail_type to be set to NONE with scontrol.
|
||||||
|
* Add new slurm_spank_log() function to print messages back to the user from
|
||||||
|
within a SPANK plugin without prepending "error: " from slurm_error().
|
||||||
|
* Enforce having partition name and nodelist=ALL when creating reservations
|
||||||
|
with flags=PART_NODES.
|
||||||
|
* SPANK - removed never-implemented slurm_spank_slurmd_init() interface. This
|
||||||
|
hook has always been accessible through slurm_spank_init() in the
|
||||||
|
S_CTX_SLURMD context instead.
|
||||||
|
* sbcast - add new BcastAddr option to NodeName lines to allow sbcast traffic
|
||||||
|
to flow over an alternate network path.
|
||||||
|
* Added auth/jwt plugin, and 'scontrol token' subcommand. PMIx - improve
|
||||||
|
* performance of proc map generation. Deprecate kill_invalid_depend in
|
||||||
|
* SchedulerParameters and move it to a new
|
||||||
|
option called DependencyParameters.
|
||||||
|
* Enable job dependencies for any job on any cluster in the same federation.
|
||||||
|
* Allow clusters to be added automatically to db at startup of ctld. Add
|
||||||
|
* AccountingStorageExternalHost slurm.conf parameter. The
|
||||||
|
* "ConditionPathExists" condition in slurmd.service has been disabled by
|
||||||
|
default to permit simpler installation of a "configless" Slurm cluster.
|
||||||
|
* In SchedulerParameters remove deprecated max_job_bf and replace with
|
||||||
|
bf_max_job_test.
|
||||||
|
* Disable sbatch, salloc, srun --reboot for non-admins. SPANK - added support
|
||||||
|
* for S_JOB_GID in the job script context with
|
||||||
|
spank_get_item().
|
||||||
|
* Prolog/Epilog - add SLURM_JOB_GID environment variable.
|
||||||
|
configuration file changes:
|
||||||
|
* The mpi/openmpi plugin has been removed as it does nothing.
|
||||||
|
MpiDefault=openmpi will be translated to the functionally-equivalent
|
||||||
|
MpiDefault=none.
|
||||||
|
command changes (see man pages for details)
|
||||||
|
* Display StepId=<jobid>.batch instead of StepId=<jobid>.4294967294 in output
|
||||||
|
of "scontrol show step". (slurm_sprint_job_step_info())
|
||||||
|
* MPMD in srun will now defer PATH resolution for the commands to launch to
|
||||||
|
slurmstepd. Previously it would handle resolution client*side, but with
|
||||||
|
a non*standard approach that walked PATH in reverse.
|
||||||
|
* squeue - added "--me" option, equivalent to --user=$USER.
|
||||||
|
* The LicensesUsed line has been removed from 'scontrol show config'.
|
||||||
|
Please see the 'scontrol show licenses' command as an alternative.
|
||||||
|
* sbatch - adjusted backoff times for "--wait" option to reduce load on
|
||||||
|
slurmctld. This results in a steady*state delay of 32s between queries,
|
||||||
|
instead of the prior 10s delay.
|
||||||
|
- Removed following deprecated patches:
|
||||||
|
* removed patch slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch
|
||||||
|
* removed patch split-xdaemon-in-xdaemon_init-and-xdaemon_finish-for.patch
|
||||||
|
* removed patch slurmctld-uses-xdaemon_-for-systemd.patch
|
||||||
|
* removed patch slurmd-uses-xdaemon_-for-systemd.patch
|
||||||
|
* removed patch slurmdbd-uses-xdaemon_-for-systemd.patch
|
||||||
|
* removed patch slurmsmwd-uses-xdaemon_-for-systemd.patch
|
||||||
|
* removed patch removed-deprecated-xdaemon.patch
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Wed Feb 5 15:37:05 UTC 2020 - Christian Goll <cgoll@suse.com>
|
Wed Feb 5 15:37:05 UTC 2020 - Christian Goll <cgoll@suse.com>
|
||||||
|
|
||||||
|
35
slurm.spec
35
slurm.spec
@ -17,10 +17,10 @@
|
|||||||
|
|
||||||
|
|
||||||
# Check file META in sources: update so_version to (API_CURRENT - API_AGE)
|
# Check file META in sources: update so_version to (API_CURRENT - API_AGE)
|
||||||
%define so_version 34
|
%define so_version 35
|
||||||
%define ver 19.05.5
|
%define ver 20.02.0
|
||||||
%define _ver _19_05
|
%define _ver _20_02
|
||||||
%define dl_ver %{ver}
|
%define dl_ver %{ver}-0pre1
|
||||||
# so-version is 0 and seems to be stable
|
# so-version is 0 and seems to be stable
|
||||||
%define pmi_so 0
|
%define pmi_so 0
|
||||||
%define nss_so 2
|
%define nss_so 2
|
||||||
@ -117,13 +117,6 @@ Source1: slurm-rpmlintrc
|
|||||||
Patch0: Remove-rpath-from-build.patch
|
Patch0: Remove-rpath-from-build.patch
|
||||||
Patch1: slurm-2.4.4-init.patch
|
Patch1: slurm-2.4.4-init.patch
|
||||||
Patch2: pam_slurm-Initialize-arrays-and-pass-sizes.patch
|
Patch2: pam_slurm-Initialize-arrays-and-pass-sizes.patch
|
||||||
Patch3: split-xdaemon-in-xdaemon_init-and-xdaemon_finish-for.patch
|
|
||||||
Patch4: slurmctld-uses-xdaemon_-for-systemd.patch
|
|
||||||
Patch5: slurmd-uses-xdaemon_-for-systemd.patch
|
|
||||||
Patch6: slurmdbd-uses-xdaemon_-for-systemd.patch
|
|
||||||
Patch7: slurmsmwd-uses-xdaemon_-for-systemd.patch
|
|
||||||
Patch8: removed-deprecated-xdaemon.patch
|
|
||||||
Patch9: slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch
|
|
||||||
|
|
||||||
%{?upgrade:Provides: %{pname} = %{version}}
|
%{?upgrade:Provides: %{pname} = %{version}}
|
||||||
%{?upgrade:Conflicts: %{pname}}
|
%{?upgrade:Conflicts: %{pname}}
|
||||||
@ -508,14 +501,6 @@ Contains also cray specific documentation.
|
|||||||
%patch0 -p2
|
%patch0 -p2
|
||||||
%patch1 -p1
|
%patch1 -p1
|
||||||
%patch2 -p1
|
%patch2 -p1
|
||||||
%patch3 -p1
|
|
||||||
%patch4 -p1
|
|
||||||
%patch5 -p1
|
|
||||||
%patch6 -p1
|
|
||||||
%patch7 -p1
|
|
||||||
%patch8 -p1
|
|
||||||
# Drop this fix as it is considered to be resolved by c1a537dbbe6
|
|
||||||
##%patch9 -p1
|
|
||||||
|
|
||||||
%build
|
%build
|
||||||
%define _lto_cflags %{nil}
|
%define _lto_cflags %{nil}
|
||||||
@ -892,7 +877,6 @@ exit 0
|
|||||||
%{_bindir}/sprio
|
%{_bindir}/sprio
|
||||||
%{_bindir}/squeue
|
%{_bindir}/squeue
|
||||||
%{_bindir}/sreport
|
%{_bindir}/sreport
|
||||||
%{_bindir}/smap
|
|
||||||
%{_bindir}/sshare
|
%{_bindir}/sshare
|
||||||
%{_bindir}/sstat
|
%{_bindir}/sstat
|
||||||
%{_bindir}/strigger
|
%{_bindir}/strigger
|
||||||
@ -916,7 +900,6 @@ exit 0
|
|||||||
%{_mandir}/man1/sgather.1.*
|
%{_mandir}/man1/sgather.1.*
|
||||||
%{_mandir}/man1/sinfo.1*
|
%{_mandir}/man1/sinfo.1*
|
||||||
%{_mandir}/man1/slurm.1*
|
%{_mandir}/man1/slurm.1*
|
||||||
%{_mandir}/man1/smap.1*
|
|
||||||
%{_mandir}/man1/sprio.1*
|
%{_mandir}/man1/sprio.1*
|
||||||
%{_mandir}/man1/squeue.1*
|
%{_mandir}/man1/squeue.1*
|
||||||
%{_mandir}/man1/sreport.1*
|
%{_mandir}/man1/sreport.1*
|
||||||
@ -1032,10 +1015,11 @@ exit 0
|
|||||||
%{_libdir}/slurm/acct_gather_profile_none.so
|
%{_libdir}/slurm/acct_gather_profile_none.so
|
||||||
%{?have_json_c:%{_libdir}/slurm/burst_buffer_datawarp.so}
|
%{?have_json_c:%{_libdir}/slurm/burst_buffer_datawarp.so}
|
||||||
%{_libdir}/slurm/burst_buffer_generic.so
|
%{_libdir}/slurm/burst_buffer_generic.so
|
||||||
%{_libdir}/slurm/checkpoint_none.so
|
|
||||||
%{_libdir}/slurm/checkpoint_ompi.so
|
|
||||||
%{_libdir}/slurm/core_spec_none.so
|
%{_libdir}/slurm/core_spec_none.so
|
||||||
%{_libdir}/slurm/cli_filter_none.so
|
%{_libdir}/slurm/cli_filter_none.so
|
||||||
|
%{_libdir}/slurm/cli_filter_lua.so
|
||||||
|
%{_libdir}/slurm/cli_filter_syslog.so
|
||||||
|
%{_libdir}/slurm/cli_filter_user_defaults.so
|
||||||
%{_libdir}/slurm/cred_none.so
|
%{_libdir}/slurm/cred_none.so
|
||||||
%{_libdir}/slurm/ext_sensors_none.so
|
%{_libdir}/slurm/ext_sensors_none.so
|
||||||
%{_libdir}/slurm/gpu_generic.so
|
%{_libdir}/slurm/gpu_generic.so
|
||||||
@ -1048,6 +1032,7 @@ exit 0
|
|||||||
%{_libdir}/slurm/jobacct_gather_none.so
|
%{_libdir}/slurm/jobacct_gather_none.so
|
||||||
%{_libdir}/slurm/jobcomp_filetxt.so
|
%{_libdir}/slurm/jobcomp_filetxt.so
|
||||||
%{_libdir}/slurm/jobcomp_none.so
|
%{_libdir}/slurm/jobcomp_none.so
|
||||||
|
%{_libdir}/slurm/jobcomp_lua.so
|
||||||
%{_libdir}/slurm/jobcomp_script.so
|
%{_libdir}/slurm/jobcomp_script.so
|
||||||
%{_libdir}/slurm/job_container_cncu.so
|
%{_libdir}/slurm/job_container_cncu.so
|
||||||
%{_libdir}/slurm/job_container_none.so
|
%{_libdir}/slurm/job_container_none.so
|
||||||
@ -1066,7 +1051,6 @@ exit 0
|
|||||||
%{_libdir}/slurm/mcs_none.so
|
%{_libdir}/slurm/mcs_none.so
|
||||||
%{_libdir}/slurm/mcs_user.so
|
%{_libdir}/slurm/mcs_user.so
|
||||||
%{_libdir}/slurm/mpi_none.so
|
%{_libdir}/slurm/mpi_none.so
|
||||||
%{_libdir}/slurm/mpi_openmpi.so
|
|
||||||
%{_libdir}/slurm/mpi_pmi2.so
|
%{_libdir}/slurm/mpi_pmi2.so
|
||||||
%if %{with pmix}
|
%if %{with pmix}
|
||||||
%{_libdir}/slurm/mpi_pmix.so
|
%{_libdir}/slurm/mpi_pmix.so
|
||||||
@ -1076,6 +1060,7 @@ exit 0
|
|||||||
%{_libdir}/slurm/preempt_none.so
|
%{_libdir}/slurm/preempt_none.so
|
||||||
%{_libdir}/slurm/preempt_partition_prio.so
|
%{_libdir}/slurm/preempt_partition_prio.so
|
||||||
%{_libdir}/slurm/preempt_qos.so
|
%{_libdir}/slurm/preempt_qos.so
|
||||||
|
%{_libdir}/slurm/prep_script.so
|
||||||
%{_libdir}/slurm/priority_basic.so
|
%{_libdir}/slurm/priority_basic.so
|
||||||
%{_libdir}/slurm/priority_multifactor.so
|
%{_libdir}/slurm/priority_multifactor.so
|
||||||
%{_libdir}/slurm/proctrack_cgroup.so
|
%{_libdir}/slurm/proctrack_cgroup.so
|
||||||
@ -1216,10 +1201,10 @@ exit 0
|
|||||||
%{_libdir}/slurm/acct_gather_energy_cray_aries.so
|
%{_libdir}/slurm/acct_gather_energy_cray_aries.so
|
||||||
%{_libdir}/slurm/core_spec_cray_aries.so
|
%{_libdir}/slurm/core_spec_cray_aries.so
|
||||||
%{_libdir}/slurm/job_submit_cray_aries.so
|
%{_libdir}/slurm/job_submit_cray_aries.so
|
||||||
|
%{_libdir}/slurm/mpi_cray_shasta.so
|
||||||
%{_libdir}/slurm/select_cray_aries.so
|
%{_libdir}/slurm/select_cray_aries.so
|
||||||
%{_libdir}/slurm/switch_cray_aries.so
|
%{_libdir}/slurm/switch_cray_aries.so
|
||||||
%{_libdir}/slurm/task_cray_aries.so
|
%{_libdir}/slurm/task_cray_aries.so
|
||||||
%{_mandir}/man5/cray.*
|
|
||||||
%if 0%{?have_json_c}
|
%if 0%{?have_json_c}
|
||||||
%{_libdir}/slurm/node_features_knl_cray.so
|
%{_libdir}/slurm/node_features_knl_cray.so
|
||||||
%{_libdir}/slurm/power_cray_aries.so
|
%{_libdir}/slurm/power_cray_aries.so
|
||||||
|
@ -1,58 +0,0 @@
|
|||||||
From: Egbert Eich <eich@suse.com>
|
|
||||||
Date: Tue Nov 20 09:22:15 2018 +0100
|
|
||||||
Subject: slurmctld: rerun agent_init() when backup controller takes over
|
|
||||||
Patch-mainline: Not yet
|
|
||||||
Git-commit: 21a7abc02e4a27cc64a213ba1fc8572a20e21ba9
|
|
||||||
References: bsc#1084917
|
|
||||||
|
|
||||||
A slurmctld backup controller often fails to clean up jobs which have
|
|
||||||
finished, the node appears in an 'IDLE+COMPLETING' state while squeue -l
|
|
||||||
still shows the job in a completing state.
|
|
||||||
This situation persists until the primary controller is restarted and
|
|
||||||
cleans up all tasks in 'COMPLETING' state.
|
|
||||||
This issue is caused by a race condition in the backup controller:
|
|
||||||
When the backup controller detects that the primary controller is
|
|
||||||
inaccessible, it will run thru a restart cycle. To trigger the shutdown
|
|
||||||
of some entities, it will set slurmctld_config.shutdown_time to a value
|
|
||||||
!= 0. Before continuing as the controller in charge, it resets this
|
|
||||||
variable to 0 again.
|
|
||||||
The agent which handles the request queue - from a separate thread -
|
|
||||||
wakes up periodically (in a 2 sec interval) and checks for things to do.
|
|
||||||
If it finds slurmctld_config.shutdown_time set to a value != 0, it will
|
|
||||||
terminate.
|
|
||||||
If this wakeup occurs in the 'takeover window' between the variable
|
|
||||||
being set to !=0 and reset to 0, the agent goes away and will no longer
|
|
||||||
be available to handle queued requests as there is nothing at the end
|
|
||||||
of the 'takeover window' that would restart it.
|
|
||||||
|
|
||||||
This fix adds a restart of the agent by calling agent_init() after
|
|
||||||
slurmctld_config.shutdown_time has been reset to 0.
|
|
||||||
Should an agent still be running (because it didn't wake up during the
|
|
||||||
'takeover window') it will be caught in agent_init().
|
|
||||||
|
|
||||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
|
||||||
---
|
|
||||||
src/slurmctld/backup.c | 4 ++++
|
|
||||||
1 file changed, 4 insertions(+)
|
|
||||||
diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c
|
|
||||||
index de74513..2b4c74e 100644
|
|
||||||
--- a/src/slurmctld/backup.c
|
|
||||||
+++ b/src/slurmctld/backup.c
|
|
||||||
@@ -65,6 +65,7 @@
|
|
||||||
#include "src/slurmctld/read_config.h"
|
|
||||||
#include "src/slurmctld/slurmctld.h"
|
|
||||||
#include "src/slurmctld/trigger_mgr.h"
|
|
||||||
+#include "src/slurmctld/agent.h"
|
|
||||||
|
|
||||||
#define _DEBUG 0
|
|
||||||
#define SHUTDOWN_WAIT 2 /* Time to wait for primary server shutdown */
|
|
||||||
@@ -258,6 +259,9 @@ void run_backup(slurm_trigger_callbacks_t *callbacks)
|
|
||||||
error("Unable to recover slurm state");
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
+ /* Reinit agent in case it has been terminated - agent_init()
|
|
||||||
+ will check itself */
|
|
||||||
+ agent_init();
|
|
||||||
slurmctld_config.shutdown_time = (time_t) 0;
|
|
||||||
unlock_slurmctld(config_write_lock);
|
|
||||||
select_g_select_nodeinfo_set_all();
|
|
@ -1,47 +0,0 @@
|
|||||||
From: Egbert Eich <eich@suse.com>
|
|
||||||
Date: Tue Nov 20 09:47:47 2018 +0100
|
|
||||||
Subject: slurmctld uses xdaemon_* for systemd
|
|
||||||
Patch-mainline: Not yet
|
|
||||||
Git-commit: 0f0c00a4a57d12be04d16f4646c186d3e5f03dd1
|
|
||||||
References: bsc#1084125
|
|
||||||
|
|
||||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
|
||||||
---
|
|
||||||
slurm-18.08.3/src/slurmctld/controller.c | 11 +++++++++--
|
|
||||||
1 file changed, 9 insertions(+), 2 deletions(-)
|
|
||||||
diff --git a/slurm-18.08.3/src/slurmctld/controller.c b/slurm-18.08.3/src/slurmctld/controller.c
|
|
||||||
index a1762de..d123db3 100644
|
|
||||||
--- a/src/slurmctld/controller.c
|
|
||||||
+++ b/src/slurmctld/controller.c
|
|
||||||
@@ -260,7 +260,7 @@ static void * _wait_primary_prog(void *arg);
|
|
||||||
/* main - slurmctld main function, start various threads and process RPCs */
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
- int cnt, error_code, i;
|
|
||||||
+ int cnt, error_code, i, fd;
|
|
||||||
struct timeval start, now;
|
|
||||||
struct stat stat_buf;
|
|
||||||
struct rlimit rlim;
|
|
||||||
@@ -326,7 +326,11 @@ int main(int argc, char **argv)
|
|
||||||
|
|
||||||
if (daemonize) {
|
|
||||||
slurmctld_config.daemonize = 1;
|
|
||||||
- if (xdaemon())
|
|
||||||
+ /*
|
|
||||||
+ * Just start daemonizing if not in test mode
|
|
||||||
+ */
|
|
||||||
+ fd = xdaemon_init();
|
|
||||||
+ if (fd == -1)
|
|
||||||
error("daemon(): %m");
|
|
||||||
log_set_timefmt(slurmctld_conf.log_fmt);
|
|
||||||
log_alter(log_opts, LOG_DAEMON,
|
|
||||||
@@ -348,6 +352,9 @@ int main(int argc, char **argv)
|
|
||||||
_init_pidfile();
|
|
||||||
_become_slurm_user();
|
|
||||||
}
|
|
||||||
+ if (daemonize) {
|
|
||||||
+ xdaemon_finish(fd);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Create StateSaveLocation directory if necessary.
|
|
@ -1,44 +0,0 @@
|
|||||||
From: Egbert Eich <eich@suse.com>
|
|
||||||
Date: Tue Nov 20 09:52:22 2018 +0100
|
|
||||||
Subject: slurmd uses xdaemon_* for systemd
|
|
||||||
Patch-mainline: Not yet
|
|
||||||
Git-commit: 3988e62eb8c20a29a7a016f264c6d65e114cfdf4
|
|
||||||
References: bsc#1084125
|
|
||||||
|
|
||||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
|
||||||
---
|
|
||||||
slurm-18.08.3/src/slurmd/slurmd/slurmd.c | 8 ++++++--
|
|
||||||
1 file changed, 6 insertions(+), 2 deletions(-)
|
|
||||||
diff --git a/slurm-18.08.3/src/slurmd/slurmd/slurmd.c b/slurm-18.08.3/src/slurmd/slurmd/slurmd.c
|
|
||||||
index aa35f8a..b2feaf9 100644
|
|
||||||
--- a/src/slurmd/slurmd/slurmd.c
|
|
||||||
+++ b/src/slurmd/slurmd/slurmd.c
|
|
||||||
@@ -215,7 +215,7 @@ static void _wait_for_all_threads(int secs);
|
|
||||||
int
|
|
||||||
main (int argc, char **argv)
|
|
||||||
{
|
|
||||||
- int i, pidfd;
|
|
||||||
+ int i, pidfd, pipefd;
|
|
||||||
int blocked_signals[] = {SIGPIPE, 0};
|
|
||||||
int cc;
|
|
||||||
char *oom_value;
|
|
||||||
@@ -300,7 +300,8 @@ main (int argc, char **argv)
|
|
||||||
* Become a daemon if desired.
|
|
||||||
*/
|
|
||||||
if (conf->daemonize) {
|
|
||||||
- if (xdaemon())
|
|
||||||
+ pipefd = xdaemon_init();
|
|
||||||
+ if (pipefd == -1)
|
|
||||||
error("Couldn't daemonize slurmd: %m");
|
|
||||||
}
|
|
||||||
test_core_limit();
|
|
||||||
@@ -356,6 +357,9 @@ main (int argc, char **argv)
|
|
||||||
|
|
||||||
conf->pid = getpid();
|
|
||||||
pidfd = create_pidfile(conf->pidfile, 0);
|
|
||||||
+ if (conf->daemonize) {
|
|
||||||
+ xdaemon_finish(pipefd);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
rfc2822_timestamp(time_stamp, sizeof(time_stamp));
|
|
||||||
info("%s started on %s", slurm_prog_name, time_stamp);
|
|
@ -1,72 +0,0 @@
|
|||||||
From: Egbert Eich <eich@suse.com>
|
|
||||||
Date: Tue Nov 20 09:58:47 2018 +0100
|
|
||||||
Subject: slurmdbd uses xdaemon_* for systemd
|
|
||||||
Patch-mainline: Not yet
|
|
||||||
Git-commit: 8a286cbaf3fe7ebe009106675a4624a2272d616f
|
|
||||||
References: bsc#1084125
|
|
||||||
|
|
||||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
|
||||||
---
|
|
||||||
slurm-18.08.3/src/slurmdbd/slurmdbd.c | 19 +++++++++++++------
|
|
||||||
1 file changed, 13 insertions(+), 6 deletions(-)
|
|
||||||
diff --git a/slurm-18.08.3/src/slurmdbd/slurmdbd.c b/slurm-18.08.3/src/slurmdbd/slurmdbd.c
|
|
||||||
index 471c724..8c7ea94 100644
|
|
||||||
--- a/src/slurmdbd/slurmdbd.c
|
|
||||||
+++ b/src/slurmdbd/slurmdbd.c
|
|
||||||
@@ -103,7 +103,7 @@ static List lft_rgt_list = NULL;
|
|
||||||
static void _become_slurm_user(void);
|
|
||||||
static void _commit_handler_cancel(void);
|
|
||||||
static void *_commit_handler(void *no_data);
|
|
||||||
-static void _daemonize(void);
|
|
||||||
+static int _daemonize_start(void);
|
|
||||||
static void _default_sigaction(int sig);
|
|
||||||
static void _free_dbd_stats(void);
|
|
||||||
static void _init_config(void);
|
|
||||||
@@ -127,6 +127,7 @@ int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
char node_name_short[128];
|
|
||||||
char node_name_long[128];
|
|
||||||
+ int pipefd;
|
|
||||||
void *db_conn = NULL;
|
|
||||||
assoc_init_args_t assoc_init_arg;
|
|
||||||
|
|
||||||
@@ -139,8 +140,9 @@ int main(int argc, char **argv)
|
|
||||||
_update_nice();
|
|
||||||
|
|
||||||
_kill_old_slurmdbd();
|
|
||||||
- if (foreground == 0)
|
|
||||||
- _daemonize();
|
|
||||||
+ if (foreground == 0) {
|
|
||||||
+ pipefd = _daemonize_start();
|
|
||||||
+ }
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Need to create pidfile here in case we setuid() below
|
|
||||||
@@ -149,7 +151,9 @@ int main(int argc, char **argv)
|
|
||||||
* able to write a core dump.
|
|
||||||
*/
|
|
||||||
_init_pidfile();
|
|
||||||
-
|
|
||||||
+ if (foreground == 0) {
|
|
||||||
+ xdaemon_finish(pipefd);
|
|
||||||
+ }
|
|
||||||
/*
|
|
||||||
* Do plugin init's after _init_pidfile so systemd is happy as
|
|
||||||
* slurm_acct_storage_init() could take a long time to finish if running
|
|
||||||
@@ -598,11 +602,14 @@ static void _init_pidfile(void)
|
|
||||||
|
|
||||||
/* Become a daemon (child of init) and
|
|
||||||
* "cd" to the LogFile directory (if one is configured) */
|
|
||||||
-static void _daemonize(void)
|
|
||||||
+static int _daemonize_start(void)
|
|
||||||
{
|
|
||||||
- if (xdaemon())
|
|
||||||
+ int retval;
|
|
||||||
+ retval = xdaemon_init();
|
|
||||||
+ if (retval == -1)
|
|
||||||
error("daemon(): %m");
|
|
||||||
log_alter(log_opts, LOG_DAEMON, slurmdbd_conf->log_file);
|
|
||||||
+ return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void _set_work_dir(void)
|
|
@ -1,40 +0,0 @@
|
|||||||
From: Egbert Eich <eich@suse.com>
|
|
||||||
Date: Tue Nov 20 10:07:35 2018 +0100
|
|
||||||
Subject: slurmsmwd uses xdaemon_* for systemd
|
|
||||||
Patch-mainline: Not yet
|
|
||||||
Git-commit: 110d76a0c56b35c8c3c9b24e136476a67a6eb413
|
|
||||||
References: bsc#1084125
|
|
||||||
|
|
||||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
|
||||||
---
|
|
||||||
slurm-18.08.3/contribs/cray/slurmsmwd/main.c | 7 ++++++-
|
|
||||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
|
||||||
diff --git a/slurm-18.08.3/contribs/cray/slurmsmwd/main.c b/slurm-18.08.3/contribs/cray/slurmsmwd/main.c
|
|
||||||
index a5247bf..1efb1f8 100644
|
|
||||||
--- a/contribs/cray/slurmsmwd/main.c
|
|
||||||
+++ b/contribs/cray/slurmsmwd/main.c
|
|
||||||
@@ -538,6 +538,7 @@ int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
pthread_t processing_thread, signal_handler_thread;
|
|
||||||
pthread_attr_t thread_attr;
|
|
||||||
+ int pipefd;
|
|
||||||
|
|
||||||
_parse_commandline(argc, argv);
|
|
||||||
|
|
||||||
@@ -546,11 +547,15 @@ int main(int argc, char **argv)
|
|
||||||
slurmsmwd_print_config();
|
|
||||||
|
|
||||||
if (!foreground) {
|
|
||||||
- if (xdaemon())
|
|
||||||
+ pipefd = xdaemon_init();
|
|
||||||
+ if (pipefd == -1)
|
|
||||||
error("daemon(): %m");
|
|
||||||
}
|
|
||||||
if (create_pidfile("/var/run/slurmsmwd.pid", 0) < 0)
|
|
||||||
fatal("Unable to create pidfile /var/run/slurmswmd.pid");
|
|
||||||
+ if (!foreground) {
|
|
||||||
+ xdaemon_finish(pipefd);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
slurm_mutex_init(&down_node_lock);
|
|
||||||
|
|
@ -1,148 +0,0 @@
|
|||||||
From 1f12c590038c7f738ff19159629fdc38de5cba82 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Christian Goll <cgoll@suse.de>
|
|
||||||
Date: Mon, 9 Apr 2018 10:05:50 +0200
|
|
||||||
Subject: [PATCH 1/6] split xdaemon in xdaemon_init and xdaemon_finish for
|
|
||||||
systemd compatibilty
|
|
||||||
|
|
||||||
---
|
|
||||||
src/common/daemonize.c | 73 ++++++++++++++++++++++++++++++++++++++++++++------
|
|
||||||
src/common/daemonize.h | 10 +++++--
|
|
||||||
2 files changed, 73 insertions(+), 10 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/src/common/daemonize.c b/src/common/daemonize.c
|
|
||||||
index e22a1d0a7f..2987a40af0 100644
|
|
||||||
--- a/src/common/daemonize.c
|
|
||||||
+++ b/src/common/daemonize.c
|
|
||||||
@@ -53,31 +53,75 @@
|
|
||||||
#include "src/common/xassert.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
- * Double-fork and go into background.
|
|
||||||
+ * Start daemonization with double-fork and go into background.
|
|
||||||
* Caller is responsible for umasks
|
|
||||||
*/
|
|
||||||
-int xdaemon(void)
|
|
||||||
+int xdaemon_init(void)
|
|
||||||
{
|
|
||||||
- int devnull;
|
|
||||||
-
|
|
||||||
+ int fds [2];
|
|
||||||
+ int n;
|
|
||||||
+ signed char priority;
|
|
||||||
+ char ebuf [1024];
|
|
||||||
+ /*
|
|
||||||
+ * Create pipe in order to get signal from grand child to terminate
|
|
||||||
+ */
|
|
||||||
+ if (pipe (fds) < 0) {
|
|
||||||
+ error("Failed to create daemon pipe");
|
|
||||||
+ }
|
|
||||||
switch (fork()) {
|
|
||||||
case 0 : break; /* child */
|
|
||||||
case -1 : return -1;
|
|
||||||
- default : _exit(0); /* exit parent */
|
|
||||||
+ default : {
|
|
||||||
+ if (close (fds[1]) < 0) {
|
|
||||||
+ error("Failed to close write-pipe in parent process");
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * get signal of grandchild to exit
|
|
||||||
+ */
|
|
||||||
+ if ((n = read (fds[0], &priority, sizeof (priority))) < 0) {
|
|
||||||
+ error("Failed to read status from grandchild process");
|
|
||||||
+ }
|
|
||||||
+ if ((n > 0) && (priority >= 0)) {
|
|
||||||
+ if ((n = read (fds[0], ebuf, sizeof (ebuf))) < 0) {
|
|
||||||
+ error("Failed to read err msg from grandchild process");
|
|
||||||
+ }
|
|
||||||
+ if ((n > 0) && (ebuf[0] != '\0')) {
|
|
||||||
+ error("Error with forking and steeing up pipe: %s", ebuf);
|
|
||||||
+ }
|
|
||||||
+ return -1;
|
|
||||||
+ }
|
|
||||||
+ _exit(0);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
if (setsid() < 0)
|
|
||||||
return -1;
|
|
||||||
-
|
|
||||||
+ if (close (fds[0]) < 0) {
|
|
||||||
+ error("Failed to close read-pipe in child process");
|
|
||||||
+ }
|
|
||||||
switch (fork()) {
|
|
||||||
case 0 : break; /* child */
|
|
||||||
case -1: return -1;
|
|
||||||
default: _exit(0); /* exit parent */
|
|
||||||
}
|
|
||||||
+ return (fds[1]);
|
|
||||||
+}
|
|
||||||
|
|
||||||
+/*
|
|
||||||
+ * finish daemonization after pidfile was written
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+void xdaemon_finish(int fd)
|
|
||||||
+{
|
|
||||||
/*
|
|
||||||
- * dup stdin, stdout, and stderr onto /dev/null
|
|
||||||
+ * PID file was written, now do dup stdin, stdout,
|
|
||||||
+ * and stderr onto /dev/null and close pipe
|
|
||||||
+ * so that systemd realizes we are daemonized
|
|
||||||
*/
|
|
||||||
+ int devnull;
|
|
||||||
+
|
|
||||||
devnull = open("/dev/null", O_RDWR);
|
|
||||||
if (devnull < 0)
|
|
||||||
error("Unable to open /dev/null: %m");
|
|
||||||
@@ -89,8 +133,21 @@ int xdaemon(void)
|
|
||||||
error("Unable to dup /dev/null onto stderr: %m");
|
|
||||||
if (close(devnull) < 0)
|
|
||||||
error("Unable to close /dev/null: %m");
|
|
||||||
+ if ((fd >= 0) && (close (fd) < 0)) {
|
|
||||||
+ error( "Failed to close write-pipe in grandchild process");
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * keep depercated api
|
|
||||||
+ */
|
|
||||||
|
|
||||||
- return 0;
|
|
||||||
+int xdaemon(void)
|
|
||||||
+{
|
|
||||||
+ int ret_val;
|
|
||||||
+ ret_val= xdaemon_init();
|
|
||||||
+ xdaemon_finish(ret_val);
|
|
||||||
+ return ret_val;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
diff --git a/src/common/daemonize.h b/src/common/daemonize.h
|
|
||||||
index 22a31f6ccf..8b2a866b61 100644
|
|
||||||
--- a/src/common/daemonize.h
|
|
||||||
+++ b/src/common/daemonize.h
|
|
||||||
@@ -41,11 +41,17 @@
|
|
||||||
#define _HAVE_DAEMONIZE_H
|
|
||||||
|
|
||||||
/*
|
|
||||||
- * Fork process into background and inherit new session.
|
|
||||||
+ * Start fork process into background and inherit new session.
|
|
||||||
*
|
|
||||||
- * Returns -1 on error.
|
|
||||||
*/
|
|
||||||
extern int xdaemon(void);
|
|
||||||
+extern int xdaemon_init(void);
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * Finish daemonization by ending grandparen
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+extern void xdaemon_finish(int fd);
|
|
||||||
|
|
||||||
/* Write pid into file pidfile if uid is not 0 change the owner of the
|
|
||||||
* pidfile to that user.
|
|
||||||
--
|
|
||||||
2.13.7
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user