forked from pool/slurm
Accepting request 780146 from network:cluster
- Disable %arm builds as this is no longer supported. (forwarded request 780053 from kasimir) OBS-URL: https://build.opensuse.org/request/show/780146 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/slurm?expand=0&rev=40
This commit is contained in:
commit
146edf5651
@ -1,47 +0,0 @@
|
||||
From: Egbert Eich <eich@suse.com>
|
||||
Date: Tue Nov 20 11:54:02 2018 +0100
|
||||
Subject: removed deprecated xdaemon
|
||||
Patch-mainline: Not yet
|
||||
Git-commit: b39551df0f202203c16d4e9a9a7b640691acf882
|
||||
References: bsc#1084125
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
||||
---
|
||||
slurm-18.08.3/src/common/daemonize.c | 12 ------------
|
||||
slurm-18.08.3/src/common/daemonize.h | 1 -
|
||||
2 files changed, 13 deletions(-)
|
||||
diff --git a/slurm-18.08.3/src/common/daemonize.c b/slurm-18.08.3/src/common/daemonize.c
|
||||
index fee9d60..bec8202 100644
|
||||
--- a/src/common/daemonize.c
|
||||
+++ b/src/common/daemonize.c
|
||||
@@ -138,18 +138,6 @@ void xdaemon_finish(int fd)
|
||||
}
|
||||
}
|
||||
|
||||
-/*
|
||||
- * keep depercated api
|
||||
- */
|
||||
-
|
||||
-int xdaemon(void)
|
||||
-{
|
||||
- int ret_val;
|
||||
- ret_val= xdaemon_init();
|
||||
- xdaemon_finish(ret_val);
|
||||
- return ret_val;
|
||||
-}
|
||||
-
|
||||
/*
|
||||
* Read and return pid stored in pidfile.
|
||||
* Returns 0 if file doesn't exist or pid cannot be read.
|
||||
diff --git a/slurm-18.08.3/src/common/daemonize.h b/slurm-18.08.3/src/common/daemonize.h
|
||||
index 8b60b4f..b7cb625 100644
|
||||
--- a/src/common/daemonize.h
|
||||
+++ b/src/common/daemonize.h
|
||||
@@ -44,7 +44,6 @@
|
||||
* Start fork process into background and inherit new session.
|
||||
*
|
||||
*/
|
||||
-extern int xdaemon(void);
|
||||
extern int xdaemon_init(void);
|
||||
|
||||
/*
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:715be211b7bef80d06df0011ec91c51ab740031bd9ff722a5e60c595feaad282
|
||||
size 6232295
|
3
slurm-20.02.0.tar.bz2
Normal file
3
slurm-20.02.0.tar.bz2
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:45213362e3cc7f37e4ea64f3b2b1e3ce8f9c1c5c32a18d50ddb56280ec585ebb
|
||||
size 6297730
|
120
slurm.changes
120
slurm.changes
@ -1,3 +1,123 @@
|
||||
-------------------------------------------------------------------
|
||||
Thu Feb 27 20:07:19 UTC 2020 - Kasimir _ <kasimir_@outlook.de>
|
||||
|
||||
- Disable %arm builds as this is no longer supported.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Feb 26 06:13:13 UTC 2020 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Update to version 20.02.0 (jsc#SLE-8491)
|
||||
* Fix minor memory leak in slurmd on reconfig.
|
||||
* Fix invalid ptr reference when rolling up data in the database.
|
||||
* Change shtml2html.py to require python3 for RHEL8 support, and match
|
||||
man2html.py.
|
||||
* slurm.spec - override "hardening" linker flags to ensure RHEL8 builds
|
||||
in a usable manner.
|
||||
* Fix type mismatches in the perl API.
|
||||
* Prevent use of uninitialized slurmctld_diag_stats.
|
||||
* Fixed various Coverity issues.
|
||||
* Only show warning about root-less topology in daemons.
|
||||
* Fix accounting of jobs in IGNORE_JOBS reservations.
|
||||
* Fix issue with batch steps state not loading correctly when upgrading from
|
||||
19.05.
|
||||
* Deprecate max_depend_depth in SchedulerParameters and move it to
|
||||
DependencyParameters.
|
||||
* Silence erroneous error on slurmctld upgrade when loading federation state.
|
||||
* Break infinite loop in cons_tres dealing with incorrect tasks per tres
|
||||
request resulting in slurmctld hang.
|
||||
* Improve handling of --gpus-per-task to make sure appropriate number of GPUs
|
||||
is assigned to job.
|
||||
* Fix seg fault on cons_res when requesting --spread-job.
|
||||
- Move to python3 for everything but SLE-11-SP4
|
||||
* For SLE-11-SP4 add a workaround to handle a python3 script (python2.7
|
||||
compliant).
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Feb 19 21:27:00 UTC 2020 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Add explicit version dependency to libpmix as well.
|
||||
'slurm-devel' has a tight version dependency on libpmix -
|
||||
allowing multiple libpmix versions in one package repository
|
||||
is therefore essential.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Feb 13 22:34:48 UTC 2020 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Update to version 20.02.0-rc1
|
||||
* sbatch - fix segfault when no newline at the end of a burst buffer file.
|
||||
* Change scancel to only check job's base state when matching -t options.
|
||||
* Save job dependency list in state files.
|
||||
* cons_tres - allow jobs to be run on systems with root-less topologies.
|
||||
* Restore pre-20.02pre1 PrologSlurmctld synchonization behavior to avoid
|
||||
various race conditions, and ensure proper batch job launch.
|
||||
* Add new slurmrestd command/daemon which implements the Slurm REST API.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Tue Feb 11 10:09:43 UTC 2020 - Christian Goll <cgoll@suse.com>
|
||||
|
||||
- Update to version 20.02.0-0pre1, highlights are
|
||||
Highlights:
|
||||
* Exclusive behavior of a node includes all GRES on a node as well
|
||||
as the cpus.
|
||||
* Use python3 instead of python for internal build/test scripts.
|
||||
The slurm.spec file has been updated to depend on python3 as well.
|
||||
* Added new NodeSet configuration option to help simplify partition
|
||||
configuration sections for heterogeneous / condo*style clusters.
|
||||
* Added slurm.conf option MaxDBDMsgs to control how many messages will be
|
||||
stored in the slurmctld before throwing them away when the slurmdbd is down.
|
||||
* The checkpoint plugin interface and all associated API calls have been
|
||||
removed.
|
||||
* slurm_init_job_desc_msg() initializes mail_type as uint16_t. This allows
|
||||
mail_type to be set to NONE with scontrol.
|
||||
* Add new slurm_spank_log() function to print messages back to the user from
|
||||
within a SPANK plugin without prepending "error: " from slurm_error().
|
||||
* Enforce having partition name and nodelist=ALL when creating reservations
|
||||
with flags=PART_NODES.
|
||||
* SPANK - removed never-implemented slurm_spank_slurmd_init() interface. This
|
||||
hook has always been accessible through slurm_spank_init() in the
|
||||
S_CTX_SLURMD context instead.
|
||||
* sbcast - add new BcastAddr option to NodeName lines to allow sbcast traffic
|
||||
to flow over an alternate network path.
|
||||
* Added auth/jwt plugin, and 'scontrol token' subcommand. PMIx - improve
|
||||
* performance of proc map generation. Deprecate kill_invalid_depend in
|
||||
* SchedulerParameters and move it to a new
|
||||
option called DependencyParameters.
|
||||
* Enable job dependencies for any job on any cluster in the same federation.
|
||||
* Allow clusters to be added automatically to db at startup of ctld. Add
|
||||
* AccountingStorageExternalHost slurm.conf parameter. The
|
||||
* "ConditionPathExists" condition in slurmd.service has been disabled by
|
||||
default to permit simpler installation of a "configless" Slurm cluster.
|
||||
* In SchedulerParameters remove deprecated max_job_bf and replace with
|
||||
bf_max_job_test.
|
||||
* Disable sbatch, salloc, srun --reboot for non-admins. SPANK - added support
|
||||
* for S_JOB_GID in the job script context with
|
||||
spank_get_item().
|
||||
* Prolog/Epilog - add SLURM_JOB_GID environment variable.
|
||||
configuration file changes:
|
||||
* The mpi/openmpi plugin has been removed as it does nothing.
|
||||
MpiDefault=openmpi will be translated to the functionally-equivalent
|
||||
MpiDefault=none.
|
||||
command changes (see man pages for details)
|
||||
* Display StepId=<jobid>.batch instead of StepId=<jobid>.4294967294 in output
|
||||
of "scontrol show step". (slurm_sprint_job_step_info())
|
||||
* MPMD in srun will now defer PATH resolution for the commands to launch to
|
||||
slurmstepd. Previously it would handle resolution client*side, but with
|
||||
a non*standard approach that walked PATH in reverse.
|
||||
* squeue - added "--me" option, equivalent to --user=$USER.
|
||||
* The LicensesUsed line has been removed from 'scontrol show config'.
|
||||
Please see the 'scontrol show licenses' command as an alternative.
|
||||
* sbatch - adjusted backoff times for "--wait" option to reduce load on
|
||||
slurmctld. This results in a steady*state delay of 32s between queries,
|
||||
instead of the prior 10s delay.
|
||||
- Removed following deprecated patches:
|
||||
* removed patch slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch
|
||||
* removed patch split-xdaemon-in-xdaemon_init-and-xdaemon_finish-for.patch
|
||||
* removed patch slurmctld-uses-xdaemon_-for-systemd.patch
|
||||
* removed patch slurmd-uses-xdaemon_-for-systemd.patch
|
||||
* removed patch slurmdbd-uses-xdaemon_-for-systemd.patch
|
||||
* removed patch slurmsmwd-uses-xdaemon_-for-systemd.patch
|
||||
* removed patch removed-deprecated-xdaemon.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Feb 5 15:37:05 UTC 2020 - Christian Goll <cgoll@suse.com>
|
||||
|
||||
|
99
slurm.spec
99
slurm.spec
@ -17,9 +17,9 @@
|
||||
|
||||
|
||||
# Check file META in sources: update so_version to (API_CURRENT - API_AGE)
|
||||
%define so_version 34
|
||||
%define ver 19.05.5
|
||||
%define _ver _19_05
|
||||
%define so_version 35
|
||||
%define ver 20.02.0
|
||||
%define _ver _20_02
|
||||
%define dl_ver %{ver}
|
||||
# so-version is 0 and seems to be stable
|
||||
%define pmi_so 0
|
||||
@ -27,7 +27,7 @@
|
||||
|
||||
%define pname slurm
|
||||
|
||||
%ifarch i586
|
||||
%ifarch i586 %arm
|
||||
ExclusiveArch: do_not_build
|
||||
%endif
|
||||
|
||||
@ -41,7 +41,7 @@ ExclusiveArch: do_not_build
|
||||
%define base_ver 1808
|
||||
%endif
|
||||
%if 0%{?sle_version} == 150200
|
||||
%define base_ver 1905
|
||||
%define base_ver 2002
|
||||
%endif
|
||||
|
||||
%if 0%{?base_ver} > 0 && 0%{?base_ver} < %(echo %{_ver} | tr -d _)
|
||||
@ -64,13 +64,19 @@ ExclusiveArch: do_not_build
|
||||
|
||||
%if 0%{?suse_version:1} && 0%{?suse_version} <= 1140
|
||||
%define comp_at %defattr(-,root,root)
|
||||
%undefine python_ver
|
||||
%else
|
||||
%define have_json_c 1
|
||||
%define python_ver 3
|
||||
%if 0%{?sle_version} >= 150000 || 0%{?is_opensuse}
|
||||
%define have_apache_rpm_macros 1
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%if 0%{?sle_version} >= 150000 || 0%{?is_opensuse}
|
||||
%define have_http_parser 1
|
||||
%endif
|
||||
|
||||
%if 0
|
||||
%define have_netloc 1
|
||||
%endif
|
||||
@ -117,13 +123,6 @@ Source1: slurm-rpmlintrc
|
||||
Patch0: Remove-rpath-from-build.patch
|
||||
Patch1: slurm-2.4.4-init.patch
|
||||
Patch2: pam_slurm-Initialize-arrays-and-pass-sizes.patch
|
||||
Patch3: split-xdaemon-in-xdaemon_init-and-xdaemon_finish-for.patch
|
||||
Patch4: slurmctld-uses-xdaemon_-for-systemd.patch
|
||||
Patch5: slurmd-uses-xdaemon_-for-systemd.patch
|
||||
Patch6: slurmdbd-uses-xdaemon_-for-systemd.patch
|
||||
Patch7: slurmsmwd-uses-xdaemon_-for-systemd.patch
|
||||
Patch8: removed-deprecated-xdaemon.patch
|
||||
Patch9: slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch
|
||||
|
||||
%{?upgrade:Provides: %{pname} = %{version}}
|
||||
%{?upgrade:Conflicts: %{pname}}
|
||||
@ -146,7 +145,7 @@ BuildRequires: hdf5-devel
|
||||
%endif
|
||||
BuildRequires: libbitmask-devel
|
||||
BuildRequires: libcpuset-devel
|
||||
BuildRequires: python
|
||||
BuildRequires: python%{?python_ver}
|
||||
%if 0%{?have_libnuma}
|
||||
BuildRequires: libnuma-devel
|
||||
%endif
|
||||
@ -248,11 +247,13 @@ This package contains the library needed to run programs dynamically linked
|
||||
with SLURM.
|
||||
|
||||
|
||||
%package -n libpmi%{pmi_so}
|
||||
%package -n libpmi%{pmi_so}%{?upgrade:%{_ver}}
|
||||
Summary: SLURM PMI Library
|
||||
Group: System/Libraries
|
||||
%{?upgrade:Provides: libpmi%{pmi_so} = %{version}}
|
||||
%{?upgrade:Conflicts: libpmi%{pmi_so}}
|
||||
|
||||
%description -n libpmi%{pmi_so}
|
||||
%description -n libpmi%{pmi_so}%{?upgrade:%{_ver}}
|
||||
This package contains the library needed to run programs dynamically linked
|
||||
with SLURM.
|
||||
|
||||
@ -436,6 +437,24 @@ BuildRequires: lua-devel
|
||||
This package includes the Lua API to provide an interface to SLURM
|
||||
through Lua.
|
||||
|
||||
%package rest
|
||||
Summary: Slurm REST API Interface
|
||||
Group: Productivity/Clustering/Computing
|
||||
Requires: %{name}-config = %{version}
|
||||
%if 0%{?have_http_parser}
|
||||
BuildRequires: http-parser-devel
|
||||
%endif
|
||||
%if 0%{?have_boolean_deps}
|
||||
Recommends: (%{name}-munge = %version if munge)
|
||||
%else
|
||||
Recommends: %{name}-munge = %version
|
||||
%endif
|
||||
%{?upgrade:Provides: %{pname}-rest = %{version}}
|
||||
%{?upgrade:Conflicts: %{pname}-rest}
|
||||
|
||||
%description rest
|
||||
This package provides the interface to SLURM via REST API.
|
||||
|
||||
%package node
|
||||
Summary: Minimal slurm node
|
||||
Group: Productivity/Clustering/Computing
|
||||
@ -508,22 +527,22 @@ Contains also cray specific documentation.
|
||||
%patch0 -p2
|
||||
%patch1 -p1
|
||||
%patch2 -p1
|
||||
%patch3 -p1
|
||||
%patch4 -p1
|
||||
%patch5 -p1
|
||||
%patch6 -p1
|
||||
%patch7 -p1
|
||||
%patch8 -p1
|
||||
# Drop this fix as it is considered to be resolved by c1a537dbbe6
|
||||
##%patch9 -p1
|
||||
%if 0%{?python_ver} < 3
|
||||
# Workaround for wrongly flagged python3 to keep SLE-11-SP4 building
|
||||
mkdir -p mybin; ln -s /usr/bin/python2 mybin/python3
|
||||
%endif
|
||||
|
||||
%build
|
||||
%define _lto_cflags %{nil}
|
||||
[ -e $(pwd)/mybin ] && PATH=$(pwd)/mybin:$PATH
|
||||
%configure --enable-shared \
|
||||
--disable-static \
|
||||
--without-rpath \
|
||||
--without-datawarp \
|
||||
--with-shared-libslurm \
|
||||
%if 0%{?have_http_parser} && 0%{?have_json_c}
|
||||
--enable-slurmrestd \
|
||||
%endif
|
||||
%{!?have_netloc:--without-netloc} \
|
||||
--sysconfdir=%{_sysconfdir}/%{pname} \
|
||||
%{!?have_hdf5:--without-hdf5} \
|
||||
@ -533,6 +552,7 @@ Contains also cray specific documentation.
|
||||
make %{?_smp_mflags}
|
||||
|
||||
%install
|
||||
[ -e $(pwd)/mybin ] && PATH=$(pwd)/mybin:$PATH
|
||||
%make_install
|
||||
make install-contrib DESTDIR=%{buildroot} PERL_MM_PARAMS="INSTALLDIRS=vendor"
|
||||
|
||||
@ -707,6 +727,10 @@ cat > %{buildroot}/%{_sysconfdir}/%{pname}/nss_slurm.conf <<EOF
|
||||
# NodeName myname
|
||||
EOF
|
||||
%fdupes -s %{buildroot}
|
||||
# Temporary - remove when build is fixed upstream.
|
||||
%if 0%{!?have_http_parser:1} || 0%{!?have_json_c:1}
|
||||
rm -f %{buildroot}/%{_mandir}/man8/slurmrestd.*
|
||||
%endif
|
||||
|
||||
%define fixperm() [ $1 -eq 1 -a -e %2 ] && /bin/chmod %1 %2
|
||||
|
||||
@ -810,8 +834,8 @@ exit 0
|
||||
%post -n %{libslurm} -p /sbin/ldconfig
|
||||
%postun -n %{libslurm} -p /sbin/ldconfig
|
||||
|
||||
%post -n libpmi%{pmi_so} -p /sbin/ldconfig
|
||||
%postun -n libpmi%{pmi_so} -p /sbin/ldconfig
|
||||
%post -n libpmi%{pmi_so}%{?upgrade:%{_ver}} -p /sbin/ldconfig
|
||||
%postun -n libpmi%{pmi_so}%{?upgrade:%{_ver}} -p /sbin/ldconfig
|
||||
|
||||
%post -n libnss_%{pname}%{nss_so} -p /sbin/ldconfig
|
||||
%postun -n libnss_%{pname}%{nss_so} -p /sbin/ldconfig
|
||||
@ -892,7 +916,6 @@ exit 0
|
||||
%{_bindir}/sprio
|
||||
%{_bindir}/squeue
|
||||
%{_bindir}/sreport
|
||||
%{_bindir}/smap
|
||||
%{_bindir}/sshare
|
||||
%{_bindir}/sstat
|
||||
%{_bindir}/strigger
|
||||
@ -916,7 +939,6 @@ exit 0
|
||||
%{_mandir}/man1/sgather.1.*
|
||||
%{_mandir}/man1/sinfo.1*
|
||||
%{_mandir}/man1/slurm.1*
|
||||
%{_mandir}/man1/smap.1*
|
||||
%{_mandir}/man1/sprio.1*
|
||||
%{_mandir}/man1/squeue.1*
|
||||
%{_mandir}/man1/sreport.1*
|
||||
@ -953,7 +975,7 @@ exit 0
|
||||
%{?comp_at}
|
||||
%{_libdir}/libslurm*.so.%{so_version}*
|
||||
|
||||
%files -n libpmi%{pmi_so}
|
||||
%files -n libpmi%{pmi_so}%{?upgrade:%{_ver}}
|
||||
%{?comp_at}
|
||||
%{_libdir}/libpmi*.so.%{pmi_so}*
|
||||
|
||||
@ -1032,10 +1054,11 @@ exit 0
|
||||
%{_libdir}/slurm/acct_gather_profile_none.so
|
||||
%{?have_json_c:%{_libdir}/slurm/burst_buffer_datawarp.so}
|
||||
%{_libdir}/slurm/burst_buffer_generic.so
|
||||
%{_libdir}/slurm/checkpoint_none.so
|
||||
%{_libdir}/slurm/checkpoint_ompi.so
|
||||
%{_libdir}/slurm/core_spec_none.so
|
||||
%{_libdir}/slurm/cli_filter_none.so
|
||||
%{_libdir}/slurm/cli_filter_lua.so
|
||||
%{_libdir}/slurm/cli_filter_syslog.so
|
||||
%{_libdir}/slurm/cli_filter_user_defaults.so
|
||||
%{_libdir}/slurm/cred_none.so
|
||||
%{_libdir}/slurm/ext_sensors_none.so
|
||||
%{_libdir}/slurm/gpu_generic.so
|
||||
@ -1048,6 +1071,7 @@ exit 0
|
||||
%{_libdir}/slurm/jobacct_gather_none.so
|
||||
%{_libdir}/slurm/jobcomp_filetxt.so
|
||||
%{_libdir}/slurm/jobcomp_none.so
|
||||
%{_libdir}/slurm/jobcomp_lua.so
|
||||
%{_libdir}/slurm/jobcomp_script.so
|
||||
%{_libdir}/slurm/job_container_cncu.so
|
||||
%{_libdir}/slurm/job_container_none.so
|
||||
@ -1066,7 +1090,6 @@ exit 0
|
||||
%{_libdir}/slurm/mcs_none.so
|
||||
%{_libdir}/slurm/mcs_user.so
|
||||
%{_libdir}/slurm/mpi_none.so
|
||||
%{_libdir}/slurm/mpi_openmpi.so
|
||||
%{_libdir}/slurm/mpi_pmi2.so
|
||||
%if %{with pmix}
|
||||
%{_libdir}/slurm/mpi_pmix.so
|
||||
@ -1076,6 +1099,7 @@ exit 0
|
||||
%{_libdir}/slurm/preempt_none.so
|
||||
%{_libdir}/slurm/preempt_partition_prio.so
|
||||
%{_libdir}/slurm/preempt_qos.so
|
||||
%{_libdir}/slurm/prep_script.so
|
||||
%{_libdir}/slurm/priority_basic.so
|
||||
%{_libdir}/slurm/priority_multifactor.so
|
||||
%{_libdir}/slurm/proctrack_cgroup.so
|
||||
@ -1121,6 +1145,7 @@ exit 0
|
||||
%{_libdir}/slurm/select_cray_aries.so
|
||||
%{_libdir}/slurm/switch_cray_aries.so
|
||||
%{_libdir}/slurm/task_cray_aries.so
|
||||
%{_libdir}/slurm/mpi_cray_shasta.so
|
||||
%if 0%{?have_json_c}
|
||||
%{_libdir}/slurm/node_features_knl_cray.so
|
||||
%{_libdir}/slurm/power_cray_aries.so
|
||||
@ -1156,6 +1181,13 @@ exit 0
|
||||
/%_lib/security/pam_slurm.so
|
||||
/%_lib/security/pam_slurm_adopt.so
|
||||
|
||||
%if 0%{?have_http_parser} && 0%{?have_json_c}
|
||||
%files rest
|
||||
%{?comp_at}
|
||||
%{_sbindir}/slurmrestd
|
||||
%{_mandir}/man8/slurmrestd.*
|
||||
%endif
|
||||
|
||||
%files node
|
||||
%{?comp_at}
|
||||
%{_sbindir}/slurmd
|
||||
@ -1199,9 +1231,6 @@ exit 0
|
||||
%{_mandir}/man5/nonstop.conf.5.*
|
||||
%{_mandir}/man5/topology.*
|
||||
%{_mandir}/man5/knl.conf.5.*
|
||||
%if 0%{?legacy_cray}
|
||||
%{_mandir}/man5/cray.*
|
||||
%endif
|
||||
|
||||
%if 0%{?have_hdf5}
|
||||
%files hdf5
|
||||
@ -1219,7 +1248,7 @@ exit 0
|
||||
%{_libdir}/slurm/select_cray_aries.so
|
||||
%{_libdir}/slurm/switch_cray_aries.so
|
||||
%{_libdir}/slurm/task_cray_aries.so
|
||||
%{_mandir}/man5/cray.*
|
||||
%{_libdir}/slurm/mpi_cray_shasta.so
|
||||
%if 0%{?have_json_c}
|
||||
%{_libdir}/slurm/node_features_knl_cray.so
|
||||
%{_libdir}/slurm/power_cray_aries.so
|
||||
|
@ -1,58 +0,0 @@
|
||||
From: Egbert Eich <eich@suse.com>
|
||||
Date: Tue Nov 20 09:22:15 2018 +0100
|
||||
Subject: slurmctld: rerun agent_init() when backup controller takes over
|
||||
Patch-mainline: Not yet
|
||||
Git-commit: 21a7abc02e4a27cc64a213ba1fc8572a20e21ba9
|
||||
References: bsc#1084917
|
||||
|
||||
A slurmctld backup controller often fails to clean up jobs which have
|
||||
finished, the node appears in an 'IDLE+COMPLETING' state while squeue -l
|
||||
still shows the job in a completing state.
|
||||
This situation persists until the primary controller is restarted and
|
||||
cleans up all tasks in 'COMPLETING' state.
|
||||
This issue is caused by a race condition in the backup controller:
|
||||
When the backup controller detects that the primary controller is
|
||||
inaccessible, it will run thru a restart cycle. To trigger the shutdown
|
||||
of some entities, it will set slurmctld_config.shutdown_time to a value
|
||||
!= 0. Before continuing as the controller in charge, it resets this
|
||||
variable to 0 again.
|
||||
The agent which handles the request queue - from a separate thread -
|
||||
wakes up periodically (in a 2 sec interval) and checks for things to do.
|
||||
If it finds slurmctld_config.shutdown_time set to a value != 0, it will
|
||||
terminate.
|
||||
If this wakeup occurs in the 'takeover window' between the variable
|
||||
being set to !=0 and reset to 0, the agent goes away and will no longer
|
||||
be available to handle queued requests as there is nothing at the end
|
||||
of the 'takeover window' that would restart it.
|
||||
|
||||
This fix adds a restart of the agent by calling agent_init() after
|
||||
slurmctld_config.shutdown_time has been reset to 0.
|
||||
Should an agent still be running (because it didn't wake up during the
|
||||
'takeover window') it will be caught in agent_init().
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
||||
---
|
||||
src/slurmctld/backup.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c
|
||||
index de74513..2b4c74e 100644
|
||||
--- a/src/slurmctld/backup.c
|
||||
+++ b/src/slurmctld/backup.c
|
||||
@@ -65,6 +65,7 @@
|
||||
#include "src/slurmctld/read_config.h"
|
||||
#include "src/slurmctld/slurmctld.h"
|
||||
#include "src/slurmctld/trigger_mgr.h"
|
||||
+#include "src/slurmctld/agent.h"
|
||||
|
||||
#define _DEBUG 0
|
||||
#define SHUTDOWN_WAIT 2 /* Time to wait for primary server shutdown */
|
||||
@@ -258,6 +259,9 @@ void run_backup(slurm_trigger_callbacks_t *callbacks)
|
||||
error("Unable to recover slurm state");
|
||||
abort();
|
||||
}
|
||||
+ /* Reinit agent in case it has been terminated - agent_init()
|
||||
+ will check itself */
|
||||
+ agent_init();
|
||||
slurmctld_config.shutdown_time = (time_t) 0;
|
||||
unlock_slurmctld(config_write_lock);
|
||||
select_g_select_nodeinfo_set_all();
|
@ -1,47 +0,0 @@
|
||||
From: Egbert Eich <eich@suse.com>
|
||||
Date: Tue Nov 20 09:47:47 2018 +0100
|
||||
Subject: slurmctld uses xdaemon_* for systemd
|
||||
Patch-mainline: Not yet
|
||||
Git-commit: 0f0c00a4a57d12be04d16f4646c186d3e5f03dd1
|
||||
References: bsc#1084125
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
||||
---
|
||||
slurm-18.08.3/src/slurmctld/controller.c | 11 +++++++++--
|
||||
1 file changed, 9 insertions(+), 2 deletions(-)
|
||||
diff --git a/slurm-18.08.3/src/slurmctld/controller.c b/slurm-18.08.3/src/slurmctld/controller.c
|
||||
index a1762de..d123db3 100644
|
||||
--- a/src/slurmctld/controller.c
|
||||
+++ b/src/slurmctld/controller.c
|
||||
@@ -260,7 +260,7 @@ static void * _wait_primary_prog(void *arg);
|
||||
/* main - slurmctld main function, start various threads and process RPCs */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
- int cnt, error_code, i;
|
||||
+ int cnt, error_code, i, fd;
|
||||
struct timeval start, now;
|
||||
struct stat stat_buf;
|
||||
struct rlimit rlim;
|
||||
@@ -326,7 +326,11 @@ int main(int argc, char **argv)
|
||||
|
||||
if (daemonize) {
|
||||
slurmctld_config.daemonize = 1;
|
||||
- if (xdaemon())
|
||||
+ /*
|
||||
+ * Just start daemonizing if not in test mode
|
||||
+ */
|
||||
+ fd = xdaemon_init();
|
||||
+ if (fd == -1)
|
||||
error("daemon(): %m");
|
||||
log_set_timefmt(slurmctld_conf.log_fmt);
|
||||
log_alter(log_opts, LOG_DAEMON,
|
||||
@@ -348,6 +352,9 @@ int main(int argc, char **argv)
|
||||
_init_pidfile();
|
||||
_become_slurm_user();
|
||||
}
|
||||
+ if (daemonize) {
|
||||
+ xdaemon_finish(fd);
|
||||
+ }
|
||||
|
||||
/*
|
||||
* Create StateSaveLocation directory if necessary.
|
@ -1,44 +0,0 @@
|
||||
From: Egbert Eich <eich@suse.com>
|
||||
Date: Tue Nov 20 09:52:22 2018 +0100
|
||||
Subject: slurmd uses xdaemon_* for systemd
|
||||
Patch-mainline: Not yet
|
||||
Git-commit: 3988e62eb8c20a29a7a016f264c6d65e114cfdf4
|
||||
References: bsc#1084125
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
||||
---
|
||||
slurm-18.08.3/src/slurmd/slurmd/slurmd.c | 8 ++++++--
|
||||
1 file changed, 6 insertions(+), 2 deletions(-)
|
||||
diff --git a/slurm-18.08.3/src/slurmd/slurmd/slurmd.c b/slurm-18.08.3/src/slurmd/slurmd/slurmd.c
|
||||
index aa35f8a..b2feaf9 100644
|
||||
--- a/src/slurmd/slurmd/slurmd.c
|
||||
+++ b/src/slurmd/slurmd/slurmd.c
|
||||
@@ -215,7 +215,7 @@ static void _wait_for_all_threads(int secs);
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
- int i, pidfd;
|
||||
+ int i, pidfd, pipefd;
|
||||
int blocked_signals[] = {SIGPIPE, 0};
|
||||
int cc;
|
||||
char *oom_value;
|
||||
@@ -300,7 +300,8 @@ main (int argc, char **argv)
|
||||
* Become a daemon if desired.
|
||||
*/
|
||||
if (conf->daemonize) {
|
||||
- if (xdaemon())
|
||||
+ pipefd = xdaemon_init();
|
||||
+ if (pipefd == -1)
|
||||
error("Couldn't daemonize slurmd: %m");
|
||||
}
|
||||
test_core_limit();
|
||||
@@ -356,6 +357,9 @@ main (int argc, char **argv)
|
||||
|
||||
conf->pid = getpid();
|
||||
pidfd = create_pidfile(conf->pidfile, 0);
|
||||
+ if (conf->daemonize) {
|
||||
+ xdaemon_finish(pipefd);
|
||||
+ }
|
||||
|
||||
rfc2822_timestamp(time_stamp, sizeof(time_stamp));
|
||||
info("%s started on %s", slurm_prog_name, time_stamp);
|
@ -1,72 +0,0 @@
|
||||
From: Egbert Eich <eich@suse.com>
|
||||
Date: Tue Nov 20 09:58:47 2018 +0100
|
||||
Subject: slurmdbd uses xdaemon_* for systemd
|
||||
Patch-mainline: Not yet
|
||||
Git-commit: 8a286cbaf3fe7ebe009106675a4624a2272d616f
|
||||
References: bsc#1084125
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
||||
---
|
||||
slurm-18.08.3/src/slurmdbd/slurmdbd.c | 19 +++++++++++++------
|
||||
1 file changed, 13 insertions(+), 6 deletions(-)
|
||||
diff --git a/slurm-18.08.3/src/slurmdbd/slurmdbd.c b/slurm-18.08.3/src/slurmdbd/slurmdbd.c
|
||||
index 471c724..8c7ea94 100644
|
||||
--- a/src/slurmdbd/slurmdbd.c
|
||||
+++ b/src/slurmdbd/slurmdbd.c
|
||||
@@ -103,7 +103,7 @@ static List lft_rgt_list = NULL;
|
||||
static void _become_slurm_user(void);
|
||||
static void _commit_handler_cancel(void);
|
||||
static void *_commit_handler(void *no_data);
|
||||
-static void _daemonize(void);
|
||||
+static int _daemonize_start(void);
|
||||
static void _default_sigaction(int sig);
|
||||
static void _free_dbd_stats(void);
|
||||
static void _init_config(void);
|
||||
@@ -127,6 +127,7 @@ int main(int argc, char **argv)
|
||||
{
|
||||
char node_name_short[128];
|
||||
char node_name_long[128];
|
||||
+ int pipefd;
|
||||
void *db_conn = NULL;
|
||||
assoc_init_args_t assoc_init_arg;
|
||||
|
||||
@@ -139,8 +140,9 @@ int main(int argc, char **argv)
|
||||
_update_nice();
|
||||
|
||||
_kill_old_slurmdbd();
|
||||
- if (foreground == 0)
|
||||
- _daemonize();
|
||||
+ if (foreground == 0) {
|
||||
+ pipefd = _daemonize_start();
|
||||
+ }
|
||||
|
||||
/*
|
||||
* Need to create pidfile here in case we setuid() below
|
||||
@@ -149,7 +151,9 @@ int main(int argc, char **argv)
|
||||
* able to write a core dump.
|
||||
*/
|
||||
_init_pidfile();
|
||||
-
|
||||
+ if (foreground == 0) {
|
||||
+ xdaemon_finish(pipefd);
|
||||
+ }
|
||||
/*
|
||||
* Do plugin init's after _init_pidfile so systemd is happy as
|
||||
* slurm_acct_storage_init() could take a long time to finish if running
|
||||
@@ -598,11 +602,14 @@ static void _init_pidfile(void)
|
||||
|
||||
/* Become a daemon (child of init) and
|
||||
* "cd" to the LogFile directory (if one is configured) */
|
||||
-static void _daemonize(void)
|
||||
+static int _daemonize_start(void)
|
||||
{
|
||||
- if (xdaemon())
|
||||
+ int retval;
|
||||
+ retval = xdaemon_init();
|
||||
+ if (retval == -1)
|
||||
error("daemon(): %m");
|
||||
log_alter(log_opts, LOG_DAEMON, slurmdbd_conf->log_file);
|
||||
+ return retval;
|
||||
}
|
||||
|
||||
static void _set_work_dir(void)
|
@ -1,40 +0,0 @@
|
||||
From: Egbert Eich <eich@suse.com>
|
||||
Date: Tue Nov 20 10:07:35 2018 +0100
|
||||
Subject: slurmsmwd uses xdaemon_* for systemd
|
||||
Patch-mainline: Not yet
|
||||
Git-commit: 110d76a0c56b35c8c3c9b24e136476a67a6eb413
|
||||
References: bsc#1084125
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
||||
---
|
||||
slurm-18.08.3/contribs/cray/slurmsmwd/main.c | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
diff --git a/slurm-18.08.3/contribs/cray/slurmsmwd/main.c b/slurm-18.08.3/contribs/cray/slurmsmwd/main.c
|
||||
index a5247bf..1efb1f8 100644
|
||||
--- a/contribs/cray/slurmsmwd/main.c
|
||||
+++ b/contribs/cray/slurmsmwd/main.c
|
||||
@@ -538,6 +538,7 @@ int main(int argc, char **argv)
|
||||
{
|
||||
pthread_t processing_thread, signal_handler_thread;
|
||||
pthread_attr_t thread_attr;
|
||||
+ int pipefd;
|
||||
|
||||
_parse_commandline(argc, argv);
|
||||
|
||||
@@ -546,11 +547,15 @@ int main(int argc, char **argv)
|
||||
slurmsmwd_print_config();
|
||||
|
||||
if (!foreground) {
|
||||
- if (xdaemon())
|
||||
+ pipefd = xdaemon_init();
|
||||
+ if (pipefd == -1)
|
||||
error("daemon(): %m");
|
||||
}
|
||||
if (create_pidfile("/var/run/slurmsmwd.pid", 0) < 0)
|
||||
fatal("Unable to create pidfile /var/run/slurmswmd.pid");
|
||||
+ if (!foreground) {
|
||||
+ xdaemon_finish(pipefd);
|
||||
+ }
|
||||
|
||||
slurm_mutex_init(&down_node_lock);
|
||||
|
@ -1,148 +0,0 @@
|
||||
From 1f12c590038c7f738ff19159629fdc38de5cba82 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Goll <cgoll@suse.de>
|
||||
Date: Mon, 9 Apr 2018 10:05:50 +0200
|
||||
Subject: [PATCH 1/6] split xdaemon in xdaemon_init and xdaemon_finish for
|
||||
systemd compatibilty
|
||||
|
||||
---
|
||||
src/common/daemonize.c | 73 ++++++++++++++++++++++++++++++++++++++++++++------
|
||||
src/common/daemonize.h | 10 +++++--
|
||||
2 files changed, 73 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/src/common/daemonize.c b/src/common/daemonize.c
|
||||
index e22a1d0a7f..2987a40af0 100644
|
||||
--- a/src/common/daemonize.c
|
||||
+++ b/src/common/daemonize.c
|
||||
@@ -53,31 +53,75 @@
|
||||
#include "src/common/xassert.h"
|
||||
|
||||
/*
|
||||
- * Double-fork and go into background.
|
||||
+ * Start daemonization with double-fork and go into background.
|
||||
* Caller is responsible for umasks
|
||||
*/
|
||||
-int xdaemon(void)
|
||||
+int xdaemon_init(void)
|
||||
{
|
||||
- int devnull;
|
||||
-
|
||||
+ int fds [2];
|
||||
+ int n;
|
||||
+ signed char priority;
|
||||
+ char ebuf [1024];
|
||||
+ /*
|
||||
+ * Create pipe in order to get signal from grand child to terminate
|
||||
+ */
|
||||
+ if (pipe (fds) < 0) {
|
||||
+ error("Failed to create daemon pipe");
|
||||
+ }
|
||||
switch (fork()) {
|
||||
case 0 : break; /* child */
|
||||
case -1 : return -1;
|
||||
- default : _exit(0); /* exit parent */
|
||||
+ default : {
|
||||
+ if (close (fds[1]) < 0) {
|
||||
+ error("Failed to close write-pipe in parent process");
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * get signal of grandchild to exit
|
||||
+ */
|
||||
+ if ((n = read (fds[0], &priority, sizeof (priority))) < 0) {
|
||||
+ error("Failed to read status from grandchild process");
|
||||
+ }
|
||||
+ if ((n > 0) && (priority >= 0)) {
|
||||
+ if ((n = read (fds[0], ebuf, sizeof (ebuf))) < 0) {
|
||||
+ error("Failed to read err msg from grandchild process");
|
||||
+ }
|
||||
+ if ((n > 0) && (ebuf[0] != '\0')) {
|
||||
+ error("Error with forking and steeing up pipe: %s", ebuf);
|
||||
+ }
|
||||
+ return -1;
|
||||
+ }
|
||||
+ _exit(0);
|
||||
+ }
|
||||
}
|
||||
|
||||
if (setsid() < 0)
|
||||
return -1;
|
||||
-
|
||||
+ if (close (fds[0]) < 0) {
|
||||
+ error("Failed to close read-pipe in child process");
|
||||
+ }
|
||||
switch (fork()) {
|
||||
case 0 : break; /* child */
|
||||
case -1: return -1;
|
||||
default: _exit(0); /* exit parent */
|
||||
}
|
||||
+ return (fds[1]);
|
||||
+}
|
||||
|
||||
+/*
|
||||
+ * finish daemonization after pidfile was written
|
||||
+ */
|
||||
+
|
||||
+
|
||||
+void xdaemon_finish(int fd)
|
||||
+{
|
||||
/*
|
||||
- * dup stdin, stdout, and stderr onto /dev/null
|
||||
+ * PID file was written, now do dup stdin, stdout,
|
||||
+ * and stderr onto /dev/null and close pipe
|
||||
+ * so that systemd realizes we are daemonized
|
||||
*/
|
||||
+ int devnull;
|
||||
+
|
||||
devnull = open("/dev/null", O_RDWR);
|
||||
if (devnull < 0)
|
||||
error("Unable to open /dev/null: %m");
|
||||
@@ -89,8 +133,21 @@ int xdaemon(void)
|
||||
error("Unable to dup /dev/null onto stderr: %m");
|
||||
if (close(devnull) < 0)
|
||||
error("Unable to close /dev/null: %m");
|
||||
+ if ((fd >= 0) && (close (fd) < 0)) {
|
||||
+ error( "Failed to close write-pipe in grandchild process");
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * keep depercated api
|
||||
+ */
|
||||
|
||||
- return 0;
|
||||
+int xdaemon(void)
|
||||
+{
|
||||
+ int ret_val;
|
||||
+ ret_val= xdaemon_init();
|
||||
+ xdaemon_finish(ret_val);
|
||||
+ return ret_val;
|
||||
}
|
||||
|
||||
/*
|
||||
diff --git a/src/common/daemonize.h b/src/common/daemonize.h
|
||||
index 22a31f6ccf..8b2a866b61 100644
|
||||
--- a/src/common/daemonize.h
|
||||
+++ b/src/common/daemonize.h
|
||||
@@ -41,11 +41,17 @@
|
||||
#define _HAVE_DAEMONIZE_H
|
||||
|
||||
/*
|
||||
- * Fork process into background and inherit new session.
|
||||
+ * Start fork process into background and inherit new session.
|
||||
*
|
||||
- * Returns -1 on error.
|
||||
*/
|
||||
extern int xdaemon(void);
|
||||
+extern int xdaemon_init(void);
|
||||
+
|
||||
+/*
|
||||
+ * Finish daemonization by ending grandparen
|
||||
+ */
|
||||
+
|
||||
+extern void xdaemon_finish(int fd);
|
||||
|
||||
/* Write pid into file pidfile if uid is not 0 change the owner of the
|
||||
* pidfile to that user.
|
||||
--
|
||||
2.13.7
|
||||
|
Loading…
Reference in New Issue
Block a user