From f21d191e3ca8ef3a8d6509498b2e5e88cf0a766e7a1006a82e974bb767ad73c9 Mon Sep 17 00:00:00 2001 From: Egbert Eich Date: Tue, 20 Nov 2018 17:07:44 +0000 Subject: [PATCH] Accepting request 650545 from home:eeich:branches:network:cluster - Added missing perl-base dependency. - Moved HTML docs to doc package. - Moved config man pages to a separate package: This way, they won't get installed on compute nodes. - Update to 18.08.3 * Add new burst buffer state of "teardown-fail" to indicate the burst buffer teardown operation is failing on specific buffers. * Multiple backup slurmctld daemons can be configured * Enable jobs with zero node count for creation and/or deletion of persistent burst buffers. * Add "scontrol show dwstat" command to display Cray burst buffer status. * Add "GetSysStatus" option to burst_buffer.conf file. * Add node and partition configuration options of "CpuBind" to control default task binding. * Add "NumaCpuBind" option to knl.conf * Add sbatch "--batch" option to identify features required on batch node. * Add "BatchFeatures" field to output of "scontrol show job". * Add support for "--bb" option to sbatch command. * Add new SystemComment field to job data structure and database. * Expand reservation "flags" field from 32 to 64 bits. * Add job state flag of "SIGNALING" to avoid race condition. * Properly handle srun --will-run option when there are jobs in COMPLETING state. * Properly report who is signaling a step. * Don't combine updated reservation records in sreport's reservation report. * node_features plugin - Add suport for XOR & XAND of job constraints (node feature specifications). OBS-URL: https://build.opensuse.org/request/show/650545 OBS-URL: https://build.opensuse.org/package/show/network:cluster/slurm?expand=0&rev=75 --- removed-deprecated-xdaemon.patch | 34 ++-- slurm-17.11.9.tar.bz2 | 3 - slurm-18.08.3.tar.bz2 | 3 + slurm.changes | 154 ++++++++++++++++++ slurm.spec | 52 ++---- ...it-when-backup-controller-takes-over.patch | 16 +- slurmctld-uses-xdaemon_-for-systemd.patch | 37 ++--- slurmd-uses-xdaemon_-for-systemd.patch | 27 ++- slurmdbd-uses-xdaemon_-for-systemd.patch | 38 ++--- slurmsmwd-uses-xdaemon_-for-systemd.patch | 25 ++- 10 files changed, 261 insertions(+), 128 deletions(-) delete mode 100644 slurm-17.11.9.tar.bz2 create mode 100644 slurm-18.08.3.tar.bz2 diff --git a/removed-deprecated-xdaemon.patch b/removed-deprecated-xdaemon.patch index c7e3d63..7c09385 100644 --- a/removed-deprecated-xdaemon.patch +++ b/removed-deprecated-xdaemon.patch @@ -1,18 +1,20 @@ -From 783f241cc56d789bf795efc7172672da1c8b2a10 Mon Sep 17 00:00:00 2001 -From: Christian Goll -Date: Mon, 9 Apr 2018 11:52:58 +0200 -Subject: [PATCH 6/6] removed deprecated xdaemon +From: Egbert Eich +Date: Tue Nov 20 11:54:02 2018 +0100 +Subject: removed deprecated xdaemon +Patch-mainline: Not yet +Git-commit: b39551df0f202203c16d4e9a9a7b640691acf882 +References: bsc#1084125 +Signed-off-by: Egbert Eich --- - src/common/daemonize.c | 11 ----------- - src/common/daemonize.h | 1 - - 2 files changed, 12 deletions(-) - -diff --git a/src/common/daemonize.c b/src/common/daemonize.c -index 2987a40af0..32dc79c577 100644 + slurm-18.08.3/src/common/daemonize.c | 12 ------------ + slurm-18.08.3/src/common/daemonize.h | 1 - + 2 files changed, 13 deletions(-) +diff --git a/slurm-18.08.3/src/common/daemonize.c b/slurm-18.08.3/src/common/daemonize.c +index fee9d60..bec8202 100644 --- a/src/common/daemonize.c +++ b/src/common/daemonize.c -@@ -138,17 +138,6 @@ void xdaemon_finish(int fd) +@@ -138,18 +138,6 @@ void xdaemon_finish(int fd) } } @@ -27,11 +29,12 @@ index 2987a40af0..32dc79c577 100644 - xdaemon_finish(ret_val); - return ret_val; -} - +- /* * Read and return pid stored in pidfile. -diff --git a/src/common/daemonize.h b/src/common/daemonize.h -index 8b2a866b61..4ec16f22b0 100644 + * Returns 0 if file doesn't exist or pid cannot be read. +diff --git a/slurm-18.08.3/src/common/daemonize.h b/slurm-18.08.3/src/common/daemonize.h +index 8b60b4f..b7cb625 100644 --- a/src/common/daemonize.h +++ b/src/common/daemonize.h @@ -44,7 +44,6 @@ @@ -42,6 +45,3 @@ index 8b2a866b61..4ec16f22b0 100644 extern int xdaemon_init(void); /* --- -2.13.7 - diff --git a/slurm-17.11.9.tar.bz2 b/slurm-17.11.9.tar.bz2 deleted file mode 100644 index fbc6787..0000000 --- a/slurm-17.11.9.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c56ed2eab6d2d2adf2ab5aec203175a64b9e8c5a5ba2af29470358e7808bd942 -size 6258698 diff --git a/slurm-18.08.3.tar.bz2 b/slurm-18.08.3.tar.bz2 new file mode 100644 index 0000000..b5ece48 --- /dev/null +++ b/slurm-18.08.3.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959df5d07563f2f472376a57bdafe61b4c44fe183a4a2c279c83607336dff806 +size 6092020 diff --git a/slurm.changes b/slurm.changes index d350284..e506363 100644 --- a/slurm.changes +++ b/slurm.changes @@ -1,3 +1,157 @@ +------------------------------------------------------------------- +Tue Nov 20 11:21:37 UTC 2018 - eich@suse.com + +- Added missing perl-base dependency. + +------------------------------------------------------------------- +Tue Nov 20 11:21:14 UTC 2018 - eich@suse.com + +- Moved HTML docs to doc package. + +------------------------------------------------------------------- +Tue Nov 20 11:20:05 UTC 2018 - eich@suse.com + +- Moved config man pages to a separate package: This way, they won't + get installed on compute nodes. + +------------------------------------------------------------------- +Tue Nov 20 11:11:15 UTC 2018 - eich@suse.com + +- Update to 18.08.3 + * Add new burst buffer state of "teardown-fail" to indicate the burst + buffer teardown operation is failing on specific buffers. + * Multiple backup slurmctld daemons can be configured + * Enable jobs with zero node count for creation and/or deletion of persistent + burst buffers. + * Add "scontrol show dwstat" command to display Cray burst buffer status. + * Add "GetSysStatus" option to burst_buffer.conf file. + * Add node and partition configuration options of "CpuBind" to control + default task binding. + * Add "NumaCpuBind" option to knl.conf + * Add sbatch "--batch" option to identify features required on batch node. + * Add "BatchFeatures" field to output of "scontrol show job". + * Add support for "--bb" option to sbatch command. + * Add new SystemComment field to job data structure and database. + * Expand reservation "flags" field from 32 to 64 bits. + * Add job state flag of "SIGNALING" to avoid race condition. + * Properly handle srun --will-run option when there are jobs in COMPLETING + state. + * Properly report who is signaling a step. + * Don't combine updated reservation records in sreport's reservation report. + * node_features plugin - Add suport for XOR & XAND of job constraints (node + feature specifications). + * Improvements to how srun searches for the executible when using cwd. + * Now programs can be checked before execution if test_exec is set. + * Report NodeFeatures plugin configuration with scontrol and sview commands. + * Add acct_gather_profile/influxdb plugin. + * Add new job state of SO/STAGE_OUT + * Correct SLURM_NTASKS and SLURM_NPROCS environment variable for + heterogeneous job step. + * Expand advanced reservation feature specification to support parenthesis + and counts of nodes with specified features. + * Defer job signaling until prolog is completed + * Have the primary slurmctld wait until the backup has completely shutdown + before taking control. + * Fix issue where unpacking job state after TRES count changed could lead to + invalid reads. + * Heterogeneous job steps allocations supported with Open MPI. + * Remove redundant function arguments from task plugins. + * Add Slurm configuration file check logic using "slurmctld -t" command. + * Add the use of a xml file to help performance when using hwloc. + * Remove support for "ChosLoc" configuration parameter. + * Configuration parameters "ControlMachine", "ControlAddr", + "BackupController" and "BackupAddr" replaced by an ordered list of + "SlurmctldHost" records. + * Remove --immediate option from sbatch. + * Add infrastructure for per-job and per-step TRES parameters. + * Add DefCpuPerGpu and DefMemPerGpu to global and per-partition configuration + parameters. + * Add ValidateMode configuration parameter to knl_cray.conf. + * Disable local PTY output processing when using 'srun --unbuffered'. + * Change the column name for the %U (User ID) field in squeue to 'UID'. + * CRAY - Add CheckGhalQuiesce to the CommunicationParameters. + * When a process is core dumping, avoid terminating other processes in that + task group. + * CPU frequency management enhancements: If scaling_available_frequencies + file is not available, then derive values from scaling_min_freq and + scaling_max_freq values. + * Add pending jobs count to sdiag output. + * Add configuration paramerers SlurmctldPrimaryOnProg and + SlurmctldPrimaryOffProg, which define programs to execute when a slurmctld + daemon changes state. + * Add configuration paramerers SlurmctldAddr for use with virtual IP to + manage backup slurmctld daemons. + * Explicitly shutdown the slurmd process when instructed to reboot. + * Add ability to create/update partition with TRESBillingWeights through + scontrol. + * Calcuate TRES billing values at submission. + * Add node_features plugin function "node_features_p_reboot_weight()". + * Add NodeRebootWeight parameter to knl.conf configuration file. + * Completely remove "gres" field from step record. Use "tres_per_node", + "tres_per_socket", etc. + * Add "Links" parameter to gres.conf configuration file. + * Force slurm_mktime() to set tm_isdst to -1. + * burst_buffer.conf - Add SetExecHost flag to enable burst buffer access + from the login node for interactive jobs. + * Append ", with requeued tasks" to job array "end" emails if any tasks in + the array were requeued. + * Add ResumeFailProgram slurm.conf option to specify a program that is called + when a node fails to respond by ResumeTimeout. + * Add new job pending reason of "ReqNodeNotAvail, reserved for maintenance". + * Remove AdminComment += syntax from 'scontrol update job'. + * sched/backfill: Reset job time limit if needed for deadline scheduling. + * For heterogeneous job component with required nodes, explicitly exclude + those nodes from all other job components. + * Add name of partition used to output of srun --test-only output. + * sdiag output now reports outgoing slurmctld message queue contents. + * Improve escaping special characters on user commands when specifying paths. + * Add salloc/sbatch/srun option of --gres-flags=disable-binding to disable + filtering of CPUs with respect to generic resource locality. + * SlurmDBD - Print warning if MySQL/MariaDB internal tuning is not at least + half of the recommended values. + * Add ability to specify a node reason when rebooting nodes with "scontrol + reboot". + * Add nextstate option to "scontrol reboot". + * Consider "resuming" (nextstate=resume) nodes as available in backfill + future scheduling. + * Add TimelimitRaw sacct output field to display timelimit numbers. + * Add support for sacct --whole-hetjob=[yes|no] option. + * Make salloc handle node requests the same as sbatch. + * Add shutdown_on_reboot SlurmdParameter to control whether the Slurmd will + shutdown itself down or not when a reboot request is received. + * Add cancel_reboot scontrol option to cancel pending reboot of nodes. + * Make Users case insensitive in the database based on + Parameters=PreserveCaseUser in the slurmdbd.conf. + * Improve scheduling when dealing with node_features that could have a + boot delay. + * Changed the default AuthType for slurmdbd to auth/munge. + * Added 'remote-fs.target' to After directive of slurmd.service file. + * Remove drain on node when reboot nextstate used. + * Speed up pack of job's qos. + * Add sacctmgr options to prevent/manage job queue stuffing: + - GrpJobsAccrue= + - MaxJobsAccrue= + * MinPrioThreshold + Minimum priority required to reserve resources when scheduling. + * Add control_inx value to trigger_info_msg_t to permit future work in the + trigger management code to distinguish which of multiple backup controllers + has changed state. + * NOTES: + PreemptType=preempt/job_prio has been removed - use PreemptType=preempt/qos + instead. + * Bluegene support was deprecated has now been removed + * cgroup_allowed_devices_file.conf was removed. It was never used by + default, as ConstrainDevices was not set. If needed, refer to the + cgroups.conf man page on how to create one. + * slurm.epilog.clean: Removed. User should use pam_slurm_adopt instead. +- Refreshed: + * removed-deprecated-xdaemon.patch + * slurmctld-uses-xdaemon_-for-systemd.patch + * slurmd-uses-xdaemon_-for-systemd.patch + * slurmdbd-uses-xdaemon_-for-systemd.patch + * slurmsmwd-uses-xdaemon_-for-systemd.patch + * slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch + ------------------------------------------------------------------- Sun Sep 30 15:18:08 UTC 2018 - eich@suse.com diff --git a/slurm.spec b/slurm.spec index 898eb66..f89fbbe 100644 --- a/slurm.spec +++ b/slurm.spec @@ -17,8 +17,8 @@ # Check file META in sources: update so_version to (API_CURRENT - API_AGE) -%define so_version 32 -%define ver 17.11.9 +%define so_version 33 +%define ver 18.08.3 # so-version is 0 and seems to be stable %define pmi_so 0 @@ -77,6 +77,8 @@ Patch9: slurmctld-rerun-agent_init-when-backup-controller-takes-over.pat Requires: slurm-config = %{version} Requires: slurm-node = %{version} +Recommends: slurm-doc = %{version} +Recommends: slurm-config-man = %{version} BuildRequires: fdupes BuildRequires: gcc-c++ BuildRequires: gtk2-devel @@ -129,6 +131,7 @@ Documentation (HTML) for the SLURM cluster managment software. Summary: Perl API to SLURM Group: Development/Languages/Perl Requires: slurm = %{version} +%{libperl_requires} %if 0%{?suse_version} < 1140 Requires: perl = %{perl_version} %else @@ -327,6 +330,12 @@ Requires(pre): shadow This package contains the slurm config files necessary direcories for the slurm daemons. +%package config-man +Summary: Config files and directories for slurm services +Group: Documentation/Man + +%description config-man +Man pages for the SLURM cluster managment software config files. %prep %setup -q -n %{name}-%{ver} @@ -381,12 +390,10 @@ rm -f %{buildroot}%{_sbindir}/capmc_resume rm -f %{buildroot}%{_sbindir}/slurmconfgen.py install -D -m644 etc/cgroup.conf.example %{buildroot}/%{_sysconfdir}/%{name}/cgroup.conf -install -D -m644 etc/cgroup_allowed_devices_file.conf.example %{buildroot}/%{_sysconfdir}/%{name}/cgroup_allowed_devices_file.conf install -D -m644 etc/layouts.d.power.conf.example %{buildroot}/%{_sysconfdir}/%{name}/layouts.d/power.conf.example install -D -m644 etc/layouts.d.power_cpufreq.conf.example %{buildroot}/%{_sysconfdir}/%{name}/layouts.d/power_cpufreq.conf.example install -D -m644 etc/layouts.d.unit.conf.example %{buildroot}/%{_sysconfdir}/%{name}/layouts.d/unit.conf.example install -D -m644 etc/slurm.conf.example %{buildroot}/%{_sysconfdir}/%{name}/slurm.conf%{?OHPC_BUILD:.example} -install -D -m755 etc/slurm.epilog.clean %{buildroot}%{_sysconfdir}/%{name}/slurm.epilog.clean install -D -m644 etc/slurmdbd.conf.example %{buildroot}/%{_sysconfdir}/%{name}/slurmdbd.conf install -D -m644 etc/slurmdbd.conf.example %{buildroot}%{_sysconfdir}/%{name}/slurmdbd.conf.example install -D -m755 contribs/sjstat %{buildroot}%{_bindir}/sjstat @@ -404,13 +411,9 @@ sed -i 's#^\(SlurmdSpoolDir=\)/.*#\1%{_localstatedir}/spool/slurm#' %{buildroot} cat >>%{buildroot}/%{_sysconfdir}/%{name}/slurm.conf < /proc/sys/vm/drop_caches\n\n#' %{buildroot}/%{_sysconfdir}/%{name}/slurm.epilog.clean # change slurmdbd.conf for our needs sed -i 's@LogFile=/var/log/slurm/slurmdbd.log@LogFile=/var/log/slurmdbd.log@'\ %{buildroot}/%{_sysconfdir}/%{name}/slurmdbd.conf @@ -629,10 +632,8 @@ exit 0 %endif %files -%defattr(-,root,root) %doc AUTHORS NEWS RELEASE_NOTES DISCLAIMER %my_license COPYING -%doc doc/html %{_bindir}/sacct %{_bindir}/sacctmgr %{_bindir}/salloc @@ -656,6 +657,10 @@ exit 0 %{?have_netloc:%{_bindir}/netloc_to_topology} %{_sbindir}/slurmctld %{_sbindir}/slurmsmwd +%dir %{_libdir}/slurm/src +%if 0%{?with_systemd} +%{_unitdir}/slurmctld.service +%{_sbindir}/rcslurmctld %{_mandir}/man1/sacct.1* %{_mandir}/man1/sacctmgr.1* %{_mandir}/man1/salloc.1* @@ -680,39 +685,29 @@ exit 0 %{_mandir}/man1/sjstat.1.* %{_mandir}/man8/slurmctld.* %{_mandir}/man8/spank* -%dir %{_libdir}/slurm/src -%if 0%{?with_systemd} -%{_unitdir}/slurmctld.service -%{_sbindir}/rcslurmctld %endif %files openlava -%defattr(-,root,root) %{_bindir}/bjobs %{_bindir}/bkill %{_bindir}/bsub %{_bindir}/lsid %files seff -%defattr(-,root,root) %{_bindir}/seff %{_bindir}/smail %files doc -%defattr(-,root,root) %dir %{_datadir}/doc/%{name}-%{version} %{_datadir}/doc/%{name}-%{version}/* %files -n %{libslurm} -%defattr(-,root,root) %{_libdir}/libslurm*.so.%{so_version}* %files -n libpmi%{pmi_so} -%defattr(-,root,root) %{_libdir}/libpmi*.so.%{pmi_so}* %files devel -%defattr(-,root,root) %{_prefix}/include/slurm %{_libdir}/libpmi.so %{_libdir}/libpmi2.so @@ -723,21 +718,17 @@ exit 0 %{_libdir}/pkgconfig/slurm.pc %files sview -%defattr(-,root,root) %{_bindir}/sview %{_mandir}/man1/sview.1* %files auth-none -%defattr(-,root,root) %{_libdir}/slurm/auth_none.so %files munge -%defattr(-,root,root) %{_libdir}/slurm/auth_munge.so %{_libdir}/slurm/crypto_munge.so %files -n perl-slurm -%defattr(-,root,root) %{perl_vendorarch}/Slurm.pm %{perl_vendorarch}/Slurm %{perl_vendorarch}/auto/Slurm @@ -746,7 +737,6 @@ exit 0 %{_mandir}/man3/Slurm*.3pm.* %files slurmdbd -%defattr(-,root,root) %{_sbindir}/slurmdbd %{_mandir}/man5/slurmdbd.* %{_mandir}/man8/slurmdbd.* @@ -760,13 +750,11 @@ exit 0 %{_sbindir}/rcslurmdbd %files sql -%defattr(-,root,root) %dir %{_libdir}/slurm %{_libdir}/slurm/accounting_storage_mysql.so %{_libdir}/slurm/jobcomp_mysql.so %files plugins -%defattr(-,root,root) %config %{_sysconfdir}/ld.so.conf.d/slurm.conf %dir %{_libdir}/slurm %{_libdir}/slurm/libslurmfull.so @@ -818,7 +806,6 @@ exit 0 %{_libdir}/slurm/mpi_openmpi.so %{_libdir}/slurm/mpi_pmi2.so %{_libdir}/slurm/power_none.so -%{_libdir}/slurm/preempt_job_prio.so %{_libdir}/slurm/preempt_none.so %{_libdir}/slurm/preempt_partition_prio.so %{_libdir}/slurm/preempt_qos.so @@ -833,7 +820,6 @@ exit 0 %{_libdir}/slurm/sched_builtin.so %{_libdir}/slurm/sched_hold.so %{_libdir}/slurm/select_alps.so -%{_libdir}/slurm/select_bluegene.so %{_libdir}/slurm/select_cons_res.so %{_libdir}/slurm/select_cray.so %{_libdir}/slurm/select_linear.so @@ -862,12 +848,10 @@ exit 0 %{_libdir}/slurm/node_features_knl_generic.so %files lua -%defattr(-,root,root) %{_libdir}/slurm/job_submit_lua.so %{_libdir}/slurm/proctrack_lua.so %files torque -%defattr(-,root,root) %{_bindir}/pbsnodes %{_bindir}/qalter %{_bindir}/qdel @@ -882,11 +866,9 @@ exit 0 %{_libdir}/slurm/spank_pbs.so %files sjstat -%defattr(-,root,root) %{_bindir}/sjstat %files pam_slurm -%defattr(-,root,root) %doc ../README.pam_slurm ../README.pam_slurm_adopt /%_lib/security/pam_slurm.so /%_lib/security/pam_slurm_adopt.so @@ -910,14 +892,14 @@ exit 0 %config(noreplace) %{_sysconfdir}/%{name}/slurm.conf %{?OHPC_BUILD:%config %{_sysconfdir}/%{name}/slurm.conf.example} %config(noreplace) %{_sysconfdir}/%{name}/cgroup.conf -%config(noreplace) %{_sysconfdir}/%{name}/cgroup_allowed_devices_file.conf -%config(noreplace) %{_sysconfdir}/%{name}/slurm.epilog.clean %config(noreplace) %{_sysconfdir}/%{name}/layouts.d/power.conf.example %config(noreplace) %{_sysconfdir}/%{name}/layouts.d/power_cpufreq.conf.example %config(noreplace) %{_sysconfdir}/%{name}/layouts.d/unit.conf.example %{?OHPC_BUILD:%attr(0755, %slurm_u, %slurm_g) %_localstatedir/lib/slurm} %{?with_systemd:%{_tmpfilesdir}/%{name}.conf} %dir %{_var}/spool/slurm + +%files config-man %{_mandir}/man5/acct_gather.conf.* %{_mandir}/man5/burst_buffer.conf.* %{_mandir}/man5/ext_sensors.conf.* diff --git a/slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch b/slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch index 6d875dd..8caa2d7 100644 --- a/slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch +++ b/slurmctld-rerun-agent_init-when-backup-controller-takes-over.patch @@ -1,8 +1,8 @@ From: Egbert Eich -Date: Tue Jul 31 17:31:15 2018 +0200 +Date: Tue Nov 20 09:22:15 2018 +0100 Subject: slurmctld: rerun agent_init() when backup controller takes over Patch-mainline: Not yet -Git-commit: 169d9522c89a10dcffbf1403c20b4e6249bac79b +Git-commit: 21a7abc02e4a27cc64a213ba1fc8572a20e21ba9 References: bsc#1084917 A slurmctld backup controller often fails to clean up jobs which have @@ -35,7 +35,7 @@ Signed-off-by: Egbert Eich src/slurmctld/backup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c -index 24ddcde..cf3bb43 100644 +index de74513..2b4c74e 100644 --- a/src/slurmctld/backup.c +++ b/src/slurmctld/backup.c @@ -65,6 +65,7 @@ @@ -44,15 +44,15 @@ index 24ddcde..cf3bb43 100644 #include "src/slurmctld/trigger_mgr.h" +#include "src/slurmctld/agent.h" - #define SHUTDOWN_WAIT 2 /* Time to wait for primary server shutdown */ - -@@ -225,6 +226,9 @@ void run_backup(slurm_trigger_callbacks_t *callbacks) + #define _DEBUG 0 + #define SHUTDOWN_WAIT 2 /* Time to wait for primary server shutdown */ +@@ -258,6 +259,9 @@ void run_backup(slurm_trigger_callbacks_t *callbacks) + error("Unable to recover slurm state"); abort(); } - slurmctld_config.shutdown_time = (time_t) 0; + /* Reinit agent in case it has been terminated - agent_init() + will check itself */ + agent_init(); + slurmctld_config.shutdown_time = (time_t) 0; unlock_slurmctld(config_write_lock); select_g_select_nodeinfo_set_all(); - diff --git a/slurmctld-uses-xdaemon_-for-systemd.patch b/slurmctld-uses-xdaemon_-for-systemd.patch index c09c5a3..333a91b 100644 --- a/slurmctld-uses-xdaemon_-for-systemd.patch +++ b/slurmctld-uses-xdaemon_-for-systemd.patch @@ -1,48 +1,47 @@ -From f0650e14983c9551fd644697285d84b35dad16aa Mon Sep 17 00:00:00 2001 -From: Christian Goll -Date: Mon, 9 Apr 2018 10:23:01 +0200 -Subject: [PATCH 2/6] slurmctld uses xdaemon_* for systemd +From: Egbert Eich +Date: Tue Nov 20 09:47:47 2018 +0100 +Subject: slurmctld uses xdaemon_* for systemd +Patch-mainline: Not yet +Git-commit: 0f0c00a4a57d12be04d16f4646c186d3e5f03dd1 +References: bsc#1084125 +Signed-off-by: Egbert Eich --- - src/slurmctld/controller.c | 11 +++++++++-- + slurm-18.08.3/src/slurmctld/controller.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c -index 7867e1d479..bd1c12600e 100644 +diff --git a/slurm-18.08.3/src/slurmctld/controller.c b/slurm-18.08.3/src/slurmctld/controller.c +index a1762de..d123db3 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c -@@ -250,7 +250,7 @@ static bool _wait_for_server_thread(void); +@@ -260,7 +260,7 @@ static void * _wait_primary_prog(void *arg); /* main - slurmctld main function, start various threads and process RPCs */ int main(int argc, char **argv) { - int cnt, error_code, i; + int cnt, error_code, i, fd; + struct timeval start, now; struct stat stat_buf; struct rlimit rlim; - /* Locks: Write configuration, job, node, and partition */ -@@ -298,7 +298,11 @@ int main(int argc, char **argv) +@@ -326,7 +326,11 @@ int main(int argc, char **argv) if (daemonize) { slurmctld_config.daemonize = 1; - if (xdaemon()) + /* -+ * Just start daemonizing if not in test mode ++ * Just start daemonizing if not in test mode + */ + fd = xdaemon_init(); + if (fd == -1) error("daemon(): %m"); log_set_timefmt(slurmctld_conf.log_fmt); log_alter(log_opts, LOG_DAEMON, -@@ -318,6 +322,9 @@ int main(int argc, char **argv) - */ - _init_pidfile(); - _become_slurm_user(); +@@ -348,6 +352,9 @@ int main(int argc, char **argv) + _init_pidfile(); + _become_slurm_user(); + } + if (daemonize) { + xdaemon_finish(fd); + } /* * Create StateSaveLocation directory if necessary. --- -2.13.7 - diff --git a/slurmd-uses-xdaemon_-for-systemd.patch b/slurmd-uses-xdaemon_-for-systemd.patch index 09bea62..712c7c8 100644 --- a/slurmd-uses-xdaemon_-for-systemd.patch +++ b/slurmd-uses-xdaemon_-for-systemd.patch @@ -1,17 +1,19 @@ -From 712caf6306c5b08b12e5a481d60bb91adc6c625e Mon Sep 17 00:00:00 2001 -From: Christian Goll -Date: Mon, 9 Apr 2018 10:59:57 +0200 -Subject: [PATCH 3/6] slurmd uses xdaemon_* for systemd +From: Egbert Eich +Date: Tue Nov 20 09:52:22 2018 +0100 +Subject: slurmd uses xdaemon_* for systemd +Patch-mainline: Not yet +Git-commit: 3988e62eb8c20a29a7a016f264c6d65e114cfdf4 +References: bsc#1084125 +Signed-off-by: Egbert Eich --- - src/slurmd/slurmd/slurmd.c | 8 ++++++-- + slurm-18.08.3/src/slurmd/slurmd/slurmd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c -index 140fd70adc..92d1faf0bc 100644 +diff --git a/slurm-18.08.3/src/slurmd/slurmd/slurmd.c b/slurm-18.08.3/src/slurmd/slurmd/slurmd.c +index aa35f8a..b2feaf9 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c -@@ -214,7 +214,7 @@ static void _wait_for_all_threads(int secs); +@@ -215,7 +215,7 @@ static void _wait_for_all_threads(int secs); int main (int argc, char **argv) { @@ -20,7 +22,7 @@ index 140fd70adc..92d1faf0bc 100644 int blocked_signals[] = {SIGPIPE, 0}; int cc; char *oom_value; -@@ -299,7 +299,8 @@ main (int argc, char **argv) +@@ -300,7 +300,8 @@ main (int argc, char **argv) * Become a daemon if desired. */ if (conf->daemonize) { @@ -30,7 +32,7 @@ index 140fd70adc..92d1faf0bc 100644 error("Couldn't daemonize slurmd: %m"); } test_core_limit(); -@@ -355,6 +356,9 @@ main (int argc, char **argv) +@@ -356,6 +357,9 @@ main (int argc, char **argv) conf->pid = getpid(); pidfd = create_pidfile(conf->pidfile, 0); @@ -40,6 +42,3 @@ index 140fd70adc..92d1faf0bc 100644 rfc2822_timestamp(time_stamp, sizeof(time_stamp)); info("%s started on %s", slurm_prog_name, time_stamp); --- -2.13.7 - diff --git a/slurmdbd-uses-xdaemon_-for-systemd.patch b/slurmdbd-uses-xdaemon_-for-systemd.patch index 44b6000..8995ecb 100644 --- a/slurmdbd-uses-xdaemon_-for-systemd.patch +++ b/slurmdbd-uses-xdaemon_-for-systemd.patch @@ -1,14 +1,16 @@ -From 9533827148d1214b8fe9a9ba47a9dd20287085d7 Mon Sep 17 00:00:00 2001 -From: Christian Goll -Date: Mon, 9 Apr 2018 11:13:54 +0200 -Subject: [PATCH 4/6] slurmdbd uses xdaemon_* for systemd +From: Egbert Eich +Date: Tue Nov 20 09:58:47 2018 +0100 +Subject: slurmdbd uses xdaemon_* for systemd +Patch-mainline: Not yet +Git-commit: 8a286cbaf3fe7ebe009106675a4624a2272d616f +References: bsc#1084125 +Signed-off-by: Egbert Eich --- - src/slurmdbd/slurmdbd.c | 18 +++++++++++++----- - 1 file changed, 13 insertions(+), 5 deletions(-) - -diff --git a/src/slurmdbd/slurmdbd.c b/src/slurmdbd/slurmdbd.c -index ae2f27d617..7b336b824f 100644 + slurm-18.08.3/src/slurmdbd/slurmdbd.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) +diff --git a/slurm-18.08.3/src/slurmdbd/slurmdbd.c b/slurm-18.08.3/src/slurmdbd/slurmdbd.c +index 471c724..8c7ea94 100644 --- a/src/slurmdbd/slurmdbd.c +++ b/src/slurmdbd/slurmdbd.c @@ -103,7 +103,7 @@ static List lft_rgt_list = NULL; @@ -28,8 +30,8 @@ index ae2f27d617..7b336b824f 100644 void *db_conn = NULL; assoc_init_args_t assoc_init_arg; -@@ -150,8 +151,9 @@ int main(int argc, char **argv) - slurmdbd_defs_init(slurmdbd_conf->auth_info); +@@ -139,8 +140,9 @@ int main(int argc, char **argv) + _update_nice(); _kill_old_slurmdbd(); - if (foreground == 0) @@ -40,17 +42,18 @@ index ae2f27d617..7b336b824f 100644 /* * Need to create pidfile here in case we setuid() below -@@ -160,6 +162,9 @@ int main(int argc, char **argv) +@@ -149,7 +151,9 @@ int main(int argc, char **argv) * able to write a core dump. */ _init_pidfile(); +- + if (foreground == 0) { + xdaemon_finish(pipefd); + } - _become_slurm_user(); - if (foreground == 0) - _set_work_dir(); -@@ -595,11 +600,14 @@ static void _init_pidfile(void) + /* + * Do plugin init's after _init_pidfile so systemd is happy as + * slurm_acct_storage_init() could take a long time to finish if running +@@ -598,11 +602,14 @@ static void _init_pidfile(void) /* Become a daemon (child of init) and * "cd" to the LogFile directory (if one is configured) */ @@ -67,6 +70,3 @@ index ae2f27d617..7b336b824f 100644 } static void _set_work_dir(void) --- -2.13.7 - diff --git a/slurmsmwd-uses-xdaemon_-for-systemd.patch b/slurmsmwd-uses-xdaemon_-for-systemd.patch index 8690576..2e87e22 100644 --- a/slurmsmwd-uses-xdaemon_-for-systemd.patch +++ b/slurmsmwd-uses-xdaemon_-for-systemd.patch @@ -1,17 +1,19 @@ -From b01f2ce29ce362b0724ea8104aadbab45122e9a4 Mon Sep 17 00:00:00 2001 -From: Christian Goll -Date: Mon, 4 Jun 2018 14:44:31 +0200 -Subject: [PATCH 5/6] slurmsmwd uses xdaemon_* for systemd +From: Egbert Eich +Date: Tue Nov 20 10:07:35 2018 +0100 +Subject: slurmsmwd uses xdaemon_* for systemd +Patch-mainline: Not yet +Git-commit: 110d76a0c56b35c8c3c9b24e136476a67a6eb413 +References: bsc#1084125 +Signed-off-by: Egbert Eich --- - contribs/cray/slurmsmwd/main.c | 7 ++++++- + slurm-18.08.3/contribs/cray/slurmsmwd/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/contribs/cray/slurmsmwd/main.c b/contribs/cray/slurmsmwd/main.c -index 8d405b2916..c1d3fce2d4 100644 +diff --git a/slurm-18.08.3/contribs/cray/slurmsmwd/main.c b/slurm-18.08.3/contribs/cray/slurmsmwd/main.c +index a5247bf..1efb1f8 100644 --- a/contribs/cray/slurmsmwd/main.c +++ b/contribs/cray/slurmsmwd/main.c -@@ -536,6 +536,7 @@ int main(int argc, char **argv) +@@ -538,6 +538,7 @@ int main(int argc, char **argv) { pthread_t processing_thread, signal_handler_thread; pthread_attr_t thread_attr; @@ -19,7 +21,7 @@ index 8d405b2916..c1d3fce2d4 100644 _parse_commandline(argc, argv); -@@ -544,11 +545,15 @@ int main(int argc, char **argv) +@@ -546,11 +547,15 @@ int main(int argc, char **argv) slurmsmwd_print_config(); if (!foreground) { @@ -36,6 +38,3 @@ index 8d405b2916..c1d3fce2d4 100644 slurm_mutex_init(&down_node_lock); --- -2.13.7 -