Accepting request 1068320 from network:cluster
- updated to 23.02.0 * Highlights + slurmctld - Add new RPC rate limiting feature. This is enabled through SlurmctldParameters=rl_enable, otherwise disabled by default. + Make scontrol reconfigure and sending a SIGHUP to the slurmctld behave the same. If you were using SIGHUP as a 'lighter' scontrol reconfigure to rotate logs please update your scripts to use SIGUSR2 instead. + Change cloud nodes to show by default. PrivateData=cloud is no longer needed. + sreport - Count planned (FKA reserved) time for jobs running in IGNORE_JOBS reservations. Previously was lumped into IDLE time. + job_container/tmpfs - Support running with an arbitrary list of private mount points (/tmp and /dev/shm are the default, but not required). + job_container/tmpfs - Set more environment variables in InitScript. + Make all cgroup directories created by Slurm owned by root. This was the behavior in cgroup/v2 but not in cgroup/v1 where by default the step directories ownership were set to the user and group of the job. + accounting_storage/mysql - change purge/archive to calculate record ages based on end time, rather than start or submission times. + job_submit/lua - add support for log_user() from slurm_job_modify(). + Run the following scripts in slurmscriptd instead of slurmctld: ResumeProgram, ResumeFailProgram, SuspendProgram, ResvProlog, ResvEpilog, and RebootProgram (only with SlurmctldParameters=reboot_from_controller). + Only permit changing log levels with 'srun --slurmd-debug' by root or SlurmUser. + slurmctld will fatal() when reconfiguring the job_submit plugin fails. + Add PowerDownOnIdle partition option to power down nodes after nodes become idle. + Add "[jobid.stepid]" prefix from slurmstepd and "slurmscriptd" prefix from slurmcriptd to Syslog logging. Previously was only happening when OBS-URL: https://build.opensuse.org/request/show/1068320 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/slurm?expand=0&rev=83
This commit is contained in:
commit
8a8f7dcb78
@ -1,9 +1,8 @@
|
|||||||
From: Egbert Eich <eich@suse.com>
|
From: Egbert Eich <eich@suse.com>
|
||||||
Date: Wed Jun 15 08:41:45 2022 +0200
|
Date: Mon Feb 20 21:35:37 2023 +0100
|
||||||
Subject: Fix test 38.11
|
Subject: Fix test 38.11
|
||||||
Patch-mainline: Not yet
|
Patch-mainline: Not yet
|
||||||
Git-repo: https://github.com/SchedMD/slurm
|
Git-commit: 80c7c8b897fde0c025f695b8301f4cbbc605581d
|
||||||
Git-commit: 235768790cb2e9cf011e6d08116a468ebec71582
|
|
||||||
References:
|
References:
|
||||||
|
|
||||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
Signed-off-by: Egbert Eich <eich@suse.com>
|
||||||
@ -12,19 +11,19 @@ Signed-off-by: Egbert Eich <eich@suse.de>
|
|||||||
testsuite/expect/test38.11 | 6 +++---
|
testsuite/expect/test38.11 | 6 +++---
|
||||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||||
diff --git a/testsuite/expect/test38.11 b/testsuite/expect/test38.11
|
diff --git a/testsuite/expect/test38.11 b/testsuite/expect/test38.11
|
||||||
index d2c07d60c7..bf5d10ecc9 100755
|
index 3d2954f..89e5333 100755
|
||||||
--- a/testsuite/expect/test38.11
|
--- a/testsuite/expect/test38.11
|
||||||
+++ b/testsuite/expect/test38.11
|
+++ b/testsuite/expect/test38.11
|
||||||
@@ -99,9 +99,9 @@ make_bash_script $script "
|
@@ -83,9 +83,9 @@ if {$het_job_id == 0} {
|
||||||
$srun -N1 -n1 --het-group=0 mkdir -p $tmp_dir/$node1
|
run_command -fail "$srun -N1 -n1 --het-group=0 mkdir -p $tmp_dir"
|
||||||
$srun -N1 -n1 --het-group=1 mkdir -p $tmp_dir/$node2
|
run_command -fail "$srun -N1 -n1 --het-group=1 mkdir -p $tmp_dir"
|
||||||
|
|
||||||
-$sbcast -f -j$het_job_id $srun $tmp_dir/file
|
-run_command -fail "$sbcast -f --jobid=$het_job_id $srun $tmp_dir/file"
|
||||||
-$sbcast -f -j${het_job_id}+0 $srun $tmp_dir/$node1/file_comp0
|
-run_command -fail "$sbcast -f --jobid=${het_job_id}+0 $srun $tmp_dir/file_comp0"
|
||||||
-$sbcast -f -j${het_job_id}+1 $srun $tmp_dir/$node2/file_comp1
|
-run_command -fail "$sbcast -f --jobid=${het_job_id}+1 $srun $tmp_dir/file_comp1"
|
||||||
+$sbcast -f -j\$SLURM_JOBID $srun $tmp_dir/file
|
+run_command -fail "$sbcast -f --jobid=\$SLURM_JOBID $srun $tmp_dir/file"
|
||||||
+$sbcast -f -j\$SLURM_JOBID $srun $tmp_dir/$node1/file_comp0
|
+run_command -fail "$sbcast -f --jobid=\$SLURM_JOBID $srun $tmp_dir/file_comp0"
|
||||||
+$sbcast -f -j\$((SLURM_JOBID+1)) $srun $tmp_dir/$node2/file_comp1
|
+run_command -fail "$sbcast -f --jobid=\$((SLURM_JOBID+1)) $srun $tmp_dir/file_comp1"
|
||||||
|
|
||||||
echo -n \"\nChecking node 1: \"
|
subtest {![run_command_status "$srun -N1 -n1 --het-group=0 ls $tmp_dir/file"]} "Verify main file is in node of component 0"
|
||||||
$srun -Q -N1 -n1 --het-group=1 ls $tmp_dir/file
|
subtest {![run_command_status "$srun -N1 -n1 --het-group=1 ls $tmp_dir/file"]} "Verify main file is in node of component 1"
|
||||||
|
@ -1,22 +1,25 @@
|
|||||||
From: Sebastian Krahmer <krahmer@suse.com>
|
From: Egbert Eich <eich@suse.com>
|
||||||
Date: Thu Feb 2 09:49:38 2017 +0100
|
Date: Mon Feb 20 21:29:27 2023 +0100
|
||||||
Subject: [PATCH]pam_slurm: Initialize arrays and pass sizes
|
Subject: pam_slurm: Initialize arrays and pass sizes
|
||||||
Git-repo: https://github.com/SchedMD/slurm
|
Patch-mainline: Not yet
|
||||||
Git-commit: fbfbb90f6a2e7f134220991ed3263894ba365411
|
Git-commit: 5feca5c29d4e820dafd8d34c0343944b28890902
|
||||||
References: bsc#1007053
|
References: bsc#1007053
|
||||||
Signed-off-by: Egbert Eich <eich@suse.de>
|
|
||||||
|
|
||||||
PAM is security critical:
|
PAM is security critical:
|
||||||
- clear arrays
|
- clear arrays
|
||||||
- ensure strings are NULL-terminated.
|
- ensure strings are NULL-terminated.
|
||||||
|
|
||||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
Signed-off-by: Egbert Eich <eich@suse.com>
|
||||||
|
Originally-from: Sebastian Krahmer <krahmer@suse.com>
|
||||||
|
Signed-off-by: Egbert Eich <eich@suse.de>
|
||||||
---
|
---
|
||||||
|
contribs/pam/pam_slurm.c | 20 +++++++++++---------
|
||||||
diff -Nrua a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c
|
1 file changed, 11 insertions(+), 9 deletions(-)
|
||||||
|
diff --git a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c
|
||||||
|
index 20d21a9..363b6ae 100644
|
||||||
--- a/contribs/pam/pam_slurm.c
|
--- a/contribs/pam/pam_slurm.c
|
||||||
+++ b/contribs/pam/pam_slurm.c
|
+++ b/contribs/pam/pam_slurm.c
|
||||||
@@ -266,9 +266,9 @@
|
@@ -266,9 +266,9 @@ static int
|
||||||
_gethostname_short (char *name, size_t len)
|
_gethostname_short (char *name, size_t len)
|
||||||
{
|
{
|
||||||
int error_code, name_len;
|
int error_code, name_len;
|
||||||
@ -28,7 +31,7 @@ diff -Nrua a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c
|
|||||||
if (error_code)
|
if (error_code)
|
||||||
return error_code;
|
return error_code;
|
||||||
|
|
||||||
@@ -296,13 +296,13 @@
|
@@ -296,13 +296,13 @@ static int
|
||||||
_slurm_match_allocation(uid_t uid)
|
_slurm_match_allocation(uid_t uid)
|
||||||
{
|
{
|
||||||
int authorized = 0, i;
|
int authorized = 0, i;
|
||||||
@ -37,14 +40,14 @@ diff -Nrua a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c
|
|||||||
char *nodename = NULL;
|
char *nodename = NULL;
|
||||||
job_info_msg_t * msg;
|
job_info_msg_t * msg;
|
||||||
|
|
||||||
slurm_conf_init(NULL);
|
slurm_init(NULL);
|
||||||
|
|
||||||
- if (_gethostname_short(hostname, sizeof(hostname)) < 0) {
|
- if (_gethostname_short(hostname, sizeof(hostname)) < 0) {
|
||||||
+ if (_gethostname_short(hostname, sizeof(hostname) - 1) < 0) {
|
+ if (_gethostname_short(hostname, sizeof(hostname) - 1) < 0) {
|
||||||
_log_msg(LOG_ERR, "gethostname: %m");
|
_log_msg(LOG_ERR, "gethostname: %m");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -425,7 +425,7 @@
|
@@ -425,7 +425,7 @@ _send_denial_msg(pam_handle_t *pamh, struct _options *opts,
|
||||||
*/
|
*/
|
||||||
extern void libpam_slurm_init (void)
|
extern void libpam_slurm_init (void)
|
||||||
{
|
{
|
||||||
@ -53,7 +56,7 @@ diff -Nrua a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c
|
|||||||
|
|
||||||
if (slurm_h)
|
if (slurm_h)
|
||||||
return;
|
return;
|
||||||
@@ -433,10 +433,10 @@
|
@@ -433,10 +433,10 @@ extern void libpam_slurm_init (void)
|
||||||
/* First try to use the same libslurm version ("libslurm.so.24.0.0"),
|
/* First try to use the same libslurm version ("libslurm.so.24.0.0"),
|
||||||
* Second try to match the major version number ("libslurm.so.24"),
|
* Second try to match the major version number ("libslurm.so.24"),
|
||||||
* Otherwise use "libslurm.so" */
|
* Otherwise use "libslurm.so" */
|
||||||
@ -66,7 +69,7 @@ diff -Nrua a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c
|
|||||||
_log_msg (LOG_ERR, "Unable to write libslurmname\n");
|
_log_msg (LOG_ERR, "Unable to write libslurmname\n");
|
||||||
} else if ((slurm_h = dlopen(libslurmname, RTLD_NOW|RTLD_GLOBAL))) {
|
} else if ((slurm_h = dlopen(libslurmname, RTLD_NOW|RTLD_GLOBAL))) {
|
||||||
return;
|
return;
|
||||||
@@ -445,8 +445,10 @@
|
@@ -445,8 +445,10 @@ extern void libpam_slurm_init (void)
|
||||||
libslurmname, dlerror ());
|
libslurmname, dlerror ());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
|
||||||
oid sha256:f687c98c4f7c0b7409f865771bbb05986daa3e207616667a9aa7390ba5a50fce
|
|
||||||
size 7098772
|
|
3
slurm-23.02.0.tar.bz2
Normal file
3
slurm-23.02.0.tar.bz2
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:21b902d24871a57f9074c58be194678b4410e9fccda3553305a008aa2954cea0
|
||||||
|
size 7258420
|
243
slurm.changes
243
slurm.changes
@ -1,7 +1,248 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Feb 20 20:45:59 UTC 2023 - Egbert Eich <eich@suse.com>
|
||||||
|
|
||||||
|
- updated to 23.02.0
|
||||||
|
* Highlights
|
||||||
|
+ slurmctld - Add new RPC rate limiting feature. This is enabled through
|
||||||
|
SlurmctldParameters=rl_enable, otherwise disabled by default.
|
||||||
|
+ Make scontrol reconfigure and sending a SIGHUP to the slurmctld behave
|
||||||
|
the same. If you were using SIGHUP as a 'lighter' scontrol reconfigure
|
||||||
|
to rotate logs please update your scripts to use SIGUSR2 instead.
|
||||||
|
+ Change cloud nodes to show by default. PrivateData=cloud is no longer
|
||||||
|
needed.
|
||||||
|
+ sreport - Count planned (FKA reserved) time for jobs running in
|
||||||
|
IGNORE_JOBS reservations. Previously was lumped into IDLE time.
|
||||||
|
+ job_container/tmpfs - Support running with an arbitrary list of private
|
||||||
|
mount points (/tmp and /dev/shm are the default, but not required).
|
||||||
|
+ job_container/tmpfs - Set more environment variables in InitScript.
|
||||||
|
+ Make all cgroup directories created by Slurm owned by root. This was the
|
||||||
|
behavior in cgroup/v2 but not in cgroup/v1 where by default the step
|
||||||
|
directories ownership were set to the user and group of the job.
|
||||||
|
+ accounting_storage/mysql - change purge/archive to calculate record ages
|
||||||
|
based on end time, rather than start or submission times.
|
||||||
|
+ job_submit/lua - add support for log_user() from slurm_job_modify().
|
||||||
|
+ Run the following scripts in slurmscriptd instead of slurmctld:
|
||||||
|
ResumeProgram, ResumeFailProgram, SuspendProgram, ResvProlog, ResvEpilog,
|
||||||
|
and RebootProgram (only with SlurmctldParameters=reboot_from_controller).
|
||||||
|
+ Only permit changing log levels with 'srun --slurmd-debug' by root
|
||||||
|
or SlurmUser.
|
||||||
|
+ slurmctld will fatal() when reconfiguring the job_submit plugin fails.
|
||||||
|
+ Add PowerDownOnIdle partition option to power down nodes after nodes
|
||||||
|
become idle.
|
||||||
|
+ Add "[jobid.stepid]" prefix from slurmstepd and "slurmscriptd" prefix
|
||||||
|
from slurmcriptd to Syslog logging. Previously was only happening when
|
||||||
|
logging to a file.
|
||||||
|
+ Add purge and archive functionality for job environment and job batch
|
||||||
|
script records.
|
||||||
|
+ Extend support for Include files to all "configless" client commands.
|
||||||
|
+ Make node weight usable for powered down and rebooting nodes.
|
||||||
|
+ Removed 'launch' plugin.
|
||||||
|
+ Add "Extra" field to job to store extra information other than a comment.
|
||||||
|
+ Add usage gathering for AMD (requires ROCM 5.5+) and NVIDIA gpus.
|
||||||
|
+ Add job's allocated nodes, features, oversubscribe, partition, and
|
||||||
|
reservation to SLURM_RESUME_FILE output for power saving.
|
||||||
|
+ Automatically create directories for stdout/stderr output files. Paths
|
||||||
|
may use %j and related substitution characters as well.
|
||||||
|
+ Add --tres-per-task to salloc/sbatch/srun.
|
||||||
|
+ Allow nodefeatures plugin features to work with cloud nodes.
|
||||||
|
e.g. - Powered down nodes have no active changeable features.
|
||||||
|
- Nodes can't be changed to other active features until powered down.
|
||||||
|
- Active changeable features are reset/cleared on power down.
|
||||||
|
+ Make slurmstepd cgroups constrained by total configured memory from
|
||||||
|
slurm.conf (NodeName=<> RealMemory=#) instead of total physical memory.
|
||||||
|
+ node_features/helpers - add support for the OR and parentheses operators
|
||||||
|
in a --constraint expression.
|
||||||
|
+ slurmctld will fatal() when [Prolog|Epilog]Slurmctld are defined but
|
||||||
|
are not executable.
|
||||||
|
+ Validate node registered active features are a super set of node's
|
||||||
|
currently active changeable features.
|
||||||
|
+ On clusters without any PrologFlags options, batch jobs with failed
|
||||||
|
prologs no longer generate an output file.
|
||||||
|
+ Add SLURM_JOB_START_TIME and SLURM_JOB_END_TIME environment variables.
|
||||||
|
+ Add SuspendExcStates option to slurm.conf to avoid suspending/powering
|
||||||
|
down specific node states.
|
||||||
|
+ Add support for DCMI power readings in IPMI plugin.
|
||||||
|
+ slurmrestd served /slurm/v0.0.39 and /slurmdb/v0.0.39 endpoints had major
|
||||||
|
changes from prior versions. Almost all schemas have been renamed and
|
||||||
|
modified. Sites using OpenAPI Generator clients are highly suggested to
|
||||||
|
upgrade to to using atleast version 6.x due to limitations with prior
|
||||||
|
versions.
|
||||||
|
+ Allow for --nodelist to contain more nodes than required by --nodes.
|
||||||
|
+ Rename "nodes" to "nodes_resume" in SLURM_RESUME_FILE job output.
|
||||||
|
+ Rename "all_nodes" to "all_nodes_resume" in SLURM_RESUME_FILE output.
|
||||||
|
+ Add jobcomp/kafka plugin.
|
||||||
|
+ Add new PreemptParameters=reclaim_licenses option which will allow higher
|
||||||
|
priority jobs to preempt jobs to free up used licenses. (This is only
|
||||||
|
enabled for with PreemptModes of CANCEL and REQUEUE, as Slurm cannot
|
||||||
|
guarantee suspended jobs will release licenses correctly.)
|
||||||
|
+ hpe/slingshot - add support for the instant-on feature.
|
||||||
|
+ Add ability to update SuspendExc* parameters with scontrol.
|
||||||
|
+ Add ability to restore SuspendExc* parameters on restart with slurmctld
|
||||||
|
-R option.
|
||||||
|
+ Add ability to clear a GRES specification by setting it to "0" via
|
||||||
|
'scontrol update job'.
|
||||||
|
+ Add SLURM_JOB_OVERSUBSCRIBE environment variable for Epilog, Prolog,
|
||||||
|
EpilogSlurmctld, PrologSlurmctld, and mail ouput.
|
||||||
|
+ System node down reasons are appended to existing reasons, separated
|
||||||
|
by ':'.
|
||||||
|
+ New command scrun has been added. scrun acts as an Open Container
|
||||||
|
Initiative (OCI) runtime proxy to run containers seamlessly via Slurm.
|
||||||
|
+ Fixed GpuFreqDef option. When set in slurm.conf, it will be used if
|
||||||
|
--gpu-freq was not explicitly set by the job step.
|
||||||
|
* Configuration Changes
|
||||||
|
+ job_container.conf - Added "Dirs" option to list desired private mount
|
||||||
|
points.
|
||||||
|
+ node_features plugins - invalid users specified for AllowUserBoot will
|
||||||
|
now result in fatal() rather than just an error.
|
||||||
|
+ Deprecate AllowedKmemSpace, ConstrainKmemSpace, MaxKmemPercent, and
|
||||||
|
MinKmemSpace.
|
||||||
|
+ Allow jobs to queue even if the user is not in AllowGroups when
|
||||||
|
EnforcePartLimits=no is set. This ensures consistency for all the
|
||||||
|
Partition access controls, and matches the documented behavior for
|
||||||
|
EnforcePartLimits.
|
||||||
|
+ Add InfluxDBTimeout parameter to acct_gather.conf.
|
||||||
|
+ job_container/tmpfs - add support for expanding %h and %n in BasePath.
|
||||||
|
+ slurm.conf - Removed SlurmctldPlugstack option.
|
||||||
|
+ Add new SlurmctldParameters=validate_nodeaddr_threads=<number> option to
|
||||||
|
allow concurrent hostname resolution at slurmctld startup.
|
||||||
|
+ Add new AccountingStoreFlags=job_extra option to store a job's extra field
|
||||||
|
in the database.
|
||||||
|
+ Add new "defer_batch" option to SchedulerParameters to only defer
|
||||||
|
scheduling for batch jobs.
|
||||||
|
+ Add new DebugFlags option 'JobComp' to replace 'Elasticsearch'.
|
||||||
|
+ Add configurable job requeue limit parameter - MaxBatchRequeue - in
|
||||||
|
slurm.conf to permit changes from the old hard-coded value of 5.
|
||||||
|
+ helpers.conf - Allow specification of node specific features.
|
||||||
|
+ helpers.conf - Allow many features to one helper script.
|
||||||
|
+ job_container/tmpfs - Add "Shared" option to support shared namespaces.
|
||||||
|
This allows autofs to work with the job_container/tmpfs plugin when
|
||||||
|
enabled.
|
||||||
|
+ acct_gather.conf - Added EnergyIPMIPowerSensors=Node=DCMI and
|
||||||
|
Node=DCMI_ENHANCED.
|
||||||
|
+ Add new "getnameinfo_cache_timeout=<number>" option to
|
||||||
|
CommunicationParameters to adjust or disable caching the results of
|
||||||
|
getnameinfo().
|
||||||
|
+ Add new PrologFlags=ForceRequeueOnFail option to automatically requeue
|
||||||
|
batch jobs on Prolog failures regardless of the job --requeue setting.
|
||||||
|
+ Add HealthCheckNodeState=NONDRAINED_IDLE option.
|
||||||
|
+ Add 'explicit' to Flags in gres.conf. This makes it so the gres is not
|
||||||
|
automatically added to a job's allocation when --exclusive is used. Note
|
||||||
|
that this is a per-node flag.
|
||||||
|
+ Moved the "preempt_" options from SchedulerParameters to
|
||||||
|
PreemptParameters, and dropped the prefix from the option names.
|
||||||
|
(The old options will still be parsed for backwards compatibility,
|
||||||
|
but are now undocumented.)
|
||||||
|
+ Add LaunchParameters=ulimit_pam_adopt, which enables setting RLIMIT_RSS
|
||||||
|
in adopted processes.
|
||||||
|
+ Update SwitchParameters=job_vni to enable/disable creating job VNIs
|
||||||
|
for all jobs, or when a user requests them.
|
||||||
|
+ Update SwitchParameters=single_node_vni to enable/disable creating
|
||||||
|
single node vnis for all jobs, or when a user requests them.
|
||||||
|
+ Add ability to preserve SuspendExc* parameters on reconfig with
|
||||||
|
ReconfigFlags=KeepPowerSaveSettings.
|
||||||
|
+ slurmdbd.conf - Add new AllResourcesAbsolute to force all new resources
|
||||||
|
to be created with the Absolute flag.
|
||||||
|
+ topology/tree - Add new TopologyParam=SwitchAsNodeRank option to reorder
|
||||||
|
nodes based on switch layout. This can be useful if the naming convention
|
||||||
|
for the nodes does not natually map to the network topology.
|
||||||
|
+ Removed the default setting for GpuFreqDef. If unset, no attempt to change
|
||||||
|
the GPU frequency will be made if --gpu-freq is not set for the step.
|
||||||
|
* Command Changes
|
||||||
|
+ sacctmgr - no longer force updates to the AdminComment, Comment, or
|
||||||
|
SystemComment to lower-case.
|
||||||
|
+ sinfo - Add -F/--future option to sinfo to display future nodes.
|
||||||
|
+ sacct - Rename 'Reserved' field to 'Planned' to match sreport and the
|
||||||
|
nomenclature of the 'Planned' node.
|
||||||
|
+ scontrol - advanced reservation flag MAINT will no longer replace nodes,
|
||||||
|
similar to STATIC_ALLOC
|
||||||
|
+ sbatch - add parsing for #PBS -d and #PBS -w.
|
||||||
|
+ scontrol show assoc_mgr will show username(uid) instead of uid in
|
||||||
|
QoS section.
|
||||||
|
+ Add strigger --draining and -R/--resume options.
|
||||||
|
+ Change --oversubscribe and --exclusive to be mutually exclusive for
|
||||||
|
job submission. Job submission commands will now fatal if both are set.
|
||||||
|
Previously, these options would override each other, with the last one
|
||||||
|
in the job submission command taking effect.
|
||||||
|
+ scontrol - Requested TRES and allocated TRES will now always be printed
|
||||||
|
when showing jobs, instead of one TRES output that was either the
|
||||||
|
requested or allocated.
|
||||||
|
+ srun --ntasks-per-core now applies to job and step allocations. Now,
|
||||||
|
use of --ntasks-per-core=1 implies --cpu-bind=cores and
|
||||||
|
--ntasks-per-core>1 implies --cpu-bind=threads.
|
||||||
|
+ salloc/sbatch/srun - Check and abort if ntasks-per-core >
|
||||||
|
threads-per-core.
|
||||||
|
+ scontrol - Add ResumeAfter=<secs> option to "scontrol update nodename=".
|
||||||
|
+ Add a new "nodes=" argument to scontrol setdebug to allow the debug
|
||||||
|
level on the slurmd processes to be temporarily altered.
|
||||||
|
+ Add a new "nodes=" argument to "scontrol setdebugflags" as well.
|
||||||
|
+ Make it so scrontab prints client-side the job_submit() err_msg (which
|
||||||
|
can be set i.e. by using the log_user() function for the lua plugin).
|
||||||
|
+ scontrol - Reservations will not be allowed to have STATIC_ALLOC or
|
||||||
|
MAINT flags and REPLACE[_DOWN] flags simultaneously.
|
||||||
|
+ scontrol - Reservations will only accept one reoccurring flag when
|
||||||
|
being created or updated.
|
||||||
|
+ scontrol - A reservation cannot be updated to be reoccurring if it is
|
||||||
|
already a floating reservation.
|
||||||
|
+ squeue - removed unused '%s' and 'SelectJobInfo' formats.
|
||||||
|
+ squeue - align print format for exit and derived codes with that of
|
||||||
|
other components (<exit_status>:<signal_number>).
|
||||||
|
+ sacct - Add --array option to expand job arrays and display array
|
||||||
|
tasks on separate lines.
|
||||||
|
+ Partial support for '--json' and '--yaml' formated outputs have been
|
||||||
|
implemented for sacctmgr, sdiag, sinfo, squeue, and scontrol. The
|
||||||
|
resultant data ouput will be filtered by normal command arguments.
|
||||||
|
Formatting arguments will continue to be ignored.
|
||||||
|
+ salloc/sbatch/srun - extended the --nodes syntax to allow for a list
|
||||||
|
of valid node counts to be allocated to the job. This also supports
|
||||||
|
a "step count" value (e.g., --nodes=20-100:20 is equivalent to
|
||||||
|
--nodes=20,40,60,80,100) which can simplify the syntax when the job
|
||||||
|
needs to scale by a certain "chunk" size.
|
||||||
|
+ srun - add user requestible vnis with '--network=job_vni' option.
|
||||||
|
+ srun - add user requestible single node vnis with the
|
||||||
|
'--network=single_node_vni' option.
|
||||||
|
* API Changes
|
||||||
|
+ job_container plugins - container_p_stepd_create() function signature
|
||||||
|
replaced uint32_t uid with stepd_step_rec_t* step.
|
||||||
|
+ gres plugins - gres_g_get_devices() function signature replaced pid_t
|
||||||
|
pid with stepd_step_rec_t* step.
|
||||||
|
+ cgroup plugins - task_cgroup_devices_constrain() function signature
|
||||||
|
removed pid_t pid.
|
||||||
|
+ task plugins - replace task_p_pre_set_affinity(), task_p_set_affinity(),
|
||||||
|
and task_p_post_set_affinity() with task_p_pre_launch_priv() like it
|
||||||
|
was back in slurm 20.11.
|
||||||
|
+ Allow for concurrent processing of job_submit_g_submit() and
|
||||||
|
job_submit_g_modify() calls. If your plugin is not capable of concurrent
|
||||||
|
operation you must add additional locking within your plugin.
|
||||||
|
+ Removed return value from slurm_list_append().
|
||||||
|
+ The List and ListIterator types have been removed in favor of list_t
|
||||||
|
and list_itr_t respectively.
|
||||||
|
+ burst buffer plugins - add bb_g_build_het_job_script().
|
||||||
|
bb_g_get_status() - added authenticated UID and GID.
|
||||||
|
bb_g_run_script() - added job_info argument.
|
||||||
|
+ burst_buffer.lua - Pass UID and GID to most hooks. Pass job_info
|
||||||
|
(detailed job information) to many hooks. See
|
||||||
|
etc/burst_buffer.lua.example for a complete list of changes.
|
||||||
|
WARNING: Backwards compatibility is broken for
|
||||||
|
slurm_bb_get_status: UID and GID are passed before the variadic
|
||||||
|
arguments. If UID and GID are not explicitly listed as arguments to
|
||||||
|
slurm_bb_get_status(), then they will be included in the variadic
|
||||||
|
arguments.
|
||||||
|
Backwards compatibility is maintained for all other hooks because
|
||||||
|
the new arguments are passed after the existing arguments.
|
||||||
|
+ node_features plugins - node_features_p_reboot_weight() function
|
||||||
|
removed.
|
||||||
|
node_features_p_job_valid() - added parameter feature_list.
|
||||||
|
node_features_p_job_xlate() - added parameters feature_list and
|
||||||
|
job_node_bitmap.
|
||||||
|
+ New data_parser interface with v0.0.39 plugin.
|
||||||
|
* Added: Fix-test-1.99.patch
|
||||||
|
* Reworked: Fix-test-38.11.patch
|
||||||
|
pam_slurm-Initialize-arrays-and-pass-sizes.patch
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Thu Feb 9 07:54:01 UTC 2023 - Egbert Eich <eich@suse.com>
|
Thu Feb 9 07:54:01 UTC 2023 - Egbert Eich <eich@suse.com>
|
||||||
|
|
||||||
- testsuite: on laster SUSE versions claim ownership of directory
|
- testsuite: on later SUSE versions claim ownership of directory
|
||||||
/etc/security/limits.d.
|
/etc/security/limits.d.
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
|
32
slurm.spec
32
slurm.spec
@ -17,10 +17,12 @@
|
|||||||
|
|
||||||
|
|
||||||
# Check file META in sources: update so_version to (API_CURRENT - API_AGE)
|
# Check file META in sources: update so_version to (API_CURRENT - API_AGE)
|
||||||
%define so_version 38
|
%define so_version 39
|
||||||
%define ver 22.05.5
|
%define ver 23.02.0
|
||||||
%define _ver _22_05
|
%define _ver _23_02
|
||||||
|
#%%define rc_v 0rc1
|
||||||
%define dl_ver %{ver}
|
%define dl_ver %{ver}
|
||||||
|
#%%define dl_ver 23-02-0%{?rc_v:-%rc_v}
|
||||||
# so-version is 0 and seems to be stable
|
# so-version is 0 and seems to be stable
|
||||||
%define pmi_so 0
|
%define pmi_so 0
|
||||||
%define nss_so 2
|
%define nss_so 2
|
||||||
@ -54,6 +56,9 @@ ExclusiveArch: do_not_build
|
|||||||
%if 0%{?sle_version} == 150300 || 0%{?sle_version} == 150400
|
%if 0%{?sle_version} == 150300 || 0%{?sle_version} == 150400
|
||||||
%define base_ver 2011
|
%define base_ver 2011
|
||||||
%endif
|
%endif
|
||||||
|
%if 0%{?sle_version} == 150500
|
||||||
|
%define base_ver 2302
|
||||||
|
%endif
|
||||||
|
|
||||||
%if 0%{?suse_version} >= 1500
|
%if 0%{?suse_version} >= 1500
|
||||||
%define have_sysuser 1
|
%define have_sysuser 1
|
||||||
@ -148,6 +153,7 @@ License: SUSE-GPL-2.0-with-openssl-exception
|
|||||||
Group: Productivity/Clustering/Computing
|
Group: Productivity/Clustering/Computing
|
||||||
URL: https://www.schedmd.com
|
URL: https://www.schedmd.com
|
||||||
Source: https://download.schedmd.com/slurm/%{pname}-%{dl_ver}.tar.bz2
|
Source: https://download.schedmd.com/slurm/%{pname}-%{dl_ver}.tar.bz2
|
||||||
|
#Source: https://github.com/SchedMD/slurm/archive/refs/tags/%{pname}-%{dl_ver}.tar.gz
|
||||||
Source1: slurm-rpmlintrc
|
Source1: slurm-rpmlintrc
|
||||||
Source10: https://raw.githubusercontent.com/openSUSE/hpc/10c105e/files/slurm/slurmd.xml
|
Source10: https://raw.githubusercontent.com/openSUSE/hpc/10c105e/files/slurm/slurmd.xml
|
||||||
Source11: https://raw.githubusercontent.com/openSUSE/hpc/10c105e/files/slurm/slurmctld.xml
|
Source11: https://raw.githubusercontent.com/openSUSE/hpc/10c105e/files/slurm/slurmctld.xml
|
||||||
@ -226,6 +232,7 @@ BuildRequires: rrdtool-devel
|
|||||||
%{?have_sysuser:BuildRequires: sysuser-tools}
|
%{?have_sysuser:BuildRequires: sysuser-tools}
|
||||||
%{?systemd_ordering}
|
%{?systemd_ordering}
|
||||||
BuildRequires: dejagnu
|
BuildRequires: dejagnu
|
||||||
|
BuildRequires: zlib-devel
|
||||||
BuildRequires: pkgconfig(dbus-1)
|
BuildRequires: pkgconfig(dbus-1)
|
||||||
BuildRequires: pkgconfig(systemd)
|
BuildRequires: pkgconfig(systemd)
|
||||||
%else
|
%else
|
||||||
@ -638,9 +645,7 @@ Do not run test suite and file bug reports for each failed test!
|
|||||||
%prep
|
%prep
|
||||||
%setup -q -n %{pname}-%{dl_ver}
|
%setup -q -n %{pname}-%{dl_ver}
|
||||||
%patch0 -p1
|
%patch0 -p1
|
||||||
#%%patch1 -p1
|
|
||||||
%patch2 -p1
|
%patch2 -p1
|
||||||
#%%patch3 -p1
|
|
||||||
%patch10 -p1
|
%patch10 -p1
|
||||||
%patch11 -p1
|
%patch11 -p1
|
||||||
%patch12 -p1
|
%patch12 -p1
|
||||||
@ -1209,8 +1214,8 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
|
|||||||
|
|
||||||
%files doc
|
%files doc
|
||||||
%{?comp_at}
|
%{?comp_at}
|
||||||
%dir %{_datadir}/doc/%{pname}-%{dl_ver}
|
%dir %{_datadir}/doc/%{pname}-%{version}%{?rc_v:-%rc_v}
|
||||||
%{_datadir}/doc/%{pname}-%{dl_ver}/*
|
%{_datadir}/doc/%{pname}-%{version}%{?rc_v:-%rc_v}/*
|
||||||
|
|
||||||
%files webdoc
|
%files webdoc
|
||||||
%{?comp_at}
|
%{?comp_at}
|
||||||
@ -1302,6 +1307,7 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
|
|||||||
%{_libdir}/slurm/acct_gather_profile_none.so
|
%{_libdir}/slurm/acct_gather_profile_none.so
|
||||||
%{_libdir}/slurm/burst_buffer_lua.so
|
%{_libdir}/slurm/burst_buffer_lua.so
|
||||||
%{?have_json_c:%{_libdir}/slurm/burst_buffer_datawarp.so}
|
%{?have_json_c:%{_libdir}/slurm/burst_buffer_datawarp.so}
|
||||||
|
%{_libdir}/slurm/data_parser_v0_0_39.so
|
||||||
%{_libdir}/slurm/cgroup_v1.so
|
%{_libdir}/slurm/cgroup_v1.so
|
||||||
%if 0%{?suse_version} >= 1500
|
%if 0%{?suse_version} >= 1500
|
||||||
%{_libdir}/slurm/cgroup_v2.so
|
%{_libdir}/slurm/cgroup_v2.so
|
||||||
@ -1335,7 +1341,6 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
|
|||||||
%{_libdir}/slurm/job_submit_partition.so
|
%{_libdir}/slurm/job_submit_partition.so
|
||||||
%{_libdir}/slurm/job_submit_require_timelimit.so
|
%{_libdir}/slurm/job_submit_require_timelimit.so
|
||||||
%{_libdir}/slurm/job_submit_throttle.so
|
%{_libdir}/slurm/job_submit_throttle.so
|
||||||
%{_libdir}/slurm/launch_slurm.so
|
|
||||||
%{_libdir}/slurm/libslurm_pmi.so
|
%{_libdir}/slurm/libslurm_pmi.so
|
||||||
%{_libdir}/slurm/mcs_account.so
|
%{_libdir}/slurm/mcs_account.so
|
||||||
%{_libdir}/slurm/mcs_group.so
|
%{_libdir}/slurm/mcs_group.so
|
||||||
@ -1369,7 +1374,6 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
|
|||||||
%{_libdir}/slurm/serializer_url_encoded.so
|
%{_libdir}/slurm/serializer_url_encoded.so
|
||||||
%{_libdir}/slurm/serializer_yaml.so
|
%{_libdir}/slurm/serializer_yaml.so
|
||||||
%{_libdir}/slurm/site_factor_none.so
|
%{_libdir}/slurm/site_factor_none.so
|
||||||
%{_libdir}/slurm/slurmctld_nonstop.so
|
|
||||||
%{_libdir}/slurm/switch_none.so
|
%{_libdir}/slurm/switch_none.so
|
||||||
%{_libdir}/slurm/task_affinity.so
|
%{_libdir}/slurm/task_affinity.so
|
||||||
%{_libdir}/slurm/task_cgroup.so
|
%{_libdir}/slurm/task_cgroup.so
|
||||||
@ -1428,13 +1432,12 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
|
|||||||
%{?comp_at}
|
%{?comp_at}
|
||||||
%{_sbindir}/slurmrestd
|
%{_sbindir}/slurmrestd
|
||||||
%{_mandir}/man8/slurmrestd.*
|
%{_mandir}/man8/slurmrestd.*
|
||||||
|
%{_libdir}/slurm/openapi_dbv0_0_39.so
|
||||||
|
%{_libdir}/slurm/openapi_v0_0_39.so
|
||||||
%{_libdir}/slurm/openapi_dbv0_0_38.so
|
%{_libdir}/slurm/openapi_dbv0_0_38.so
|
||||||
%{_libdir}/slurm/openapi_v0_0_38.so
|
%{_libdir}/slurm/openapi_v0_0_38.so
|
||||||
%{_libdir}/slurm/openapi_dbv0_0_37.so
|
%{_libdir}/slurm/openapi_dbv0_0_37.so
|
||||||
%{_libdir}/slurm/openapi_v0_0_37.so
|
%{_libdir}/slurm/openapi_v0_0_37.so
|
||||||
%{_libdir}/slurm/openapi_dbv0_0_36.so
|
|
||||||
#%{_libdir}/slurm/openapi_v0_0_35.so
|
|
||||||
%{_libdir}/slurm/openapi_v0_0_36.so
|
|
||||||
#%{_libdir}/slurm/rest_auth_jwt.so
|
#%{_libdir}/slurm/rest_auth_jwt.so
|
||||||
%{_libdir}/slurm/rest_auth_local.so
|
%{_libdir}/slurm/rest_auth_local.so
|
||||||
%endif
|
%endif
|
||||||
@ -1445,7 +1448,9 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
|
|||||||
%{_sbindir}/slurmstepd
|
%{_sbindir}/slurmstepd
|
||||||
# bsc#1153095
|
# bsc#1153095
|
||||||
%{_bindir}/srun
|
%{_bindir}/srun
|
||||||
|
%{_bindir}/scrun
|
||||||
%{_mandir}/man1/srun.1*
|
%{_mandir}/man1/srun.1*
|
||||||
|
%{_mandir}/man1/scrun.1*
|
||||||
%{_mandir}/man8/slurmd.*
|
%{_mandir}/man8/slurmd.*
|
||||||
%{_mandir}/man8/slurmstepd*
|
%{_mandir}/man8/slurmstepd*
|
||||||
%if 0%{?with_systemd}
|
%if 0%{?with_systemd}
|
||||||
@ -1506,6 +1511,7 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
|
|||||||
%{_libdir}/slurm/select_cray_aries.so
|
%{_libdir}/slurm/select_cray_aries.so
|
||||||
%{_libdir}/slurm/switch_cray_aries.so
|
%{_libdir}/slurm/switch_cray_aries.so
|
||||||
%{_libdir}/slurm/task_cray_aries.so
|
%{_libdir}/slurm/task_cray_aries.so
|
||||||
|
%{_libdir}/slurm/proctrack_cray_aries.so
|
||||||
%{_libdir}/slurm/mpi_cray_shasta.so
|
%{_libdir}/slurm/mpi_cray_shasta.so
|
||||||
%if 0%{?have_json_c}
|
%if 0%{?have_json_c}
|
||||||
%{_libdir}/slurm/node_features_knl_cray.so
|
%{_libdir}/slurm/node_features_knl_cray.so
|
||||||
@ -1518,7 +1524,7 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
|
|||||||
%dir %attr(-, %slurm_u, %slurm_u) /srv/slurm-testsuite
|
%dir %attr(-, %slurm_u, %slurm_u) /srv/slurm-testsuite
|
||||||
%attr(-, root, root) %{_datadir}/%{name}
|
%attr(-, root, root) %{_datadir}/%{name}
|
||||||
%if 0%{?sle_version} == 120200 || 0%{?suse_version} >= 1550
|
%if 0%{?sle_version} == 120200 || 0%{?suse_version} >= 1550
|
||||||
%dir %{_pam_secconfdir}/limits.d
|
%dir %attr(-, root, root) %{_pam_secconfdir}/limits.d
|
||||||
%endif
|
%endif
|
||||||
%doc testsuite/expect/README
|
%doc testsuite/expect/README
|
||||||
%doc %{basename: %{S:21}}
|
%doc %{basename: %{S:21}}
|
||||||
|
Loading…
Reference in New Issue
Block a user