SHA256
1
0
forked from pool/slurm

Accepting request 1141442 from network:cluster

- Update to 23.11.1 with following major improvements and fixing
  CVE-2023-49933, CVE-2023-49934, CVE-2023-49935, CVE-2023-49936
  and CVE-2023-49937
  * Substantially overhauled the SlurmDBD association management
    code. For clusters updated to 23.11, account and user
    additions or removals are significantly faster than in prior
    releases.
  * Overhauled `scontrol reconfigure` to prevent configuration
    mistakes from disabling slurmctld and slurmd. Instead, an
    error will be returned, and the running configuration will
    persist. This does require updates to the systemd service
    files to use the `--systemd` option to `slurmctld` and `slurmd`.
  * Added a new internal `auth/cred` plugin - `auth/slurm`. This
    builds off the prior `auth/jwt` model, and permits operation
    of the `slurmdbd` and `slurmctld` without access to full
    directory information with a suitable configuration.
  * Added a new `--external-launcher` option to `srun`, which is
    automatically set by common MPI launcher implementations and
    ensures processes using those non-srun launchers have full
    access to all resources allocated on each node.
  * Reworked the dynamic/cloud modes of operation to allow for
    "fanout" - where Slurm communication can be automatically
    offloaded to compute nodes for increased cluster scalability.
  * Overhauled and extended the Reservation subsystem to allow
    for most of the same resource requirements as are placed on
    the job. Notably, this permits reservations to now reserve
    GRES directly.
- Details of changes:
  * Fix `scontrol update job=... TimeLimit+=/-=` when used with a
    raw JobId of job array element.

OBS-URL: https://build.opensuse.org/request/show/1141442
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/slurm?expand=0&rev=103
This commit is contained in:
Ana Guerrero 2024-01-25 17:41:05 +00:00 committed by Git OBS Bridge
commit 6a021ebb80
6 changed files with 379 additions and 67 deletions

View File

@ -1,56 +1,58 @@
From 46bea350d06e9c8e1f93938ce2b2bd04a1c3bf3f Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed Jan 8 20:56:25 2020 +0100
Subject: Remove rpath from build
Patch-mainline: never
Git-commit: f79af97c35f38775a7a90cd8e4b98814729d9a9c
References:
Date: Wed, 8 Jan 2020 20:56:25 +0100
Subject: [PATCH] Remove rpath from build
Signed-off-by: Egbert Eich <eich@suse.com>
---
slurm-19.05.5/contribs/perlapi/libslurm/perl/Makefile.PL.in | 4 ++--
slurm-19.05.5/contribs/perlapi/libslurmdb/perl/Makefile.PL.in | 4 ++--
contribs/perlapi/libslurm/perl/Makefile.PL.in | 4 ++--
contribs/perlapi/libslurmdb/perl/Makefile.PL.in | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/contribs/perlapi/libslurm/perl/Makefile.PL.in b/contribs/perlapi/libslurm/perl/Makefile.PL.in
index fcae437..797c943 100644
index e8f8aff54d..b51f53f412 100644
--- a/contribs/perlapi/libslurm/perl/Makefile.PL.in
+++ b/contribs/perlapi/libslurm/perl/Makefile.PL.in
@@ -77,7 +77,7 @@ DESTDIR_BUG
@@ -68,7 +68,7 @@ DESTDIR_BUG
# AIX has problems with not always having the correct
# flags so we have to add some :)
my $os = lc(`uname`);
-my $other_ld_flags = "-Wl,-rpath,@top_builddir@/src/api/.libs -Wl,-rpath,@prefix@/lib";
-my $other_ld_flags = '-Wl,-rpath,@top_builddir@/src/api/.libs -Wl,-rpath,@libdir@';
+my $other_ld_flags = "-L@top_builddir@/src/api/.libs -lslurm";
$other_ld_flags = " -brtl -G -bnoentry -bgcbypass:1000 -bexpfull"
if $os =~ "aix";
@@ -88,7 +88,7 @@ WriteMakefile(
@@ -79,7 +79,7 @@ WriteMakefile(
($] >= 5.005 ? ## Add these new keywords supported since 5.005
(ABSTRACT_FROM => 'lib/Slurm.pm', # retrieve abstract from module
AUTHOR => 'Hongjia Cao <hjcao@nudt.edu.cn>') : ()),
- LIBS => ["-L@top_builddir@/src/api/.libs -L@prefix@/lib -lslurm"], # e.g., '-lm'
- LIBS => ['-L@top_builddir@/src/api/.libs -L@libdir@ -lslurm'], # e.g., '-lm'
+ LIBS => ["-L@prefix@/lib -lslurm"], # e.g., '-lm'
DEFINE => '', # e.g., '-DHAVE_SOMETHING'
INC => "-I. -I@top_srcdir@ -I@top_srcdir@/contribs/perlapi/common -I@top_builddir@",
# Un-comment this if you add C files to link with later:
diff --git a/contribs/perlapi/libslurmdb/perl/Makefile.PL.in b/contribs/perlapi/libslurmdb/perl/Makefile.PL.in
index 2db028c..83eb4e2 100644
index 4fb38b9725..148efa6e82 100644
--- a/contribs/perlapi/libslurmdb/perl/Makefile.PL.in
+++ b/contribs/perlapi/libslurmdb/perl/Makefile.PL.in
@@ -76,7 +76,7 @@ DESTDIR_BUG
@@ -68,7 +68,7 @@ DESTDIR_BUG
# AIX has problems with not always having the correct
# flags so we have to add some :)
my $os = lc(`uname`);
-my $other_ld_flags = "-Wl,-rpath,@top_builddir@/src/db_api/.libs -Wl,-rpath,@prefix@/lib";
-my $other_ld_flags = '-Wl,-rpath,@top_builddir@/src/db_api/.libs -Wl,-rpath,@libdir@';
+my $other_ld_flags = "-L@top_builddir@/src/api/.libs -lslurm";
$other_ld_flags = " -brtl -G -bnoentry -bgcbypass:1000 -bexpfull"
if $os =~ "aix";
@@ -87,7 +87,7 @@ WriteMakefile(
@@ -79,7 +79,7 @@ WriteMakefile(
($] >= 5.005 ? ## Add these new keywords supported since 5.005
(ABSTRACT_FROM => 'Slurmdb.pm', # retrieve abstract from module
AUTHOR => 'Don Lipari <lipari@llnl.gov>') : ()),
- LIBS => ["-L@top_builddir@/src/api/.libs -L@prefix@/lib -lslurm"], # e.g., '-lm'
+ LIBS => ["-L@prefix@/lib -lslurmdb"], # e.g., '-lm'
- LIBS => ['-L@top_builddir@/src/api/.libs -L@libdir@ -lslurm'], # e.g., '-lm'
+ LIBS => ["-L@prefix@/lib -lslurm"], # e.g., '-lm'
DEFINE => '', # e.g., '-DHAVE_SOMETHING'
INC => "-I. -I@top_srcdir@ -I@top_srcdir@/contribs/perlapi/common -I@top_builddir@",
# Un-comment this if you add C files to link with later:
--
2.42.1

View File

@ -1,9 +1,7 @@
From d51d3e1db8b2ed650a042352eff041ae77e467f9 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon Feb 20 21:29:27 2023 +0100
Subject: pam_slurm: Initialize arrays and pass sizes
Patch-mainline: Not yet
Git-commit: 5feca5c29d4e820dafd8d34c0343944b28890902
References: bsc#1007053
Date: Mon, 20 Feb 2023 21:29:27 +0100
Subject: [PATCH] pam_slurm: Initialize arrays and pass sizes
PAM is security critical:
- clear arrays
@ -15,11 +13,12 @@ Signed-off-by: Egbert Eich <eich@suse.de>
---
contribs/pam/pam_slurm.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c
index 20d21a9..363b6ae 100644
index a27e651548..eac9879c07 100644
--- a/contribs/pam/pam_slurm.c
+++ b/contribs/pam/pam_slurm.c
@@ -266,9 +266,9 @@ static int
@@ -279,9 +279,9 @@ static int
_gethostname_short (char *name, size_t len)
{
int error_code, name_len;
@ -31,12 +30,12 @@ index 20d21a9..363b6ae 100644
if (error_code)
return error_code;
@@ -296,13 +296,13 @@ static int
@@ -309,13 +309,13 @@ static int
_slurm_match_allocation(uid_t uid)
{
int authorized = 0, i;
- char hostname[MAXHOSTNAMELEN];
+ char hostname[MAXHOSTNAMELEN] = {0};
- char hostname[HOST_NAME_MAX];
+ char hostname[HOST_NAME_MAX] = {0};
char *nodename = NULL;
job_info_msg_t * msg;
@ -47,7 +46,7 @@ index 20d21a9..363b6ae 100644
_log_msg(LOG_ERR, "gethostname: %m");
return 0;
}
@@ -425,7 +425,7 @@ _send_denial_msg(pam_handle_t *pamh, struct _options *opts,
@@ -438,7 +438,7 @@ _send_denial_msg(pam_handle_t *pamh, struct _options *opts,
*/
extern void libpam_slurm_init (void)
{
@ -56,7 +55,7 @@ index 20d21a9..363b6ae 100644
if (slurm_h)
return;
@@ -433,10 +433,10 @@ extern void libpam_slurm_init (void)
@@ -446,10 +446,10 @@ extern void libpam_slurm_init (void)
/* First try to use the same libslurm version ("libslurm.so.24.0.0"),
* Second try to match the major version number ("libslurm.so.24"),
* Otherwise use "libslurm.so" */
@ -69,7 +68,7 @@ index 20d21a9..363b6ae 100644
_log_msg (LOG_ERR, "Unable to write libslurmname\n");
} else if ((slurm_h = dlopen(libslurmname, RTLD_NOW|RTLD_GLOBAL))) {
return;
@@ -445,8 +445,10 @@ extern void libpam_slurm_init (void)
@@ -458,8 +458,10 @@ extern void libpam_slurm_init (void)
libslurmname, dlerror ());
}
@ -82,3 +81,6 @@ index 20d21a9..363b6ae 100644
_log_msg (LOG_ERR, "Unable to write libslurmname\n");
} else if ((slurm_h = dlopen(libslurmname, RTLD_NOW|RTLD_GLOBAL))) {
return;
--
2.42.1

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:eba6db8990abf40402d8e30d8706a7ddd0560e0e307c567f0fb72f1c8a522078
size 7447239

3
slurm-23.11.1.tar.bz2 Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2f3f4ad4c92596c405d465f5a991bc50d85508b8b127fb2cc008a0980b7bdbd8
size 7536436

View File

@ -1,3 +1,331 @@
-------------------------------------------------------------------
Fri Jan 12 11:08:01 UTC 2024 - Christian Goll <cgoll@suse.com>
- Update to 23.11.1 with following major improvements and fixing
CVE-2023-49933, CVE-2023-49934, CVE-2023-49935, CVE-2023-49936
and CVE-2023-49937
* Substantially overhauled the SlurmDBD association management
code. For clusters updated to 23.11, account and user
additions or removals are significantly faster than in prior
releases.
* Overhauled `scontrol reconfigure` to prevent configuration
mistakes from disabling slurmctld and slurmd. Instead, an
error will be returned, and the running configuration will
persist. This does require updates to the systemd service
files to use the `--systemd` option to `slurmctld` and `slurmd`.
* Added a new internal `auth/cred` plugin - `auth/slurm`. This
builds off the prior `auth/jwt` model, and permits operation
of the `slurmdbd` and `slurmctld` without access to full
directory information with a suitable configuration.
* Added a new `--external-launcher` option to `srun`, which is
automatically set by common MPI launcher implementations and
ensures processes using those non-srun launchers have full
access to all resources allocated on each node.
* Reworked the dynamic/cloud modes of operation to allow for
"fanout" - where Slurm communication can be automatically
offloaded to compute nodes for increased cluster scalability.
* Overhauled and extended the Reservation subsystem to allow
for most of the same resource requirements as are placed on
the job. Notably, this permits reservations to now reserve
GRES directly.
- Details of changes:
* Fix `scontrol update job=... TimeLimit+=/-=` when used with a
raw JobId of job array element.
* Reject `TimeLimit` increment/decrement when called on job with
`TimeLimit=UNLIMITED`.
* Fix issue with requesting a job with `*licenses` as well as
`*tres-per-task=license`.
* `slurmctld` - Prevent segfault in `getopt_long()` with an
invalid long option.
* slurmrestd - Added `/meta/slurm/cluster` field to responses.
* Adjust systemd service files to start daemons after
`remote-fs.target`.
* Fix `task/cgroup` indexing tasks in cgroup plugins, which
caused `jobacct/gather` to match the gathered stats with the
wrong task id.
* `select/linear` - Fix regression in 23.11 in which jobs that
requested `*cpus-per-task` were rejected.
* `data_parser/v0.0.40` - Fix the parsing for
`/slurmdb/v0.0.40/jobs` exit_code query parameter.
* If a job requests more shards which would allocate more than
one sharing GRES (gpu) per node refuse it unless
`SelectTypeparameters` has `MULTIPLE_SHARING_GRES_PJ`.
* Trigger fatal exit when Slurm API function is called before
`slurm_init()` is called.
* `slurmd` - Fix issue with `scontrol reconfigure` when started
with `-c`.
* `slurmrestd` - Job submissions that result in the following
error codes will be considered as successfully submitted (with
a warning), instead of returning an HTTP 500 error back:
`ESLURM_NODES_BUSY`, `ESLURM_RESERVATION_BUSY`, `ESLURM_JOB_HELD`,
`ESLURM_NODE_NOT_AVAIL`, `ESLURM_QOS_THRES`,
`ESLURM_ACCOUNTING_POLICY`, `ESLURM_RESERVATION_NOT_USABLE`,
`ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE`,
`ESLURM_BURST_BUFFER_WAIT`, ESLURM_PARTITION_DOWN`,
`ESLURM_LICENSES_UNAVAILABLE`.
* Fix a `slurmctld` fatal error when upgrading to 23.11 and
changing from `select/cons_res` to `select/cons_tres` at the
same time.
* `slurmctld` - Reject arbitrary distribution jobs that have a
minimum node count that differs from the number of unique
nodes in the hostlist.
* Prevent `slurmdbd` errors when updating reservations with names
containing apostrophes.
* Prevent message extension attacks that could bypass the
message hash. CVE-2023-49933.
* Prevent SQL injection attacks in `slurmdbd`. CVE-2023-49934.
* Prevent message hash bypass in slurmd which can allow an
attacker to reuse root-level MUNGE tokens and escalate
permissions. CVE-2023-49935.
* Prevent NULL pointer dereference on size_valp overflow.
CVE-2023-49936.
* Prevent double-xfree() on error in `_unpack_node_reg_resp()`.
CVE-2023-49937.
* For jobs that request `*cpus-per-gpu`, ensure that the
`*cpus-per-gpu request` is honored on every node in the and
not just for the job as a whole.
* Fix listing available `data_parser` plugins for json and yaml
when giving no commands to `scontrol` or `sacctmgr`.
* `slurmctld` - Rework `scontrol reconfigure` to avoid race
conditions that can result in stray jobs.
* `slurmctld` - Shave ~1 second off average reconfigure time by
terminating internal processing threads faster.
* Skip running `slurmdbd -R` if the connected cluster is 23.11
or newer. This operation is no longer relevant for 23.11.
* Ensure `slurmscriptd` shuts down before `slurmctld` is stopped
or reconfigured.
* Improve error handling and error messages in `slurmctld` to
`slurmscriptd` communications. This includes avoiding
potential deadlock in `slurmctld` if slurmscript dies
unexpectedly.
* Do not hold batch jobs whose extra constraints cannot be
immediately satisfied, and set the state reason to
`Constraints` instead of `BadConstraints`.
* Fix verbose log message printing a hex number instead of a job
id.
* Upgrade rate limit parameters message from debug to info.
* For `SchedulerParameters=extra_constraints`, prevent `slurmctld`
segfault when starting a `slurmd` with `*extra` for a node
that did not previously set this.
This also ensures the extra constraints model works off the
current node state, not the prior state.
* Fix `*tres-per-task` assertion.
* Fix a few issues when creating reservations.
* Add `SchedulerParameters=time_min_as_soft_limit` option.
* Remove `SLURM_WORKING_CLUSTER` env from batch and srun
environments.
* `cli_filter/lua` - return nil for unset time options rather
than the string `2982616-04:14:00` (which is the internal
macro `NO_VAL` represented as time string).
* Remove 'none' plugins for all but auth and cred. scontrol show
config will report (null) now.
* Removed `select/cons_res`. Please update your configuration to
`select/cons_tres`.
* `mpi/pmix` - When aborted with status 0, avoid marking
job/step as failed.
* Fixed typo on `initialized` for the description of
`ESLURM_PLUGIN_NOT_LOADED`.
* `cgroup.conf` - Removed deprecated parameters `AllowedKmemSpace`,
`ConstrainKmemSpace`, `MaxKmemPercent`, and `MinKmemSpace`.
* `proctrack/cgroup` - Add `SignalChildrenProcesses=<yes|no>`
option to `cgroup.conf`. This allows signals for cancelling,
suspending, resuming, etc. to be sent to children processes in
a `step/job` rather than just the parent.
* Add `PreemptParameters=suspend_grace_time` parameter to
control amount of time between `SIGTSTP` and `SIGSTOP` signals
when suspending jobs.
* `job_submit/throttle` - improve reset of submitted job counts
per user in order to better honor
`SchedulerParameters=jobs_per_user_per_hour=#`.
* Load the user environment into a private pid namespace to
avoid user scripts leaving background processes on a node.
* `scontrol` show `assoc_mgr` will display Lineage instead of Lft
for associations.
* Add `SlurmctldParameters=no_quick_restart` to avoid a new
`slurmctld` taking over the old `slurmctld` by accident.
* Fix `--cpus-per-gpu` for step allocations, which was
previously ignored for job steps. `*cpus-per-gpu` implies
`--exact`.
* Fix mutual exclusivity of `--cpus-per-gpu` and
`--cpus-per-task`: fatal if both options are requested in the
commandline or both are requested in the environment. If one
option is requested in the command line, it will override the
other option in the environment.
* `slurmrestd` - `openapi/dbv0.0.37` and `openapi/v0.0.37`
plugins have been removed.
* `slurmrestd` - `openapi/dbv0.0.38` and `openapi/v0.0.38`
plugins have been tagged as deprecated.
* `slurmrestd` - added auto population of `info/version` field.
* `sdiag` - add `--yaml` and `--json` arg support to specify
data_parser plugin.
* `sacct` - add `--yaml` and `--json` arg support to specify
`data_parser` plugin.
* `scontrol` - add `--yaml` and `--json` arg support to specify
`data_parser` plugin.
* `sinfo` - add `--yaml` and `--json` arg support to specify
`data_parser` plugin.
* `squeue` - add `--yaml` and `--json` arg support to specify
`data_parser` plugin.
* Changed the default `SelectType` to `select/cons_tres` (from
`select/linear`).
* Allow `SlurmUser`/`root` to use reservations without specific
permissions.
* Fix sending step signals to nodes not allocated by the step.
* Remove `CgroupAutomount=` option from `cgroup.conf`.
* Add `TopologyRoute=RoutePart` to route communications based
on partition node lists.
* Added ability for configless to push Prolog and Epilog
scripts to `slurmd`s.
* Prolog and Epilog do not have to be fully qualified pathnames.
* Changed default value of `PriorityType` from `priority/basic`
to `priority/multifactor`.
* `torque/mpiexec` - Propogate exit code from `launched` process.
* `slurmrestd` - Add new rlimits fields for job submission.
* Define SPANK options environment variables when
`--export=[NIL|NONE]` is specified.
* `slurmrestd` - Numeric input fields provided with a null
formatted value will now convert to zero (0) where it can be
a valid value. This is expected to be only be notable with job
submission against v0.0.38 versioned endpoints with job
requests with fields provided with null values. These fields
were already rejected by v0.0.39+ endpoints, unless `+complex`
parser value is provided to v0.0.40+ endpoints.
* `slurmrestd` - Improve parsing of integers and floating point
numbers when handling incoming user provided numeric fields.
Fields that would have not rejected a number for a numeric
field followed by other non-numeric characters will now get
rejected. This is expected to be only be notable with job
submission against v0.0.38 versioned endpoints with malformed
job requests.
* Reject reservation update if it will result in previously
submitted jobs losing access to the reservation.
* `data_parser/v0.0.40` - output partition state when dumping
partitions.
* Allow for a shared suffix to be used with the hostlist format.
E.g., `node[0001-0010]-int`.
* Replace `SRUN_CPUS_PER_TASK` with `SLURM_CPUS_PER_TASK` and
get back the previous behavior before Slurm 22.05 since now we
have the new external launcher step.
* `job_container/tmpfs` - Add `BasePath=none` option to disable
plugin on node subsets when there is a global setting.
* Add QOS flag `Relative`. If set the QOS limits will be treated
as percentages of a cluster/partition instead of absolutes.
* Remove `FIRST_CORES` flag from reservations.
* Add cloud instance id and instance type to node records.
Can be viewed/updated with `scontrol`.
* `slurmd` - add `instance-id`, `instance-type`, and `extra`
options to allow them to be set on startup.
* Add cloud instance accounting to database that can be viewed
with `sacctmgr show instance`.
* `select/linear` - fix task launch failure that sometimes
occurred when requesting `*threads-per-core` or
`--hint=nomultithread`. This also fixes memory calculation
with one of these options and `*mem-per-cpu`:
Previously, memory = mem-per-cpu * all cpus including unusable
threads.
Now, memory = mem-per-cpu * only usuable threads. This
behavior matches the documentation and select/cons_tres.
* `gpu/nvml` - Reduce chances of `NVML_ERROR_INSUFFICIENT_SIZE`
error when getting gpu memory information.
* `slurmrestd` - Convert to generating `OperationIDs` based on
path for all v0.0.40 tagged paths.
* `slurmrestd` - Reduce memory used while dumping a job's stdio
paths.
* `slurmrestd` - Jobs queried from `data_parser/v0.0.40` from
`slurmdb` will have `step/id` field given as a string to match
CLI formatting instead of an object.
* `sacct` - Output in JSON or YAML output will will have the
`step/id` field given as a string instead of an object.
* `scontrol`/`squeue` - Step output in JSON or YAML output will
will have the `id` field given as a string instead of an
object.
* `slurmrestd` - For `GET /slurmdb/v0.0.40/jobs` mimick default
behavior for handling of job start and end times as `sacct`
when one or both fields are not provided as a query parameter.
* `openapi/slurmctld` - Add `GET /slurm/v0.0.40/shares` endpoint
to dump same output as `sshare`.
* `sshare` - add JSON/YAML support.
* `data_parser/v0.0.40` - Remove `required/memory` output in
json. It is replaced by `required/memory_per_cpu` and
`required/memory_per_node`.
* `slurmrestd` - Add numeric id to all association identifiers
to allow unique identification where association has been
deleted but is still referenced by accounting record.
* `slurmrestd` - Add accounting, id, and comment fields to
association dumps.
* Use `memory.current` in cgroup/v2 instead of manually
calculating RSS. This makes accounting consistent with
OOM Killer.
* `sreport` - cluster Utilization `PlannedDown` field now
includes the time that all nodes were in the `POWERED_DOWN`
state instead of just cloud nodes.
* `scontrol` update partition now allows `Nodes+=<node-list>` and
`Nodes-=<node-list>` to add/delete nodes from the existing
partition node list. `Nodes=+host1,-host2` is also allowed.
* `sacctmgr` - add `--yaml` and `--json` arg support to specify
`data_parser` plugin.
* `sacctmgr` can now modify QOS's RawUsage to zero or a positive
value.
* `sdiag` - Added statistics on why the main and backfill
schedulers have stopped evaluation on each scheduling cycle.
the number of `RPC limit exceeded...` messages that are logged.
* Rename `sbcast --fanout` to `--treewidth`.
* Remove `SLURM_NODE_ALIASES` env variable.
* Enable fanout for dynamic and unaddresable cloud nodes.
* Fix how steps are dealloced in an allocation if the last step
of an srun never completes due to a node failure.
* Remove redundant database indexes.
* Add database index to suspend table to speed up archive/purges.
* When requesting `--tres-per-task` alter incorrect request for
TRES, it should be `TRESType/TRESName` not `TRESType:TRESName`.
* Make it so reservations can reserve GRES.
* `sbcast` - use the specified `--fanout` value on all hops in
message forwarding; previously the specified fanout was only
used on the first hop, and additional hops used `TreeWidth` in
`slurm.conf`.
* `slurmrestd`- remove logger prefix from `-s/-a list` options
outputs.
* `switch/hpe_slingshot` - Add support for collectives.
* Nodes with suspended jobs can now be displayed as `MIXED`.
* Fix inconsistent handling of using cli and/or environment
options for `tres_per_task=cpu:#` and `cpus_per_gpu`.
* Requesting `--cpus-per-task` will now set
`SLURM_TRES_PER_TASK=cpu:#` in the environment.
* For some tres related environment variables such as
`SLURM_TRES_PER_TASK`, when `srun` requests a different value
for that option, set these environment variables to the value
requested by `srun`. Previously these environment variables
were unchanged from the job allocation. This bug only affected
the output environment variables, not the actual step resource
allocation.
* `RoutePlugin=route/topology` has been replaced with
`TopologyParam=RouteTree`.
* If `ThreadsPerCore` in `slurm.conf` is configured with less
than the number of hardware threads, fix a bug where the task
plugins used fewer cores instead of using fewer threads per core.
* Fix arbitrary distribution allowing it to be used with `salloc`
and `sbatch` and fix how cpus are allocated to nodes.
* Allow nodes to reboot while node is drained or in a
maintenance state.
* Allow `scontrol` reboot to use nodesets to filter nodes to reboot.
* Fix how the topology of typed gres gets updated.
* Changes to the Type option in gres.conf now can be applied with
`scontrol` reconfig.
* Allow for jobs that request a newly configured gres type to be
queued even when the needed `slurmd`s have not yet registered.
* Kill recovered jobs that require unconfigured gres types.
* If keepalives are configured, enable them on all persistent
connections.
* Configless - Also send Includes from configuration files not
parsed by the controller (i.e. from `plugstack.conf`).
* Add `gpu/nrt` plugin for nodes using Trainium/Inferentia
devices.
* `data_parser/v0.0.40` - Add `START_RECEIVED` to job flags in
dumped output.
* SPANK - Failures from most spank functions (not epilog or
exit) will now cause the step to be marked as failed and the
command (`srun`, `salloc`, `sbatch *wait`) to return 1.
-------------------------------------------------------------------
Wed Jan 3 10:45:48 UTC 2024 - Egbert Eich <eich@suse.com>

View File

@ -17,13 +17,11 @@
# Check file META in sources: update so_version to (API_CURRENT - API_AGE)
%define so_version 39
%define so_version 40
# Make sure to update `upgrades` as well!
%define ver 23.02.7
%define _ver _23_02
#%%define rc_v 0rc1
%define ver 23.11.1
%define _ver _23_11
%define dl_ver %{ver}
#%%define dl_ver 23-02-0%{?rc_v:-%rc_v}
# so-version is 0 and seems to be stable
%define pmi_so 0
%define nss_so 2
@ -58,7 +56,7 @@ ExclusiveArch: do_not_build
%if 0%{?sle_version} == 150300 || 0%{?sle_version} == 150400
%define base_ver 2011
%endif
%if 0%{?sle_version} == 150500
%if 0%{?sle_version} == 150500 || 0%{?sle_version} == 150600
%define base_ver 2302
%endif
@ -122,7 +120,7 @@ Conflicts: %{*} }
%endif
%if 0%{?suse_version} >= 1500
%define have_hdf5 1
%undefine have_hdf5
%define have_boolean_deps 1
%define have_lz4 1
%define have_firewalld 1
@ -162,7 +160,6 @@ License: SUSE-GPL-2.0-with-openssl-exception
Group: Productivity/Clustering/Computing
URL: https://www.schedmd.com
Source: https://download.schedmd.com/slurm/%{pname}-%{dl_ver}.tar.bz2
#Source: https://github.com/SchedMD/slurm/archive/refs/tags/%{pname}-%{dl_ver}.tar.gz
Source1: %upgrade_versions
Source2: slurm-rpmlintrc
Source10: slurmd.xml
@ -1088,6 +1085,7 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
%{_bindir}/sstat
%{_bindir}/strigger
%{?have_netloc:%{_bindir}/netloc_to_topology}
%{_sbindir}/sackd
%{_sbindir}/slurmctld
%{_sbindir}/slurmsmwd
%dir %{_libdir}/slurm/src
@ -1190,33 +1188,27 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
%dir %{_sysconfdir}/%{pname}/plugstack.conf.d
%dir %{_libdir}/slurm
%{_libdir}/slurm/libslurmfull.so
%{_libdir}/slurm/accounting_storage_none.so
%{_libdir}/slurm/accounting_storage_slurmdbd.so
%{_libdir}/slurm/acct_gather_energy_pm_counters.so
%{_libdir}/slurm/acct_gather_energy_gpu.so
%{_libdir}/slurm/acct_gather_energy_ibmaem.so
%{_libdir}/slurm/acct_gather_energy_none.so
%{_libdir}/slurm/acct_gather_energy_rapl.so
%{_libdir}/slurm/acct_gather_interconnect_sysfs.so
%{_libdir}/slurm/acct_gather_filesystem_lustre.so
%{_libdir}/slurm/acct_gather_filesystem_none.so
%{_libdir}/slurm/acct_gather_interconnect_none.so
%{_libdir}/slurm/acct_gather_profile_none.so
%{_libdir}/slurm/burst_buffer_lua.so
%{_libdir}/slurm/burst_buffer_datawarp.so
%{_libdir}/slurm/data_parser_v0_0_40.so
%{_libdir}/slurm/data_parser_v0_0_39.so
%{_libdir}/slurm/cgroup_v1.so
%if 0%{?suse_version} >= 1500
%{_libdir}/slurm/cgroup_v2.so
%endif
%{_libdir}/slurm/core_spec_none.so
%{_libdir}/slurm/cli_filter_none.so
%{_libdir}/slurm/cli_filter_lua.so
%{_libdir}/slurm/cli_filter_syslog.so
%{_libdir}/slurm/cli_filter_user_defaults.so
%{_libdir}/slurm/cred_none.so
%{_libdir}/slurm/ext_sensors_none.so
%{_libdir}/slurm/gpu_generic.so
%{_libdir}/slurm/gpu_nrt.so
%{_libdir}/slurm/gres_gpu.so
%{_libdir}/slurm/gres_mps.so
%{_libdir}/slurm/gres_nic.so
@ -1224,13 +1216,10 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
%{_libdir}/slurm/hash_k12.so
%{_libdir}/slurm/jobacct_gather_cgroup.so
%{_libdir}/slurm/jobacct_gather_linux.so
%{_libdir}/slurm/jobacct_gather_none.so
%{_libdir}/slurm/jobcomp_filetxt.so
%{_libdir}/slurm/jobcomp_none.so
%{_libdir}/slurm/jobcomp_lua.so
%{_libdir}/slurm/jobcomp_script.so
%{_libdir}/slurm/job_container_cncu.so
%{_libdir}/slurm/job_container_none.so
%{_libdir}/slurm/job_container_tmpfs.so
%{_libdir}/slurm/job_submit_all_partitions.so
%{_libdir}/slurm/job_submit_defaults.so
@ -1241,17 +1230,13 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
%{_libdir}/slurm/libslurm_pmi.so
%{_libdir}/slurm/mcs_account.so
%{_libdir}/slurm/mcs_group.so
%{_libdir}/slurm/mcs_none.so
%{_libdir}/slurm/mcs_user.so
%{_libdir}/slurm/mpi_none.so
%{_libdir}/slurm/mpi_pmi2.so
%if %{with pmix}
%{_libdir}/slurm/mpi_pmix.so
%{_libdir}/slurm/mpi_pmix_v3.so
%endif
%{_libdir}/slurm/node_features_helpers.so
%{_libdir}/slurm/power_none.so
%{_libdir}/slurm/preempt_none.so
%{_libdir}/slurm/preempt_partition_prio.so
%{_libdir}/slurm/preempt_qos.so
%{_libdir}/slurm/prep_script.so
@ -1260,24 +1245,19 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
%{_libdir}/slurm/proctrack_cgroup.so
%{_libdir}/slurm/proctrack_linuxproc.so
%{_libdir}/slurm/proctrack_pgid.so
%{_libdir}/slurm/route_default.so
%{_libdir}/slurm/route_topology.so
%{_libdir}/slurm/sched_backfill.so
%{_libdir}/slurm/sched_builtin.so
%{_libdir}/slurm/select_cons_res.so
%{_libdir}/slurm/select_cons_tres.so
%{_libdir}/slurm/select_linear.so
%{_libdir}/slurm/serializer_json.so
%{_libdir}/slurm/serializer_url_encoded.so
%{_libdir}/slurm/serializer_yaml.so
%{_libdir}/slurm/site_factor_none.so
%{_libdir}/slurm/switch_none.so
%{_libdir}/slurm/site_factor_example.so
%{_libdir}/slurm/task_affinity.so
%{_libdir}/slurm/task_cgroup.so
%{_libdir}/slurm/task_none.so
%{_libdir}/slurm/topology_3d_torus.so
%{_libdir}/slurm/topology_hypercube.so
%{_libdir}/slurm/topology_none.so
%{_libdir}/slurm/topology_block.so
%{_libdir}/slurm/topology_default.so
%{_libdir}/slurm/topology_tree.so
%if 0%{?suse_version} > 1310
%{_libdir}/slurm/acct_gather_interconnect_ofed.so
@ -1326,12 +1306,12 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \
%{_sbindir}/rcslurmrestd
%{_unitdir}/slurmrestd.service
%{_mandir}/man8/slurmrestd.*
%{_libdir}/slurm/openapi_slurmctld.so
%{_libdir}/slurm/openapi_slurmdbd.so
%{_libdir}/slurm/openapi_dbv0_0_39.so
%{_libdir}/slurm/openapi_v0_0_39.so
%{_libdir}/slurm/openapi_dbv0_0_38.so
%{_libdir}/slurm/openapi_v0_0_38.so
%{_libdir}/slurm/openapi_dbv0_0_37.so
%{_libdir}/slurm/openapi_v0_0_37.so
%{_libdir}/slurm/rest_auth_local.so
%endif