diff --git a/Remove-rpath-from-build.patch b/Remove-rpath-from-build.patch index fd85c24..828a59d 100644 --- a/Remove-rpath-from-build.patch +++ b/Remove-rpath-from-build.patch @@ -1,56 +1,58 @@ +From 46bea350d06e9c8e1f93938ce2b2bd04a1c3bf3f Mon Sep 17 00:00:00 2001 From: Egbert Eich -Date: Wed Jan 8 20:56:25 2020 +0100 -Subject: Remove rpath from build -Patch-mainline: never -Git-commit: f79af97c35f38775a7a90cd8e4b98814729d9a9c -References: +Date: Wed, 8 Jan 2020 20:56:25 +0100 +Subject: [PATCH] Remove rpath from build Signed-off-by: Egbert Eich --- - slurm-19.05.5/contribs/perlapi/libslurm/perl/Makefile.PL.in | 4 ++-- - slurm-19.05.5/contribs/perlapi/libslurmdb/perl/Makefile.PL.in | 4 ++-- + contribs/perlapi/libslurm/perl/Makefile.PL.in | 4 ++-- + contribs/perlapi/libslurmdb/perl/Makefile.PL.in | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) + diff --git a/contribs/perlapi/libslurm/perl/Makefile.PL.in b/contribs/perlapi/libslurm/perl/Makefile.PL.in -index fcae437..797c943 100644 +index e8f8aff54d..b51f53f412 100644 --- a/contribs/perlapi/libslurm/perl/Makefile.PL.in +++ b/contribs/perlapi/libslurm/perl/Makefile.PL.in -@@ -77,7 +77,7 @@ DESTDIR_BUG +@@ -68,7 +68,7 @@ DESTDIR_BUG # AIX has problems with not always having the correct # flags so we have to add some :) my $os = lc(`uname`); --my $other_ld_flags = "-Wl,-rpath,@top_builddir@/src/api/.libs -Wl,-rpath,@prefix@/lib"; +-my $other_ld_flags = '-Wl,-rpath,@top_builddir@/src/api/.libs -Wl,-rpath,@libdir@'; +my $other_ld_flags = "-L@top_builddir@/src/api/.libs -lslurm"; $other_ld_flags = " -brtl -G -bnoentry -bgcbypass:1000 -bexpfull" if $os =~ "aix"; -@@ -88,7 +88,7 @@ WriteMakefile( +@@ -79,7 +79,7 @@ WriteMakefile( ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'lib/Slurm.pm', # retrieve abstract from module AUTHOR => 'Hongjia Cao ') : ()), -- LIBS => ["-L@top_builddir@/src/api/.libs -L@prefix@/lib -lslurm"], # e.g., '-lm' +- LIBS => ['-L@top_builddir@/src/api/.libs -L@libdir@ -lslurm'], # e.g., '-lm' + LIBS => ["-L@prefix@/lib -lslurm"], # e.g., '-lm' DEFINE => '', # e.g., '-DHAVE_SOMETHING' INC => "-I. -I@top_srcdir@ -I@top_srcdir@/contribs/perlapi/common -I@top_builddir@", # Un-comment this if you add C files to link with later: diff --git a/contribs/perlapi/libslurmdb/perl/Makefile.PL.in b/contribs/perlapi/libslurmdb/perl/Makefile.PL.in -index 2db028c..83eb4e2 100644 +index 4fb38b9725..148efa6e82 100644 --- a/contribs/perlapi/libslurmdb/perl/Makefile.PL.in +++ b/contribs/perlapi/libslurmdb/perl/Makefile.PL.in -@@ -76,7 +76,7 @@ DESTDIR_BUG +@@ -68,7 +68,7 @@ DESTDIR_BUG # AIX has problems with not always having the correct # flags so we have to add some :) my $os = lc(`uname`); --my $other_ld_flags = "-Wl,-rpath,@top_builddir@/src/db_api/.libs -Wl,-rpath,@prefix@/lib"; +-my $other_ld_flags = '-Wl,-rpath,@top_builddir@/src/db_api/.libs -Wl,-rpath,@libdir@'; +my $other_ld_flags = "-L@top_builddir@/src/api/.libs -lslurm"; $other_ld_flags = " -brtl -G -bnoentry -bgcbypass:1000 -bexpfull" if $os =~ "aix"; -@@ -87,7 +87,7 @@ WriteMakefile( +@@ -79,7 +79,7 @@ WriteMakefile( ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'Slurmdb.pm', # retrieve abstract from module AUTHOR => 'Don Lipari ') : ()), -- LIBS => ["-L@top_builddir@/src/api/.libs -L@prefix@/lib -lslurm"], # e.g., '-lm' -+ LIBS => ["-L@prefix@/lib -lslurmdb"], # e.g., '-lm' +- LIBS => ['-L@top_builddir@/src/api/.libs -L@libdir@ -lslurm'], # e.g., '-lm' ++ LIBS => ["-L@prefix@/lib -lslurm"], # e.g., '-lm' DEFINE => '', # e.g., '-DHAVE_SOMETHING' INC => "-I. -I@top_srcdir@ -I@top_srcdir@/contribs/perlapi/common -I@top_builddir@", # Un-comment this if you add C files to link with later: +-- +2.42.1 + diff --git a/pam_slurm-Initialize-arrays-and-pass-sizes.patch b/pam_slurm-Initialize-arrays-and-pass-sizes.patch index 6bf3183..ed3649c 100644 --- a/pam_slurm-Initialize-arrays-and-pass-sizes.patch +++ b/pam_slurm-Initialize-arrays-and-pass-sizes.patch @@ -1,9 +1,7 @@ +From d51d3e1db8b2ed650a042352eff041ae77e467f9 Mon Sep 17 00:00:00 2001 From: Egbert Eich -Date: Mon Feb 20 21:29:27 2023 +0100 -Subject: pam_slurm: Initialize arrays and pass sizes -Patch-mainline: Not yet -Git-commit: 5feca5c29d4e820dafd8d34c0343944b28890902 -References: bsc#1007053 +Date: Mon, 20 Feb 2023 21:29:27 +0100 +Subject: [PATCH] pam_slurm: Initialize arrays and pass sizes PAM is security critical: - clear arrays @@ -15,11 +13,12 @@ Signed-off-by: Egbert Eich --- contribs/pam/pam_slurm.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) + diff --git a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c -index 20d21a9..363b6ae 100644 +index a27e651548..eac9879c07 100644 --- a/contribs/pam/pam_slurm.c +++ b/contribs/pam/pam_slurm.c -@@ -266,9 +266,9 @@ static int +@@ -279,9 +279,9 @@ static int _gethostname_short (char *name, size_t len) { int error_code, name_len; @@ -31,12 +30,12 @@ index 20d21a9..363b6ae 100644 if (error_code) return error_code; -@@ -296,13 +296,13 @@ static int +@@ -309,13 +309,13 @@ static int _slurm_match_allocation(uid_t uid) { int authorized = 0, i; -- char hostname[MAXHOSTNAMELEN]; -+ char hostname[MAXHOSTNAMELEN] = {0}; +- char hostname[HOST_NAME_MAX]; ++ char hostname[HOST_NAME_MAX] = {0}; char *nodename = NULL; job_info_msg_t * msg; @@ -47,7 +46,7 @@ index 20d21a9..363b6ae 100644 _log_msg(LOG_ERR, "gethostname: %m"); return 0; } -@@ -425,7 +425,7 @@ _send_denial_msg(pam_handle_t *pamh, struct _options *opts, +@@ -438,7 +438,7 @@ _send_denial_msg(pam_handle_t *pamh, struct _options *opts, */ extern void libpam_slurm_init (void) { @@ -56,7 +55,7 @@ index 20d21a9..363b6ae 100644 if (slurm_h) return; -@@ -433,10 +433,10 @@ extern void libpam_slurm_init (void) +@@ -446,10 +446,10 @@ extern void libpam_slurm_init (void) /* First try to use the same libslurm version ("libslurm.so.24.0.0"), * Second try to match the major version number ("libslurm.so.24"), * Otherwise use "libslurm.so" */ @@ -69,7 +68,7 @@ index 20d21a9..363b6ae 100644 _log_msg (LOG_ERR, "Unable to write libslurmname\n"); } else if ((slurm_h = dlopen(libslurmname, RTLD_NOW|RTLD_GLOBAL))) { return; -@@ -445,8 +445,10 @@ extern void libpam_slurm_init (void) +@@ -458,8 +458,10 @@ extern void libpam_slurm_init (void) libslurmname, dlerror ()); } @@ -82,3 +81,6 @@ index 20d21a9..363b6ae 100644 _log_msg (LOG_ERR, "Unable to write libslurmname\n"); } else if ((slurm_h = dlopen(libslurmname, RTLD_NOW|RTLD_GLOBAL))) { return; +-- +2.42.1 + diff --git a/slurm-23.02.7.tar.bz2 b/slurm-23.02.7.tar.bz2 deleted file mode 100644 index 91f8f1d..0000000 --- a/slurm-23.02.7.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eba6db8990abf40402d8e30d8706a7ddd0560e0e307c567f0fb72f1c8a522078 -size 7447239 diff --git a/slurm-23.11.1.tar.bz2 b/slurm-23.11.1.tar.bz2 new file mode 100644 index 0000000..60fe4c1 --- /dev/null +++ b/slurm-23.11.1.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f3f4ad4c92596c405d465f5a991bc50d85508b8b127fb2cc008a0980b7bdbd8 +size 7536436 diff --git a/slurm.changes b/slurm.changes index 296f0ba..3d89146 100644 --- a/slurm.changes +++ b/slurm.changes @@ -1,3 +1,331 @@ +------------------------------------------------------------------- +Fri Jan 12 11:08:01 UTC 2024 - Christian Goll + +- Update to 23.11.1 with following major improvements and fixing + CVE-2023-49933, CVE-2023-49934, CVE-2023-49935, CVE-2023-49936 + and CVE-2023-49937 + * Substantially overhauled the SlurmDBD association management + code. For clusters updated to 23.11, account and user + additions or removals are significantly faster than in prior + releases. + * Overhauled `scontrol reconfigure` to prevent configuration + mistakes from disabling slurmctld and slurmd. Instead, an + error will be returned, and the running configuration will + persist. This does require updates to the systemd service + files to use the `--systemd` option to `slurmctld` and `slurmd`. + * Added a new internal `auth/cred` plugin - `auth/slurm`. This + builds off the prior `auth/jwt` model, and permits operation + of the `slurmdbd` and `slurmctld` without access to full + directory information with a suitable configuration. + * Added a new `--external-launcher` option to `srun`, which is + automatically set by common MPI launcher implementations and + ensures processes using those non-srun launchers have full + access to all resources allocated on each node. + * Reworked the dynamic/cloud modes of operation to allow for + "fanout" - where Slurm communication can be automatically + offloaded to compute nodes for increased cluster scalability. + * Overhauled and extended the Reservation subsystem to allow + for most of the same resource requirements as are placed on + the job. Notably, this permits reservations to now reserve + GRES directly. +- Details of changes: + * Fix `scontrol update job=... TimeLimit+=/-=` when used with a + raw JobId of job array element. + * Reject `TimeLimit` increment/decrement when called on job with + `TimeLimit=UNLIMITED`. + * Fix issue with requesting a job with `*licenses` as well as + `*tres-per-task=license`. + * `slurmctld` - Prevent segfault in `getopt_long()` with an + invalid long option. + * slurmrestd - Added `/meta/slurm/cluster` field to responses. + * Adjust systemd service files to start daemons after + `remote-fs.target`. + * Fix `task/cgroup` indexing tasks in cgroup plugins, which + caused `jobacct/gather` to match the gathered stats with the + wrong task id. + * `select/linear` - Fix regression in 23.11 in which jobs that + requested `*cpus-per-task` were rejected. + * `data_parser/v0.0.40` - Fix the parsing for + `/slurmdb/v0.0.40/jobs` exit_code query parameter. + * If a job requests more shards which would allocate more than + one sharing GRES (gpu) per node refuse it unless + `SelectTypeparameters` has `MULTIPLE_SHARING_GRES_PJ`. + * Trigger fatal exit when Slurm API function is called before + `slurm_init()` is called. + * `slurmd` - Fix issue with `scontrol reconfigure` when started + with `-c`. + * `slurmrestd` - Job submissions that result in the following + error codes will be considered as successfully submitted (with + a warning), instead of returning an HTTP 500 error back: + `ESLURM_NODES_BUSY`, `ESLURM_RESERVATION_BUSY`, `ESLURM_JOB_HELD`, + `ESLURM_NODE_NOT_AVAIL`, `ESLURM_QOS_THRES`, + `ESLURM_ACCOUNTING_POLICY`, `ESLURM_RESERVATION_NOT_USABLE`, + `ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE`, + `ESLURM_BURST_BUFFER_WAIT`, ESLURM_PARTITION_DOWN`, + `ESLURM_LICENSES_UNAVAILABLE`. + * Fix a `slurmctld` fatal error when upgrading to 23.11 and + changing from `select/cons_res` to `select/cons_tres` at the + same time. + * `slurmctld` - Reject arbitrary distribution jobs that have a + minimum node count that differs from the number of unique + nodes in the hostlist. + * Prevent `slurmdbd` errors when updating reservations with names + containing apostrophes. + * Prevent message extension attacks that could bypass the + message hash. CVE-2023-49933. + * Prevent SQL injection attacks in `slurmdbd`. CVE-2023-49934. + * Prevent message hash bypass in slurmd which can allow an + attacker to reuse root-level MUNGE tokens and escalate + permissions. CVE-2023-49935. + * Prevent NULL pointer dereference on size_valp overflow. + CVE-2023-49936. + * Prevent double-xfree() on error in `_unpack_node_reg_resp()`. + CVE-2023-49937. + * For jobs that request `*cpus-per-gpu`, ensure that the + `*cpus-per-gpu request` is honored on every node in the and + not just for the job as a whole. + * Fix listing available `data_parser` plugins for json and yaml + when giving no commands to `scontrol` or `sacctmgr`. + * `slurmctld` - Rework `scontrol reconfigure` to avoid race + conditions that can result in stray jobs. + * `slurmctld` - Shave ~1 second off average reconfigure time by + terminating internal processing threads faster. + * Skip running `slurmdbd -R` if the connected cluster is 23.11 + or newer. This operation is no longer relevant for 23.11. + * Ensure `slurmscriptd` shuts down before `slurmctld` is stopped + or reconfigured. + * Improve error handling and error messages in `slurmctld` to + `slurmscriptd` communications. This includes avoiding + potential deadlock in `slurmctld` if slurmscript dies + unexpectedly. + * Do not hold batch jobs whose extra constraints cannot be + immediately satisfied, and set the state reason to + `Constraints` instead of `BadConstraints`. + * Fix verbose log message printing a hex number instead of a job + id. + * Upgrade rate limit parameters message from debug to info. + * For `SchedulerParameters=extra_constraints`, prevent `slurmctld` + segfault when starting a `slurmd` with `*extra` for a node + that did not previously set this. + This also ensures the extra constraints model works off the + current node state, not the prior state. + * Fix `*tres-per-task` assertion. + * Fix a few issues when creating reservations. + * Add `SchedulerParameters=time_min_as_soft_limit` option. + * Remove `SLURM_WORKING_CLUSTER` env from batch and srun + environments. + * `cli_filter/lua` - return nil for unset time options rather + than the string `2982616-04:14:00` (which is the internal + macro `NO_VAL` represented as time string). + * Remove 'none' plugins for all but auth and cred. scontrol show + config will report (null) now. + * Removed `select/cons_res`. Please update your configuration to + `select/cons_tres`. + * `mpi/pmix` - When aborted with status 0, avoid marking + job/step as failed. + * Fixed typo on `initialized` for the description of + `ESLURM_PLUGIN_NOT_LOADED`. + * `cgroup.conf` - Removed deprecated parameters `AllowedKmemSpace`, + `ConstrainKmemSpace`, `MaxKmemPercent`, and `MinKmemSpace`. + * `proctrack/cgroup` - Add `SignalChildrenProcesses=` + option to `cgroup.conf`. This allows signals for cancelling, + suspending, resuming, etc. to be sent to children processes in + a `step/job` rather than just the parent. + * Add `PreemptParameters=suspend_grace_time` parameter to + control amount of time between `SIGTSTP` and `SIGSTOP` signals + when suspending jobs. + * `job_submit/throttle` - improve reset of submitted job counts + per user in order to better honor + `SchedulerParameters=jobs_per_user_per_hour=#`. + * Load the user environment into a private pid namespace to + avoid user scripts leaving background processes on a node. + * `scontrol` show `assoc_mgr` will display Lineage instead of Lft + for associations. + * Add `SlurmctldParameters=no_quick_restart` to avoid a new + `slurmctld` taking over the old `slurmctld` by accident. + * Fix `--cpus-per-gpu` for step allocations, which was + previously ignored for job steps. `*cpus-per-gpu` implies + `--exact`. + * Fix mutual exclusivity of `--cpus-per-gpu` and + `--cpus-per-task`: fatal if both options are requested in the + commandline or both are requested in the environment. If one + option is requested in the command line, it will override the + other option in the environment. + * `slurmrestd` - `openapi/dbv0.0.37` and `openapi/v0.0.37` + plugins have been removed. + * `slurmrestd` - `openapi/dbv0.0.38` and `openapi/v0.0.38` + plugins have been tagged as deprecated. + * `slurmrestd` - added auto population of `info/version` field. + * `sdiag` - add `--yaml` and `--json` arg support to specify + data_parser plugin. + * `sacct` - add `--yaml` and `--json` arg support to specify + `data_parser` plugin. + * `scontrol` - add `--yaml` and `--json` arg support to specify + `data_parser` plugin. + * `sinfo` - add `--yaml` and `--json` arg support to specify + `data_parser` plugin. + * `squeue` - add `--yaml` and `--json` arg support to specify + `data_parser` plugin. + * Changed the default `SelectType` to `select/cons_tres` (from + `select/linear`). + * Allow `SlurmUser`/`root` to use reservations without specific + permissions. + * Fix sending step signals to nodes not allocated by the step. + * Remove `CgroupAutomount=` option from `cgroup.conf`. + * Add `TopologyRoute=RoutePart` to route communications based + on partition node lists. + * Added ability for configless to push Prolog and Epilog + scripts to `slurmd`s. + * Prolog and Epilog do not have to be fully qualified pathnames. + * Changed default value of `PriorityType` from `priority/basic` + to `priority/multifactor`. + * `torque/mpiexec` - Propogate exit code from `launched` process. + * `slurmrestd` - Add new rlimits fields for job submission. + * Define SPANK options environment variables when + `--export=[NIL|NONE]` is specified. + * `slurmrestd` - Numeric input fields provided with a null + formatted value will now convert to zero (0) where it can be + a valid value. This is expected to be only be notable with job + submission against v0.0.38 versioned endpoints with job + requests with fields provided with null values. These fields + were already rejected by v0.0.39+ endpoints, unless `+complex` + parser value is provided to v0.0.40+ endpoints. + * `slurmrestd` - Improve parsing of integers and floating point + numbers when handling incoming user provided numeric fields. + Fields that would have not rejected a number for a numeric + field followed by other non-numeric characters will now get + rejected. This is expected to be only be notable with job + submission against v0.0.38 versioned endpoints with malformed + job requests. + * Reject reservation update if it will result in previously + submitted jobs losing access to the reservation. + * `data_parser/v0.0.40` - output partition state when dumping + partitions. + * Allow for a shared suffix to be used with the hostlist format. + E.g., `node[0001-0010]-int`. + * Replace `SRUN_CPUS_PER_TASK` with `SLURM_CPUS_PER_TASK` and + get back the previous behavior before Slurm 22.05 since now we + have the new external launcher step. + * `job_container/tmpfs` - Add `BasePath=none` option to disable + plugin on node subsets when there is a global setting. + * Add QOS flag `Relative`. If set the QOS limits will be treated + as percentages of a cluster/partition instead of absolutes. + * Remove `FIRST_CORES` flag from reservations. + * Add cloud instance id and instance type to node records. + Can be viewed/updated with `scontrol`. + * `slurmd` - add `instance-id`, `instance-type`, and `extra` + options to allow them to be set on startup. + * Add cloud instance accounting to database that can be viewed + with `sacctmgr show instance`. + * `select/linear` - fix task launch failure that sometimes + occurred when requesting `*threads-per-core` or + `--hint=nomultithread`. This also fixes memory calculation + with one of these options and `*mem-per-cpu`: + Previously, memory = mem-per-cpu * all cpus including unusable + threads. + Now, memory = mem-per-cpu * only usuable threads. This + behavior matches the documentation and select/cons_tres. + * `gpu/nvml` - Reduce chances of `NVML_ERROR_INSUFFICIENT_SIZE` + error when getting gpu memory information. + * `slurmrestd` - Convert to generating `OperationIDs` based on + path for all v0.0.40 tagged paths. + * `slurmrestd` - Reduce memory used while dumping a job's stdio + paths. + * `slurmrestd` - Jobs queried from `data_parser/v0.0.40` from + `slurmdb` will have `step/id` field given as a string to match + CLI formatting instead of an object. + * `sacct` - Output in JSON or YAML output will will have the + `step/id` field given as a string instead of an object. + * `scontrol`/`squeue` - Step output in JSON or YAML output will + will have the `id` field given as a string instead of an + object. + * `slurmrestd` - For `GET /slurmdb/v0.0.40/jobs` mimick default + behavior for handling of job start and end times as `sacct` + when one or both fields are not provided as a query parameter. + * `openapi/slurmctld` - Add `GET /slurm/v0.0.40/shares` endpoint + to dump same output as `sshare`. + * `sshare` - add JSON/YAML support. + * `data_parser/v0.0.40` - Remove `required/memory` output in + json. It is replaced by `required/memory_per_cpu` and + `required/memory_per_node`. + * `slurmrestd` - Add numeric id to all association identifiers + to allow unique identification where association has been + deleted but is still referenced by accounting record. + * `slurmrestd` - Add accounting, id, and comment fields to + association dumps. + * Use `memory.current` in cgroup/v2 instead of manually + calculating RSS. This makes accounting consistent with + OOM Killer. + * `sreport` - cluster Utilization `PlannedDown` field now + includes the time that all nodes were in the `POWERED_DOWN` + state instead of just cloud nodes. + * `scontrol` update partition now allows `Nodes+=` and + `Nodes-=` to add/delete nodes from the existing + partition node list. `Nodes=+host1,-host2` is also allowed. + * `sacctmgr` - add `--yaml` and `--json` arg support to specify + `data_parser` plugin. + * `sacctmgr` can now modify QOS's RawUsage to zero or a positive + value. + * `sdiag` - Added statistics on why the main and backfill + schedulers have stopped evaluation on each scheduling cycle. + the number of `RPC limit exceeded...` messages that are logged. + * Rename `sbcast --fanout` to `--treewidth`. + * Remove `SLURM_NODE_ALIASES` env variable. + * Enable fanout for dynamic and unaddresable cloud nodes. + * Fix how steps are dealloced in an allocation if the last step + of an srun never completes due to a node failure. + * Remove redundant database indexes. + * Add database index to suspend table to speed up archive/purges. + * When requesting `--tres-per-task` alter incorrect request for + TRES, it should be `TRESType/TRESName` not `TRESType:TRESName`. + * Make it so reservations can reserve GRES. + * `sbcast` - use the specified `--fanout` value on all hops in + message forwarding; previously the specified fanout was only + used on the first hop, and additional hops used `TreeWidth` in + `slurm.conf`. + * `slurmrestd`- remove logger prefix from `-s/-a list` options + outputs. + * `switch/hpe_slingshot` - Add support for collectives. + * Nodes with suspended jobs can now be displayed as `MIXED`. + * Fix inconsistent handling of using cli and/or environment + options for `tres_per_task=cpu:#` and `cpus_per_gpu`. + * Requesting `--cpus-per-task` will now set + `SLURM_TRES_PER_TASK=cpu:#` in the environment. + * For some tres related environment variables such as + `SLURM_TRES_PER_TASK`, when `srun` requests a different value + for that option, set these environment variables to the value + requested by `srun`. Previously these environment variables + were unchanged from the job allocation. This bug only affected + the output environment variables, not the actual step resource + allocation. + * `RoutePlugin=route/topology` has been replaced with + `TopologyParam=RouteTree`. + * If `ThreadsPerCore` in `slurm.conf` is configured with less + than the number of hardware threads, fix a bug where the task + plugins used fewer cores instead of using fewer threads per core. + * Fix arbitrary distribution allowing it to be used with `salloc` + and `sbatch` and fix how cpus are allocated to nodes. + * Allow nodes to reboot while node is drained or in a + maintenance state. + * Allow `scontrol` reboot to use nodesets to filter nodes to reboot. + * Fix how the topology of typed gres gets updated. + * Changes to the Type option in gres.conf now can be applied with + `scontrol` reconfig. + * Allow for jobs that request a newly configured gres type to be + queued even when the needed `slurmd`s have not yet registered. + * Kill recovered jobs that require unconfigured gres types. + * If keepalives are configured, enable them on all persistent + connections. + * Configless - Also send Includes from configuration files not + parsed by the controller (i.e. from `plugstack.conf`). + * Add `gpu/nrt` plugin for nodes using Trainium/Inferentia + devices. + * `data_parser/v0.0.40` - Add `START_RECEIVED` to job flags in + dumped output. + * SPANK - Failures from most spank functions (not epilog or + exit) will now cause the step to be marked as failed and the + command (`srun`, `salloc`, `sbatch *wait`) to return 1. + ------------------------------------------------------------------- Wed Jan 3 10:45:48 UTC 2024 - Egbert Eich diff --git a/slurm.spec b/slurm.spec index 9ca78f0..0304d54 100644 --- a/slurm.spec +++ b/slurm.spec @@ -17,13 +17,11 @@ # Check file META in sources: update so_version to (API_CURRENT - API_AGE) -%define so_version 39 +%define so_version 40 # Make sure to update `upgrades` as well! -%define ver 23.02.7 -%define _ver _23_02 -#%%define rc_v 0rc1 +%define ver 23.11.1 +%define _ver _23_11 %define dl_ver %{ver} -#%%define dl_ver 23-02-0%{?rc_v:-%rc_v} # so-version is 0 and seems to be stable %define pmi_so 0 %define nss_so 2 @@ -58,7 +56,7 @@ ExclusiveArch: do_not_build %if 0%{?sle_version} == 150300 || 0%{?sle_version} == 150400 %define base_ver 2011 %endif -%if 0%{?sle_version} == 150500 +%if 0%{?sle_version} == 150500 || 0%{?sle_version} == 150600 %define base_ver 2302 %endif @@ -122,7 +120,7 @@ Conflicts: %{*} } %endif %if 0%{?suse_version} >= 1500 -%define have_hdf5 1 +%undefine have_hdf5 %define have_boolean_deps 1 %define have_lz4 1 %define have_firewalld 1 @@ -162,7 +160,6 @@ License: SUSE-GPL-2.0-with-openssl-exception Group: Productivity/Clustering/Computing URL: https://www.schedmd.com Source: https://download.schedmd.com/slurm/%{pname}-%{dl_ver}.tar.bz2 -#Source: https://github.com/SchedMD/slurm/archive/refs/tags/%{pname}-%{dl_ver}.tar.gz Source1: %upgrade_versions Source2: slurm-rpmlintrc Source10: slurmd.xml @@ -1088,6 +1085,7 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \ %{_bindir}/sstat %{_bindir}/strigger %{?have_netloc:%{_bindir}/netloc_to_topology} +%{_sbindir}/sackd %{_sbindir}/slurmctld %{_sbindir}/slurmsmwd %dir %{_libdir}/slurm/src @@ -1190,33 +1188,27 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \ %dir %{_sysconfdir}/%{pname}/plugstack.conf.d %dir %{_libdir}/slurm %{_libdir}/slurm/libslurmfull.so -%{_libdir}/slurm/accounting_storage_none.so %{_libdir}/slurm/accounting_storage_slurmdbd.so %{_libdir}/slurm/acct_gather_energy_pm_counters.so %{_libdir}/slurm/acct_gather_energy_gpu.so %{_libdir}/slurm/acct_gather_energy_ibmaem.so -%{_libdir}/slurm/acct_gather_energy_none.so %{_libdir}/slurm/acct_gather_energy_rapl.so %{_libdir}/slurm/acct_gather_interconnect_sysfs.so %{_libdir}/slurm/acct_gather_filesystem_lustre.so -%{_libdir}/slurm/acct_gather_filesystem_none.so -%{_libdir}/slurm/acct_gather_interconnect_none.so -%{_libdir}/slurm/acct_gather_profile_none.so %{_libdir}/slurm/burst_buffer_lua.so %{_libdir}/slurm/burst_buffer_datawarp.so +%{_libdir}/slurm/data_parser_v0_0_40.so %{_libdir}/slurm/data_parser_v0_0_39.so %{_libdir}/slurm/cgroup_v1.so %if 0%{?suse_version} >= 1500 %{_libdir}/slurm/cgroup_v2.so %endif -%{_libdir}/slurm/core_spec_none.so -%{_libdir}/slurm/cli_filter_none.so %{_libdir}/slurm/cli_filter_lua.so %{_libdir}/slurm/cli_filter_syslog.so %{_libdir}/slurm/cli_filter_user_defaults.so %{_libdir}/slurm/cred_none.so -%{_libdir}/slurm/ext_sensors_none.so %{_libdir}/slurm/gpu_generic.so +%{_libdir}/slurm/gpu_nrt.so %{_libdir}/slurm/gres_gpu.so %{_libdir}/slurm/gres_mps.so %{_libdir}/slurm/gres_nic.so @@ -1224,13 +1216,10 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \ %{_libdir}/slurm/hash_k12.so %{_libdir}/slurm/jobacct_gather_cgroup.so %{_libdir}/slurm/jobacct_gather_linux.so -%{_libdir}/slurm/jobacct_gather_none.so %{_libdir}/slurm/jobcomp_filetxt.so -%{_libdir}/slurm/jobcomp_none.so %{_libdir}/slurm/jobcomp_lua.so %{_libdir}/slurm/jobcomp_script.so %{_libdir}/slurm/job_container_cncu.so -%{_libdir}/slurm/job_container_none.so %{_libdir}/slurm/job_container_tmpfs.so %{_libdir}/slurm/job_submit_all_partitions.so %{_libdir}/slurm/job_submit_defaults.so @@ -1241,17 +1230,13 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \ %{_libdir}/slurm/libslurm_pmi.so %{_libdir}/slurm/mcs_account.so %{_libdir}/slurm/mcs_group.so -%{_libdir}/slurm/mcs_none.so %{_libdir}/slurm/mcs_user.so -%{_libdir}/slurm/mpi_none.so %{_libdir}/slurm/mpi_pmi2.so %if %{with pmix} %{_libdir}/slurm/mpi_pmix.so %{_libdir}/slurm/mpi_pmix_v3.so %endif %{_libdir}/slurm/node_features_helpers.so -%{_libdir}/slurm/power_none.so -%{_libdir}/slurm/preempt_none.so %{_libdir}/slurm/preempt_partition_prio.so %{_libdir}/slurm/preempt_qos.so %{_libdir}/slurm/prep_script.so @@ -1260,24 +1245,19 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \ %{_libdir}/slurm/proctrack_cgroup.so %{_libdir}/slurm/proctrack_linuxproc.so %{_libdir}/slurm/proctrack_pgid.so -%{_libdir}/slurm/route_default.so -%{_libdir}/slurm/route_topology.so %{_libdir}/slurm/sched_backfill.so %{_libdir}/slurm/sched_builtin.so -%{_libdir}/slurm/select_cons_res.so %{_libdir}/slurm/select_cons_tres.so %{_libdir}/slurm/select_linear.so %{_libdir}/slurm/serializer_json.so %{_libdir}/slurm/serializer_url_encoded.so %{_libdir}/slurm/serializer_yaml.so -%{_libdir}/slurm/site_factor_none.so -%{_libdir}/slurm/switch_none.so +%{_libdir}/slurm/site_factor_example.so %{_libdir}/slurm/task_affinity.so %{_libdir}/slurm/task_cgroup.so -%{_libdir}/slurm/task_none.so %{_libdir}/slurm/topology_3d_torus.so -%{_libdir}/slurm/topology_hypercube.so -%{_libdir}/slurm/topology_none.so +%{_libdir}/slurm/topology_block.so +%{_libdir}/slurm/topology_default.so %{_libdir}/slurm/topology_tree.so %if 0%{?suse_version} > 1310 %{_libdir}/slurm/acct_gather_interconnect_ofed.so @@ -1326,12 +1306,12 @@ rm -rf /srv/slurm-testsuite/src /srv/slurm-testsuite/testsuite \ %{_sbindir}/rcslurmrestd %{_unitdir}/slurmrestd.service %{_mandir}/man8/slurmrestd.* +%{_libdir}/slurm/openapi_slurmctld.so +%{_libdir}/slurm/openapi_slurmdbd.so %{_libdir}/slurm/openapi_dbv0_0_39.so %{_libdir}/slurm/openapi_v0_0_39.so %{_libdir}/slurm/openapi_dbv0_0_38.so %{_libdir}/slurm/openapi_v0_0_38.so -%{_libdir}/slurm/openapi_dbv0_0_37.so -%{_libdir}/slurm/openapi_v0_0_37.so %{_libdir}/slurm/rest_auth_local.so %endif