From 2028708d3a1e5d5cac7ea23f25d8a68e83de2974bab8c95eaf923963491d4408 Mon Sep 17 00:00:00 2001 From: Corot Sebastien Date: Sun, 16 Oct 2016 19:51:20 +0000 Subject: [PATCH] Accepting request 435622 from home:eeich:branches:network:cluster - version 15.08.7.1 * Remove the 1024-character limit on lines in batch scripts. task/affinity: Disable core-level task binding if more CPUs required than available cores. * Preemption/gang scheduling: If a job is suspended at slurmctld restart or reconfiguration time, then leave it suspended rather than resume+suspend. * Don't use lower weight nodes for job allocation when topology/tree used. * Don't allow user specified reservation names to disrupt the normal reservation sequeuece numbering scheme. * Avoid hard-link/copy of script/environment files for job arrays. Use the master job record file for all tasks of the job array. NOTE: Job arrays submitted to Slurm version 15.08.6 or later will fail if the slurmctld daemon is downgraded to an earlier version of Slurm. * In slurmctld log file, log duplicate job ID found by slurmd. Previously was being logged as prolog/epilog failure. * If a job is requeued while in the process of being launch, remove it's job ID from slurmd's record of active jobs in order to avoid generating a duplicate job ID error when launched for the second time (which would drain the node). * Cleanup messages when handling job script and environment variables in older directory structure formats. * Prevent triggering gang scheduling within a partition if configured with PreemptType=partition_prio and PreemptMode=suspend,gang. * Decrease parallelism in job cancel request to prevent denial of service when cancelling huge numbers of jobs. * If all ephemeral ports are in use, try using other port numbers. * Prevent "scontrol update job" from updating jobs that have already finished. * Show requested TRES in "squeue -O tres" when job is pending. * Backfill scheduler: Test association and QOS node limits before reserving resources for pending job. * Many bug fixes. - Use source services to download package. - Fix code for new API of hwloc-2.0. - package netloc_to_topology where avialable. - Package documentation. OBS-URL: https://build.opensuse.org/request/show/435622 OBS-URL: https://build.opensuse.org/package/show/network:cluster/slurm?expand=0&rev=10 --- _service | 5 +++ ...vice:download_files:slurm-15-08-7-1.tar.gz | 3 ++ slurm-15.08.2.tar.bz2 | 3 -- slurm.changes | 39 +++++++++++++++++ slurm.spec | 35 ++++++++++++--- slurmd-Fix-for-newer-API-versions.patch | 43 +++++++++++++++++++ 6 files changed, 119 insertions(+), 9 deletions(-) create mode 100644 _service create mode 100644 _service:download_files:slurm-15-08-7-1.tar.gz delete mode 100644 slurm-15.08.2.tar.bz2 create mode 100644 slurmd-Fix-for-newer-API-versions.patch diff --git a/_service b/_service new file mode 100644 index 0000000..06cd20b --- /dev/null +++ b/_service @@ -0,0 +1,5 @@ + + + yes + + diff --git a/_service:download_files:slurm-15-08-7-1.tar.gz b/_service:download_files:slurm-15-08-7-1.tar.gz new file mode 100644 index 0000000..8ac840f --- /dev/null +++ b/_service:download_files:slurm-15-08-7-1.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:710a6d60c31b1627e7d102cf1aba0fd6aca3d16688c54d7203e0d5486819b1e6 +size 9077914 diff --git a/slurm-15.08.2.tar.bz2 b/slurm-15.08.2.tar.bz2 deleted file mode 100644 index b59dd74..0000000 --- a/slurm-15.08.2.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b7d312157e537fd24e9c8e1056ae8bd33882237155282f26b3831a087ee8e4cb -size 6745582 diff --git a/slurm.changes b/slurm.changes index 31d5969..9e51f25 100644 --- a/slurm.changes +++ b/slurm.changes @@ -1,3 +1,42 @@ +------------------------------------------------------------------- +Sat Oct 15 18:11:39 UTC 2016 - eich@suse.com + +- version 15.08.7.1 + * Remove the 1024-character limit on lines in batch scripts. + task/affinity: Disable core-level task binding if more CPUs required than + available cores. + * Preemption/gang scheduling: If a job is suspended at slurmctld restart or + reconfiguration time, then leave it suspended rather than resume+suspend. + * Don't use lower weight nodes for job allocation when topology/tree used. + * Don't allow user specified reservation names to disrupt the normal + reservation sequeuece numbering scheme. + * Avoid hard-link/copy of script/environment files for job arrays. Use the + master job record file for all tasks of the job array. + NOTE: Job arrays submitted to Slurm version 15.08.6 or later will fail if + the slurmctld daemon is downgraded to an earlier version of Slurm. + * In slurmctld log file, log duplicate job ID found by slurmd. Previously was + being logged as prolog/epilog failure. + * If a job is requeued while in the process of being launch, remove it's + job ID from slurmd's record of active jobs in order to avoid generating a + duplicate job ID error when launched for the second time (which would + drain the node). + * Cleanup messages when handling job script and environment variables in + older directory structure formats. + * Prevent triggering gang scheduling within a partition if configured with + PreemptType=partition_prio and PreemptMode=suspend,gang. + * Decrease parallelism in job cancel request to prevent denial of service + when cancelling huge numbers of jobs. + * If all ephemeral ports are in use, try using other port numbers. + * Prevent "scontrol update job" from updating jobs that have already finished. + * Show requested TRES in "squeue -O tres" when job is pending. + * Backfill scheduler: Test association and QOS node limits before reserving + resources for pending job. + * Many bug fixes. +- Use source services to download package. +- Fix code for new API of hwloc-2.0. +- package netloc_to_topology where avialable. +- Package documentation. + ------------------------------------------------------------------- Sun Nov 1 13:45:52 UTC 2015 - scorot@free.fr diff --git a/slurm.spec b/slurm.spec index 8d72739..25bbff6 100644 --- a/slurm.spec +++ b/slurm.spec @@ -15,26 +15,36 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # +%define trans() ( echo %{1} | sed -e "s#-#\\.#g" ) +%define trunc() ( echo %{1} | sed -e "s#\\([^.]\\+\\.[^.]\\+\\.[^.]\\+\\).*#\\1#" ) +%define vers_f() %(%trans) +%define vers_t() %(%trunc) + %if 0%{?suse_version} >= 1220 %define with_systemd 1 %else %define with_systemd 0 %endif +%if 0%{suse_version} >= 1310 +%define have_netloc 1 +%endif %define libslurm libslurm29 +%define ver_exp 15-08-7-1 Name: slurm -Version: 15.08.2 +Version: %{vers_f %ver_exp} Release: 0 Summary: Simple Linux Utility for Resource Management License: GPL-3.0 Group: Productivity/Clustering/Computing Url: https://computing.llnl.gov/linux/slurm/ -Source: slurm-%{version}.tar.bz2 +Source: https://github.com/SchedMD/slurm/archive/%{name}-%{ver_exp}.tar.gz Source1: slurm.service Source2: slurmdbd.service Patch0: slurm-2.4.4-rpath.patch Patch1: slurm-2.4.4-init.patch +Patch2: slurmd-Fix-for-newer-API-versions.patch Requires: slurm-plugins = %{version} BuildRequires: fdupes BuildRequires: gcc-c++ @@ -67,6 +77,13 @@ containing up to 65,536 nodes. Components include machine status, partition management, job management, scheduling and accounting modules. +%package doc +Summary: Documentation for SLURM +Group: Documentation/Clustering/Computing + +%description doc +Documentation (html) for the SLURM cluster managment software + %package -n perl-slurm Summary: Perl API to SLURM Group: Development/Languages/Perl @@ -120,7 +137,7 @@ Obsoletes: slurm-auth-munge < %{version} Provides: slurm-auth-munge = %{version} %description munge -This package contains the SLURM authentication module for Chris Dunlap's Munge. +This package contains the SLURM authentication module for Chris Dunlap''s Munge. %package sview Summary: SLURM graphical interface @@ -212,9 +229,10 @@ or any user who has allocated resources on the node according to the SLURM %prep -%setup -q +%setup -q -n %{name}-%{name}-%{ver_exp} %patch0 -p1 %patch1 -p1 +%patch2 -p1 chmod 0644 doc/html/*.{gif,jpg} %build @@ -252,7 +270,6 @@ install -D -m755 contribs/sjstat $RPM_BUILD_ROOT%{_bindir}/sjstat rm -rf $RPM_BUILD_ROOT/%{_libdir}/slurm/*.{a,la} \ $RPM_BUILD_ROOT/%{_libdir}/*.la \ $RPM_BUILD_ROOT/%_lib/security/*.la \ - $RPM_BUILD_ROOT/%{_datadir}/doc/slurm-%{version}/ \ $RPM_BUILD_ROOT/%{_mandir}/man5/bluegene* rm -f $RPM_BUILD_ROOT%{_mandir}/man1/srun_cr* \ @@ -354,6 +371,7 @@ sed -i 's/\r$//' $RPM_BUILD_ROOT%{_bindir}/qalter %{_bindir}/sshare %{_bindir}/sstat %{_bindir}/strigger +%{?have_netloc: %{_bindir}/netloc_to_topology} %{_sbindir}/slurmctld %{_sbindir}/slurmd %{_sbindir}/slurmstepd @@ -400,12 +418,17 @@ sed -i 's/\r$//' $RPM_BUILD_ROOT%{_bindir}/qalter %dir %{_sysconfdir}/%{name}/cgroup %config(noreplace) %{_sysconfdir}/%{name}/cgroup/release_common %if %{with_systemd} -%config %{_unitdir}/slurm.service +%{_unitdir}/slurm.service %else %{_initrddir}/slurm %endif %{_sbindir}/rcslurm +%files doc +%defattr(-,root,root) +%dir %{_datadir}/doc/%{name}-%{vers_t %{version}} +%{_datadir}/doc/%{name}-%{vers_t %{version}}/* + %files -n %{libslurm} %defattr(-,root,root) %{_libdir}/*.so.* diff --git a/slurmd-Fix-for-newer-API-versions.patch b/slurmd-Fix-for-newer-API-versions.patch new file mode 100644 index 0000000..afb8b48 --- /dev/null +++ b/slurmd-Fix-for-newer-API-versions.patch @@ -0,0 +1,43 @@ +From: Egbert Eich +Date: Fri Oct 14 17:49:13 2016 +0200 +Subject: [PATCH] slurmd: Fix for newer API versions +Git-commit: 9f263fa4cd8e9e8090eda2f533294e10ae984190 +References: +Signed-off-by: Egbert Eich + +Replace hwloc_topology_ignore_type() by hwloc_topology_set_type_filter() +for API versions >= 0x00020000 + +Signed-off-by: Egbert Eich +--- + src/slurmd/common/xcpuinfo.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/src/slurmd/common/xcpuinfo.c b/src/slurmd/common/xcpuinfo.c +index ee213d3..ae9112f 100644 +--- a/src/slurmd/common/xcpuinfo.c ++++ b/src/slurmd/common/xcpuinfo.c +@@ -203,8 +203,23 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, + hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); + + /* ignores cache, misc */ ++#if HWLOC_API_VERSION < 0x00020000 + hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE); + hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC); ++#else ++ hwloc_topology_set_type_filter(topology,HWLOC_OBJ_L1CACHE, ++ HWLOC_TYPE_FILTER_KEEP_NONE); ++ hwloc_topology_set_type_filter(topology,HWLOC_OBJ_L2CACHE, ++ HWLOC_TYPE_FILTER_KEEP_NONE); ++ hwloc_topology_set_type_filter(topology,HWLOC_OBJ_L3CACHE, ++ HWLOC_TYPE_FILTER_KEEP_NONE); ++ hwloc_topology_set_type_filter(topology,HWLOC_OBJ_L4CACHE, ++ HWLOC_TYPE_FILTER_KEEP_NONE); ++ hwloc_topology_set_type_filter(topology,HWLOC_OBJ_L5CACHE, ++ HWLOC_TYPE_FILTER_KEEP_NONE); ++ hwloc_topology_set_type_filter(topology,HWLOC_OBJ_MISC ++ ,HWLOC_TYPE_FILTER_KEEP_NONE); ++#endif + + /* load topology */ + debug2("hwloc_topology_load");