From b3c1bfbb9b40b0cb912bfa82db21a270d3f83764c7928f68689dbc53f9d8f83d Mon Sep 17 00:00:00 2001 From: Nicolas Morey-Chaisemartin Date: Thu, 29 Apr 2021 05:57:54 +0000 Subject: [PATCH 1/3] Accepting request 889006 from home:NMoreyChaisemartin:branches:science:HPC - Update to version 4.1.1 - Fix a number of datatype issues, including an issue with improper handling of partial datatypes that could lead to an unexpected application failure. - Change UCX PML to not warn about MPI_Request leaks during MPI_FINALIZE by default. The old behavior can be restored with the mca_pml_ucx_request_leak_check MCA parameter. - Reverted temporary solution that worked around launch issues in SLURM v20.11.{0,1,2}. SchedMD encourages users to avoid these versions and to upgrade to v20.11.3 or newer. - Updated PMIx to v3.2.2. - Disabled gcc built-in atomics by default on aarch64 platforms. - Disabled UCX PML when UCX v1.8.0 is detected. UCX version 1.8.0 has a bug that may cause data corruption when its TCP transport is used in conjunction with the shared memory transport. UCX versions prior to v1.8.0 are not affected by this issue. Thanks to @ksiazekm for reporting the issue. - Fixed detection of available UCX transports/devices to better inform PML prioritization. - Fixed SLURM support to mark ORTE daemons as non-MPI tasks. - Improved AVX detection to more accurately detect supported platforms. Also improved the generated AVX code, and switched to using word-based MCA params for the op/avx component (vs. numeric big flags). - Improved OFI compatibility support and fixed memory leaks in error handling paths. - Improved HAN collectives with support for Barrier and Scatter. Thanks to @EmmanuelBRELLE for these changes and the relevant bug fixes. - Fixed MPI debugger support (i.e., the MPIR_Breakpoint() symbol). Thanks to @louisespellacy-arm for reporting the issue. - Fixed ORTE bug that prevented debuggers from reading MPIR_Proctable. OBS-URL: https://build.opensuse.org/request/show/889006 OBS-URL: https://build.opensuse.org/package/show/science:HPC/openmpi4?expand=0&rev=15 --- _service | 2 +- openmpi-4.1.0.0.9ac5471035.tar.bz2 | 3 -- openmpi-4.1.1.0.a8dd8708d8b6.tar.bz2 | 3 ++ openmpi4.changes | 65 ++++++++++++++++++++++++++++ openmpi4.spec | 6 +-- 5 files changed, 72 insertions(+), 7 deletions(-) delete mode 100644 openmpi-4.1.0.0.9ac5471035.tar.bz2 create mode 100644 openmpi-4.1.1.0.a8dd8708d8b6.tar.bz2 diff --git a/_service b/_service index e34b008..68a54bb 100644 --- a/_service +++ b/_service @@ -8,7 +8,7 @@ @PARENT_TAG@.@TAG_OFFSET@.%h v(.*) \1 - 9ac5471035b9066462506bc4d92be7a340a8058e + a8dd8708d8b6d1346328d7f4612d63b307c25653 openmpi*.tar diff --git a/openmpi-4.1.0.0.9ac5471035.tar.bz2 b/openmpi-4.1.0.0.9ac5471035.tar.bz2 deleted file mode 100644 index 3051b22..0000000 --- a/openmpi-4.1.0.0.9ac5471035.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c6b9013b8fc5788371477d68b8a3273781c36b57600b4957af705aa70dc0688e -size 6418922 diff --git a/openmpi-4.1.1.0.a8dd8708d8b6.tar.bz2 b/openmpi-4.1.1.0.a8dd8708d8b6.tar.bz2 new file mode 100644 index 0000000..32b28fb --- /dev/null +++ b/openmpi-4.1.1.0.a8dd8708d8b6.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52fb519d3b71c62223f33b1434d245b724b4c3b59c9824b983df26cebaf4413b +size 6461180 diff --git a/openmpi4.changes b/openmpi4.changes index b591b69..928be07 100644 --- a/openmpi4.changes +++ b/openmpi4.changes @@ -1,3 +1,68 @@ +------------------------------------------------------------------- +Wed Apr 28 09:24:33 UTC 2021 - Nicolas Morey-Chaisemartin + +- Update to version 4.1.1 + - Fix a number of datatype issues, including an issue with + improper handling of partial datatypes that could lead to + an unexpected application failure. + - Change UCX PML to not warn about MPI_Request leaks during + MPI_FINALIZE by default. The old behavior can be restored with + the mca_pml_ucx_request_leak_check MCA parameter. + - Reverted temporary solution that worked around launch issues in + SLURM v20.11.{0,1,2}. SchedMD encourages users to avoid these + versions and to upgrade to v20.11.3 or newer. + - Updated PMIx to v3.2.2. + - Disabled gcc built-in atomics by default on aarch64 platforms. + - Disabled UCX PML when UCX v1.8.0 is detected. UCX version 1.8.0 has a bug that + may cause data corruption when its TCP transport is used in conjunction with + the shared memory transport. UCX versions prior to v1.8.0 are not affected by + this issue. Thanks to @ksiazekm for reporting the issue. + - Fixed detection of available UCX transports/devices to better inform PML + prioritization. + - Fixed SLURM support to mark ORTE daemons as non-MPI tasks. + - Improved AVX detection to more accurately detect supported + platforms. Also improved the generated AVX code, and switched to + using word-based MCA params for the op/avx component (vs. numeric + big flags). + - Improved OFI compatibility support and fixed memory leaks in error + handling paths. + - Improved HAN collectives with support for Barrier and Scatter. Thanks + to @EmmanuelBRELLE for these changes and the relevant bug fixes. + - Fixed MPI debugger support (i.e., the MPIR_Breakpoint() symbol). + Thanks to @louisespellacy-arm for reporting the issue. + - Fixed ORTE bug that prevented debuggers from reading MPIR_Proctable. + - Removed PML uniformity check from the UCX PML to address performance + regression. + - Fixed MPI_Init_thread(3) statement about C++ binding and update + references about MPI_THREAD_MULTIPLE. Thanks to Andreas Lösel for + bringing the outdated docs to our attention. + - Added fence_nb to Flux PMIx support to address segmentation faults. + - Ensured progress of AIO requests in the POSIX FBTL component to + prevent exceeding maximum number of pending requests on MacOS. + - Used OPAL's mutli-thread support in the orted to leverage atomic + operations for object refcounting. + - Fixed segv when launching with static TCP ports. + - Fixed --debug-daemons mpirun CLI option. + - Fixed bug where mpirun did not honor --host in a managed job + allocation. + - Made a managed allocation filter a hostfile/hostlist. + - Fixed bug to marked a generalized request as pending once initiated. + - Fixed external PMIx v4.x check. + - Fixed OSHMEM build with `--enable-mem-debug`. + - Fixed a performance regression observed with older versions of GCC when + __ATOMIC_SEQ_CST is used. Thanks to @BiplabRaut for reporting the issue. + - Fixed buffer allocation bug in the binomial tree scatter algorithm when + non-contiguous datatypes are used. Thanks to @sadcat11 for reporting the issue. + - Fixed bugs related to the accumulate and atomics functionality in the + osc/rdma component. + - Fixed race condition in MPI group operations observed with + MPI_THREAD_MULTIPLE threading level. + - Fixed a deadlock in the TCP BTL's connection matching logic. + - Fixed pml/ob1 compilation error when CUDA support is enabled. + - Fixed a build issue with Lustre caused by unnecessary header includes. + - Fixed a build issue with IMB LSF workload manager. + - Fixed linker error with UCX SPML. + ------------------------------------------------------------------- Wed Mar 24 08:01:35 UTC 2021 - Egbert Eich diff --git a/openmpi4.spec b/openmpi4.spec index 3f2517b..0f424d1 100644 --- a/openmpi4.spec +++ b/openmpi4.spec @@ -42,8 +42,8 @@ # % define build_static_devel 1 %define pname openmpi -%define vers 4.1.0 -%define _vers 4_1_0 +%define vers 4.1.1 +%define _vers 4_1_1 %define m_f_ver 4 %bcond_with ringdisabled @@ -115,7 +115,7 @@ ExclusiveArch: do_not_build %global hpc_openmpi_pack_version %{hpc_openmpi_dep_version} %endif -%define git_ver .0.9ac5471035 +%define git_ver .0.a8dd8708d8b6 ############################################################################# # From dbbdfa574280a5d777ebde5afd3f6f40c4891c6174ccd7c102952755fa0b2bbb Mon Sep 17 00:00:00 2001 From: Nicolas Morey-Chaisemartin Date: Thu, 29 Apr 2021 12:44:14 +0000 Subject: [PATCH 2/3] Accepting request 889294 from home:NMoreyChaisemartin:branches:science:HPC - openmpi4 is now the default openmpi for releases > 15.3 OBS-URL: https://build.opensuse.org/request/show/889294 OBS-URL: https://build.opensuse.org/package/show/science:HPC/openmpi4?expand=0&rev=16 --- openmpi4.changes | 5 +++++ openmpi4.spec | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/openmpi4.changes b/openmpi4.changes index 928be07..4c55645 100644 --- a/openmpi4.changes +++ b/openmpi4.changes @@ -1,3 +1,8 @@ +------------------------------------------------------------------- +Wed Apr 28 11:08:54 UTC 2021 - Nicolas Morey-Chaisemartin + +- openmpi4 is now the default openmpi for releases > 15.3 + ------------------------------------------------------------------- Wed Apr 28 09:24:33 UTC 2021 - Nicolas Morey-Chaisemartin diff --git a/openmpi4.spec b/openmpi4.spec index 0f424d1..724bb48 100644 --- a/openmpi4.spec +++ b/openmpi4.spec @@ -104,7 +104,11 @@ ExclusiveArch: do_not_build %endif # Detect whether we are the default openMPI implemantation or not +%if "%{flavor}" == "standard" && (%{suse_version} > 1500 || 0%{?sle_version} > 150300) +%define default_openmpi 1 +%else %define default_openmpi 0 +%endif %if %{with hpc} %{!?compiler_family:%global compiler_family gnu} From 1986d94aaf09df385d524b567ecc3221c056ee726d030dac2c346e81a7cdc477 Mon Sep 17 00:00:00 2001 From: Nicolas Morey-Chaisemartin Date: Fri, 30 Apr 2021 15:10:32 +0000 Subject: [PATCH 3/3] - Add orted-mpir-add-version-to-shared-library.patch to fix unversionned library - Change RPM macros install path to %{_rpmmacrodir} OBS-URL: https://build.opensuse.org/package/show/science:HPC/openmpi4?expand=0&rev=17 --- openmpi4.changes | 2 ++ openmpi4.spec | 16 ++++++++------ ...d-mpir-add-version-to-shared-library.patch | 22 +++++++++++++++++++ 3 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 orted-mpir-add-version-to-shared-library.patch diff --git a/openmpi4.changes b/openmpi4.changes index 4c55645..1e9ddbb 100644 --- a/openmpi4.changes +++ b/openmpi4.changes @@ -2,6 +2,8 @@ Wed Apr 28 11:08:54 UTC 2021 - Nicolas Morey-Chaisemartin - openmpi4 is now the default openmpi for releases > 15.3 +- Add orted-mpir-add-version-to-shared-library.patch to fix unversionned library +- Change RPM macros install path to %{_rpmmacrodir} ------------------------------------------------------------------- Wed Apr 28 09:24:33 UTC 2021 - Nicolas Morey-Chaisemartin diff --git a/openmpi4.spec b/openmpi4.spec index 724bb48..f201e0c 100644 --- a/openmpi4.spec +++ b/openmpi4.spec @@ -139,6 +139,7 @@ Source2: openmpi4-rpmlintrc Source3: macros.hpc-openmpi Source4: mpivars.sh Source5: mpivars.csh +Patch1: orted-mpir-add-version-to-shared-library.patch Provides: mpi BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: autoconf @@ -397,6 +398,7 @@ echo with HPC echo without HPC %endif %setup -q -n openmpi-%{version}%{git_ver} +%patch1 %if %{without hpc} cat > %{_sourcedir}/baselibs.conf <%{buildroot}%{_sysconfdir}/rpm/macros.openmpi +mkdir -p %{buildroot}%{_rpmmacrodir} +cat <%{buildroot}%{_rpmmacrodir}/macros.openmpi # # openmpi # @@ -584,9 +586,9 @@ EOF sed -e "s/export/setenv/" -e "s/=/ /" \ %{buildroot}/%{mpi_bindir}/mpivars.sh > \ %{buildroot}/%{mpi_bindir}/mpivars.csh -mkdir -p %{buildroot}%{_sysconfdir}/rpm -mkdir -p %{buildroot}%{_sysconfdir}/rpm -cp %{S:3} %{buildroot}%{_sysconfdir}/rpm +mkdir -p %{buildroot}%{_rpmmacrodir} +mkdir -p %{buildroot}%{_rpmmacrodir} +cp %{S:3} %{buildroot}%{_rpmmacrodir} # Drop the files that should go into %{pname}-config as we only package them # in the non HPC build @@ -734,9 +736,9 @@ fi %files macros-devel %defattr(-,root,root,-) %if %{with hpc} -%config %{_sysconfdir}/rpm/macros.hpc-openmpi +%{_rpmmacrodir}/macros.hpc-openmpi %else -%config %{_sysconfdir}/rpm/macros.openmpi +%{_rpmmacrodir}/macros.openmpi %endif %if 0%{?build_static_devel} diff --git a/orted-mpir-add-version-to-shared-library.patch b/orted-mpir-add-version-to-shared-library.patch new file mode 100644 index 0000000..52ba5cb --- /dev/null +++ b/orted-mpir-add-version-to-shared-library.patch @@ -0,0 +1,22 @@ +commit bd2ceac4315c772e02cbb070e443d1acf2512da1 +Author: Andrew J. Hesford +Date: Thu Apr 29 15:13:23 2021 -0400 + + orted-mpir: add version to shared library + + Because orted-mpir is installed in the public library direrctory, it + should have a version. Assign the library libopen_rte_so_version to be + consistent with the rest of the RTE. + + Signed-off-by: Andrew J. Hesford + +diff --git orte/orted/orted-mpir/Makefile.am orte/orted/orted-mpir/Makefile.am +index 5c0dd335644a..8105dd4a6b83 100644 +--- orte/orted/orted-mpir/Makefile.am ++++ orte/orted/orted-mpir/Makefile.am +@@ -19,4 +19,4 @@ lib_LTLIBRARIES = lib@ORTE_LIB_PREFIX@open-orted-mpir.la + lib@ORTE_LIB_PREFIX@open_orted_mpir_la_SOURCES = \ + orted_mpir_breakpoint.c \ + orted_mpir.h +-lib@ORTE_LIB_PREFIX@open_orted_mpir_la_LDFLAGS = -avoid-version ++lib@ORTE_LIB_PREFIX@open_orted_mpir_la_LDFLAGS = -version-info $(libopen_rte_so_version)