forked from pool/openucx
Accepting request 840386 from home:NMoreyChaisemartin:branches:science:HPC
- Update to v1.9.0 (jsc#SLE-15163) - Features: - Added a new class of communication APIs '*_nbx' that enable API extendability while - preserving ABI backward compatibility - Added asynchronous event support to UCT/IB/DEVX - Added support for latest CUDA library version - Added NAK-based reliability protocol for UCT/IB/UD to optimize resends - Added new tests for ROCm - Added new configuration parameters for protocol selection - Added performance optimization for Fujitsu A64FX with InfiniBand - Added performance optimization for clear cache code aarch64 - Added support for relaxed-order PCIe access in IB RDMA transports - Added new TCP connection manager - Added support for UCT/IB PKey with partial membership in IB transports - Added support for RoCE LAG - Added support for ROCm 3.7 and above - Added flow control for RDMA read operations - Improved endpoint flush implementation for UCT/IB - Improved UD timer to avoid interrupting the main thread when not in use - Improved latency estimation for network path with CUDA - Improved error reporting messages - Improved performance in active message flow (removed malloc call) - Improved performance in ptr_array flow - Improved performance in UCT/SM progress engine flow - Improved I/O demo code - Improved rendezvous protocol for CUDA - Updated examples code - Bugfixes: - Fixes for most resent versions of GCC, CLANG, ARMCLANG, PGI - Fixes in UCT/IB for strict order keys OBS-URL: https://build.opensuse.org/request/show/840386 OBS-URL: https://build.opensuse.org/package/show/science:HPC/openucx?expand=0&rev=42
This commit is contained in:
parent
b4e3d46395
commit
f10927b874
@ -1,4 +1,4 @@
|
|||||||
commit 02e9e9daa1adf26573bfb0d35c26cca46fb0e777
|
commit a8b24a8cd4124e3ffb7b682b94b3a1192e48c9c7
|
||||||
Author: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
|
Author: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
|
||||||
Date: Thu Aug 9 07:41:24 2018 +0200
|
Date: Thu Aug 9 07:41:24 2018 +0200
|
||||||
|
|
||||||
@ -33,19 +33,17 @@ index 9c7c820d9fff..8297fc7e6ec2 100644
|
|||||||
AS_IF([test "x$bistro_hooks_happy" = "xyes"],
|
AS_IF([test "x$bistro_hooks_happy" = "xyes"],
|
||||||
[AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])],
|
[AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])],
|
||||||
diff --git src/tools/info/sys_info.c src/tools/info/sys_info.c
|
diff --git src/tools/info/sys_info.c src/tools/info/sys_info.c
|
||||||
index 435e5a6feb8b..9b3beed68ea4 100644
|
index 88d317670462..04d2c223b970 100644
|
||||||
--- src/tools/info/sys_info.c
|
--- src/tools/info/sys_info.c
|
||||||
+++ src/tools/info/sys_info.c
|
+++ src/tools/info/sys_info.c
|
||||||
@@ -37,7 +37,8 @@ static const char* cpu_vendor_names[] = {
|
@@ -38,6 +38,7 @@ static const char* cpu_vendor_names[] = {
|
||||||
[UCS_CPU_VENDOR_INTEL] = "Intel",
|
|
||||||
[UCS_CPU_VENDOR_AMD] = "AMD",
|
[UCS_CPU_VENDOR_AMD] = "AMD",
|
||||||
[UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM",
|
[UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM",
|
||||||
- [UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC"
|
[UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC",
|
||||||
+ [UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC",
|
+ [UCS_CPU_VENDOR_GENERIC_IBM] = "Generic IBM",
|
||||||
+ [UCS_CPU_VENDOR_GENERIC_IBM] = "Generic IBM"
|
[UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM"
|
||||||
};
|
};
|
||||||
|
|
||||||
static double measure_memcpy_bandwidth(size_t size)
|
|
||||||
diff --git src/ucm/Makefile.am src/ucm/Makefile.am
|
diff --git src/ucm/Makefile.am src/ucm/Makefile.am
|
||||||
index e53a30a46916..21bce848045d 100644
|
index e53a30a46916..21bce848045d 100644
|
||||||
--- src/ucm/Makefile.am
|
--- src/ucm/Makefile.am
|
||||||
@ -93,10 +91,10 @@ index 000000000000..334c0474e8b6
|
|||||||
+
|
+
|
||||||
+#endif
|
+#endif
|
||||||
diff --git src/ucm/mmap/install.c src/ucm/mmap/install.c
|
diff --git src/ucm/mmap/install.c src/ucm/mmap/install.c
|
||||||
index 6824a6247bef..81d0375a4132 100644
|
index 6b46baaeecfa..522fd6eaa89f 100644
|
||||||
--- src/ucm/mmap/install.c
|
--- src/ucm/mmap/install.c
|
||||||
+++ src/ucm/mmap/install.c
|
+++ src/ucm/mmap/install.c
|
||||||
@@ -258,7 +258,11 @@ static ucs_status_t ucs_mmap_install_reloc(int events)
|
@@ -331,7 +331,11 @@ static ucs_status_t ucs_mmap_install_reloc(int events)
|
||||||
status = ucm_reloc_modify(&entry->patch);
|
status = ucm_reloc_modify(&entry->patch);
|
||||||
} else {
|
} else {
|
||||||
ucs_assert(ucm_mmap_hook_mode() == UCM_MMAP_HOOK_BISTRO);
|
ucs_assert(ucm_mmap_hook_mode() == UCM_MMAP_HOOK_BISTRO);
|
||||||
@ -109,10 +107,10 @@ index 6824a6247bef..81d0375a4132 100644
|
|||||||
if (status != UCS_OK) {
|
if (status != UCS_OK) {
|
||||||
ucm_warn("failed to install %s hook for '%s'",
|
ucm_warn("failed to install %s hook for '%s'",
|
||||||
diff --git src/ucs/Makefile.am src/ucs/Makefile.am
|
diff --git src/ucs/Makefile.am src/ucs/Makefile.am
|
||||||
index 2bd0a8d50c4b..4a60658f34d8 100644
|
index b612ddeb54ff..6d01521b697b 100644
|
||||||
--- src/ucs/Makefile.am
|
--- src/ucs/Makefile.am
|
||||||
+++ src/ucs/Makefile.am
|
+++ src/ucs/Makefile.am
|
||||||
@@ -58,6 +58,7 @@ nobase_dist_libucs_la_HEADERS = \
|
@@ -64,6 +64,7 @@ nobase_dist_libucs_la_HEADERS = \
|
||||||
arch/x86_64/global_opts.h \
|
arch/x86_64/global_opts.h \
|
||||||
arch/aarch64/global_opts.h \
|
arch/aarch64/global_opts.h \
|
||||||
arch/ppc64/global_opts.h \
|
arch/ppc64/global_opts.h \
|
||||||
@ -120,16 +118,16 @@ index 2bd0a8d50c4b..4a60658f34d8 100644
|
|||||||
arch/global_opts.h
|
arch/global_opts.h
|
||||||
|
|
||||||
noinst_HEADERS = \
|
noinst_HEADERS = \
|
||||||
@@ -67,6 +68,8 @@ noinst_HEADERS = \
|
@@ -71,6 +72,8 @@ noinst_HEADERS = \
|
||||||
|
arch/generic/atomic.h \
|
||||||
arch/generic/cpu.h \
|
arch/generic/cpu.h \
|
||||||
arch/ppc64/bitops.h \
|
|
||||||
arch/ppc64/cpu.h \
|
arch/ppc64/cpu.h \
|
||||||
+ arch/s390x/bitops.h \
|
+ arch/s390x/bitops.h \
|
||||||
+ arch/s390x/cpu.h \
|
+ arch/s390x/cpu.h \
|
||||||
arch/x86_64/atomic.h \
|
arch/x86_64/atomic.h \
|
||||||
arch/x86_64/bitops.h \
|
|
||||||
arch/x86_64/cpu.h \
|
arch/x86_64/cpu.h \
|
||||||
@@ -108,6 +111,7 @@ libucs_la_SOURCES = \
|
arch/atomic.h \
|
||||||
|
@@ -112,6 +115,7 @@ libucs_la_SOURCES = \
|
||||||
algorithm/qsort_r.c \
|
algorithm/qsort_r.c \
|
||||||
arch/aarch64/cpu.c \
|
arch/aarch64/cpu.c \
|
||||||
arch/aarch64/global_opts.c \
|
arch/aarch64/global_opts.c \
|
||||||
@ -151,10 +149,10 @@ index 99e53ca5d0f7..dfa83b757bb0 100644
|
|||||||
# error "Unsupported architecture"
|
# error "Unsupported architecture"
|
||||||
#endif
|
#endif
|
||||||
diff --git src/ucs/arch/bitops.h src/ucs/arch/bitops.h
|
diff --git src/ucs/arch/bitops.h src/ucs/arch/bitops.h
|
||||||
index 91d4573c1faf..8b092b023639 100644
|
index 10a86b53ca42..80084eea6260 100644
|
||||||
--- src/ucs/arch/bitops.h
|
--- src/ucs/arch/bitops.h
|
||||||
+++ src/ucs/arch/bitops.h
|
+++ src/ucs/arch/bitops.h
|
||||||
@@ -14,6 +14,8 @@
|
@@ -18,6 +18,8 @@ BEGIN_C_DECLS
|
||||||
# include "ppc64/bitops.h"
|
# include "ppc64/bitops.h"
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
# include "aarch64/bitops.h"
|
# include "aarch64/bitops.h"
|
||||||
@ -164,34 +162,30 @@ index 91d4573c1faf..8b092b023639 100644
|
|||||||
# error "Unsupported architecture"
|
# error "Unsupported architecture"
|
||||||
#endif
|
#endif
|
||||||
diff --git src/ucs/arch/cpu.c src/ucs/arch/cpu.c
|
diff --git src/ucs/arch/cpu.c src/ucs/arch/cpu.c
|
||||||
index e81f540a899d..40b89907664a 100644
|
index 6d9ebbafeaed..5cda2179efca 100644
|
||||||
--- src/ucs/arch/cpu.c
|
--- src/ucs/arch/cpu.c
|
||||||
+++ src/ucs/arch/cpu.c
|
+++ src/ucs/arch/cpu.c
|
||||||
@@ -57,7 +57,11 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = {
|
@@ -60,6 +60,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = {
|
||||||
[UCS_CPU_VENDOR_GENERIC_PPC] = {
|
|
||||||
.min = UCS_MEMUNITS_INF,
|
.min = UCS_MEMUNITS_INF,
|
||||||
.max = UCS_MEMUNITS_INF
|
.max = UCS_MEMUNITS_INF
|
||||||
- }
|
},
|
||||||
+ },
|
|
||||||
+ [UCS_CPU_VENDOR_GENERIC_IBM] = {
|
+ [UCS_CPU_VENDOR_GENERIC_IBM] = {
|
||||||
+ .min = UCS_MEMUNITS_INF,
|
+ .min = UCS_MEMUNITS_INF,
|
||||||
+ .max = UCS_MEMUNITS_INF
|
+ .max = UCS_MEMUNITS_INF
|
||||||
+ },
|
+ },
|
||||||
};
|
[UCS_CPU_VENDOR_FUJITSU_ARM] = {
|
||||||
|
.min = UCS_MEMUNITS_INF,
|
||||||
const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
|
.max = UCS_MEMUNITS_INF
|
||||||
@@ -65,7 +69,8 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
|
@@ -72,6 +76,7 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
|
||||||
[UCS_CPU_VENDOR_INTEL] = 5800 * UCS_MBYTE,
|
|
||||||
[UCS_CPU_VENDOR_AMD] = 5008 * UCS_MBYTE,
|
[UCS_CPU_VENDOR_AMD] = 5008 * UCS_MBYTE,
|
||||||
[UCS_CPU_VENDOR_GENERIC_ARM] = 5800 * UCS_MBYTE,
|
[UCS_CPU_VENDOR_GENERIC_ARM] = 5800 * UCS_MBYTE,
|
||||||
- [UCS_CPU_VENDOR_GENERIC_PPC] = 5800 * UCS_MBYTE
|
[UCS_CPU_VENDOR_GENERIC_PPC] = 5800 * UCS_MBYTE,
|
||||||
+ [UCS_CPU_VENDOR_GENERIC_PPC] = 5800 * UCS_MBYTE,
|
|
||||||
+ [UCS_CPU_VENDOR_GENERIC_IBM] = 5800 * UCS_MBYTE,
|
+ [UCS_CPU_VENDOR_GENERIC_IBM] = 5800 * UCS_MBYTE,
|
||||||
|
[UCS_CPU_VENDOR_FUJITSU_ARM] = 5800 * UCS_MBYTE
|
||||||
};
|
};
|
||||||
|
|
||||||
static void ucs_sysfs_get_cache_size()
|
|
||||||
diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h
|
diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h
|
||||||
index c1f1cc826b6b..e9d5c5b32ca3 100644
|
index cb317a8db3a4..5174eeb7e1db 100644
|
||||||
--- src/ucs/arch/cpu.h
|
--- src/ucs/arch/cpu.h
|
||||||
+++ src/ucs/arch/cpu.h
|
+++ src/ucs/arch/cpu.h
|
||||||
@@ -58,6 +58,7 @@ typedef enum ucs_cpu_vendor {
|
@@ -58,6 +58,7 @@ typedef enum ucs_cpu_vendor {
|
||||||
@ -199,10 +193,10 @@ index c1f1cc826b6b..e9d5c5b32ca3 100644
|
|||||||
UCS_CPU_VENDOR_GENERIC_ARM,
|
UCS_CPU_VENDOR_GENERIC_ARM,
|
||||||
UCS_CPU_VENDOR_GENERIC_PPC,
|
UCS_CPU_VENDOR_GENERIC_PPC,
|
||||||
+ UCS_CPU_VENDOR_GENERIC_IBM,
|
+ UCS_CPU_VENDOR_GENERIC_IBM,
|
||||||
|
UCS_CPU_VENDOR_FUJITSU_ARM,
|
||||||
UCS_CPU_VENDOR_LAST
|
UCS_CPU_VENDOR_LAST
|
||||||
} ucs_cpu_vendor_t;
|
} ucs_cpu_vendor_t;
|
||||||
|
@@ -92,6 +93,8 @@ typedef struct ucs_cpu_builtin_memcpy {
|
||||||
@@ -91,6 +92,8 @@ typedef struct ucs_cpu_builtin_memcpy {
|
|
||||||
# include "ppc64/cpu.h"
|
# include "ppc64/cpu.h"
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
# include "aarch64/cpu.h"
|
# include "aarch64/cpu.h"
|
||||||
|
@ -1,3 +1,59 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Oct 5 13:21:34 UTC 2020 - Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
|
||||||
|
|
||||||
|
- Update to v1.9.0 (jsc#SLE-15163)
|
||||||
|
- Features:
|
||||||
|
- Added a new class of communication APIs '*_nbx' that enable API extendability while
|
||||||
|
- preserving ABI backward compatibility
|
||||||
|
- Added asynchronous event support to UCT/IB/DEVX
|
||||||
|
- Added support for latest CUDA library version
|
||||||
|
- Added NAK-based reliability protocol for UCT/IB/UD to optimize resends
|
||||||
|
- Added new tests for ROCm
|
||||||
|
- Added new configuration parameters for protocol selection
|
||||||
|
- Added performance optimization for Fujitsu A64FX with InfiniBand
|
||||||
|
- Added performance optimization for clear cache code aarch64
|
||||||
|
- Added support for relaxed-order PCIe access in IB RDMA transports
|
||||||
|
- Added new TCP connection manager
|
||||||
|
- Added support for UCT/IB PKey with partial membership in IB transports
|
||||||
|
- Added support for RoCE LAG
|
||||||
|
- Added support for ROCm 3.7 and above
|
||||||
|
- Added flow control for RDMA read operations
|
||||||
|
- Improved endpoint flush implementation for UCT/IB
|
||||||
|
- Improved UD timer to avoid interrupting the main thread when not in use
|
||||||
|
- Improved latency estimation for network path with CUDA
|
||||||
|
- Improved error reporting messages
|
||||||
|
- Improved performance in active message flow (removed malloc call)
|
||||||
|
- Improved performance in ptr_array flow
|
||||||
|
- Improved performance in UCT/SM progress engine flow
|
||||||
|
- Improved I/O demo code
|
||||||
|
- Improved rendezvous protocol for CUDA
|
||||||
|
- Updated examples code
|
||||||
|
- Bugfixes:
|
||||||
|
- Fixes for most resent versions of GCC, CLANG, ARMCLANG, PGI
|
||||||
|
- Fixes in UCT/IB for strict order keys
|
||||||
|
- Fixes in memory barrier code for aarch64
|
||||||
|
- Fixes in UCT/IB/DEVX for fork system call
|
||||||
|
- Fixes in UCT/IB for rand() call in rdma-core
|
||||||
|
- Fixed in group rescheduling for UCT/IB/DC
|
||||||
|
- Fixes in UCT/CUDA bandwidth reporting
|
||||||
|
- Fixes in rkey_ptr protocol
|
||||||
|
- Fixes in lane selection for rendezvous protocol based on get-zero-copy flow
|
||||||
|
- Fixes for ROCm build
|
||||||
|
- Fixes for XPMEM transport
|
||||||
|
- Fixes in closing endpoint code
|
||||||
|
- Fixes in RDMACM code
|
||||||
|
- Fixes in memcpy selection for AMD
|
||||||
|
- Fixed in UCT/UD endpoint flush functionality
|
||||||
|
- Fixes in XPMEM detection
|
||||||
|
- Fixes in rendezvous staging protocol
|
||||||
|
- Fixes in ROCEv1 mlx5 UDP source port configuration
|
||||||
|
- Multiple fixes in RPM spec file
|
||||||
|
- Multiple fixes in UCP documentation
|
||||||
|
- Multiple fixes in socket connection manager
|
||||||
|
- Multiple fixes in gtest
|
||||||
|
- Multiple fixes in JAVA API implementation
|
||||||
|
- Refresh openucx-s390x-support.patch against new version
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Mon Jul 13 08:19:45 UTC 2020 - Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
|
Mon Jul 13 08:19:45 UTC 2020 - Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
|
|
||||||
Name: openucx
|
Name: openucx
|
||||||
Version: 1.8.1
|
Version: 1.9.0
|
||||||
Release: 0
|
Release: 0
|
||||||
Summary: Communication layer for Message Passing (MPI)
|
Summary: Communication layer for Message Passing (MPI)
|
||||||
License: BSD-3-Clause
|
License: BSD-3-Clause
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
|
||||||
oid sha256:a48820cb8d0761b5ccf3e7ba03a7c8c1dde6276017657178829e07ffc35b556a
|
|
||||||
size 2316448
|
|
3
ucx-1.9.0.tar.gz
Normal file
3
ucx-1.9.0.tar.gz
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:a7a2c8841dc0d5444088a4373dc9b9cc68dbffcd917c1eba92ca8ed8e5e635fb
|
||||||
|
size 2467338
|
Loading…
x
Reference in New Issue
Block a user