Accepting request 1115979 from home:NMorey:branches:science:HPC
- Update to 1.15.0 - UCP - Added 2-stage pipeline protocol in the new protocol infrastructure - Added reset and abort functionality of rendezvous protocols in the new infrastructure - Added zero-copy rendezvous data send protocol in the new infrastructure - Added support for user memory handle in the new protocol infrastructure - Added option to force ODP registration for certain memory types - Enabled lock free memory region deregistration - Updated allow/deny transport list feature to control auxiliary transport selection - Multiple performance improvements of the new protocol infrastructure - Multiple improvements in error and debug messages - Fixed assertion when sending from non-contiguous GPU buffer to managed buffer - Fixed the race condition on endpoint configurations - Fixed endpoint reconfiguration issues due to asymmetrical selection - Fixed endpoint reconfiguration error due to wrong locality detection - Fixed crash during connection manager cleanup - Fixed rkey index calculation for rendezvous protocol - Fixed rcache dump function - Removed logging from rkey unpack in release mode - Fixed dobule free of rkey in rendezvous protocol - Fixed rendezvous pipeline protocol error flow - Fixed error handling in rendezvous get zcopy protocol - Replay pending requests of wireup EP CM during connection establishment to prevent potential ordering issues and wrong configuration - Pass user-provided memory type to the function that checks whether the buffer can be sent inline or not - Avoid memory registration during UCP context initialization - Fixed CPU/device atomics selection in the new protocol infrastructure - Multiple fixes in the new protocol infrastructure information output OBS-URL: https://build.opensuse.org/request/show/1115979 OBS-URL: https://build.opensuse.org/package/show/science:HPC/openucx?expand=0&rev=63
This commit is contained in:
parent
7d6841ca26
commit
2a1a111b03
@ -1,4 +1,4 @@
|
|||||||
commit 909e453ce7ee166c98274096720a6c9345fdc3ae
|
commit 328a69d07b618e0aa83fe2351e8d7ca4fc1b2f00
|
||||||
Author: Nicolas Morey <nmorey@suse.com>
|
Author: Nicolas Morey <nmorey@suse.com>
|
||||||
Date: Mon Feb 13 17:04:14 2023 +0100
|
Date: Mon Feb 13 17:04:14 2023 +0100
|
||||||
|
|
||||||
@ -7,7 +7,7 @@ Date: Mon Feb 13 17:04:14 2023 +0100
|
|||||||
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
|
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
|
||||||
|
|
||||||
diff --git config/m4/ucm.m4 config/m4/ucm.m4
|
diff --git config/m4/ucm.m4 config/m4/ucm.m4
|
||||||
index abf02c06f430..84aa3e214d5a 100644
|
index e5e66266d695..ef7e4ede93ce 100644
|
||||||
--- config/m4/ucm.m4
|
--- config/m4/ucm.m4
|
||||||
+++ config/m4/ucm.m4
|
+++ config/m4/ucm.m4
|
||||||
@@ -80,9 +80,20 @@ AC_CHECK_DECLS([SYS_ipc],
|
@@ -80,9 +80,20 @@ AC_CHECK_DECLS([SYS_ipc],
|
||||||
@ -33,10 +33,10 @@ index abf02c06f430..84aa3e214d5a 100644
|
|||||||
AS_IF([test "x$bistro_hooks_happy" = "xyes"],
|
AS_IF([test "x$bistro_hooks_happy" = "xyes"],
|
||||||
[AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])],
|
[AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])],
|
||||||
diff --git src/tools/info/sys_info.c src/tools/info/sys_info.c
|
diff --git src/tools/info/sys_info.c src/tools/info/sys_info.c
|
||||||
index f41155ca3d9d..edeaaf5df529 100644
|
index e5aff871d491..2b7c54319f53 100644
|
||||||
--- src/tools/info/sys_info.c
|
--- src/tools/info/sys_info.c
|
||||||
+++ src/tools/info/sys_info.c
|
+++ src/tools/info/sys_info.c
|
||||||
@@ -46,7 +46,8 @@ static const char* cpu_vendor_names[] = {
|
@@ -47,7 +47,8 @@ static const char* cpu_vendor_names[] = {
|
||||||
[UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM",
|
[UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM",
|
||||||
[UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC",
|
[UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC",
|
||||||
[UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM",
|
[UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM",
|
||||||
@ -98,7 +98,7 @@ index 000000000000..c0f427f4984a
|
|||||||
+
|
+
|
||||||
+#endif
|
+#endif
|
||||||
diff --git src/ucs/Makefile.am src/ucs/Makefile.am
|
diff --git src/ucs/Makefile.am src/ucs/Makefile.am
|
||||||
index 9e0bd2ffb1cc..97021c205d5c 100644
|
index c7696d56f25d..c63b32bad844 100644
|
||||||
--- src/ucs/Makefile.am
|
--- src/ucs/Makefile.am
|
||||||
+++ src/ucs/Makefile.am
|
+++ src/ucs/Makefile.am
|
||||||
@@ -22,6 +22,7 @@ libucs_la_LIBADD = $(LIBM) $(top_builddir)/src/ucm/libucm.la $(BFD_LIBS)
|
@@ -22,6 +22,7 @@ libucs_la_LIBADD = $(LIBM) $(top_builddir)/src/ucm/libucm.la $(BFD_LIBS)
|
||||||
@ -159,10 +159,10 @@ index e89a37d0b673..dd2b9d5b6bcb 100644
|
|||||||
# error "Unsupported architecture"
|
# error "Unsupported architecture"
|
||||||
#endif
|
#endif
|
||||||
diff --git src/ucs/arch/cpu.c src/ucs/arch/cpu.c
|
diff --git src/ucs/arch/cpu.c src/ucs/arch/cpu.c
|
||||||
index ac8e268a1502..16c308bae94c 100644
|
index ece8f7fb82dd..b35b10ad090a 100644
|
||||||
--- src/ucs/arch/cpu.c
|
--- src/ucs/arch/cpu.c
|
||||||
+++ src/ucs/arch/cpu.c
|
+++ src/ucs/arch/cpu.c
|
||||||
@@ -61,6 +61,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = {
|
@@ -63,6 +63,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = {
|
||||||
.min = UCS_MEMUNITS_INF,
|
.min = UCS_MEMUNITS_INF,
|
||||||
.max = UCS_MEMUNITS_INF
|
.max = UCS_MEMUNITS_INF
|
||||||
},
|
},
|
||||||
@ -173,19 +173,19 @@ index ac8e268a1502..16c308bae94c 100644
|
|||||||
[UCS_CPU_VENDOR_FUJITSU_ARM] = {
|
[UCS_CPU_VENDOR_FUJITSU_ARM] = {
|
||||||
.min = UCS_MEMUNITS_INF,
|
.min = UCS_MEMUNITS_INF,
|
||||||
.max = UCS_MEMUNITS_INF
|
.max = UCS_MEMUNITS_INF
|
||||||
@@ -77,6 +81,7 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
|
@@ -78,6 +82,7 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
|
||||||
[UCS_CPU_VENDOR_AMD] = 5008 * UCS_MBYTE,
|
[UCS_CPU_VENDOR_INTEL] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
||||||
[UCS_CPU_VENDOR_GENERIC_ARM] = 5800 * UCS_MBYTE,
|
[UCS_CPU_VENDOR_AMD] = UCS_CPU_EST_BCOPY_BW_AMD,
|
||||||
[UCS_CPU_VENDOR_GENERIC_PPC] = 5800 * UCS_MBYTE,
|
[UCS_CPU_VENDOR_GENERIC_ARM] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
||||||
+ [UCS_CPU_VENDOR_GENERIC_IBM] = 5800 * UCS_MBYTE,
|
+ [UCS_CPU_VENDOR_GENERIC_IBM] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
||||||
[UCS_CPU_VENDOR_FUJITSU_ARM] = 12000 * UCS_MBYTE,
|
[UCS_CPU_VENDOR_GENERIC_PPC] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
||||||
[UCS_CPU_VENDOR_ZHAOXIN] = 5800 * UCS_MBYTE
|
[UCS_CPU_VENDOR_FUJITSU_ARM] = UCS_CPU_EST_BCOPY_BW_FUJITSU_ARM,
|
||||||
};
|
[UCS_CPU_VENDOR_ZHAOXIN] = UCS_CPU_EST_BCOPY_BW_DEFAULT
|
||||||
diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h
|
diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h
|
||||||
index a59561c041fa..dc95f3f56231 100644
|
index eb557d385670..cfd297e24558 100644
|
||||||
--- src/ucs/arch/cpu.h
|
--- src/ucs/arch/cpu.h
|
||||||
+++ src/ucs/arch/cpu.h
|
+++ src/ucs/arch/cpu.h
|
||||||
@@ -63,6 +63,7 @@ typedef enum ucs_cpu_vendor {
|
@@ -64,6 +64,7 @@ typedef enum ucs_cpu_vendor {
|
||||||
UCS_CPU_VENDOR_AMD,
|
UCS_CPU_VENDOR_AMD,
|
||||||
UCS_CPU_VENDOR_GENERIC_ARM,
|
UCS_CPU_VENDOR_GENERIC_ARM,
|
||||||
UCS_CPU_VENDOR_GENERIC_PPC,
|
UCS_CPU_VENDOR_GENERIC_PPC,
|
||||||
@ -193,7 +193,7 @@ index a59561c041fa..dc95f3f56231 100644
|
|||||||
UCS_CPU_VENDOR_FUJITSU_ARM,
|
UCS_CPU_VENDOR_FUJITSU_ARM,
|
||||||
UCS_CPU_VENDOR_ZHAOXIN,
|
UCS_CPU_VENDOR_ZHAOXIN,
|
||||||
UCS_CPU_VENDOR_LAST
|
UCS_CPU_VENDOR_LAST
|
||||||
@@ -98,6 +99,8 @@ typedef struct ucs_cpu_builtin_memcpy {
|
@@ -99,6 +100,8 @@ typedef struct ucs_cpu_builtin_memcpy {
|
||||||
# include "ppc64/cpu.h"
|
# include "ppc64/cpu.h"
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
# include "aarch64/cpu.h"
|
# include "aarch64/cpu.h"
|
||||||
@ -410,10 +410,10 @@ index 000000000000..225e4e5e896a
|
|||||||
+#endif
|
+#endif
|
||||||
+
|
+
|
||||||
diff --git src/ucs/sys/sys.c src/ucs/sys/sys.c
|
diff --git src/ucs/sys/sys.c src/ucs/sys/sys.c
|
||||||
index 57160c061986..be6b20812c39 100644
|
index 58e67835c4d0..308f03606d5b 100644
|
||||||
--- src/ucs/sys/sys.c
|
--- src/ucs/sys/sys.c
|
||||||
+++ src/ucs/sys/sys.c
|
+++ src/ucs/sys/sys.c
|
||||||
@@ -1256,8 +1256,19 @@ void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length)
|
@@ -1258,8 +1258,19 @@ void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length)
|
||||||
if (old_ptr == NULL) {
|
if (old_ptr == NULL) {
|
||||||
/* Note: Must pass the 0 offset as "long", otherwise it will be
|
/* Note: Must pass the 0 offset as "long", otherwise it will be
|
||||||
* partially undefined when converted to syscall arguments */
|
* partially undefined when converted to syscall arguments */
|
||||||
|
@ -1,3 +1,96 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Oct 2 08:00:54 UTC 2023 - Nicolas Morey <nicolas.morey@suse.com>
|
||||||
|
|
||||||
|
- Update to 1.15.0
|
||||||
|
- UCP
|
||||||
|
- Added 2-stage pipeline protocol in the new protocol infrastructure
|
||||||
|
- Added reset and abort functionality of rendezvous protocols in the
|
||||||
|
new infrastructure
|
||||||
|
- Added zero-copy rendezvous data send protocol in the new infrastructure
|
||||||
|
- Added support for user memory handle in the new protocol infrastructure
|
||||||
|
- Added option to force ODP registration for certain memory types
|
||||||
|
- Enabled lock free memory region deregistration
|
||||||
|
- Updated allow/deny transport list feature to control auxiliary transport selection
|
||||||
|
- Multiple performance improvements of the new protocol infrastructure
|
||||||
|
- Multiple improvements in error and debug messages
|
||||||
|
- Fixed assertion when sending from non-contiguous GPU buffer to managed buffer
|
||||||
|
- Fixed the race condition on endpoint configurations
|
||||||
|
- Fixed endpoint reconfiguration issues due to asymmetrical selection
|
||||||
|
- Fixed endpoint reconfiguration error due to wrong locality detection
|
||||||
|
- Fixed crash during connection manager cleanup
|
||||||
|
- Fixed rkey index calculation for rendezvous protocol
|
||||||
|
- Fixed rcache dump function
|
||||||
|
- Removed logging from rkey unpack in release mode
|
||||||
|
- Fixed dobule free of rkey in rendezvous protocol
|
||||||
|
- Fixed rendezvous pipeline protocol error flow
|
||||||
|
- Fixed error handling in rendezvous get zcopy protocol
|
||||||
|
- Replay pending requests of wireup EP CM during connection establishment
|
||||||
|
to prevent potential ordering issues and wrong configuration
|
||||||
|
- Pass user-provided memory type to the function that checks whether the buffer
|
||||||
|
can be sent inline or not
|
||||||
|
- Avoid memory registration during UCP context initialization
|
||||||
|
- Fixed CPU/device atomics selection in the new protocol infrastructure
|
||||||
|
- Multiple fixes in the new protocol infrastructure information output
|
||||||
|
- UCT
|
||||||
|
- Split UCT_MD_MKEY_PACK_FLAG_INVALIDATE into two flags for RMA and AMO
|
||||||
|
- Added put_zcopy and get_zcopy scheme support for self transport
|
||||||
|
- Added base implementation of is_reachable_v2 API using intra/inter flag
|
||||||
|
- Introduced MD capability for non-blocking registration memory types
|
||||||
|
- Added check for dmabuf kernel support in ROCm memory domain
|
||||||
|
- Fixed exported memh packing
|
||||||
|
- Fixed an error in checking return status of multi-threaded memory
|
||||||
|
registration function
|
||||||
|
- RDMA CORE (IB, ROCE, etc.)
|
||||||
|
- Added implementation of is_reachable_v2 routine to IB interface
|
||||||
|
- Added option to control CQE zipping per CQ RX/TX direction
|
||||||
|
- Added option to specify how DCI selects port under RoCE LAG
|
||||||
|
- Added hw_dcs to the list of policies to select DCI by an endpoint
|
||||||
|
- Removed implicit on-demand paging
|
||||||
|
- Added option to set RoCE lag dct port for response under queue affinity mode
|
||||||
|
- Improved IB memlock limit logging
|
||||||
|
- Fixed dma-buf based memory region registration
|
||||||
|
- Fixed memory handle data corruption when PCIe relaxed ordering is enabled
|
||||||
|
- Fixed performance degradation when indirect atomic key is not supported
|
||||||
|
by the hardware
|
||||||
|
- Fixed remote access error to strict-order keys because of wrong offset
|
||||||
|
- Added check for UAR support to memory domain opening
|
||||||
|
- Fixed updating port counters for devx qp
|
||||||
|
- Fixed ibv_create_cq error message on node without Infiniband
|
||||||
|
- Fixed performance degradation due to using 2 paths on NDR400 by default
|
||||||
|
- Removed unnecessary async lock which otherwise would block UD progress
|
||||||
|
- UCS
|
||||||
|
- Added ucs_string_buffer_rbrk() to split token
|
||||||
|
- Fixed lane selection and added bandwidth estimation for Sapphire Rapids family
|
||||||
|
- Fixed displaying wrong environment variable suggestions
|
||||||
|
- Fixed VFS warning output
|
||||||
|
- Fixed SEGV in ucs_debug_backtrace_next(), upon previous SEGV handling,
|
||||||
|
due to ENOMEM situation
|
||||||
|
- Fixed memory corruption when using UCX_MPOOL_FIFO=y
|
||||||
|
- UCM
|
||||||
|
- Fixed conditional jump patching
|
||||||
|
- Fixed mremap() override
|
||||||
|
- Tests
|
||||||
|
- Added a rocm docker container for testing
|
||||||
|
- Added option to send client_id in iodemo test
|
||||||
|
- Added support for multiple connections to the same server in iodemo test
|
||||||
|
- Added synchronization before exit to hello world examples
|
||||||
|
- Fixed wrong usage of ep_close in examples
|
||||||
|
- Tools
|
||||||
|
- Added user-side memcpy option for AM benchmarks in ucx_perftest
|
||||||
|
- Added wireshark LUA dissectors for some UCX protocols
|
||||||
|
- Fixed memory access flags in perftest
|
||||||
|
- Removed support for librte from perf
|
||||||
|
- Fixed worker flush deadlock when using multiple workers in ucx_perftest
|
||||||
|
- Build
|
||||||
|
- Added support for binutils 2.40
|
||||||
|
- Added versioned dependency to switch between packages with the same names
|
||||||
|
- Added a separate xpmem deb subpackage
|
||||||
|
- Added aarch64 support to the binary distribution pipeline
|
||||||
|
- Removed dependency on libnuma
|
||||||
|
- Documentation
|
||||||
|
- Updated ucp_worker_release_address description
|
||||||
|
- Refresh openucx-s390x-support.patch against latest souces
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Tue Jul 25 13:08:44 UTC 2023 - Nicolas Morey <nicolas.morey@suse.com>
|
Tue Jul 25 13:08:44 UTC 2023 - Nicolas Morey <nicolas.morey@suse.com>
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@
|
|||||||
%define version_suf %{nil}
|
%define version_suf %{nil}
|
||||||
|
|
||||||
Name: openucx
|
Name: openucx
|
||||||
Version: 1.14.1
|
Version: 1.15.0
|
||||||
Release: 0
|
Release: 0
|
||||||
Summary: Communication layer for Message Passing (MPI)
|
Summary: Communication layer for Message Passing (MPI)
|
||||||
License: BSD-3-Clause
|
License: BSD-3-Clause
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
|
||||||
oid sha256:baa0634cafb269a3112f626eb226bcd2ca8c9fcf0fec3b8e2a3553baad5f77aa
|
|
||||||
size 3030698
|
|
BIN
ucx-1.15.0.tar.gz
(Stored with Git LFS)
Normal file
BIN
ucx-1.15.0.tar.gz
(Stored with Git LFS)
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user