Accepting request 1116008 from science:HPC
OBS-URL: https://build.opensuse.org/request/show/1116008 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openucx?expand=0&rev=28
This commit is contained in:
commit
0a42199aad
@ -1,4 +1,4 @@
|
||||
commit 909e453ce7ee166c98274096720a6c9345fdc3ae
|
||||
commit 328a69d07b618e0aa83fe2351e8d7ca4fc1b2f00
|
||||
Author: Nicolas Morey <nmorey@suse.com>
|
||||
Date: Mon Feb 13 17:04:14 2023 +0100
|
||||
|
||||
@ -7,7 +7,7 @@ Date: Mon Feb 13 17:04:14 2023 +0100
|
||||
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
|
||||
|
||||
diff --git config/m4/ucm.m4 config/m4/ucm.m4
|
||||
index abf02c06f430..84aa3e214d5a 100644
|
||||
index e5e66266d695..ef7e4ede93ce 100644
|
||||
--- config/m4/ucm.m4
|
||||
+++ config/m4/ucm.m4
|
||||
@@ -80,9 +80,20 @@ AC_CHECK_DECLS([SYS_ipc],
|
||||
@ -33,10 +33,10 @@ index abf02c06f430..84aa3e214d5a 100644
|
||||
AS_IF([test "x$bistro_hooks_happy" = "xyes"],
|
||||
[AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])],
|
||||
diff --git src/tools/info/sys_info.c src/tools/info/sys_info.c
|
||||
index f41155ca3d9d..edeaaf5df529 100644
|
||||
index e5aff871d491..2b7c54319f53 100644
|
||||
--- src/tools/info/sys_info.c
|
||||
+++ src/tools/info/sys_info.c
|
||||
@@ -46,7 +46,8 @@ static const char* cpu_vendor_names[] = {
|
||||
@@ -47,7 +47,8 @@ static const char* cpu_vendor_names[] = {
|
||||
[UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM",
|
||||
[UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC",
|
||||
[UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM",
|
||||
@ -98,7 +98,7 @@ index 000000000000..c0f427f4984a
|
||||
+
|
||||
+#endif
|
||||
diff --git src/ucs/Makefile.am src/ucs/Makefile.am
|
||||
index 9e0bd2ffb1cc..97021c205d5c 100644
|
||||
index c7696d56f25d..c63b32bad844 100644
|
||||
--- src/ucs/Makefile.am
|
||||
+++ src/ucs/Makefile.am
|
||||
@@ -22,6 +22,7 @@ libucs_la_LIBADD = $(LIBM) $(top_builddir)/src/ucm/libucm.la $(BFD_LIBS)
|
||||
@ -159,10 +159,10 @@ index e89a37d0b673..dd2b9d5b6bcb 100644
|
||||
# error "Unsupported architecture"
|
||||
#endif
|
||||
diff --git src/ucs/arch/cpu.c src/ucs/arch/cpu.c
|
||||
index ac8e268a1502..16c308bae94c 100644
|
||||
index ece8f7fb82dd..b35b10ad090a 100644
|
||||
--- src/ucs/arch/cpu.c
|
||||
+++ src/ucs/arch/cpu.c
|
||||
@@ -61,6 +61,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = {
|
||||
@@ -63,6 +63,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = {
|
||||
.min = UCS_MEMUNITS_INF,
|
||||
.max = UCS_MEMUNITS_INF
|
||||
},
|
||||
@ -173,19 +173,19 @@ index ac8e268a1502..16c308bae94c 100644
|
||||
[UCS_CPU_VENDOR_FUJITSU_ARM] = {
|
||||
.min = UCS_MEMUNITS_INF,
|
||||
.max = UCS_MEMUNITS_INF
|
||||
@@ -77,6 +81,7 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
|
||||
[UCS_CPU_VENDOR_AMD] = 5008 * UCS_MBYTE,
|
||||
[UCS_CPU_VENDOR_GENERIC_ARM] = 5800 * UCS_MBYTE,
|
||||
[UCS_CPU_VENDOR_GENERIC_PPC] = 5800 * UCS_MBYTE,
|
||||
+ [UCS_CPU_VENDOR_GENERIC_IBM] = 5800 * UCS_MBYTE,
|
||||
[UCS_CPU_VENDOR_FUJITSU_ARM] = 12000 * UCS_MBYTE,
|
||||
[UCS_CPU_VENDOR_ZHAOXIN] = 5800 * UCS_MBYTE
|
||||
};
|
||||
@@ -78,6 +82,7 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
|
||||
[UCS_CPU_VENDOR_INTEL] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
||||
[UCS_CPU_VENDOR_AMD] = UCS_CPU_EST_BCOPY_BW_AMD,
|
||||
[UCS_CPU_VENDOR_GENERIC_ARM] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
||||
+ [UCS_CPU_VENDOR_GENERIC_IBM] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
||||
[UCS_CPU_VENDOR_GENERIC_PPC] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
||||
[UCS_CPU_VENDOR_FUJITSU_ARM] = UCS_CPU_EST_BCOPY_BW_FUJITSU_ARM,
|
||||
[UCS_CPU_VENDOR_ZHAOXIN] = UCS_CPU_EST_BCOPY_BW_DEFAULT
|
||||
diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h
|
||||
index a59561c041fa..dc95f3f56231 100644
|
||||
index eb557d385670..cfd297e24558 100644
|
||||
--- src/ucs/arch/cpu.h
|
||||
+++ src/ucs/arch/cpu.h
|
||||
@@ -63,6 +63,7 @@ typedef enum ucs_cpu_vendor {
|
||||
@@ -64,6 +64,7 @@ typedef enum ucs_cpu_vendor {
|
||||
UCS_CPU_VENDOR_AMD,
|
||||
UCS_CPU_VENDOR_GENERIC_ARM,
|
||||
UCS_CPU_VENDOR_GENERIC_PPC,
|
||||
@ -193,7 +193,7 @@ index a59561c041fa..dc95f3f56231 100644
|
||||
UCS_CPU_VENDOR_FUJITSU_ARM,
|
||||
UCS_CPU_VENDOR_ZHAOXIN,
|
||||
UCS_CPU_VENDOR_LAST
|
||||
@@ -98,6 +99,8 @@ typedef struct ucs_cpu_builtin_memcpy {
|
||||
@@ -99,6 +100,8 @@ typedef struct ucs_cpu_builtin_memcpy {
|
||||
# include "ppc64/cpu.h"
|
||||
#elif defined(__aarch64__)
|
||||
# include "aarch64/cpu.h"
|
||||
@ -410,10 +410,10 @@ index 000000000000..225e4e5e896a
|
||||
+#endif
|
||||
+
|
||||
diff --git src/ucs/sys/sys.c src/ucs/sys/sys.c
|
||||
index 57160c061986..be6b20812c39 100644
|
||||
index 58e67835c4d0..308f03606d5b 100644
|
||||
--- src/ucs/sys/sys.c
|
||||
+++ src/ucs/sys/sys.c
|
||||
@@ -1256,8 +1256,19 @@ void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length)
|
||||
@@ -1258,8 +1258,19 @@ void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length)
|
||||
if (old_ptr == NULL) {
|
||||
/* Note: Must pass the 0 offset as "long", otherwise it will be
|
||||
* partially undefined when converted to syscall arguments */
|
||||
|
@ -1,3 +1,96 @@
|
||||
-------------------------------------------------------------------
|
||||
Mon Oct 2 08:00:54 UTC 2023 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
- Update to 1.15.0
|
||||
- UCP
|
||||
- Added 2-stage pipeline protocol in the new protocol infrastructure
|
||||
- Added reset and abort functionality of rendezvous protocols in the
|
||||
new infrastructure
|
||||
- Added zero-copy rendezvous data send protocol in the new infrastructure
|
||||
- Added support for user memory handle in the new protocol infrastructure
|
||||
- Added option to force ODP registration for certain memory types
|
||||
- Enabled lock free memory region deregistration
|
||||
- Updated allow/deny transport list feature to control auxiliary transport selection
|
||||
- Multiple performance improvements of the new protocol infrastructure
|
||||
- Multiple improvements in error and debug messages
|
||||
- Fixed assertion when sending from non-contiguous GPU buffer to managed buffer
|
||||
- Fixed the race condition on endpoint configurations
|
||||
- Fixed endpoint reconfiguration issues due to asymmetrical selection
|
||||
- Fixed endpoint reconfiguration error due to wrong locality detection
|
||||
- Fixed crash during connection manager cleanup
|
||||
- Fixed rkey index calculation for rendezvous protocol
|
||||
- Fixed rcache dump function
|
||||
- Removed logging from rkey unpack in release mode
|
||||
- Fixed dobule free of rkey in rendezvous protocol
|
||||
- Fixed rendezvous pipeline protocol error flow
|
||||
- Fixed error handling in rendezvous get zcopy protocol
|
||||
- Replay pending requests of wireup EP CM during connection establishment
|
||||
to prevent potential ordering issues and wrong configuration
|
||||
- Pass user-provided memory type to the function that checks whether the buffer
|
||||
can be sent inline or not
|
||||
- Avoid memory registration during UCP context initialization
|
||||
- Fixed CPU/device atomics selection in the new protocol infrastructure
|
||||
- Multiple fixes in the new protocol infrastructure information output
|
||||
- UCT
|
||||
- Split UCT_MD_MKEY_PACK_FLAG_INVALIDATE into two flags for RMA and AMO
|
||||
- Added put_zcopy and get_zcopy scheme support for self transport
|
||||
- Added base implementation of is_reachable_v2 API using intra/inter flag
|
||||
- Introduced MD capability for non-blocking registration memory types
|
||||
- Added check for dmabuf kernel support in ROCm memory domain
|
||||
- Fixed exported memh packing
|
||||
- Fixed an error in checking return status of multi-threaded memory
|
||||
registration function
|
||||
- RDMA CORE (IB, ROCE, etc.)
|
||||
- Added implementation of is_reachable_v2 routine to IB interface
|
||||
- Added option to control CQE zipping per CQ RX/TX direction
|
||||
- Added option to specify how DCI selects port under RoCE LAG
|
||||
- Added hw_dcs to the list of policies to select DCI by an endpoint
|
||||
- Removed implicit on-demand paging
|
||||
- Added option to set RoCE lag dct port for response under queue affinity mode
|
||||
- Improved IB memlock limit logging
|
||||
- Fixed dma-buf based memory region registration
|
||||
- Fixed memory handle data corruption when PCIe relaxed ordering is enabled
|
||||
- Fixed performance degradation when indirect atomic key is not supported
|
||||
by the hardware
|
||||
- Fixed remote access error to strict-order keys because of wrong offset
|
||||
- Added check for UAR support to memory domain opening
|
||||
- Fixed updating port counters for devx qp
|
||||
- Fixed ibv_create_cq error message on node without Infiniband
|
||||
- Fixed performance degradation due to using 2 paths on NDR400 by default
|
||||
- Removed unnecessary async lock which otherwise would block UD progress
|
||||
- UCS
|
||||
- Added ucs_string_buffer_rbrk() to split token
|
||||
- Fixed lane selection and added bandwidth estimation for Sapphire Rapids family
|
||||
- Fixed displaying wrong environment variable suggestions
|
||||
- Fixed VFS warning output
|
||||
- Fixed SEGV in ucs_debug_backtrace_next(), upon previous SEGV handling,
|
||||
due to ENOMEM situation
|
||||
- Fixed memory corruption when using UCX_MPOOL_FIFO=y
|
||||
- UCM
|
||||
- Fixed conditional jump patching
|
||||
- Fixed mremap() override
|
||||
- Tests
|
||||
- Added a rocm docker container for testing
|
||||
- Added option to send client_id in iodemo test
|
||||
- Added support for multiple connections to the same server in iodemo test
|
||||
- Added synchronization before exit to hello world examples
|
||||
- Fixed wrong usage of ep_close in examples
|
||||
- Tools
|
||||
- Added user-side memcpy option for AM benchmarks in ucx_perftest
|
||||
- Added wireshark LUA dissectors for some UCX protocols
|
||||
- Fixed memory access flags in perftest
|
||||
- Removed support for librte from perf
|
||||
- Fixed worker flush deadlock when using multiple workers in ucx_perftest
|
||||
- Build
|
||||
- Added support for binutils 2.40
|
||||
- Added versioned dependency to switch between packages with the same names
|
||||
- Added a separate xpmem deb subpackage
|
||||
- Added aarch64 support to the binary distribution pipeline
|
||||
- Removed dependency on libnuma
|
||||
- Documentation
|
||||
- Updated ucp_worker_release_address description
|
||||
- Refresh openucx-s390x-support.patch against latest souces
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Tue Jul 25 13:08:44 UTC 2023 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
|
@ -20,7 +20,7 @@
|
||||
%define version_suf %{nil}
|
||||
|
||||
Name: openucx
|
||||
Version: 1.14.1
|
||||
Version: 1.15.0
|
||||
Release: 0
|
||||
Summary: Communication layer for Message Passing (MPI)
|
||||
License: BSD-3-Clause
|
||||
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:baa0634cafb269a3112f626eb226bcd2ca8c9fcf0fec3b8e2a3553baad5f77aa
|
||||
size 3030698
|
BIN
ucx-1.15.0.tar.gz
(Stored with Git LFS)
Normal file
BIN
ucx-1.15.0.tar.gz
(Stored with Git LFS)
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user