diff --git a/openucx-s390x-support.patch b/openucx-s390x-support.patch index 66ba44f..abbdd93 100644 --- a/openucx-s390x-support.patch +++ b/openucx-s390x-support.patch @@ -1,4 +1,4 @@ -commit 909e453ce7ee166c98274096720a6c9345fdc3ae +commit 328a69d07b618e0aa83fe2351e8d7ca4fc1b2f00 Author: Nicolas Morey Date: Mon Feb 13 17:04:14 2023 +0100 @@ -7,7 +7,7 @@ Date: Mon Feb 13 17:04:14 2023 +0100 Signed-off-by: Nicolas Morey-Chaisemartin diff --git config/m4/ucm.m4 config/m4/ucm.m4 -index abf02c06f430..84aa3e214d5a 100644 +index e5e66266d695..ef7e4ede93ce 100644 --- config/m4/ucm.m4 +++ config/m4/ucm.m4 @@ -80,9 +80,20 @@ AC_CHECK_DECLS([SYS_ipc], @@ -33,10 +33,10 @@ index abf02c06f430..84aa3e214d5a 100644 AS_IF([test "x$bistro_hooks_happy" = "xyes"], [AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])], diff --git src/tools/info/sys_info.c src/tools/info/sys_info.c -index f41155ca3d9d..edeaaf5df529 100644 +index e5aff871d491..2b7c54319f53 100644 --- src/tools/info/sys_info.c +++ src/tools/info/sys_info.c -@@ -46,7 +46,8 @@ static const char* cpu_vendor_names[] = { +@@ -47,7 +47,8 @@ static const char* cpu_vendor_names[] = { [UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM", [UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC", [UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM", @@ -98,7 +98,7 @@ index 000000000000..c0f427f4984a + +#endif diff --git src/ucs/Makefile.am src/ucs/Makefile.am -index 9e0bd2ffb1cc..97021c205d5c 100644 +index c7696d56f25d..c63b32bad844 100644 --- src/ucs/Makefile.am +++ src/ucs/Makefile.am @@ -22,6 +22,7 @@ libucs_la_LIBADD = $(LIBM) $(top_builddir)/src/ucm/libucm.la $(BFD_LIBS) @@ -159,10 +159,10 @@ index e89a37d0b673..dd2b9d5b6bcb 100644 # error "Unsupported architecture" #endif diff --git src/ucs/arch/cpu.c src/ucs/arch/cpu.c -index ac8e268a1502..16c308bae94c 100644 +index ece8f7fb82dd..b35b10ad090a 100644 --- src/ucs/arch/cpu.c +++ src/ucs/arch/cpu.c -@@ -61,6 +61,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = { +@@ -63,6 +63,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = { .min = UCS_MEMUNITS_INF, .max = UCS_MEMUNITS_INF }, @@ -173,19 +173,19 @@ index ac8e268a1502..16c308bae94c 100644 [UCS_CPU_VENDOR_FUJITSU_ARM] = { .min = UCS_MEMUNITS_INF, .max = UCS_MEMUNITS_INF -@@ -77,6 +81,7 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = { - [UCS_CPU_VENDOR_AMD] = 5008 * UCS_MBYTE, - [UCS_CPU_VENDOR_GENERIC_ARM] = 5800 * UCS_MBYTE, - [UCS_CPU_VENDOR_GENERIC_PPC] = 5800 * UCS_MBYTE, -+ [UCS_CPU_VENDOR_GENERIC_IBM] = 5800 * UCS_MBYTE, - [UCS_CPU_VENDOR_FUJITSU_ARM] = 12000 * UCS_MBYTE, - [UCS_CPU_VENDOR_ZHAOXIN] = 5800 * UCS_MBYTE - }; +@@ -78,6 +82,7 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = { + [UCS_CPU_VENDOR_INTEL] = UCS_CPU_EST_BCOPY_BW_DEFAULT, + [UCS_CPU_VENDOR_AMD] = UCS_CPU_EST_BCOPY_BW_AMD, + [UCS_CPU_VENDOR_GENERIC_ARM] = UCS_CPU_EST_BCOPY_BW_DEFAULT, ++ [UCS_CPU_VENDOR_GENERIC_IBM] = UCS_CPU_EST_BCOPY_BW_DEFAULT, + [UCS_CPU_VENDOR_GENERIC_PPC] = UCS_CPU_EST_BCOPY_BW_DEFAULT, + [UCS_CPU_VENDOR_FUJITSU_ARM] = UCS_CPU_EST_BCOPY_BW_FUJITSU_ARM, + [UCS_CPU_VENDOR_ZHAOXIN] = UCS_CPU_EST_BCOPY_BW_DEFAULT diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h -index a59561c041fa..dc95f3f56231 100644 +index eb557d385670..cfd297e24558 100644 --- src/ucs/arch/cpu.h +++ src/ucs/arch/cpu.h -@@ -63,6 +63,7 @@ typedef enum ucs_cpu_vendor { +@@ -64,6 +64,7 @@ typedef enum ucs_cpu_vendor { UCS_CPU_VENDOR_AMD, UCS_CPU_VENDOR_GENERIC_ARM, UCS_CPU_VENDOR_GENERIC_PPC, @@ -193,7 +193,7 @@ index a59561c041fa..dc95f3f56231 100644 UCS_CPU_VENDOR_FUJITSU_ARM, UCS_CPU_VENDOR_ZHAOXIN, UCS_CPU_VENDOR_LAST -@@ -98,6 +99,8 @@ typedef struct ucs_cpu_builtin_memcpy { +@@ -99,6 +100,8 @@ typedef struct ucs_cpu_builtin_memcpy { # include "ppc64/cpu.h" #elif defined(__aarch64__) # include "aarch64/cpu.h" @@ -410,10 +410,10 @@ index 000000000000..225e4e5e896a +#endif + diff --git src/ucs/sys/sys.c src/ucs/sys/sys.c -index 57160c061986..be6b20812c39 100644 +index 58e67835c4d0..308f03606d5b 100644 --- src/ucs/sys/sys.c +++ src/ucs/sys/sys.c -@@ -1256,8 +1256,19 @@ void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length) +@@ -1258,8 +1258,19 @@ void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length) if (old_ptr == NULL) { /* Note: Must pass the 0 offset as "long", otherwise it will be * partially undefined when converted to syscall arguments */ diff --git a/openucx.changes b/openucx.changes index 13ed5d0..6c472be 100644 --- a/openucx.changes +++ b/openucx.changes @@ -1,3 +1,96 @@ +------------------------------------------------------------------- +Mon Oct 2 08:00:54 UTC 2023 - Nicolas Morey + +- Update to 1.15.0 + - UCP + - Added 2-stage pipeline protocol in the new protocol infrastructure + - Added reset and abort functionality of rendezvous protocols in the + new infrastructure + - Added zero-copy rendezvous data send protocol in the new infrastructure + - Added support for user memory handle in the new protocol infrastructure + - Added option to force ODP registration for certain memory types + - Enabled lock free memory region deregistration + - Updated allow/deny transport list feature to control auxiliary transport selection + - Multiple performance improvements of the new protocol infrastructure + - Multiple improvements in error and debug messages + - Fixed assertion when sending from non-contiguous GPU buffer to managed buffer + - Fixed the race condition on endpoint configurations + - Fixed endpoint reconfiguration issues due to asymmetrical selection + - Fixed endpoint reconfiguration error due to wrong locality detection + - Fixed crash during connection manager cleanup + - Fixed rkey index calculation for rendezvous protocol + - Fixed rcache dump function + - Removed logging from rkey unpack in release mode + - Fixed dobule free of rkey in rendezvous protocol + - Fixed rendezvous pipeline protocol error flow + - Fixed error handling in rendezvous get zcopy protocol + - Replay pending requests of wireup EP CM during connection establishment + to prevent potential ordering issues and wrong configuration + - Pass user-provided memory type to the function that checks whether the buffer + can be sent inline or not + - Avoid memory registration during UCP context initialization + - Fixed CPU/device atomics selection in the new protocol infrastructure + - Multiple fixes in the new protocol infrastructure information output + - UCT + - Split UCT_MD_MKEY_PACK_FLAG_INVALIDATE into two flags for RMA and AMO + - Added put_zcopy and get_zcopy scheme support for self transport + - Added base implementation of is_reachable_v2 API using intra/inter flag + - Introduced MD capability for non-blocking registration memory types + - Added check for dmabuf kernel support in ROCm memory domain + - Fixed exported memh packing + - Fixed an error in checking return status of multi-threaded memory + registration function + - RDMA CORE (IB, ROCE, etc.) + - Added implementation of is_reachable_v2 routine to IB interface + - Added option to control CQE zipping per CQ RX/TX direction + - Added option to specify how DCI selects port under RoCE LAG + - Added hw_dcs to the list of policies to select DCI by an endpoint + - Removed implicit on-demand paging + - Added option to set RoCE lag dct port for response under queue affinity mode + - Improved IB memlock limit logging + - Fixed dma-buf based memory region registration + - Fixed memory handle data corruption when PCIe relaxed ordering is enabled + - Fixed performance degradation when indirect atomic key is not supported + by the hardware + - Fixed remote access error to strict-order keys because of wrong offset + - Added check for UAR support to memory domain opening + - Fixed updating port counters for devx qp + - Fixed ibv_create_cq error message on node without Infiniband + - Fixed performance degradation due to using 2 paths on NDR400 by default + - Removed unnecessary async lock which otherwise would block UD progress + - UCS + - Added ucs_string_buffer_rbrk() to split token + - Fixed lane selection and added bandwidth estimation for Sapphire Rapids family + - Fixed displaying wrong environment variable suggestions + - Fixed VFS warning output + - Fixed SEGV in ucs_debug_backtrace_next(), upon previous SEGV handling, + due to ENOMEM situation + - Fixed memory corruption when using UCX_MPOOL_FIFO=y + - UCM + - Fixed conditional jump patching + - Fixed mremap() override + - Tests + - Added a rocm docker container for testing + - Added option to send client_id in iodemo test + - Added support for multiple connections to the same server in iodemo test + - Added synchronization before exit to hello world examples + - Fixed wrong usage of ep_close in examples + - Tools + - Added user-side memcpy option for AM benchmarks in ucx_perftest + - Added wireshark LUA dissectors for some UCX protocols + - Fixed memory access flags in perftest + - Removed support for librte from perf + - Fixed worker flush deadlock when using multiple workers in ucx_perftest + - Build + - Added support for binutils 2.40 + - Added versioned dependency to switch between packages with the same names + - Added a separate xpmem deb subpackage + - Added aarch64 support to the binary distribution pipeline + - Removed dependency on libnuma + - Documentation + - Updated ucp_worker_release_address description +- Refresh openucx-s390x-support.patch against latest souces + ------------------------------------------------------------------- Tue Jul 25 13:08:44 UTC 2023 - Nicolas Morey diff --git a/openucx.spec b/openucx.spec index c149250..99ef109 100644 --- a/openucx.spec +++ b/openucx.spec @@ -20,7 +20,7 @@ %define version_suf %{nil} Name: openucx -Version: 1.14.1 +Version: 1.15.0 Release: 0 Summary: Communication layer for Message Passing (MPI) License: BSD-3-Clause diff --git a/ucx-1.14.1.tar.gz b/ucx-1.14.1.tar.gz deleted file mode 100644 index 0203bb6..0000000 --- a/ucx-1.14.1.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:baa0634cafb269a3112f626eb226bcd2ca8c9fcf0fec3b8e2a3553baad5f77aa -size 3030698 diff --git a/ucx-1.15.0.tar.gz b/ucx-1.15.0.tar.gz new file mode 100644 index 0000000..feeb6be --- /dev/null +++ b/ucx-1.15.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b202087076bc1c98f9249144f0c277a8ea88ad4ca6f404f94baa9cb3aebda6d +size 3090730