From 7ed9b3ac39e117aacabc84fe2cad3bcc85425e62663084c4ba8e15a5d5f42456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Schr=C3=B6ter?= Date: Fri, 3 May 2024 17:33:35 +0200 Subject: [PATCH] Sync from SUSE:SLFO:Main openucx revision 0805f6108977eab3a5fdc89b96d84beb --- .gitattributes | 23 + openucx-s390x-support.patch | 435 +++++++++++++++++ openucx.changes | 744 +++++++++++++++++++++++++++++ openucx.spec | 245 ++++++++++ ucm-fix-UCX_MEM_MALLOC_RELOC.patch | 23 + ucx-1.15.0.tar.gz | 3 + 6 files changed, 1473 insertions(+) create mode 100644 .gitattributes create mode 100644 openucx-s390x-support.patch create mode 100644 openucx.changes create mode 100644 openucx.spec create mode 100644 ucm-fix-UCX_MEM_MALLOC_RELOC.patch create mode 100644 ucx-1.15.0.tar.gz diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/openucx-s390x-support.patch b/openucx-s390x-support.patch new file mode 100644 index 0000000..abbdd93 --- /dev/null +++ b/openucx-s390x-support.patch @@ -0,0 +1,435 @@ +commit 328a69d07b618e0aa83fe2351e8d7ca4fc1b2f00 +Author: Nicolas Morey +Date: Mon Feb 13 17:04:14 2023 +0100 + + openucx s390x support + + Signed-off-by: Nicolas Morey-Chaisemartin + +diff --git config/m4/ucm.m4 config/m4/ucm.m4 +index e5e66266d695..ef7e4ede93ce 100644 +--- config/m4/ucm.m4 ++++ config/m4/ucm.m4 +@@ -80,9 +80,20 @@ AC_CHECK_DECLS([SYS_ipc], + [ipc_hooks_happy=no], + [#include ]) + ++ ++SAVE_CFLAGS=$CFLAGS ++CFLAGS="$CLAGS -Isrc/" ++bistro_arch_happy=yes ++AC_CHECK_DECLS([ucm_bistro_patch], ++ [], ++ [bistro_arch_happy=no], ++ [#include ]) ++CFLAGS=$SAVE_CFLAGS ++ + AS_IF([test "x$mmap_hooks_happy" = "xyes"], + AS_IF([test "x$ipc_hooks_happy" = "xyes" -o "x$shm_hooks_happy" = "xyes"], +- [bistro_hooks_happy=yes])) ++ AS_IF([test "x$bistro_arch_happy" == "xyes"], ++ [bistro_hooks_happy=yes]))) + + AS_IF([test "x$bistro_hooks_happy" = "xyes"], + [AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])], +diff --git src/tools/info/sys_info.c src/tools/info/sys_info.c +index e5aff871d491..2b7c54319f53 100644 +--- src/tools/info/sys_info.c ++++ src/tools/info/sys_info.c +@@ -47,7 +47,8 @@ static const char* cpu_vendor_names[] = { + [UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM", + [UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC", + [UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM", +- [UCS_CPU_VENDOR_ZHAOXIN] = "Zhaoxin" ++ [UCS_CPU_VENDOR_ZHAOXIN] = "Zhaoxin", ++ [UCS_CPU_VENDOR_GENERIC_IBM] = "Generic IBM" + }; + + static double measure_memcpy_bandwidth(size_t size) +diff --git src/ucm/Makefile.am src/ucm/Makefile.am +index 48b82bf89cbe..582f83d1ea82 100644 +--- src/ucm/Makefile.am ++++ src/ucm/Makefile.am +@@ -31,7 +31,8 @@ noinst_HEADERS = \ + bistro/bistro.h \ + bistro/bistro_x86_64.h \ + bistro/bistro_aarch64.h \ +- bistro/bistro_ppc64.h ++ bistro/bistro_ppc64.h \ ++ bistro/bistro_s390x.h + + libucm_la_SOURCES = \ + event/event.c \ +diff --git src/ucm/bistro/bistro.h src/ucm/bistro/bistro.h +index b622e3c14fbb..4acd9e9cdb83 100644 +--- src/ucm/bistro/bistro.h ++++ src/ucm/bistro/bistro.h +@@ -20,6 +20,8 @@ typedef struct ucm_bistro_restore_point ucm_bistro_restore_point_t; + # include "bistro_aarch64.h" + #elif defined(__x86_64__) + # include "bistro_x86_64.h" ++#elif defined(__s390x__) ++# include "bistro_s390x.h" + #else + # error "Unsupported architecture" + #endif +diff --git src/ucm/bistro/bistro_s390x.h src/ucm/bistro/bistro_s390x.h +new file mode 100644 +index 000000000000..c0f427f4984a +--- /dev/null ++++ src/ucm/bistro/bistro_s390x.h +@@ -0,0 +1,18 @@ ++#ifndef UCM_BISTRO_BISTRO_S390X_H_ ++#define UCM_BISTRO_BISTRO_S390X_H_ ++ ++#include ++ ++#include ++#include ++ ++#define UCM_BISTRO_PROLOGUE ++#define UCM_BISTRO_EPILOGUE ++ ++static inline ucs_status_t ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol, ++ void **orig_func_p, ++ ucm_bistro_restore_point_t **rp){ ++ return UCS_ERR_UNSUPPORTED; ++} ++ ++#endif +diff --git src/ucs/Makefile.am src/ucs/Makefile.am +index c7696d56f25d..c63b32bad844 100644 +--- src/ucs/Makefile.am ++++ src/ucs/Makefile.am +@@ -22,6 +22,7 @@ libucs_la_LIBADD = $(LIBM) $(top_builddir)/src/ucm/libucm.la $(BFD_LIBS) + nobase_dist_libucs_la_HEADERS = \ + arch/aarch64/bitops.h \ + arch/ppc64/bitops.h \ ++ arch/s390x/bitops.h \ + arch/x86_64/bitops.h \ + arch/bitops.h \ + algorithm/crc.h \ +@@ -82,12 +83,14 @@ nobase_dist_libucs_la_HEADERS = \ + arch/aarch64/global_opts.h \ + arch/generic/atomic.h \ + arch/ppc64/global_opts.h \ ++ arch/s390x/global_opts.h \ + arch/global_opts.h + + noinst_HEADERS = \ + arch/aarch64/cpu.h \ + arch/generic/cpu.h \ + arch/ppc64/cpu.h \ ++ arch/s390x/cpu.h \ + arch/x86_64/cpu.h \ + arch/cpu.h \ + config/ucm_opts.h \ +@@ -138,6 +141,7 @@ libucs_la_SOURCES = \ + algorithm/string_distance.c \ + arch/aarch64/cpu.c \ + arch/aarch64/global_opts.c \ ++ arch/s390x/global_opts.c \ + arch/ppc64/timebase.c \ + arch/ppc64/global_opts.c \ + arch/x86_64/cpu.c \ +diff --git src/ucs/arch/atomic.h src/ucs/arch/atomic.h +index 52be711c1d0a..8f1d62a28dc9 100644 +--- src/ucs/arch/atomic.h ++++ src/ucs/arch/atomic.h +@@ -15,6 +15,8 @@ + # include "generic/atomic.h" + #elif defined(__aarch64__) + # include "generic/atomic.h" ++#elif defined(__s390x__) ++# include "generic/atomic.h" + #else + # error "Unsupported architecture" + #endif +diff --git src/ucs/arch/bitops.h src/ucs/arch/bitops.h +index e89a37d0b673..dd2b9d5b6bcb 100644 +--- src/ucs/arch/bitops.h ++++ src/ucs/arch/bitops.h +@@ -20,6 +20,8 @@ BEGIN_C_DECLS + # include "ppc64/bitops.h" + #elif defined(__aarch64__) + # include "aarch64/bitops.h" ++#elif defined(__s390x__) ++# include "s390x/bitops.h" + #else + # error "Unsupported architecture" + #endif +diff --git src/ucs/arch/cpu.c src/ucs/arch/cpu.c +index ece8f7fb82dd..b35b10ad090a 100644 +--- src/ucs/arch/cpu.c ++++ src/ucs/arch/cpu.c +@@ -63,6 +63,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = { + .min = UCS_MEMUNITS_INF, + .max = UCS_MEMUNITS_INF + }, ++ [UCS_CPU_VENDOR_GENERIC_IBM] = { ++ .min = UCS_MEMUNITS_INF, ++ .max = UCS_MEMUNITS_INF ++ }, + [UCS_CPU_VENDOR_FUJITSU_ARM] = { + .min = UCS_MEMUNITS_INF, + .max = UCS_MEMUNITS_INF +@@ -78,6 +82,7 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = { + [UCS_CPU_VENDOR_INTEL] = UCS_CPU_EST_BCOPY_BW_DEFAULT, + [UCS_CPU_VENDOR_AMD] = UCS_CPU_EST_BCOPY_BW_AMD, + [UCS_CPU_VENDOR_GENERIC_ARM] = UCS_CPU_EST_BCOPY_BW_DEFAULT, ++ [UCS_CPU_VENDOR_GENERIC_IBM] = UCS_CPU_EST_BCOPY_BW_DEFAULT, + [UCS_CPU_VENDOR_GENERIC_PPC] = UCS_CPU_EST_BCOPY_BW_DEFAULT, + [UCS_CPU_VENDOR_FUJITSU_ARM] = UCS_CPU_EST_BCOPY_BW_FUJITSU_ARM, + [UCS_CPU_VENDOR_ZHAOXIN] = UCS_CPU_EST_BCOPY_BW_DEFAULT +diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h +index eb557d385670..cfd297e24558 100644 +--- src/ucs/arch/cpu.h ++++ src/ucs/arch/cpu.h +@@ -64,6 +64,7 @@ typedef enum ucs_cpu_vendor { + UCS_CPU_VENDOR_AMD, + UCS_CPU_VENDOR_GENERIC_ARM, + UCS_CPU_VENDOR_GENERIC_PPC, ++ UCS_CPU_VENDOR_GENERIC_IBM, + UCS_CPU_VENDOR_FUJITSU_ARM, + UCS_CPU_VENDOR_ZHAOXIN, + UCS_CPU_VENDOR_LAST +@@ -99,6 +100,8 @@ typedef struct ucs_cpu_builtin_memcpy { + # include "ppc64/cpu.h" + #elif defined(__aarch64__) + # include "aarch64/cpu.h" ++#elif defined(__s390x__) ++# include "s390x/cpu.h" + #else + # error "Unsupported architecture" + #endif +diff --git src/ucs/arch/global_opts.h src/ucs/arch/global_opts.h +index 75d086177a7f..96c670cb60d3 100644 +--- src/ucs/arch/global_opts.h ++++ src/ucs/arch/global_opts.h +@@ -15,6 +15,8 @@ + # include "ppc64/global_opts.h" + #elif defined(__aarch64__) + # include "aarch64/global_opts.h" ++#elif defined(__s390x__) ++# include "s390x/global_opts.h" + #else + # error "Unsupported architecture" + #endif +diff --git src/ucs/arch/s390x/bitops.h src/ucs/arch/s390x/bitops.h +new file mode 100644 +index 000000000000..ce48ff1ff451 +--- /dev/null ++++ src/ucs/arch/s390x/bitops.h +@@ -0,0 +1,37 @@ ++/** ++* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. ++* ++* See file LICENSE for terms. ++*/ ++ ++#ifndef UCS_S390X_BITOPS_H_ ++#define UCS_S390X_BITOPS_H_ ++ ++#include ++ ++ ++static inline unsigned __ucs_ilog2_u32(uint32_t n) ++{ ++ if (!n) ++ return 0; ++ return 31 - __builtin_clz(n); ++} ++ ++static inline unsigned __ucs_ilog2_u64(uint64_t n) ++{ ++ if (!n) ++ return 0; ++ return 63 - __builtin_clz(n); ++} ++ ++static UCS_F_ALWAYS_INLINE unsigned ucs_ffs32(uint32_t n) ++{ ++ return __ucs_ilog2_u32(n & -n); ++} ++ ++static inline unsigned ucs_ffs64(uint64_t n) ++{ ++ return __ucs_ilog2_u64(n & -n); ++} ++ ++#endif +diff --git src/ucs/arch/s390x/cpu.h src/ucs/arch/s390x/cpu.h +new file mode 100644 +index 000000000000..4f0a87006118 +--- /dev/null ++++ src/ucs/arch/s390x/cpu.h +@@ -0,0 +1,84 @@ ++/** ++* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. ++* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. ++* ++* See file LICENSE for terms. ++*/ ++ ++ ++#ifndef UCS_S390X_CPU_H_ ++#define UCS_S390X_CPU_H_ ++ ++#include ++#include ++#include ++#include ++#include ++ ++ ++#define UCS_ARCH_CACHE_LINE_SIZE 256 ++ ++BEGIN_C_DECLS ++ ++/* Assume the worst - weak memory ordering */ ++#define ucs_memory_bus_fence() asm volatile (""::: "memory") ++#define ucs_memory_bus_store_fence() ucs_memory_bus_fence() ++#define ucs_memory_bus_load_fence() ucs_memory_bus_fence() ++#define ucs_memory_bus_wc_flush() ucs_memory_bus_fence() ++#define ucs_memory_cpu_fence() ucs_memory_bus_fence() ++#define ucs_memory_cpu_store_fence() ucs_memory_bus_fence() ++#define ucs_memory_cpu_load_fence() ucs_memory_bus_fence() ++#define ucs_memory_cpu_wc_fence() ucs_memory_bus_fence() ++ ++ ++static inline uint64_t ucs_arch_read_hres_clock() ++{ ++ unsigned long clk; ++ asm volatile("stck %0" : "=Q" (clk) : : "cc"); ++ return clk >> 2; ++} ++#define ucs_arch_get_clocks_per_sec ucs_arch_generic_get_clocks_per_sec ++ ++ ++static inline ucs_cpu_model_t ucs_arch_get_cpu_model() ++{ ++ return UCS_CPU_MODEL_UNKNOWN; ++} ++ ++static inline ucs_cpu_vendor_t ucs_arch_get_cpu_vendor() ++{ ++ return UCS_CPU_VENDOR_GENERIC_IBM; ++} ++ ++static inline int ucs_arch_get_cpu_flag() ++{ ++ return UCS_CPU_FLAG_UNKNOWN; ++} ++ ++double ucs_arch_get_clocks_per_sec(); ++ ++#define ucs_arch_wait_mem ucs_arch_generic_wait_mem ++ ++static inline void ucs_cpu_init() ++{ ++} ++ ++static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len) ++{ ++ return memcpy(dst, src, len); ++} ++ ++static UCS_F_ALWAYS_INLINE void ++ucs_memcpy_nontemporal(void *dst, const void *src, size_t len) ++{ ++ memcpy(dst, src, len); ++} ++ ++static inline ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes) ++{ ++ return UCS_ERR_UNSUPPORTED; ++} ++ ++END_C_DECLS ++ ++#endif +diff --git src/ucs/arch/s390x/global_opts.c src/ucs/arch/s390x/global_opts.c +new file mode 100644 +index 000000000000..4fa0c74034a7 +--- /dev/null ++++ src/ucs/arch/s390x/global_opts.c +@@ -0,0 +1,24 @@ ++/** ++* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. ++* ++* See file LICENSE for terms. ++*/ ++ ++#if defined(__s390x__) ++ ++#ifdef HAVE_CONFIG_H ++# include "config.h" ++#endif ++ ++#include ++#include ++ ++ucs_config_field_t ucs_arch_global_opts_table[] = { ++ {NULL} ++}; ++ ++void ucs_arch_print_memcpy_limits(ucs_arch_global_opts_t *config) ++{ ++} ++ ++#endif +diff --git src/ucs/arch/s390x/global_opts.h src/ucs/arch/s390x/global_opts.h +new file mode 100644 +index 000000000000..225e4e5e896a +--- /dev/null ++++ src/ucs/arch/s390x/global_opts.h +@@ -0,0 +1,25 @@ ++/** ++* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. ++* ++* See file LICENSE for terms. ++*/ ++ ++ ++#ifndef UCS_PPC64_GLOBAL_OPTS_H_ ++#define UCS_PPC64_GLOBAL_OPTS_H_ ++ ++#include ++ ++BEGIN_C_DECLS ++ ++#define UCS_ARCH_GLOBAL_OPTS_INITALIZER {} ++ ++/* built-in memcpy config */ ++typedef struct ucs_arch_global_opts { ++ char dummy; ++} ucs_arch_global_opts_t; ++ ++END_C_DECLS ++ ++#endif ++ +diff --git src/ucs/sys/sys.c src/ucs/sys/sys.c +index 58e67835c4d0..308f03606d5b 100644 +--- src/ucs/sys/sys.c ++++ src/ucs/sys/sys.c +@@ -1258,8 +1258,19 @@ void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length) + if (old_ptr == NULL) { + /* Note: Must pass the 0 offset as "long", otherwise it will be + * partially undefined when converted to syscall arguments */ ++#if defined(__s390x__) ++ long int _args[6] = { ++ (long int) NULL, ++ (long int) new_length, ++ (long int) PROT_READ|PROT_WRITE, ++ (long int) MAP_PRIVATE|MAP_ANONYMOUS, ++ (long int) -1, ++ (long int) 0ul}; ++ ptr = (void*)syscall(__NR_mmap, _args); ++#else + ptr = (void*)syscall(__NR_mmap, NULL, new_length, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0ul); ++#endif + if (ptr == MAP_FAILED) { + ucs_log_fatal_error("mmap(NULL, %zu, READ|WRITE, PRIVATE|ANON) failed: %m", + new_length); diff --git a/openucx.changes b/openucx.changes new file mode 100644 index 0000000..6c472be --- /dev/null +++ b/openucx.changes @@ -0,0 +1,744 @@ +------------------------------------------------------------------- +Mon Oct 2 08:00:54 UTC 2023 - Nicolas Morey + +- Update to 1.15.0 + - UCP + - Added 2-stage pipeline protocol in the new protocol infrastructure + - Added reset and abort functionality of rendezvous protocols in the + new infrastructure + - Added zero-copy rendezvous data send protocol in the new infrastructure + - Added support for user memory handle in the new protocol infrastructure + - Added option to force ODP registration for certain memory types + - Enabled lock free memory region deregistration + - Updated allow/deny transport list feature to control auxiliary transport selection + - Multiple performance improvements of the new protocol infrastructure + - Multiple improvements in error and debug messages + - Fixed assertion when sending from non-contiguous GPU buffer to managed buffer + - Fixed the race condition on endpoint configurations + - Fixed endpoint reconfiguration issues due to asymmetrical selection + - Fixed endpoint reconfiguration error due to wrong locality detection + - Fixed crash during connection manager cleanup + - Fixed rkey index calculation for rendezvous protocol + - Fixed rcache dump function + - Removed logging from rkey unpack in release mode + - Fixed dobule free of rkey in rendezvous protocol + - Fixed rendezvous pipeline protocol error flow + - Fixed error handling in rendezvous get zcopy protocol + - Replay pending requests of wireup EP CM during connection establishment + to prevent potential ordering issues and wrong configuration + - Pass user-provided memory type to the function that checks whether the buffer + can be sent inline or not + - Avoid memory registration during UCP context initialization + - Fixed CPU/device atomics selection in the new protocol infrastructure + - Multiple fixes in the new protocol infrastructure information output + - UCT + - Split UCT_MD_MKEY_PACK_FLAG_INVALIDATE into two flags for RMA and AMO + - Added put_zcopy and get_zcopy scheme support for self transport + - Added base implementation of is_reachable_v2 API using intra/inter flag + - Introduced MD capability for non-blocking registration memory types + - Added check for dmabuf kernel support in ROCm memory domain + - Fixed exported memh packing + - Fixed an error in checking return status of multi-threaded memory + registration function + - RDMA CORE (IB, ROCE, etc.) + - Added implementation of is_reachable_v2 routine to IB interface + - Added option to control CQE zipping per CQ RX/TX direction + - Added option to specify how DCI selects port under RoCE LAG + - Added hw_dcs to the list of policies to select DCI by an endpoint + - Removed implicit on-demand paging + - Added option to set RoCE lag dct port for response under queue affinity mode + - Improved IB memlock limit logging + - Fixed dma-buf based memory region registration + - Fixed memory handle data corruption when PCIe relaxed ordering is enabled + - Fixed performance degradation when indirect atomic key is not supported + by the hardware + - Fixed remote access error to strict-order keys because of wrong offset + - Added check for UAR support to memory domain opening + - Fixed updating port counters for devx qp + - Fixed ibv_create_cq error message on node without Infiniband + - Fixed performance degradation due to using 2 paths on NDR400 by default + - Removed unnecessary async lock which otherwise would block UD progress + - UCS + - Added ucs_string_buffer_rbrk() to split token + - Fixed lane selection and added bandwidth estimation for Sapphire Rapids family + - Fixed displaying wrong environment variable suggestions + - Fixed VFS warning output + - Fixed SEGV in ucs_debug_backtrace_next(), upon previous SEGV handling, + due to ENOMEM situation + - Fixed memory corruption when using UCX_MPOOL_FIFO=y + - UCM + - Fixed conditional jump patching + - Fixed mremap() override + - Tests + - Added a rocm docker container for testing + - Added option to send client_id in iodemo test + - Added support for multiple connections to the same server in iodemo test + - Added synchronization before exit to hello world examples + - Fixed wrong usage of ep_close in examples + - Tools + - Added user-side memcpy option for AM benchmarks in ucx_perftest + - Added wireshark LUA dissectors for some UCX protocols + - Fixed memory access flags in perftest + - Removed support for librte from perf + - Fixed worker flush deadlock when using multiple workers in ucx_perftest + - Build + - Added support for binutils 2.40 + - Added versioned dependency to switch between packages with the same names + - Added a separate xpmem deb subpackage + - Added aarch64 support to the binary distribution pipeline + - Removed dependency on libnuma + - Documentation + - Updated ucp_worker_release_address description +- Refresh openucx-s390x-support.patch against latest souces + +------------------------------------------------------------------- +Tue Jul 25 13:08:44 UTC 2023 - Nicolas Morey + +- Update to v1.14.1 + - Fixed ROCm to prevent the locking of host pinned memory + - Added CUDA 12 based UCX builds to the release flow + - Increased the maximal number of endpoint configurations + - Fixed filter for a slow-lanes in selection logic + - Fixed TCP transport bandwidth calculation + - Fixed device detection for ROCM + - Fixed compatibility with CUDA 12 + - Fixed rendezvous threshold for multi-path configurations + - Fixed error message in case of static link + - Fixed BlueField-3 detection + - Multiple fixes for Azure CI pipeline + +------------------------------------------------------------------- +Mon Mar 20 09:00:27 UTC 2023 - Nicolas Morey + +- Update to v1.14.0 + - UCP + - Added API for querying transport and device names on endpoint + - Added API for querying datatype object + - Added API for exporting and importing memory keys (no implementation yet) + - Added support for non-persistent active message header + - Added infrastructure to print protocols v2 performance + - Multiple performance improvements for protocols v2 + - Added support for non-contiguous datatypes for rendezvous protocols v2 + - Added support for reset and abort request in protocols v2 + - Added support for user memory handles in RMA API + - Added multi-rail support for RMA API in protocols v2 + - Added support for up to 16 different lanes per endpoint + - Added support for dmabuf memory registration in protocols v2 + - Added strong fence mode for ucp_worker_fence() API + - UCT + - Added new uct_md_mem_attach() API to support exported memory handles + - Added remote completion mode for endpoint flush (via new flag) + - Added support for dmabuf registration + - Added new uct_ep_connect_to_ep_v2() API + - Added new uct_mem_reg_v2() API + - Added new uct_md_query_v2() API + - Added support for IPv6 loopback address in TCP transport + - RDMA CORE (IB, ROCE, etc.) + - Added ECE (enhanced connection establishment) support for RC and DC transports + - Added support for hardware DCS in DC transport + - Added UD interface and endpoint resource information to VFS + - Added CQ creation via DEVX API + - Removed support for accelerated IB transports over legacy experimental verbs + - UCS + - Added support for auto-correction of user environment variables + - UCM + - Implemented CUDA bistro hooks for aarch64 (to enable memory cache on this platform) + - Added support for CUDA virtual/stream-ordered memory with cudaMallocAsync + - Documentation + - Added FAQ for using pkg-config tool to build applications with UCX + - Tools + - Added runtime library version to the 'ucx_info -v' output + - Added support for memory types in ucx_info + - Many bugfixes. See NEWS. +- Drop patch merged upstream: + - UCS-DEBUG-replace-PTR-with-void.patch + - gcc13-fix.patch +- Refresh openucx-s390x-support.patch + +------------------------------------------------------------------- +Mon Mar 6 12:18:52 UTC 2023 - Martin Liška + +- Add upstream gcc13-fix.patch fix. + +------------------------------------------------------------------- +Mon Jan 16 09:45:05 UTC 2023 - Andreas Schwab + +- openucx-s390x-support.patch: fix use of clz builtin for 64-bit value + +------------------------------------------------------------------- +Tue Oct 4 16:39:30 UTC 2022 - Nicolas Morey-Chaisemartin + +- Update openucx-s390x-support.patch to add missing ucs_ffs32 on s390x +- Drop baselibs.conf as openucx only works on 64b systems + +------------------------------------------------------------------- +Tue Sep 27 15:55:19 UTC 2022 - Nicolas Morey-Chaisemartin + +- Update to v1.13.1 (jsc#PED-912) + - Core + - Added new objects to VFS: local and remote address of endpoint, + statistics of ucp_ep_create success/failure, failed/destroyed endpoints + - Added support for UCX static libraries + - Added profiling for rkey management routines + - PCIe relaxed order enabled by default for AMD CPUs + - Fixed not deallocating memory from ucp_mem_unmap if no rcache + - Fixed versioning infrastructure + - Multiple code improvements: refactoring, debug prints and assertions, etc. + - Multiple improvements in build, test and docs infrastructure + - Added new objects to VFS (md, component, log_level, etc.) + - Added configuration variable to specify which loadable modules are allowed + - Added build-time configuration to disable sigaction overriding + - UCP + - Added API to pass pre-registered memory handle to UCP operations + - Added implementation of AM rendezvous protocol + - Added 2-stage pipeline rendezvous protocol for GPU + - Added support for fragment mem_type for v1 pipeline proto, disabled by default + - Added active message support for proto v2 + - Added UCP memory registration cache + - Improved adaptive progress - deactivate iface when all p2p lanes are destroyed + - Added support for user memh in proto_v1 + - Added support for selecting local address when creating a client endpoint + - Added option to limit GPUDirectRDMA size in rendezvous protocol, UCX_RNDV_MEMTYPE_DIRECT_SIZE + - Deprecated UCX_SOCKADDR_AUX_TLS configuration parameter + - Resolving remote EP ID when creating local EP disabled by default + - Added client_id to ucp_worker_create() and ucp_conn_request_query() APIs + - Added ucp_worker_address_query() API + - Updated ucp_ep_query() API for getting local and remote addresses + - Added address versioning to correctly preserve wire compatibility starting from version 1.11.0 + - Added new client/server connection establishment packet header format + - Enabled rendezvous and tag sync protocols when error handling is enabled on the endpoint + - Added iov zcopy support to RMA operations + - Reduced memory usage of unexpected messages by fitting receive buffer size to packet size + - Added support for modifying UCT and UCS configs by ucp_config_modify() API + - Optimized unpacked rkeys memory consumption + - Added request flag to influence latency vs. bandwidth protocol + - Reduced memory management overhead with new protocols + - Improved performance calculations for new protocols + - Added AMO support with GPU memory target using new protocols + - Added put_zcopy, get_zcopy and pipeline based rendezvous in new protocols + - Added support for user-defined alignment in Active Messages + - Added support for offload tag sync in new protocols + - Updated ucp_atomic_post() to use NBX flow + - UCT + - Introduced API uct_md_mkey_pack_v2 + - Introduced UCT iface features API + - Introduced max_inflight_eps parameter in perf_attr API + - Introduced UCT_SEND_FLAG_PEER_CHECK flag that forces checking connectivity to a peer + - Introduced UCX_RCACHE_PURGE_ON_FORK to enable/disable cleaning regions when application is forking + - Disabled PEER_FAILURE capability for XPMEM + - Added API - uct_iface_is_reachable_v2() + - Added IPv6 address support in TCP + - Added latency estimation to uct_iface_estimate_perf() + - Adjusted knem and cma overhead cost + - Increased built-in TCP keep-alive interval to 2 seconds + - RDMA CORE (IB, ROCE, etc.) + - Introduced NDR autorecognition + - Introduced CQE zipping support + - Set the default MAX_RD_ATOMIC to maximum value supported by the hardware + - Disabled mlx5 ifaces on verbs MD + - Added detection of IB NDR devices + - Added check for CQ overrun in assert mode + - Added bitmap usage for releasing detached DCIs + - Added configuration for requests ack frequency with DevX + - Added remote QP info to tx error CQE traces + - ROCM + - Increased maximum number of HSA agents + - UCS + - Added topo module infrastructure + - Added memtrack and rcache information to VFS + - Added API for a per-process aggregate-sum statistics report + - Added memory pool set data structure + - Added new ptr_array API for bulk allocation + - Added ucs_string_buffer_append_flags() for string buffer + - Added ucs_ffs32() + - Added ucs_vsnprintf_safe() which always adds '\0' + - Added thread-safe put to ptr_map + - Improved accuracy of the topology distance estimation + - Added prints of leaked callbacks from the callback queue + - Removed a diagnostic message when fuse thread is stopped + - Added configurable limit for the memory consumed by rcache + - Added configuration for VFS(FUSE) thread affinity + - Added memory limit support to memtrack + - Packaging + - Added cmake config files for better integration with external cmake based projects + - Tools + - Added loop-back transport support in ucx_perftest + - Split ucx_perftest into separate modules + - Added process placement option for ucx_info + - Extended parameters correctness check in ucx_perftest +- Backported UCS-DEBUG-replace-PTR-with-void.patch + from upstream to fix compilation + + +------------------------------------------------------------------- +Thu Jan 13 08:42:05 UTC 2022 - Nicolas Morey-Chaisemartin + +- Fix UCM bistro support on non s390x archs +- Add ucm-fix-UCX_MEM_MALLOC_RELOC.patch to disable malloc relocations by default (bsc#1194369) + +------------------------------------------------------------------- +Thu Sep 23 07:35:57 UTC 2021 - Nicolas Morey-Chaisemartin + +- Update to v1.11.1 (jsc#SLE-19260) + +------------------------------------------------------------------- +Wed Feb 24 16:34:54 UTC 2021 - Nicolas Morey-Chaisemartin + +- Update openucx-s390x-support.patch to fix mmap syscall on s390x (bsc#1182691) + - Core: + - Added support for UCX monitoring using virtual file system (VFS)/FUSE + - Added support for applications with static CUDA runtime linking + - Added support for a configuration file + - Updated clang format configuration + - UCP + - Added rendezvous API for active messages + - Added user-defined name to context, worker, and endpoint objects + - Added flag to silence request leak check + - Added API for endpoint performance evaluation + - Added API - ucp_request_query + - Added API - ucp_lib_query + - Added bandwidth optimizations for new protocols multi-lane + - Added support for multi-rail over lanes with BW ratio >= 1/4 + - Added support for tracking outstanding requests and aborting those in case of connection failure + - Refactored keep-alive protocol + - Added device id to wireup protocol + - Added support up to 128 transport layer resources in UCP context + - Added support CUDA memory allocations with ucp_mem_map + - Increased UCP_WORKER_MAX_EP_CONFIG to 64 + - Adjusted memory type zcopy threshold when UCX_ZCOPY_THRESH set + - Refactored wireup protocols, rendezvous, get, zcopy protocols + - Added put zcopy multi-rail + - Improved logging for new protocols + - Added system topology information + - Added new protocols for eager offload protocols + - UCT + - Extended connection establishment API + - Added active message AM alignment in iface params + - Added active message short IOV API. + - Added support for interface query by operation and memory type + - Added API to get allocation base address and length + - Added md_dereg_v2 API + - UCS + - Added log filter by source file name. + - Added checking for last element in fraglist queue + - Added a method to get IP address from sockaddr. + - Added memory usage limits to registration cache + - RDMA CORE (IB, ROCE, etc.) + - Added report of QP info in case of completion with error + - Refactored of FC send operations + - Added support for DevX unique QPN allocation + - Optimized endpoint lookup for DCI + - Added support for RDMA sub-function (SF) + - Added support for DCI via DEVX + - Added DCI pool per LAG port + - Added support for RoCE IP reachability check using a subnet mask + - Added active message short IOV for UD/DC/RC mlx, UD/RC verbs + - Added endpoint keep alive check for UD + - Suppressed warning if device can't be opened + - Added support for multiple flush cancel without completion + - Added ignore for devices with invalid GID + - Added support for SRQ linked list reordering + - Added flush by flow control on old devices + - Added support for configurable rdma_resolve_addr/route timeout + - Shared memory + - Added active message short IOV support for posix, sysv, and self transports + - TCP + - Added support for peer failure in case of CONNECT_TO_EP + - Added support for active message short IOV + - See NEWS for a complete changelog and bug fixes +- Refresh openucx-s390x-support against latest sources + +------------------------------------------------------------------- +Mon Oct 5 13:21:34 UTC 2020 - Nicolas Morey-Chaisemartin + +- Update to v1.9.0 (jsc#SLE-15163) + - Features: + - Added a new class of communication APIs '*_nbx' that enable API extendability while + - preserving ABI backward compatibility + - Added asynchronous event support to UCT/IB/DEVX + - Added support for latest CUDA library version + - Added NAK-based reliability protocol for UCT/IB/UD to optimize resends + - Added new tests for ROCm + - Added new configuration parameters for protocol selection + - Added performance optimization for Fujitsu A64FX with InfiniBand + - Added performance optimization for clear cache code aarch64 + - Added support for relaxed-order PCIe access in IB RDMA transports + - Added new TCP connection manager + - Added support for UCT/IB PKey with partial membership in IB transports + - Added support for RoCE LAG + - Added support for ROCm 3.7 and above + - Added flow control for RDMA read operations + - Improved endpoint flush implementation for UCT/IB + - Improved UD timer to avoid interrupting the main thread when not in use + - Improved latency estimation for network path with CUDA + - Improved error reporting messages + - Improved performance in active message flow (removed malloc call) + - Improved performance in ptr_array flow + - Improved performance in UCT/SM progress engine flow + - Improved I/O demo code + - Improved rendezvous protocol for CUDA + - Updated examples code + - Bugfixes: + - Fixes for most resent versions of GCC, CLANG, ARMCLANG, PGI + - Fixes in UCT/IB for strict order keys + - Fixes in memory barrier code for aarch64 + - Fixes in UCT/IB/DEVX for fork system call + - Fixes in UCT/IB for rand() call in rdma-core + - Fixed in group rescheduling for UCT/IB/DC + - Fixes in UCT/CUDA bandwidth reporting + - Fixes in rkey_ptr protocol + - Fixes in lane selection for rendezvous protocol based on get-zero-copy flow + - Fixes for ROCm build + - Fixes for XPMEM transport + - Fixes in closing endpoint code + - Fixes in RDMACM code + - Fixes in memcpy selection for AMD + - Fixed in UCT/UD endpoint flush functionality + - Fixes in XPMEM detection + - Fixes in rendezvous staging protocol + - Fixes in ROCEv1 mlx5 UDP source port configuration + - Multiple fixes in RPM spec file + - Multiple fixes in UCP documentation + - Multiple fixes in socket connection manager + - Multiple fixes in gtest + - Multiple fixes in JAVA API implementation +- Refresh openucx-s390x-support.patch against new version + +------------------------------------------------------------------- +Mon Jul 13 08:19:45 UTC 2020 - Nicolas Morey-Chaisemartin + +- Update to v1.8.1 + - Features: + - Added binary release pipeline in Azure CI + - Bugfixes: + - Multiple fixes in testing environment + - Fixes in InfiniBand DEVX transport + - Fixes in memory management for CUDA IPC transport + - Fixes for binutils 2.34+ + - Fixes for AMD ROCM build environment + +------------------------------------------------------------------- +Fri Jun 5 09:38:40 UTC 2020 - Jan Engelhardt + +- Trim bias and filler wording from descriptions. + +------------------------------------------------------------------- +Thu Jun 4 08:18:26 UTC 2020 - Nicolas Morey-Chaisemartin + +- Update to v1.8.0 + - Features: + - Improved detection for DEVX support + - Improved TCP scalability + - Added support for ROCM to perftest + - Added support for different source and target memory types to perftest + - Added optimized memcpy for ROCM devices + - Added hardware tag-matching for CUDA buffers + - Added support for CUDA and ROCM managed memories + - Added support for client/server disconnect protocol over rdma connection manager + - Added support for striding receive queue for hardware tag-matching + - Added XPMEM-based rendezvous protocol for shared memory + - Added support shared memory communication between containers on same machine + - Added support for multi-threaded RDMA memory registration for large regions + - Added new test cases to Azure CI + - Added support for multiple listening transports + - Added UCT socket-based connection manager transport + - Updated API for UCT component management + - Added API to retrieve the listening port + - Added UCP active message API + - Removed deprecated API for querying UCT memory domains + - Refactored server/client examples + - Added support for dlopen interception in UCM + - Added support for PCIe atomics + - Updated Java API: added support for most of UCP layer operations + - Updated support for Mellanox DevX API + - Added multiple UCT/TCP transport performance optimizations + - Optimized memcpy() for Intel platforms + - Added protection from non-UCX socket based app connections + - Improved search time for PKEY object + - Enabled gtest over IPv6 interfaces + - Updated Mellanox and Bull device IDs + - Added support for CUDA_VISIBLE_DEVICES + - Increased limits for CUDA IPC registration + - Bugfixes: + - Multiple fixes in JUCX + - Fixes in UCP thread safety + - Fixes for most recent versions GCC, PGI, and ICC + - Fixes for CPU affinity on Azure instances + - Fixes in XPMEM support on PPC64 + - Performance fixes in CUDA IPC + - Fixes in RDMA CM flows + - Multiple fixes in TCP transport + - Multiple fixes in documentation + - Fixes in transport lane selection logic + - Fixes in Java jar build + - Fixes in socket connection manager for Nvidia DGX-2 platform + - Multiple fixes in UCP, UCT, UCM libraries + - Multiple fixes for BSD and Mac OS systems + - Fixes for Clang compiler + - Fix CPU optimization configuration options + - Fix JUCX build on GPU nodes + - Fix in Azure release pipeline flow + - Fix in CUDA memory hooks management + - Fix in GPU memory peer direct gtest + - Fix in TCP connection establishment flow + - Fix in GPU IPC check + - Fix in CUDA Jenkins test flow + - Multiple fixes in CUDA IPC flow + - Fix adding missing header files + - Fix to prevent failures in presence of VPN enabled Ethernet interfaces +- Refresh openucx-s390x-support.patch against new version + +------------------------------------------------------------------- +Fri Oct 4 08:11:49 UTC 2019 - Jan Engelhardt + +- Ensure /usr/lib/ucx is owned at all times. + +------------------------------------------------------------------- +Wed Sep 18 10:16:05 UTC 2019 - Nicolas Morey-Chaisemartin + +- Update to v1.6.0 + - Features: + - Modular architecture for UCT transports + - ROCm transport re-design: support for managed memory, direct copy, ROCm GDR + - Random scheduling policy for DC transport + - Optimized out-of-box settings for multi-rail + - Added support for OmniPath (using Verbs) + - Support for PCI atomics with IB transports + - Reduced UCP address size for homogeneous environments + - Bugfixes: + - Multiple stability and performance improvements in TCP transport + - Multiple stability fixed in Verbs and MLX5 transports + - Multiple stability fixes in UCM memory hooks + - Multiple stability fixes in UGNI transport + - RPM Spec file cleanup + - Fixing compilation issues with most recent clang and gcc compilers + - Fixing the wrong name of aliases + - Fix data race in UCP wireup + - Fix segfault when libuct.so is reloaded - issue #3558 + - Include Java sources in distribution + - Handle EADDRNOTAVAIL in rdma_cm connection manager + - Disable ibcm on RHEL7+ by default + - Fix data race in UCP proxy endpoint + - Static checker fixes + - Fallback to ibv_create_cq() if ibv_create_cq_ex() returns ENOSYS + - Fix malloc hooks test + - Fix checking return status in ucp_client_server example + - Fix gdrcopy libdir config value + - Fix printing atomic capabilities in ucx_info + - Fix perftest warmup iterations to be non-zero + - Fixing default values for configure logic + - Fix race condition updating fired_events from multiple threads + - Fix madvise() hook +- Refresh openucx-s390x-support.patch against new version + +------------------------------------------------------------------- +Wed May 15 05:52:55 UTC 2019 - Nicolas Morey-Chaisemartin + +- Disable Werror to handle boo#1121267 + +------------------------------------------------------------------- +Mon Feb 25 07:56:39 UTC 2019 - nmorey + +- Update openucx-s390x-support.patch to fix support of 1.5.0 on s390x (bsc#1121267) +- Add baselibs.conf for ppc + +------------------------------------------------------------------- +Fri Feb 22 12:11:57 UTC 2019 - Martin Liška + +- Update to v1.5.0 (bsc#1121267) + * Features: + + * New emulation mode enabling full UCX functionality (Atomic, Put, Get) + * over TCP and RDMA-CORE interconnects which don't implement full RDMA semantics + * Non-blocking API for all one-sided operations. All blocking communication APIs marked + * as deprecated + * New client/server connection establishment API, which allows connected handover between workers + * Support for rdma-core direct-verbs (DEVX) and DC with mlx5 transports + * GPU - Support for stream API and receive side pipelining + * Malloc hooks using binary instrumentation instead of symbol override + * Statistics for UCT tag API + * GPU-to-Infiniband HCA affinity support based on locality/distance (PCIe) + * Bugfixes: + + * Fix overflow in RC/DC flush operations + * Update description in SPEC file and README + * Fix RoCE source port for dc_mlx5 flow control + * Improve ucx_info help message + * Fix segfault in UCP, due to int truncation in count_one_bits() + * Multiple other bugfixes (full list on github) + * Tested configurations: + + * InfiniBand: MLNX_OFED 4.4-4.5, distribution inbox drivers, rdma-core + * CUDA: gdrcopy 1.2, cuda 9.1.85 + * XPMEM: 2.6.2 + * KNEM: 1.1.2 + +------------------------------------------------------------------- +Tue Nov 6 07:18:34 UTC 2018 - nmoreychaisemartin@suse.com + +- Update to v1.4.0 (bsc#1103494) + * Features: + * Improved support for installation with latest ROCm + * Improved support for latest rdma-core + * Added support for CUDA IPC for intra-node GPU, CUDA memory + allocation cache for mem-type detection, latest Mellanox + devices, Nvidia GPU managed memory, multiple connections + between the same pair of workers, large worker address for + client/server connection establishment and INADDR_ANY, and + for bitwise atomics operations. + * Bugfixes: + * Performance fixes for rendezvous protocol + * Memory hook fixes + * Clang support fixes + * Self tl multi-rail fix + * Thread safety fixes in IB/RDMA transport + * Compilation fixes with upstream rdma-core + * Multiple minor bugfixes (full list on github) + * Segfault fix for a code generated by armclang compiler + * UCP memory-domain index fix for zero-copy active messages + +------------------------------------------------------------------- +Mon Oct 15 07:51:12 UTC 2018 - nmoreychaisemartin@suse.com + +- Update to v1.3.1 (fate#325996) + - Prevent potential out-of-order sending in shared memory active messages + - CUDA: Include cudamem.h in source tarball, pass cudaFree memory size + - Registration cache: fix large range lookup, handle shmat(REMAP)/mmap(FIXED) + - Limit IB CQE size for specific ARM boards + +------------------------------------------------------------------- +Thu Aug 9 05:57:24 UTC 2018 - nmoreychaisemartin@suse.com + +- Update to v1.3.0 (bsc#1104159) + - Added stream-based communication API to UCP + - Added support for GPU platforms: Nvidia CUDA and AMD ROCM software stacks + - Added API for client/server based connection establishment + - Added support for TCP transport + - Support for InfiniBand tag-matching offload for DC and accelerated transports + - Multi-rail support for eager and rendezvous protocols + - Added support for tag-matching communications with CUDA buffers + - Added ucp_rkey_ptr() to obtain pointer for shared memory region + - Avoid progress overhead on unused transports + - Improved scalability of software tag-matching by using a hash table + - Added transparent huge-pages allocator + - Added non-blocking flush and disconnect for UCP + - Support fixed-address memory allocation via ucp_mem_map() + - Added ucp_tag_send_nbr() API to avoid send request allocation + - Support global addressing in all IB transports + - Add support for external epoll fd and edge-triggered events + - Added registration cache for knem + - Initial support for Java bindings + - Multiple bugfixes (full list on github) +- Drop UCT-UD-fixed-compilation-by-gcc8.patch as it was fixed upstream +- Refresh openucx-s390x-support.patch against latest sources + +------------------------------------------------------------------- +Wed Jun 13 12:45:34 UTC 2018 - nmoreychaisemartin@suse.com + +- Remove libnuma-devel on s390x for older releases + +------------------------------------------------------------------- +Tue Mar 27 07:12:37 UTC 2018 - nmoreychaisemartin@suse.com + +- Add UCT-UD-fixed-compilation-by-gcc8.patch to fix compilation + with GCC8 (bsc#1084635) + +------------------------------------------------------------------- +Sat Jan 20 15:40:43 UTC 2018 - jengelh@inai.de + +- Use right documentation path. + +------------------------------------------------------------------- +Fri Jan 19 10:12:04 UTC 2018 - nmoreychaisemartin@suse.com + +- Update to 1.2.2 + - Support including UCX API headers from C++ code + - UD transport to handle unicast flood on RoCE fabric + - Compilation fixes for gcc 7.1.1, clang 3.6, clang 5 + - When UD transport is used with RoCE, packets intended for other peers may + arrive on different adapters (as a result of unicast flooding). + - This change adds packet filtering based on destination GIDs. Now the packet + is silently dropped, if its destination GID does not match the local GID. + - Added a new device ID for InfiniBand HCA + +------------------------------------------------------------------- +Fri Dec 8 21:19:11 UTC 2017 - dimstar@opensuse.org + +- Drop doxygen BuildRequires: The documentation was already not + built with this enabled. Removing the BR causes no regression in + the package but eliminates a build cycle + boost -> curl -> doxygen -> openucx -> boost + +------------------------------------------------------------------- +Tue Sep 19 13:52:13 UTC 2017 - jengelh@inai.de + +- Rediff openucx-s390x-support.patch as p1 to be in line with + potential git-generated patches. + +------------------------------------------------------------------- +Tue Sep 19 09:26:07 UTC 2017 - nmoreychaisemartin@suse.com + +- Switch to version 1.2.1 (Fate#324050) + Previous 1.3+ version was based on a development branch. + + Supported platforms + - Shared memory: KNEM, CMA, XPMEM, SYSV, Posix + - VERBs over InfiniBand and RoCE. + VERBS over other RDMA interconnects (iWarp, OmniPath, etc.) is available + for community evaluation and has not been tested in context of this release + - Cray Gemini and Aries + - Architectures: x86_64, ARMv8 (64bit), Power64 + Features: + - Added support for InfiniBand DC and UD transports, including accelerated verbs for Mellanox devices + - Full support for PGAS/SHMEM interfaces, blocking and non-blocking APIs + - Support for MPI tag matching, both in software and offload mode + - Zero copy protocols and rendezvous, registration cache + - Handling transport errors + - Flow control for DC/RC + - Dataypes support: contiguous, IOV, generic + - Multi-threading support + - Support for ARMv8 64bit architecture + - A new API for efficient memory polling + - Support for malloc-hooks and memory registration caching + +------------------------------------------------------------------- +Fri Jun 30 09:30:58 UTC 2017 - nmoreychaisemartin@suse.com + +- Disable avx at configure level + +------------------------------------------------------------------- +Wed Jun 28 16:46:31 UTC 2017 - nmoreychaisemartin@suse.com + +- Add openucx-s390x-support.patch to fix compilation on s390x +- Compile openucx on s390x + +------------------------------------------------------------------- +Thu Jun 8 12:12:59 UTC 2017 - nmoreychaisemartin@suse.com + +- Fix compilation on ppc + +------------------------------------------------------------------- +Fri May 26 08:29:51 UTC 2017 - jengelh@inai.de + +- Update to snapshot 1.3+git44 + * No changelog was found +- Add -Wno-error and disable AVX/SSE as it is not guaranteed + to exist. + +------------------------------------------------------------------- +Sat Jun 18 07:36:59 UTC 2016 - jengelh@inai.de + +- Update to snapshot 0~git1727 +* New: libucm. libucm is a standalone non-unloadable library which + installs hooks for virtual memory changes in the current process. + +------------------------------------------------------------------- +Sun Sep 13 18:35:15 UTC 2015 - jengelh@inai.de + +- Update to snapshot 0~git862 +* License clarification on upstream's behalf + +------------------------------------------------------------------- +Mon Jul 27 18:32:48 UTC 2015 - jengelh@inai.de + +- Initial package for build.opensuse.org (version 0~git713) diff --git a/openucx.spec b/openucx.spec new file mode 100644 index 0000000..99ef109 --- /dev/null +++ b/openucx.spec @@ -0,0 +1,245 @@ +# +# spec file for package openucx +# +# Copyright (c) 2023 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +# Use for pulling RC versions +%define version_suf %{nil} + +Name: openucx +Version: 1.15.0 +Release: 0 +Summary: Communication layer for Message Passing (MPI) +License: BSD-3-Clause +Group: Development/Libraries/C and C++ +URL: http://openucx.org/ + +#Git-Clone: git://github.com/openucx/ucx +#Git-Web: https://github.com/openucx/ucx +Source: https://github.com/openucx/ucx/releases/download/v%version%{?version_suf}/ucx-%version.tar.gz +Patch1: openucx-s390x-support.patch +Patch2: ucm-fix-UCX_MEM_MALLOC_RELOC.patch +BuildRequires: autoconf >= 2.63 +BuildRequires: automake >= 1.10 +BuildRequires: binutils-devel +BuildRequires: gcc-c++ +BuildRequires: libibverbs-devel +%if 0%{?suse_version} < 1330 +%ifnarch s390x +BuildRequires: libnuma-devel +%endif +%else +BuildRequires: libnuma-devel +%endif +BuildRequires: libtool +BuildRequires: pkg-config +BuildRequires: zlib-devel +BuildRoot: %{_tmppath}/%{name}-%{version}-build +ExclusiveArch: aarch64 %power64 x86_64 s390x + +%description +UCX stands for Unified Communication X. UCX provides a communication +layer for Message Passing (MPI), PGAS/OpenSHMEM libraries and +RPC/data-centric applications. UCX utilizes networks such as RDMA +(InfiniBand, RoCE, etc), Cray Gemini or Aries, for inter-node +communication, or TCP. UCX supports transfer of data in either main +memory (RAM) or GPU memory (through CUDA and ROCm libraries). UCX +provides intra-node communication by using shared memory mechanisms. + +%package tools +Summary: OpenUCX utilities +Group: System/Console + +%description tools +Miscallaneous utilities for Unified Communication X. + +%package -n libucm0 +Summary: Memory (un)happing hooks for Unified Communication X +Group: System/Libraries + +%description -n libucm0 +libucm is a standalone non-unloadable library which installs hooks +for virtual memory changes in the current process. + +%package -n libucm-devel +Summary: Development files for Unified Communication X Memory Hooks +Group: Development/Libraries/C and C++ +Requires: libucm0 = %version + +%description -n libucm-devel +libucm is a standalone non-unloadable library which installs hooks +for virtual memory changes in the current process. + +%package -n libucp0 +Summary: Infiniband Unified Communication Protocols +Group: System/Libraries + +%description -n libucp0 +High-level API uses UCT framework to construct protocols commonly +found in applications (MPI, OpenSHMEM, PGAS, etc.) + +%package -n libucp-devel +Summary: Development files for Unified Communication Protocols (UC-P) +Group: Development/Libraries/C and C++ +Requires: libucp0 = %version + +%description -n libucp-devel +High-level API uses UCT framework to construct protocols commonly +found in applications (MPI, OpenSHMEM, PGAS, etc.) + +%package -n libucs0 +Summary: Infiniband Unicified Communication Services +Group: System/Libraries + +%description -n libucs0 +This framework provides infrastructure for component based +programming, memory management, and system utilities. + +%package -n libucs-devel +Summary: Development files for Unified Communication Services (UC-S) +Group: Development/Libraries/C and C++ +Requires: libucs0 = %version + +%description -n libucs-devel +This framework provides infrastructure for component based +programming, memory management, and system utilities. + +%package -n libuct0 +Summary: Infiniband Unified Communication Transport +Group: System/Libraries + +%description -n libuct0 +Low-level API that expose network operations supported by underlying +hardware. + +%package -n libuct-devel +Summary: Development files for Unified Communication Transport (UC-T) +Group: Development/Libraries/C and C++ +Requires: libuct0 = %version + +%description -n libuct-devel +Low-level API that expose network operations supported by underlying +hardware. + +%prep +%setup -qn ucx-%version +%ifarch s390x +%patch1 +%endif +%patch2 + +%build +autoreconf -fi +export UCX_CFLAGS="%optflags -Wno-error" +%ifarch x86_64 +export UCX_CFLAGS="$UCX_CFLAGS -mno-avx" +%endif +%ifarch %ix86 +export UCX_CFLAGS="$UCX_CFLAGS -mno-sse -mno-sse2" +%endif +%configure --disable-static --without-avx \ +%if 0%{?suse_version} < 1330 +%ifarch s390x + --disable-numa \ +%endif +%endif + --docdir="%_docdir/%name" \ + --disable-debug --disable-assertions \ + --disable-params-check \ + --with-rc --with-ud --with-dc \ + --with-mlx5-dv --with-rdmacm + +# Override BASE_CFLAGS to disable Werror (boo#1121267) +make %{?_smp_mflags} V=1 BASE_CFLAGS="-g -Wall" + +%post -n libucp0 -p /sbin/ldconfig +%postun -n libucp0 -p /sbin/ldconfig +%post -n libucs0 -p /sbin/ldconfig +%postun -n libucs0 -p /sbin/ldconfig +%post -n libuct0 -p /sbin/ldconfig +%postun -n libuct0 -p /sbin/ldconfig +%post -n libucm0 -p /sbin/ldconfig +%postun -n libucm0 -p /sbin/ldconfig + +%install +%make_install +rm -fv "%buildroot/%_libdir"/*.la +rm -fv "%buildroot/%_libdir"/ucx/*.la +# Rename example dir for consistency with the package name +mv %buildroot/%_datadir/ucx %buildroot/%_datadir/openucx +# io_demo should not be in %{bindir} and more likely in %{libexec} +mkdir -p %buildroot/%_libexecdir/%{name}/ +mv %buildroot/%_bindir/io_demo %buildroot/%_libexecdir/%{name}/ + +%files tools +%defattr(-,root,root) +%_bindir/ucx_* +%_datadir/%{name}/ +%_libexecdir/%{name} +%_libdir/pkgconfig/ucx.pc +%dir %_libdir/cmake/ +%_libdir/cmake/ucx/ +%license LICENSE +%doc NEWS + +%files -n libucm0 +%defattr(-,root,root) +%_libdir/libucm.so.* + +%files -n libucm-devel +%defattr(-,root,root) +%_includedir/ucm/ +%_libdir/libucm.so + +%files -n libucp0 +%defattr(-,root,root) +%_libdir/libucp.so.* + +%files -n libucp-devel +%defattr(-,root,root) +%_includedir/ucp/ +%_libdir/libucp.so + +%files -n libucs0 +%defattr(-,root,root) +%_libdir/libucs.so.* +%_libdir/libucs_signal.so.* + +%files -n libucs-devel +%defattr(-,root,root) +%_includedir/ucs/ +%_libdir/libucs.so +%_libdir/libucs_signal.so +%_libdir/pkgconfig/ucx-ucs.pc + +%files -n libuct0 +%defattr(-,root,root) +%_libdir/libuct.so.* +%dir %_libdir/ucx/ +%_libdir/ucx/libuct_*.so.* + +%files -n libuct-devel +%defattr(-,root,root) +%_includedir/uct/ +%_libdir/libuct.so +%dir %_libdir/ucx/ +%_libdir/ucx/libuct_*.so +%_libdir/pkgconfig/ucx-uct.pc +%_libdir/pkgconfig/ucx-cma.pc +%_libdir/pkgconfig/ucx-ib.pc +%_libdir/pkgconfig/ucx-rdmacm.pc + +%changelog diff --git a/ucm-fix-UCX_MEM_MALLOC_RELOC.patch b/ucm-fix-UCX_MEM_MALLOC_RELOC.patch new file mode 100644 index 0000000..90f16af --- /dev/null +++ b/ucm-fix-UCX_MEM_MALLOC_RELOC.patch @@ -0,0 +1,23 @@ +commit 57ca2c53bcef6de7ca33767cedab120afd9a7601 +Author: Nicolas Morey-Chaisemartin +Date: Thu Jan 13 12:00:19 2022 +0100 + + ucm: fix UCX_MEM_MALLOC_RELOC + + Should be disabled by default but is enabled by the parser default value + + Signed-off-by: Nicolas Morey-Chaisemartin + +diff --git src/ucs/config/ucm_opts.c src/ucs/config/ucm_opts.c +index 89cccc487231..b13360980793 100644 +--- src/ucs/config/ucm_opts.c ++++ src/ucs/config/ucm_opts.c +@@ -62,7 +62,7 @@ static ucs_config_field_t ucm_global_config_table[] = { + ucs_offsetof(ucm_global_config_t, enable_malloc_hooks), + UCS_CONFIG_TYPE_BOOL}, + +- {"MALLOC_RELOC", "yes", ++ {"MALLOC_RELOC", "no", + "Enable installing malloc symbols in the relocation table.\n" + "This is unsafe and off by default, because sometimes glibc\n" + "calls malloc/free without going through the relocation table,\n" diff --git a/ucx-1.15.0.tar.gz b/ucx-1.15.0.tar.gz new file mode 100644 index 0000000..feeb6be --- /dev/null +++ b/ucx-1.15.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b202087076bc1c98f9249144f0c277a8ea88ad4ca6f404f94baa9cb3aebda6d +size 3090730