Accepting request 811726 from science:HPC

- Update to v1.8.0

OBS-URL: https://build.opensuse.org/request/show/811726
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openucx?expand=0&rev=16
This commit is contained in:
Dominique Leuenberger 2020-06-09 22:33:43 +00:00 committed by Git OBS Bridge
commit 46c68d5620
5 changed files with 276 additions and 41 deletions

View File

@ -1,4 +1,4 @@
commit 7efd75794d17351fbcfdd2759fc9abf22af0d631 commit 02e9e9daa1adf26573bfb0d35c26cca46fb0e777
Author: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com> Author: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
Date: Thu Aug 9 07:41:24 2018 +0200 Date: Thu Aug 9 07:41:24 2018 +0200
@ -32,6 +32,20 @@ index 9c7c820d9fff..8297fc7e6ec2 100644
AS_IF([test "x$bistro_hooks_happy" = "xyes"], AS_IF([test "x$bistro_hooks_happy" = "xyes"],
[AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])], [AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])],
diff --git src/tools/info/sys_info.c src/tools/info/sys_info.c
index 435e5a6feb8b..9b3beed68ea4 100644
--- src/tools/info/sys_info.c
+++ src/tools/info/sys_info.c
@@ -37,7 +37,8 @@ static const char* cpu_vendor_names[] = {
[UCS_CPU_VENDOR_INTEL] = "Intel",
[UCS_CPU_VENDOR_AMD] = "AMD",
[UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM",
- [UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC"
+ [UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC",
+ [UCS_CPU_VENDOR_GENERIC_IBM] = "Generic IBM"
};
static double measure_memcpy_bandwidth(size_t size)
diff --git src/ucm/Makefile.am src/ucm/Makefile.am diff --git src/ucm/Makefile.am src/ucm/Makefile.am
index e53a30a46916..21bce848045d 100644 index e53a30a46916..21bce848045d 100644
--- src/ucm/Makefile.am --- src/ucm/Makefile.am
@ -79,10 +93,10 @@ index 000000000000..334c0474e8b6
+ +
+#endif +#endif
diff --git src/ucm/mmap/install.c src/ucm/mmap/install.c diff --git src/ucm/mmap/install.c src/ucm/mmap/install.c
index c58afb37e029..a9cfd5865278 100644 index 6824a6247bef..81d0375a4132 100644
--- src/ucm/mmap/install.c --- src/ucm/mmap/install.c
+++ src/ucm/mmap/install.c +++ src/ucm/mmap/install.c
@@ -254,7 +254,11 @@ static ucs_status_t ucs_mmap_install_reloc(int events) @@ -258,7 +258,11 @@ static ucs_status_t ucs_mmap_install_reloc(int events)
status = ucm_reloc_modify(&entry->patch); status = ucm_reloc_modify(&entry->patch);
} else { } else {
ucs_assert(ucm_mmap_hook_mode() == UCM_MMAP_HOOK_BISTRO); ucs_assert(ucm_mmap_hook_mode() == UCM_MMAP_HOOK_BISTRO);
@ -95,10 +109,18 @@ index c58afb37e029..a9cfd5865278 100644
if (status != UCS_OK) { if (status != UCS_OK) {
ucm_warn("failed to install %s hook for '%s'", ucm_warn("failed to install %s hook for '%s'",
diff --git src/ucs/Makefile.am src/ucs/Makefile.am diff --git src/ucs/Makefile.am src/ucs/Makefile.am
index 7e8153a4fd07..d41842711a5e 100644 index 2bd0a8d50c4b..4a60658f34d8 100644
--- src/ucs/Makefile.am --- src/ucs/Makefile.am
+++ src/ucs/Makefile.am +++ src/ucs/Makefile.am
@@ -56,6 +56,8 @@ noinst_HEADERS = \ @@ -58,6 +58,7 @@ nobase_dist_libucs_la_HEADERS = \
arch/x86_64/global_opts.h \
arch/aarch64/global_opts.h \
arch/ppc64/global_opts.h \
+ arch/s390x/global_opts.h \
arch/global_opts.h
noinst_HEADERS = \
@@ -67,6 +68,8 @@ noinst_HEADERS = \
arch/generic/cpu.h \ arch/generic/cpu.h \
arch/ppc64/bitops.h \ arch/ppc64/bitops.h \
arch/ppc64/cpu.h \ arch/ppc64/cpu.h \
@ -107,8 +129,16 @@ index 7e8153a4fd07..d41842711a5e 100644
arch/x86_64/atomic.h \ arch/x86_64/atomic.h \
arch/x86_64/bitops.h \ arch/x86_64/bitops.h \
arch/x86_64/cpu.h \ arch/x86_64/cpu.h \
@@ -108,6 +111,7 @@ libucs_la_SOURCES = \
algorithm/qsort_r.c \
arch/aarch64/cpu.c \
arch/aarch64/global_opts.c \
+ arch/s390x/global_opts.c \
arch/ppc64/timebase.c \
arch/ppc64/global_opts.c \
arch/x86_64/cpu.c \
diff --git src/ucs/arch/atomic.h src/ucs/arch/atomic.h diff --git src/ucs/arch/atomic.h src/ucs/arch/atomic.h
index 0caea9b1f3ba..d9afa780bbc5 100644 index 99e53ca5d0f7..dfa83b757bb0 100644
--- src/ucs/arch/atomic.h --- src/ucs/arch/atomic.h
+++ src/ucs/arch/atomic.h +++ src/ucs/arch/atomic.h
@@ -15,6 +15,8 @@ @@ -15,6 +15,8 @@
@ -121,7 +151,7 @@ index 0caea9b1f3ba..d9afa780bbc5 100644
# error "Unsupported architecture" # error "Unsupported architecture"
#endif #endif
diff --git src/ucs/arch/bitops.h src/ucs/arch/bitops.h diff --git src/ucs/arch/bitops.h src/ucs/arch/bitops.h
index af7bb93392d4..6639045ae0b6 100644 index 91d4573c1faf..8b092b023639 100644
--- src/ucs/arch/bitops.h --- src/ucs/arch/bitops.h
+++ src/ucs/arch/bitops.h +++ src/ucs/arch/bitops.h
@@ -14,6 +14,8 @@ @@ -14,6 +14,8 @@
@ -133,11 +163,46 @@ index af7bb93392d4..6639045ae0b6 100644
#else #else
# error "Unsupported architecture" # error "Unsupported architecture"
#endif #endif
diff --git src/ucs/arch/cpu.c src/ucs/arch/cpu.c
index e81f540a899d..40b89907664a 100644
--- src/ucs/arch/cpu.c
+++ src/ucs/arch/cpu.c
@@ -57,7 +57,11 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = {
[UCS_CPU_VENDOR_GENERIC_PPC] = {
.min = UCS_MEMUNITS_INF,
.max = UCS_MEMUNITS_INF
- }
+ },
+ [UCS_CPU_VENDOR_GENERIC_IBM] = {
+ .min = UCS_MEMUNITS_INF,
+ .max = UCS_MEMUNITS_INF
+ },
};
const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
@@ -65,7 +69,8 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
[UCS_CPU_VENDOR_INTEL] = 5800 * UCS_MBYTE,
[UCS_CPU_VENDOR_AMD] = 5008 * UCS_MBYTE,
[UCS_CPU_VENDOR_GENERIC_ARM] = 5800 * UCS_MBYTE,
- [UCS_CPU_VENDOR_GENERIC_PPC] = 5800 * UCS_MBYTE
+ [UCS_CPU_VENDOR_GENERIC_PPC] = 5800 * UCS_MBYTE,
+ [UCS_CPU_VENDOR_GENERIC_IBM] = 5800 * UCS_MBYTE,
};
static void ucs_sysfs_get_cache_size()
diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h
index 58a83825ee4a..a30456ba441d 100644 index c1f1cc826b6b..e9d5c5b32ca3 100644
--- src/ucs/arch/cpu.h --- src/ucs/arch/cpu.h
+++ src/ucs/arch/cpu.h +++ src/ucs/arch/cpu.h
@@ -59,6 +59,8 @@ typedef enum ucs_cpu_flag { @@ -58,6 +58,7 @@ typedef enum ucs_cpu_vendor {
UCS_CPU_VENDOR_AMD,
UCS_CPU_VENDOR_GENERIC_ARM,
UCS_CPU_VENDOR_GENERIC_PPC,
+ UCS_CPU_VENDOR_GENERIC_IBM,
UCS_CPU_VENDOR_LAST
} ucs_cpu_vendor_t;
@@ -91,6 +92,8 @@ typedef struct ucs_cpu_builtin_memcpy {
# include "ppc64/cpu.h" # include "ppc64/cpu.h"
#elif defined(__aarch64__) #elif defined(__aarch64__)
# include "aarch64/cpu.h" # include "aarch64/cpu.h"
@ -146,6 +211,19 @@ index 58a83825ee4a..a30456ba441d 100644
#else #else
# error "Unsupported architecture" # error "Unsupported architecture"
#endif #endif
diff --git src/ucs/arch/global_opts.h src/ucs/arch/global_opts.h
index 8786f130290a..0d251fb91868 100644
--- src/ucs/arch/global_opts.h
+++ src/ucs/arch/global_opts.h
@@ -15,6 +15,8 @@
# include "ppc64/global_opts.h"
#elif defined(__aarch64__)
# include "aarch64/global_opts.h"
+#elif defined(__s390x__)
+# include "s390x/global_opts.h"
#else
# error "Unsupported architecture"
#endif
diff --git src/ucs/arch/s390x/bitops.h src/ucs/arch/s390x/bitops.h diff --git src/ucs/arch/s390x/bitops.h src/ucs/arch/s390x/bitops.h
new file mode 100644 new file mode 100644
index 000000000000..39ad125107e9 index 000000000000..39ad125107e9
@ -186,10 +264,10 @@ index 000000000000..39ad125107e9
+#endif +#endif
diff --git src/ucs/arch/s390x/cpu.h src/ucs/arch/s390x/cpu.h diff --git src/ucs/arch/s390x/cpu.h src/ucs/arch/s390x/cpu.h
new file mode 100644 new file mode 100644
index 000000000000..b8ab713d6e21 index 000000000000..4f0a87006118
--- /dev/null --- /dev/null
+++ src/ucs/arch/s390x/cpu.h +++ src/ucs/arch/s390x/cpu.h
@@ -0,0 +1,53 @@ @@ -0,0 +1,84 @@
+/** +/**
+* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED.
+* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED.
@ -204,10 +282,14 @@ index 000000000000..b8ab713d6e21
+#include <ucs/sys/compiler.h> +#include <ucs/sys/compiler.h>
+#include <ucs/arch/generic/cpu.h> +#include <ucs/arch/generic/cpu.h>
+#include <stdint.h> +#include <stdint.h>
+#include <string.h>
+#include <ucs/type/status.h>
+ +
+ +
+#define UCS_ARCH_CACHE_LINE_SIZE 256 +#define UCS_ARCH_CACHE_LINE_SIZE 256
+ +
+BEGIN_C_DECLS
+
+/* Assume the worst - weak memory ordering */ +/* Assume the worst - weak memory ordering */
+#define ucs_memory_bus_fence() asm volatile (""::: "memory") +#define ucs_memory_bus_fence() asm volatile (""::: "memory")
+#define ucs_memory_bus_store_fence() ucs_memory_bus_fence() +#define ucs_memory_bus_store_fence() ucs_memory_bus_fence()
@ -233,6 +315,11 @@ index 000000000000..b8ab713d6e21
+ return UCS_CPU_MODEL_UNKNOWN; + return UCS_CPU_MODEL_UNKNOWN;
+} +}
+ +
+static inline ucs_cpu_vendor_t ucs_arch_get_cpu_vendor()
+{
+ return UCS_CPU_VENDOR_GENERIC_IBM;
+}
+
+static inline int ucs_arch_get_cpu_flag() +static inline int ucs_arch_get_cpu_flag()
+{ +{
+ return UCS_CPU_FLAG_UNKNOWN; + return UCS_CPU_FLAG_UNKNOWN;
@ -242,4 +329,87 @@ index 000000000000..b8ab713d6e21
+ +
+#define ucs_arch_wait_mem ucs_arch_generic_wait_mem +#define ucs_arch_wait_mem ucs_arch_generic_wait_mem
+ +
+static inline void ucs_cpu_init()
+{
+}
+
+static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len)
+{
+ return memcpy(dst, src, len);
+}
+
+static UCS_F_ALWAYS_INLINE void
+ucs_memcpy_nontemporal(void *dst, const void *src, size_t len)
+{
+ memcpy(dst, src, len);
+}
+
+static inline ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes)
+{
+ return UCS_ERR_UNSUPPORTED;
+}
+
+END_C_DECLS
+
+#endif +#endif
diff --git src/ucs/arch/s390x/global_opts.c src/ucs/arch/s390x/global_opts.c
new file mode 100644
index 000000000000..4fa0c74034a7
--- /dev/null
+++ src/ucs/arch/s390x/global_opts.c
@@ -0,0 +1,24 @@
+/**
+* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED.
+*
+* See file LICENSE for terms.
+*/
+
+#if defined(__s390x__)
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <ucs/arch/global_opts.h>
+#include <ucs/config/parser.h>
+
+ucs_config_field_t ucs_arch_global_opts_table[] = {
+ {NULL}
+};
+
+void ucs_arch_print_memcpy_limits(ucs_arch_global_opts_t *config)
+{
+}
+
+#endif
diff --git src/ucs/arch/s390x/global_opts.h src/ucs/arch/s390x/global_opts.h
new file mode 100644
index 000000000000..225e4e5e896a
--- /dev/null
+++ src/ucs/arch/s390x/global_opts.h
@@ -0,0 +1,25 @@
+/**
+* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED.
+*
+* See file LICENSE for terms.
+*/
+
+
+#ifndef UCS_PPC64_GLOBAL_OPTS_H_
+#define UCS_PPC64_GLOBAL_OPTS_H_
+
+#include <ucs/sys/compiler_def.h>
+
+BEGIN_C_DECLS
+
+#define UCS_ARCH_GLOBAL_OPTS_INITALIZER {}
+
+/* built-in memcpy config */
+typedef struct ucs_arch_global_opts {
+ char dummy;
+} ucs_arch_global_opts_t;
+
+END_C_DECLS
+
+#endif
+

View File

@ -1,3 +1,74 @@
-------------------------------------------------------------------
Fri Jun 5 09:38:40 UTC 2020 - Jan Engelhardt <jengelh@inai.de>
- Trim bias and filler wording from descriptions.
-------------------------------------------------------------------
Thu Jun 4 08:18:26 UTC 2020 - Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
- Update to v1.8.0
- Features:
- Improved detection for DEVX support
- Improved TCP scalability
- Added support for ROCM to perftest
- Added support for different source and target memory types to perftest
- Added optimized memcpy for ROCM devices
- Added hardware tag-matching for CUDA buffers
- Added support for CUDA and ROCM managed memories
- Added support for client/server disconnect protocol over rdma connection manager
- Added support for striding receive queue for hardware tag-matching
- Added XPMEM-based rendezvous protocol for shared memory
- Added support shared memory communication between containers on same machine
- Added support for multi-threaded RDMA memory registration for large regions
- Added new test cases to Azure CI
- Added support for multiple listening transports
- Added UCT socket-based connection manager transport
- Updated API for UCT component management
- Added API to retrieve the listening port
- Added UCP active message API
- Removed deprecated API for querying UCT memory domains
- Refactored server/client examples
- Added support for dlopen interception in UCM
- Added support for PCIe atomics
- Updated Java API: added support for most of UCP layer operations
- Updated support for Mellanox DevX API
- Added multiple UCT/TCP transport performance optimizations
- Optimized memcpy() for Intel platforms
- Added protection from non-UCX socket based app connections
- Improved search time for PKEY object
- Enabled gtest over IPv6 interfaces
- Updated Mellanox and Bull device IDs
- Added support for CUDA_VISIBLE_DEVICES
- Increased limits for CUDA IPC registration
- Bugfixes:
- Multiple fixes in JUCX
- Fixes in UCP thread safety
- Fixes for most recent versions GCC, PGI, and ICC
- Fixes for CPU affinity on Azure instances
- Fixes in XPMEM support on PPC64
- Performance fixes in CUDA IPC
- Fixes in RDMA CM flows
- Multiple fixes in TCP transport
- Multiple fixes in documentation
- Fixes in transport lane selection logic
- Fixes in Java jar build
- Fixes in socket connection manager for Nvidia DGX-2 platform
- Multiple fixes in UCP, UCT, UCM libraries
- Multiple fixes for BSD and Mac OS systems
- Fixes for Clang compiler
- Fix CPU optimization configuration options
- Fix JUCX build on GPU nodes
- Fix in Azure release pipeline flow
- Fix in CUDA memory hooks management
- Fix in GPU memory peer direct gtest
- Fix in TCP connection establishment flow
- Fix in GPU IPC check
- Fix in CUDA Jenkins test flow
- Multiple fixes in CUDA IPC flow
- Fix adding missing header files
- Fix to prevent failures in presence of VPN enabled Ethernet interfaces
- Refresh openucx-s390x-support.patch against new version
------------------------------------------------------------------- -------------------------------------------------------------------
Fri Oct 4 08:11:49 UTC 2019 - Jan Engelhardt <jengelh@inai.de> Fri Oct 4 08:11:49 UTC 2019 - Jan Engelhardt <jengelh@inai.de>

View File

@ -1,7 +1,7 @@
# #
# spec file for package openucx # spec file for package openucx
# #
# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. # Copyright (c) 2020 SUSE LLC
# #
# All modifications and additions to the file contributed by third parties # All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed # remain the property of their copyright owners, unless otherwise agreed
@ -17,12 +17,12 @@
Name: openucx Name: openucx
Summary: Unifieid Communication X Version: 1.8.0
Release: 0
Summary: Communication layer for Message Passing (MPI)
License: BSD-3-Clause License: BSD-3-Clause
Group: Development/Libraries/C and C++ Group: Development/Libraries/C and C++
Version: 1.6.0 URL: http://openucx.org/
Release: 0
Url: http://openucx.org/
#Git-Clone: git://github.com/openucx/ucx #Git-Clone: git://github.com/openucx/ucx
#Git-Web: https://github.com/openucx/ucx #Git-Web: https://github.com/openucx/ucx
@ -48,14 +48,13 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build
ExclusiveArch: aarch64 %power64 x86_64 s390x ExclusiveArch: aarch64 %power64 x86_64 s390x
%description %description
UCX stands for Unified Communication X. UCX provides an optimized communication UCX stands for Unified Communication X. UCX provides a communication
layer for Message Passing (MPI), PGAS/OpenSHMEM libraries and RPC/data-centric layer for Message Passing (MPI), PGAS/OpenSHMEM libraries and
applications. UCX utilizes high-speed networks, such as RDMA (InfiniBand, RoCE, RPC/data-centric applications. UCX utilizes networks such as RDMA
etc), Cray Gemini or Aries, for inter-node communication. If no such network is (InfiniBand, RoCE, etc), Cray Gemini or Aries, for inter-node
available, TCP is used instead. UCX supports efficient transfer of data in communication, or TCP. UCX supports transfer of data in either main
either main memory (RAM) or GPU memory (through CUDA and ROCm libraries). memory (RAM) or GPU memory (through CUDA and ROCm libraries). UCX
In addition, UCX provides efficient intra-node communication, by leveraging the provides intra-node communication by using shared memory mechanisms.
following shared memory mechanisms: posix, sysv, cma, knem, and xpmem.
%package tools %package tools
Summary: OpenUCX utilities Summary: OpenUCX utilities
@ -70,12 +69,7 @@ Group: System/Libraries
%description -n libucm0 %description -n libucm0
libucm is a standalone non-unloadable library which installs hooks libucm is a standalone non-unloadable library which installs hooks
for virtual memory changes in the current process. Then, it calls for virtual memory changes in the current process.
user-defined callbacks, which may potentially override the default
behavior, or just passively listen and update their own data. libucm
does not use libuct, to avoid making it non-unloadable as well, and
impelements a basic logging service which is safe to use from malloc
hooks.
%package -n libucm-devel %package -n libucm-devel
Summary: Development files for Unified Communication X Memory Hooks Summary: Development files for Unified Communication X Memory Hooks
@ -108,8 +102,8 @@ Summary: Infiniband Unicified Communication Services
Group: System/Libraries Group: System/Libraries
%description -n libucs0 %description -n libucs0
This framework provides basic infrastructure for component based This framework provides infrastructure for component based
programming, memory management, and useful system utilities. programming, memory management, and system utilities.
%package -n libucs-devel %package -n libucs-devel
Summary: Development files for Unified Communication Services (UC-S) Summary: Development files for Unified Communication Services (UC-S)
@ -117,16 +111,16 @@ Group: Development/Libraries/C and C++
Requires: libucs0 = %version Requires: libucs0 = %version
%description -n libucs-devel %description -n libucs-devel
This framework provides basic infrastructure for component based This framework provides infrastructure for component based
programming, memory management, and useful system utilities. programming, memory management, and system utilities.
%package -n libuct0 %package -n libuct0
Summary: Infiniband Unified Communication Transport Summary: Infiniband Unified Communication Transport
Group: System/Libraries Group: System/Libraries
%description -n libuct0 %description -n libuct0
Low-level API that expose basic network operations supported by Low-level API that expose network operations supported by underlying
underlying hardware. hardware.
%package -n libuct-devel %package -n libuct-devel
Summary: Development files for Unified Communication Transport (UC-T) Summary: Development files for Unified Communication Transport (UC-T)
@ -134,8 +128,8 @@ Group: Development/Libraries/C and C++
Requires: libuct0 = %version Requires: libuct0 = %version
%description -n libuct-devel %description -n libuct-devel
Low-level API that expose basic network operations supported by Low-level API that expose network operations supported by underlying
underlying hardware. hardware.
%prep %prep
%setup -qn ucx-%version %setup -qn ucx-%version

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:360e885dd7f706a19b673035a3477397d100a02eb618371697c7f3ee4e143e2c
size 2078802

3
ucx-1.8.0.tar.gz Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e400f7aa5354971c8f5ac6b881dc2846143851df868088c37d432c076445628d
size 2352265