openucx/openucx-s390x-support.patch
Jan Engelhardt 9033dd246f Accepting request 811684 from home:NMoreyChaisemartin:branches:science:HPC
- Update to v1.8.0
  - Features:
    - Improved detection for DEVX support
    - Improved TCP scalability
    - Added support for ROCM to perftest
    - Added support for different source and target memory types to perftest
    - Added optimized memcpy for ROCM devices
    - Added hardware tag-matching for CUDA buffers
    - Added support for CUDA and ROCM managed memories
    - Added support for client/server disconnect protocol over rdma connection manager
    - Added support for striding receive queue for hardware tag-matching
    - Added XPMEM-based rendezvous protocol for shared memory
    - Added support shared memory communication between containers on same machine
    - Added support for multi-threaded RDMA memory registration for large regions
    - Added new test cases to Azure CI
    - Added support for multiple listening transports
    - Added UCT socket-based connection manager transport
    - Updated API for UCT component management
    - Added API to retrieve the listening port
    - Added UCP active message API
    - Removed deprecated API for querying UCT memory domains
    - Refactored server/client examples
    - Added support for dlopen interception in UCM
    - Added support for PCIe atomics
    - Updated Java API: added support for most of UCP layer operations
    - Updated support for Mellanox DevX API
    - Added multiple UCT/TCP transport performance optimizations
    - Optimized memcpy() for Intel platforms
    - Added protection from non-UCX socket based app connections
    - Improved search time for PKEY object
    - Enabled gtest over IPv6 interfaces
    - Updated Mellanox and Bull device IDs
    - Added support for CUDA_VISIBLE_DEVICES
    - Increased limits for CUDA IPC registration
  - Bugfixes:
    - Multiple fixes in JUCX
    - Fixes in UCP thread safety
    - Fixes for most recent versions GCC, PGI, and ICC
    - Fixes for CPU affinity on Azure instances
    - Fixes in XPMEM support on PPC64
    - Performance fixes in CUDA IPC
    - Fixes in RDMA CM flows
    - Multiple fixes in TCP transport
    - Multiple fixes in documentation
    - Fixes in transport lane selection logic
    - Fixes in Java jar build
    - Fixes in socket connection manager for Nvidia DGX-2 platform
    - Multiple fixes in UCP, UCT, UCM libraries
    - Multiple fixes for BSD and Mac OS systems
    - Fixes for Clang compiler
    - Fix CPU optimization configuration options
    - Fix JUCX build on GPU nodes
    - Fix in Azure release pipeline flow
    - Fix in CUDA memory hooks management
    - Fix in GPU memory peer direct gtest
    - Fix in TCP connection establishment flow
    - Fix in GPU IPC check
    - Fix in CUDA Jenkins test flow
    - Multiple fixes in CUDA IPC flow
    - Fix adding missing header files
    - Fix to prevent failures in presence of VPN enabled Ethernet interfaces
- Refresh openucx-s390x-support.patch against new version

OBS-URL: https://build.opensuse.org/request/show/811684
OBS-URL: https://build.opensuse.org/package/show/science:HPC/openucx?expand=0&rev=37
2020-06-05 08:02:58 +00:00

416 lines
11 KiB
Diff

commit 02e9e9daa1adf26573bfb0d35c26cca46fb0e777
Author: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
Date: Thu Aug 9 07:41:24 2018 +0200
openucx s390x support
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
diff --git config/m4/ucm.m4 config/m4/ucm.m4
index 9c7c820d9fff..8297fc7e6ec2 100644
--- config/m4/ucm.m4
+++ config/m4/ucm.m4
@@ -86,9 +86,20 @@ AC_CHECK_DECLS([SYS_ipc],
[ipc_hooks_happy=no],
[#include <sys/syscall.h>])
+
+SAVE_CFLAGS=$CFLAGS
+CFLAGS="$CLAGS -Isrc/"
+bistro_arch_happy=yes
+AC_CHECK_DECLS([ucm_bistro_patch],
+ [],
+ [bistro_arch_happy=no],
+ [#include <ucm/bistro/bistro.h>])
+CFLAGS=$SAVE_CFLAGS
+
AS_IF([test "x$mmap_hooks_happy" = "xyes"],
AS_IF([test "x$ipc_hooks_happy" = "xyes" -o "x$shm_hooks_happy" = "xyes"],
- [bistro_hooks_happy=yes]))
+ AS_IF([test "x$bistro_arch_happy" == "xyes"],
+ [bistro_hooks_happy=yes])))
AS_IF([test "x$bistro_hooks_happy" = "xyes"],
[AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])],
diff --git src/tools/info/sys_info.c src/tools/info/sys_info.c
index 435e5a6feb8b..9b3beed68ea4 100644
--- src/tools/info/sys_info.c
+++ src/tools/info/sys_info.c
@@ -37,7 +37,8 @@ static const char* cpu_vendor_names[] = {
[UCS_CPU_VENDOR_INTEL] = "Intel",
[UCS_CPU_VENDOR_AMD] = "AMD",
[UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM",
- [UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC"
+ [UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC",
+ [UCS_CPU_VENDOR_GENERIC_IBM] = "Generic IBM"
};
static double measure_memcpy_bandwidth(size_t size)
diff --git src/ucm/Makefile.am src/ucm/Makefile.am
index e53a30a46916..21bce848045d 100644
--- src/ucm/Makefile.am
+++ src/ucm/Makefile.am
@@ -30,7 +30,8 @@ noinst_HEADERS = \
bistro/bistro.h \
bistro/bistro_x86_64.h \
bistro/bistro_aarch64.h \
- bistro/bistro_ppc64.h
+ bistro/bistro_ppc64.h \
+ bistro/bistro_s390x.h
libucm_la_SOURCES = \
event/event.c \
diff --git src/ucm/bistro/bistro.h src/ucm/bistro/bistro.h
index 16e988700c35..b4c2762fb5b2 100644
--- src/ucm/bistro/bistro.h
+++ src/ucm/bistro/bistro.h
@@ -20,6 +20,8 @@ typedef struct ucm_bistro_restore_point ucm_bistro_restore_point_t;
# include "bistro_aarch64.h"
#elif defined(__x86_64__)
# include "bistro_x86_64.h"
+#elif defined(__s390x__)
+# include "bistro_s390x.h"
#else
# error "Unsupported architecture"
#endif
diff --git src/ucm/bistro/bistro_s390x.h src/ucm/bistro/bistro_s390x.h
new file mode 100644
index 000000000000..334c0474e8b6
--- /dev/null
+++ src/ucm/bistro/bistro_s390x.h
@@ -0,0 +1,13 @@
+#ifndef UCM_BISTRO_BISTRO_S390X_H_
+#define UCM_BISTRO_BISTRO_S390X_H_
+
+#include <stdint.h>
+
+#include <ucs/type/status.h>
+#include <ucs/sys/compiler_def.h>
+
+#define UCM_BISTRO_PROLOGUE
+#define UCM_BISTRO_EPILOGUE
+
+
+#endif
diff --git src/ucm/mmap/install.c src/ucm/mmap/install.c
index 6824a6247bef..81d0375a4132 100644
--- src/ucm/mmap/install.c
+++ src/ucm/mmap/install.c
@@ -258,7 +258,11 @@ static ucs_status_t ucs_mmap_install_reloc(int events)
status = ucm_reloc_modify(&entry->patch);
} else {
ucs_assert(ucm_mmap_hook_mode() == UCM_MMAP_HOOK_BISTRO);
+#if UCM_BISTRO_HOOKS
status = ucm_bistro_patch(entry->patch.symbol, entry->patch.value, NULL);
+#else
+ status = UCS_ERR_UNSUPPORTED;
+#endif
}
if (status != UCS_OK) {
ucm_warn("failed to install %s hook for '%s'",
diff --git src/ucs/Makefile.am src/ucs/Makefile.am
index 2bd0a8d50c4b..4a60658f34d8 100644
--- src/ucs/Makefile.am
+++ src/ucs/Makefile.am
@@ -58,6 +58,7 @@ nobase_dist_libucs_la_HEADERS = \
arch/x86_64/global_opts.h \
arch/aarch64/global_opts.h \
arch/ppc64/global_opts.h \
+ arch/s390x/global_opts.h \
arch/global_opts.h
noinst_HEADERS = \
@@ -67,6 +68,8 @@ noinst_HEADERS = \
arch/generic/cpu.h \
arch/ppc64/bitops.h \
arch/ppc64/cpu.h \
+ arch/s390x/bitops.h \
+ arch/s390x/cpu.h \
arch/x86_64/atomic.h \
arch/x86_64/bitops.h \
arch/x86_64/cpu.h \
@@ -108,6 +111,7 @@ libucs_la_SOURCES = \
algorithm/qsort_r.c \
arch/aarch64/cpu.c \
arch/aarch64/global_opts.c \
+ arch/s390x/global_opts.c \
arch/ppc64/timebase.c \
arch/ppc64/global_opts.c \
arch/x86_64/cpu.c \
diff --git src/ucs/arch/atomic.h src/ucs/arch/atomic.h
index 99e53ca5d0f7..dfa83b757bb0 100644
--- src/ucs/arch/atomic.h
+++ src/ucs/arch/atomic.h
@@ -15,6 +15,8 @@
# include "generic/atomic.h"
#elif defined(__aarch64__)
# include "generic/atomic.h"
+#elif defined(__s390x__)
+# include "generic/atomic.h"
#else
# error "Unsupported architecture"
#endif
diff --git src/ucs/arch/bitops.h src/ucs/arch/bitops.h
index 91d4573c1faf..8b092b023639 100644
--- src/ucs/arch/bitops.h
+++ src/ucs/arch/bitops.h
@@ -14,6 +14,8 @@
# include "ppc64/bitops.h"
#elif defined(__aarch64__)
# include "aarch64/bitops.h"
+#elif defined(__s390x__)
+# include "s390x/bitops.h"
#else
# error "Unsupported architecture"
#endif
diff --git src/ucs/arch/cpu.c src/ucs/arch/cpu.c
index e81f540a899d..40b89907664a 100644
--- src/ucs/arch/cpu.c
+++ src/ucs/arch/cpu.c
@@ -57,7 +57,11 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = {
[UCS_CPU_VENDOR_GENERIC_PPC] = {
.min = UCS_MEMUNITS_INF,
.max = UCS_MEMUNITS_INF
- }
+ },
+ [UCS_CPU_VENDOR_GENERIC_IBM] = {
+ .min = UCS_MEMUNITS_INF,
+ .max = UCS_MEMUNITS_INF
+ },
};
const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
@@ -65,7 +69,8 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
[UCS_CPU_VENDOR_INTEL] = 5800 * UCS_MBYTE,
[UCS_CPU_VENDOR_AMD] = 5008 * UCS_MBYTE,
[UCS_CPU_VENDOR_GENERIC_ARM] = 5800 * UCS_MBYTE,
- [UCS_CPU_VENDOR_GENERIC_PPC] = 5800 * UCS_MBYTE
+ [UCS_CPU_VENDOR_GENERIC_PPC] = 5800 * UCS_MBYTE,
+ [UCS_CPU_VENDOR_GENERIC_IBM] = 5800 * UCS_MBYTE,
};
static void ucs_sysfs_get_cache_size()
diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h
index c1f1cc826b6b..e9d5c5b32ca3 100644
--- src/ucs/arch/cpu.h
+++ src/ucs/arch/cpu.h
@@ -58,6 +58,7 @@ typedef enum ucs_cpu_vendor {
UCS_CPU_VENDOR_AMD,
UCS_CPU_VENDOR_GENERIC_ARM,
UCS_CPU_VENDOR_GENERIC_PPC,
+ UCS_CPU_VENDOR_GENERIC_IBM,
UCS_CPU_VENDOR_LAST
} ucs_cpu_vendor_t;
@@ -91,6 +92,8 @@ typedef struct ucs_cpu_builtin_memcpy {
# include "ppc64/cpu.h"
#elif defined(__aarch64__)
# include "aarch64/cpu.h"
+#elif defined(__s390x__)
+# include "s390x/cpu.h"
#else
# error "Unsupported architecture"
#endif
diff --git src/ucs/arch/global_opts.h src/ucs/arch/global_opts.h
index 8786f130290a..0d251fb91868 100644
--- src/ucs/arch/global_opts.h
+++ src/ucs/arch/global_opts.h
@@ -15,6 +15,8 @@
# include "ppc64/global_opts.h"
#elif defined(__aarch64__)
# include "aarch64/global_opts.h"
+#elif defined(__s390x__)
+# include "s390x/global_opts.h"
#else
# error "Unsupported architecture"
#endif
diff --git src/ucs/arch/s390x/bitops.h src/ucs/arch/s390x/bitops.h
new file mode 100644
index 000000000000..39ad125107e9
--- /dev/null
+++ src/ucs/arch/s390x/bitops.h
@@ -0,0 +1,32 @@
+/**
+* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
+*
+* See file LICENSE for terms.
+*/
+
+#ifndef UCS_S390X_BITOPS_H_
+#define UCS_S390X_BITOPS_H_
+
+#include <stdint.h>
+
+
+static inline unsigned __ucs_ilog2_u32(uint32_t n)
+{
+ if (!n)
+ return 0;
+ return 31 - __builtin_clz(n);
+}
+
+static inline unsigned __ucs_ilog2_u64(uint64_t n)
+{
+ if (!n)
+ return 0;
+ return 63 - __builtin_clz(n);
+}
+
+static inline unsigned ucs_ffs64(uint64_t n)
+{
+ return __ucs_ilog2_u64(n & -n);
+}
+
+#endif
diff --git src/ucs/arch/s390x/cpu.h src/ucs/arch/s390x/cpu.h
new file mode 100644
index 000000000000..4f0a87006118
--- /dev/null
+++ src/ucs/arch/s390x/cpu.h
@@ -0,0 +1,84 @@
+/**
+* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED.
+* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED.
+*
+* See file LICENSE for terms.
+*/
+
+
+#ifndef UCS_S390X_CPU_H_
+#define UCS_S390X_CPU_H_
+
+#include <ucs/sys/compiler.h>
+#include <ucs/arch/generic/cpu.h>
+#include <stdint.h>
+#include <string.h>
+#include <ucs/type/status.h>
+
+
+#define UCS_ARCH_CACHE_LINE_SIZE 256
+
+BEGIN_C_DECLS
+
+/* Assume the worst - weak memory ordering */
+#define ucs_memory_bus_fence() asm volatile (""::: "memory")
+#define ucs_memory_bus_store_fence() ucs_memory_bus_fence()
+#define ucs_memory_bus_load_fence() ucs_memory_bus_fence()
+#define ucs_memory_bus_wc_flush() ucs_memory_bus_fence()
+#define ucs_memory_cpu_fence() ucs_memory_bus_fence()
+#define ucs_memory_cpu_store_fence() ucs_memory_bus_fence()
+#define ucs_memory_cpu_load_fence() ucs_memory_bus_fence()
+#define ucs_memory_cpu_wc_fence() ucs_memory_bus_fence()
+
+
+static inline uint64_t ucs_arch_read_hres_clock()
+{
+ unsigned long clk;
+ asm volatile("stck %0" : "=Q" (clk) : : "cc");
+ return clk >> 2;
+}
+#define ucs_arch_get_clocks_per_sec ucs_arch_generic_get_clocks_per_sec
+
+
+static inline ucs_cpu_model_t ucs_arch_get_cpu_model()
+{
+ return UCS_CPU_MODEL_UNKNOWN;
+}
+
+static inline ucs_cpu_vendor_t ucs_arch_get_cpu_vendor()
+{
+ return UCS_CPU_VENDOR_GENERIC_IBM;
+}
+
+static inline int ucs_arch_get_cpu_flag()
+{
+ return UCS_CPU_FLAG_UNKNOWN;
+}
+
+double ucs_arch_get_clocks_per_sec();
+
+#define ucs_arch_wait_mem ucs_arch_generic_wait_mem
+
+static inline void ucs_cpu_init()
+{
+}
+
+static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len)
+{
+ return memcpy(dst, src, len);
+}
+
+static UCS_F_ALWAYS_INLINE void
+ucs_memcpy_nontemporal(void *dst, const void *src, size_t len)
+{
+ memcpy(dst, src, len);
+}
+
+static inline ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes)
+{
+ return UCS_ERR_UNSUPPORTED;
+}
+
+END_C_DECLS
+
+#endif
diff --git src/ucs/arch/s390x/global_opts.c src/ucs/arch/s390x/global_opts.c
new file mode 100644
index 000000000000..4fa0c74034a7
--- /dev/null
+++ src/ucs/arch/s390x/global_opts.c
@@ -0,0 +1,24 @@
+/**
+* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED.
+*
+* See file LICENSE for terms.
+*/
+
+#if defined(__s390x__)
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <ucs/arch/global_opts.h>
+#include <ucs/config/parser.h>
+
+ucs_config_field_t ucs_arch_global_opts_table[] = {
+ {NULL}
+};
+
+void ucs_arch_print_memcpy_limits(ucs_arch_global_opts_t *config)
+{
+}
+
+#endif
diff --git src/ucs/arch/s390x/global_opts.h src/ucs/arch/s390x/global_opts.h
new file mode 100644
index 000000000000..225e4e5e896a
--- /dev/null
+++ src/ucs/arch/s390x/global_opts.h
@@ -0,0 +1,25 @@
+/**
+* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED.
+*
+* See file LICENSE for terms.
+*/
+
+
+#ifndef UCS_PPC64_GLOBAL_OPTS_H_
+#define UCS_PPC64_GLOBAL_OPTS_H_
+
+#include <ucs/sys/compiler_def.h>
+
+BEGIN_C_DECLS
+
+#define UCS_ARCH_GLOBAL_OPTS_INITALIZER {}
+
+/* built-in memcpy config */
+typedef struct ucs_arch_global_opts {
+ char dummy;
+} ucs_arch_global_opts_t;
+
+END_C_DECLS
+
+#endif
+