forked from pool/openucx
Compare commits
81 Commits
Author | SHA256 | Date | |
---|---|---|---|
44641d0495 | |||
1a9eb16483 | |||
daadbbb50e | |||
e2fae99408 | |||
89e2c1bb0f | |||
99815d77b5 | |||
aa486005dd | |||
46d315ac9e | |||
7905fb8b39 | |||
145da08ae6 | |||
222004fc02 | |||
50926fe318 | |||
1aaa6114cd | |||
8094d4b34d | |||
2b4398a74a | |||
cfaa4352a9 | |||
d7ff57612d | |||
0835e04bcc | |||
0a42199aad | |||
2a1a111b03 | |||
ba3eec4113 | |||
7d6841ca26 | |||
8a8941ab4f | |||
a42d04ee36 | |||
b714ee86f4 | |||
6a412379a9 | |||
61b71445ce | |||
1c9eb00a8e | |||
1c024f5f2d | |||
dfc3070ec1 | |||
ec8c3382db | |||
|
8322be19fe | ||
|
d485735431 | ||
54dbb80402 | |||
|
878438d42d | ||
98063d874c | |||
|
6e22959692 | ||
21f5083b95 | |||
|
643404b991 | ||
|
b01e11bc13 | ||
|
cc6c36d10f | ||
3b5acc2b06 | |||
|
f10927b874 | ||
2da20e0a3c | |||
|
b4e3d46395 | ||
46c68d5620 | |||
b3b5e27527 | |||
9033dd246f | |||
455518e131 | |||
f5ac91c2bc | |||
6488ec11a4 | |||
|
de6138b03e | ||
c6d47e9fb8 | |||
|
47949112e3 | ||
d2263e3b21 | |||
|
ca246a454a | ||
|
85725747e0 | ||
|
fd1e5380fe | ||
50735531ff | |||
56befa2187 | |||
|
4774502643 | ||
|
6cb716aaee | ||
6ff0a2a930 | |||
|
6c87d0bee6 | ||
28b9a25066 | |||
1bb8a7934f | |||
61591a8cc8 | |||
|
37de8011ef | ||
c5d83e6e59 | |||
936151ea1e | |||
|
33fb347489 | ||
bbb8bc7682 | |||
d0cd7dd0be | |||
57691e3478 | |||
c648941319 | |||
2b8d3bdf06 | |||
|
8c6efa2743 | ||
|
6a966f5112 | ||
42718c30e4 | |||
ec0b537606 | |||
|
c69bff694e |
@@ -1,18 +0,0 @@
|
|||||||
commit c49bd7a5d183a57f41c801c7f5c9691bcd7d23da
|
|
||||||
Author: Thomas Vegas <tvegas@nvidia.com>
|
|
||||||
Date: Mon Jun 24 16:52:04 2024 +0300
|
|
||||||
|
|
||||||
UCS/TIME: Add math.h to provide INFINITY
|
|
||||||
|
|
||||||
diff --git src/ucs/time/time.h src/ucs/time/time.h
|
|
||||||
index cff9810cdad8..c51362273f8d 100644
|
|
||||||
--- src/ucs/time/time.h
|
|
||||||
+++ src/ucs/time/time.h
|
|
||||||
@@ -11,6 +11,7 @@
|
|
||||||
#include <ucs/time/time_def.h>
|
|
||||||
#include <sys/time.h>
|
|
||||||
#include <limits.h>
|
|
||||||
+#include <math.h>
|
|
||||||
|
|
||||||
BEGIN_C_DECLS
|
|
||||||
|
|
@@ -1,6 +1,6 @@
|
|||||||
commit 70e243c8a6685a03d5faa65e706d318196ad712b
|
commit ba1d7048df80ee535e01335992f70568e2f88c80
|
||||||
Author: Nicolas Morey <nmorey@suse.com>
|
Author: Nicolas Morey <nmorey@suse.com>
|
||||||
Date: Wed Jun 26 17:36:58 2024 +0200
|
Date: Wed Feb 19 16:46:33 2025 +0100
|
||||||
|
|
||||||
openucx s390x support
|
openucx s390x support
|
||||||
|
|
||||||
@@ -33,10 +33,10 @@ index e5e66266d695..ef7e4ede93ce 100644
|
|||||||
AS_IF([test "x$bistro_hooks_happy" = "xyes"],
|
AS_IF([test "x$bistro_hooks_happy" = "xyes"],
|
||||||
[AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])],
|
[AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])],
|
||||||
diff --git src/ucm/Makefile.am src/ucm/Makefile.am
|
diff --git src/ucm/Makefile.am src/ucm/Makefile.am
|
||||||
index fa7a722f2d31..e6df414a4ecb 100644
|
index 7866aa0ac13b..2d44e20f124d 100644
|
||||||
--- src/ucm/Makefile.am
|
--- src/ucm/Makefile.am
|
||||||
+++ src/ucm/Makefile.am
|
+++ src/ucm/Makefile.am
|
||||||
@@ -34,6 +34,7 @@ noinst_HEADERS = \
|
@@ -35,6 +35,7 @@ noinst_HEADERS = \
|
||||||
bistro/bistro_aarch64.h \
|
bistro/bistro_aarch64.h \
|
||||||
bistro/bistro_ppc64.h \
|
bistro/bistro_ppc64.h \
|
||||||
bistro/bistro_rv64.h
|
bistro/bistro_rv64.h
|
||||||
@@ -45,7 +45,7 @@ index fa7a722f2d31..e6df414a4ecb 100644
|
|||||||
libucm_la_SOURCES = \
|
libucm_la_SOURCES = \
|
||||||
event/event.c \
|
event/event.c \
|
||||||
diff --git src/ucm/bistro/bistro.h src/ucm/bistro/bistro.h
|
diff --git src/ucm/bistro/bistro.h src/ucm/bistro/bistro.h
|
||||||
index 8d0b90751676..a0b9d3f064c3 100644
|
index fffbe738b116..31859a84b159 100644
|
||||||
--- src/ucm/bistro/bistro.h
|
--- src/ucm/bistro/bistro.h
|
||||||
+++ src/ucm/bistro/bistro.h
|
+++ src/ucm/bistro/bistro.h
|
||||||
@@ -23,6 +23,8 @@ typedef struct ucm_bistro_restore_point ucm_bistro_restore_point_t;
|
@@ -23,6 +23,8 @@ typedef struct ucm_bistro_restore_point ucm_bistro_restore_point_t;
|
||||||
@@ -91,7 +91,7 @@ index 000000000000..2beb5de54fab
|
|||||||
+
|
+
|
||||||
+#endif
|
+#endif
|
||||||
diff --git src/ucs/Makefile.am src/ucs/Makefile.am
|
diff --git src/ucs/Makefile.am src/ucs/Makefile.am
|
||||||
index 4a05f47b6369..c1cd2fb2cb57 100644
|
index 86a469a60bcc..6751bad764b8 100644
|
||||||
--- src/ucs/Makefile.am
|
--- src/ucs/Makefile.am
|
||||||
+++ src/ucs/Makefile.am
|
+++ src/ucs/Makefile.am
|
||||||
@@ -24,6 +24,7 @@ nobase_dist_libucs_la_HEADERS = \
|
@@ -24,6 +24,7 @@ nobase_dist_libucs_la_HEADERS = \
|
||||||
@@ -140,7 +140,7 @@ index 849647902fab..a328c37e2020 100644
|
|||||||
# error "Unsupported architecture"
|
# error "Unsupported architecture"
|
||||||
#endif
|
#endif
|
||||||
diff --git src/ucs/arch/bitops.h src/ucs/arch/bitops.h
|
diff --git src/ucs/arch/bitops.h src/ucs/arch/bitops.h
|
||||||
index 3e0e530f1336..f887e03ebac0 100644
|
index f8e51c45888a..476631d95eb6 100644
|
||||||
--- src/ucs/arch/bitops.h
|
--- src/ucs/arch/bitops.h
|
||||||
+++ src/ucs/arch/bitops.h
|
+++ src/ucs/arch/bitops.h
|
||||||
@@ -23,6 +23,8 @@ BEGIN_C_DECLS
|
@@ -23,6 +23,8 @@ BEGIN_C_DECLS
|
||||||
@@ -153,7 +153,7 @@ index 3e0e530f1336..f887e03ebac0 100644
|
|||||||
# error "Unsupported architecture"
|
# error "Unsupported architecture"
|
||||||
#endif
|
#endif
|
||||||
diff --git src/ucs/arch/cpu.c src/ucs/arch/cpu.c
|
diff --git src/ucs/arch/cpu.c src/ucs/arch/cpu.c
|
||||||
index 307fb61bfc4a..4356fff36f8b 100644
|
index 6fe5e31dba31..f92c53f303cd 100644
|
||||||
--- src/ucs/arch/cpu.c
|
--- src/ucs/arch/cpu.c
|
||||||
+++ src/ucs/arch/cpu.c
|
+++ src/ucs/arch/cpu.c
|
||||||
@@ -64,6 +64,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = {
|
@@ -64,6 +64,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = {
|
||||||
@@ -167,15 +167,15 @@ index 307fb61bfc4a..4356fff36f8b 100644
|
|||||||
[UCS_CPU_VENDOR_FUJITSU_ARM] = {
|
[UCS_CPU_VENDOR_FUJITSU_ARM] = {
|
||||||
.min = UCS_MEMUNITS_INF,
|
.min = UCS_MEMUNITS_INF,
|
||||||
.max = UCS_MEMUNITS_INF
|
.max = UCS_MEMUNITS_INF
|
||||||
@@ -89,6 +93,7 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = {
|
@@ -82,7 +86,6 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = {
|
||||||
[UCS_CPU_VENDOR_GENERIC_ARM] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
}
|
||||||
[UCS_CPU_VENDOR_GENERIC_PPC] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
};
|
||||||
[UCS_CPU_VENDOR_GENERIC_RV64G] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
|
||||||
+ [UCS_CPU_VENDOR_GENERIC_IBM] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
-
|
||||||
[UCS_CPU_VENDOR_FUJITSU_ARM] = UCS_CPU_EST_BCOPY_BW_FUJITSU_ARM,
|
static void ucs_sysfs_get_cache_size()
|
||||||
[UCS_CPU_VENDOR_ZHAOXIN] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
|
{
|
||||||
[UCS_CPU_VENDOR_NVIDIA] = UCS_CPU_EST_BCOPY_BW_DEFAULT
|
char type_str[32]; /* Data/Instruction/Unified */
|
||||||
@@ -183,6 +188,7 @@ const char *ucs_cpu_vendor_name()
|
@@ -167,6 +170,7 @@ const char *ucs_cpu_vendor_name()
|
||||||
[UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM",
|
[UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM",
|
||||||
[UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC",
|
[UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC",
|
||||||
[UCS_CPU_VENDOR_GENERIC_RV64G] = "Generic RV64G",
|
[UCS_CPU_VENDOR_GENERIC_RV64G] = "Generic RV64G",
|
||||||
@@ -183,7 +183,7 @@ index 307fb61bfc4a..4356fff36f8b 100644
|
|||||||
[UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM",
|
[UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM",
|
||||||
[UCS_CPU_VENDOR_ZHAOXIN] = "Zhaoxin",
|
[UCS_CPU_VENDOR_ZHAOXIN] = "Zhaoxin",
|
||||||
[UCS_CPU_VENDOR_NVIDIA] = "Nvidia"
|
[UCS_CPU_VENDOR_NVIDIA] = "Nvidia"
|
||||||
@@ -212,6 +218,7 @@ const char *ucs_cpu_model_name()
|
@@ -197,6 +201,7 @@ const char *ucs_cpu_model_name()
|
||||||
[UCS_CPU_MODEL_ZHAOXIN_WUDAOKOU] = "Wudaokou",
|
[UCS_CPU_MODEL_ZHAOXIN_WUDAOKOU] = "Wudaokou",
|
||||||
[UCS_CPU_MODEL_ZHAOXIN_LUJIAZUI] = "Lujiazui",
|
[UCS_CPU_MODEL_ZHAOXIN_LUJIAZUI] = "Lujiazui",
|
||||||
[UCS_CPU_MODEL_RV64G] = "RV64G",
|
[UCS_CPU_MODEL_RV64G] = "RV64G",
|
||||||
@@ -192,10 +192,10 @@ index 307fb61bfc4a..4356fff36f8b 100644
|
|||||||
};
|
};
|
||||||
|
|
||||||
diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h
|
diff --git src/ucs/arch/cpu.h src/ucs/arch/cpu.h
|
||||||
index ca25e714d141..e97405c30d52 100644
|
index 857b8b804cf7..89461d52d406 100644
|
||||||
--- src/ucs/arch/cpu.h
|
--- src/ucs/arch/cpu.h
|
||||||
+++ src/ucs/arch/cpu.h
|
+++ src/ucs/arch/cpu.h
|
||||||
@@ -39,6 +39,7 @@ typedef enum ucs_cpu_model {
|
@@ -41,6 +41,7 @@ typedef enum ucs_cpu_model {
|
||||||
UCS_CPU_MODEL_ZHAOXIN_WUDAOKOU,
|
UCS_CPU_MODEL_ZHAOXIN_WUDAOKOU,
|
||||||
UCS_CPU_MODEL_ZHAOXIN_LUJIAZUI,
|
UCS_CPU_MODEL_ZHAOXIN_LUJIAZUI,
|
||||||
UCS_CPU_MODEL_RV64G,
|
UCS_CPU_MODEL_RV64G,
|
||||||
@@ -203,7 +203,7 @@ index ca25e714d141..e97405c30d52 100644
|
|||||||
UCS_CPU_MODEL_NVIDIA_GRACE,
|
UCS_CPU_MODEL_NVIDIA_GRACE,
|
||||||
UCS_CPU_MODEL_LAST
|
UCS_CPU_MODEL_LAST
|
||||||
} ucs_cpu_model_t;
|
} ucs_cpu_model_t;
|
||||||
@@ -68,6 +69,7 @@ typedef enum ucs_cpu_vendor {
|
@@ -70,6 +71,7 @@ typedef enum ucs_cpu_vendor {
|
||||||
UCS_CPU_VENDOR_AMD,
|
UCS_CPU_VENDOR_AMD,
|
||||||
UCS_CPU_VENDOR_GENERIC_ARM,
|
UCS_CPU_VENDOR_GENERIC_ARM,
|
||||||
UCS_CPU_VENDOR_GENERIC_PPC,
|
UCS_CPU_VENDOR_GENERIC_PPC,
|
||||||
@@ -211,7 +211,7 @@ index ca25e714d141..e97405c30d52 100644
|
|||||||
UCS_CPU_VENDOR_FUJITSU_ARM,
|
UCS_CPU_VENDOR_FUJITSU_ARM,
|
||||||
UCS_CPU_VENDOR_ZHAOXIN,
|
UCS_CPU_VENDOR_ZHAOXIN,
|
||||||
UCS_CPU_VENDOR_GENERIC_RV64G,
|
UCS_CPU_VENDOR_GENERIC_RV64G,
|
||||||
@@ -107,6 +109,8 @@ typedef struct ucs_cpu_builtin_memcpy {
|
@@ -109,6 +111,8 @@ typedef struct ucs_cpu_builtin_memcpy {
|
||||||
# include "aarch64/cpu.h"
|
# include "aarch64/cpu.h"
|
||||||
#elif defined(__riscv)
|
#elif defined(__riscv)
|
||||||
# include "rv64/cpu.h"
|
# include "rv64/cpu.h"
|
||||||
@@ -278,10 +278,10 @@ index 000000000000..ce48ff1ff451
|
|||||||
+#endif
|
+#endif
|
||||||
diff --git src/ucs/arch/s390x/cpu.h src/ucs/arch/s390x/cpu.h
|
diff --git src/ucs/arch/s390x/cpu.h src/ucs/arch/s390x/cpu.h
|
||||||
new file mode 100644
|
new file mode 100644
|
||||||
index 000000000000..0aee278010d2
|
index 000000000000..e1d41a0ef8b8
|
||||||
--- /dev/null
|
--- /dev/null
|
||||||
+++ src/ucs/arch/s390x/cpu.h
|
+++ src/ucs/arch/s390x/cpu.h
|
||||||
@@ -0,0 +1,84 @@
|
@@ -0,0 +1,86 @@
|
||||||
+/**
|
+/**
|
||||||
+* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED.
|
+* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED.
|
||||||
+* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED.
|
+* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED.
|
||||||
@@ -347,7 +347,9 @@ index 000000000000..0aee278010d2
|
|||||||
+{
|
+{
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
+static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len)
|
+static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len,
|
||||||
|
+ ucs_arch_memcpy_hint_t hint,
|
||||||
|
+ size_t total_len)
|
||||||
+{
|
+{
|
||||||
+ return memcpy(dst, src, len);
|
+ return memcpy(dst, src, len);
|
||||||
+}
|
+}
|
||||||
@@ -428,7 +430,7 @@ index 000000000000..225e4e5e896a
|
|||||||
+#endif
|
+#endif
|
||||||
+
|
+
|
||||||
diff --git src/ucs/sys/sys.c src/ucs/sys/sys.c
|
diff --git src/ucs/sys/sys.c src/ucs/sys/sys.c
|
||||||
index 42ff75f64af5..b22418e3f4b0 100644
|
index d0b5effe11a3..ce22a2097f18 100644
|
||||||
--- src/ucs/sys/sys.c
|
--- src/ucs/sys/sys.c
|
||||||
+++ src/ucs/sys/sys.c
|
+++ src/ucs/sys/sys.c
|
||||||
@@ -1258,8 +1258,19 @@ void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length)
|
@@ -1258,8 +1258,19 @@ void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length)
|
||||||
|
210
openucx.changes
210
openucx.changes
@@ -1,3 +1,213 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Jun 25 15:49:50 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||||
|
|
||||||
|
- Update to ucx 1.19.0
|
||||||
|
- UCP
|
||||||
|
- Enabled multi-GPU support within a single process
|
||||||
|
- Added dynamic selection between strong and weak fences in RMA flush operations
|
||||||
|
- Improved endpoint reconfiguration capabilities
|
||||||
|
- Added All2All lane selection for multi-NIC-GPU systems
|
||||||
|
- Improved rkey debug info when config cache limit is reached
|
||||||
|
- Improved UCP protocol selection based on available memory types
|
||||||
|
- Removed dummy memory key from irrelevant transports (TCP, CMA and CUDA)
|
||||||
|
- Improved RNDV performance with device-local staging buffers
|
||||||
|
- Enabled error handling for RMA get_offload protocols
|
||||||
|
- Made UCX_TLS=^ib disable all transports including auxiliary
|
||||||
|
- Fixed send request status handling
|
||||||
|
- Fixed performance degradation in RNDV by optimizing md cache updates
|
||||||
|
- Fixed protocol selection when first lane is filtered out by fragment size
|
||||||
|
- Fixed rkey selection by using memory registration flag
|
||||||
|
- UCT
|
||||||
|
- Defined uct_rkey_unpack_v2 API to support passing sys-dev
|
||||||
|
- RDMA CORE (IB, ROCE, etc.)
|
||||||
|
- Added SRD transport support in EFA with reordering, AM, and control operations
|
||||||
|
- Removed XGVMI BF2 support (umem)
|
||||||
|
- Removed device memory indirect key
|
||||||
|
- Fixed VFS objects for DCIs and pools
|
||||||
|
- Added routing table cache to the reachability check
|
||||||
|
- Fixed strict order usage in IB auxiliary rkeys
|
||||||
|
- Improved various init logging messages
|
||||||
|
- Improved reliability of DC transport by adding DCI validation and separating connection logic
|
||||||
|
- Fixed segfault in DC fence operation
|
||||||
|
- UCS
|
||||||
|
- Removed compilation warnings
|
||||||
|
- Use UCS function for counting leading zeros on x86 architecture
|
||||||
|
- Fixed a compilation warning
|
||||||
|
- Shared Memory
|
||||||
|
- Fixed FIFO availability check for sm transport
|
||||||
|
- Tools
|
||||||
|
- Added name filter option (-F 'str') to ucx_info for config and feature dumps
|
||||||
|
- Improved ucx_info input validation
|
||||||
|
- Documentation
|
||||||
|
- Fixed open-mpi clone instruction
|
||||||
|
- Build
|
||||||
|
- Fixed enum-int-mismatch warnings with GCC 15
|
||||||
|
- Drop patches merged upstream:
|
||||||
|
- UCT-IB-UD-Use-GRH-to-detect-address-family-on-non-Mellanox-hardware.patch
|
||||||
|
- openucx-extern-c.patch
|
||||||
|
- openucx-strict-headers-additional.patch
|
||||||
|
- openucx-strict-headers.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Jun 12 08:28:59 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||||
|
|
||||||
|
- Update to ucx 1.18.1
|
||||||
|
- CUDA
|
||||||
|
- Added config keys to update cuda_copy bandwidth for coherent platforms
|
||||||
|
- Improved cache invalidation of memory allocated using CUDA memory pool
|
||||||
|
- AZP
|
||||||
|
- Added Ubuntu 24.04 to build and release pipeline
|
||||||
|
- UCP
|
||||||
|
- Fixed assertion failure when maximum lane fragment is smaller than AM header
|
||||||
|
- Fixed potential active message user header use after free with protocol reconfiguration
|
||||||
|
- CUDA
|
||||||
|
- Fixed registration of CUDA Fabric memory allocated by UCT
|
||||||
|
- Fixed VA recycling check of memory allocated using VMM and CUDA memory pool
|
||||||
|
- RDMA CORE (IB, ROCE, etc.)
|
||||||
|
- Do not use ConnectX-8 SMI subdevices for communication
|
||||||
|
- Fixed remote access error by disabling ODP when the device supports DDP
|
||||||
|
- Fixed configuration logic by disabling DDP when AR is disabled
|
||||||
|
- UCM
|
||||||
|
- Fixed crash with bistro hooks for CUDA 12.9 on amd64
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed May 2 14:16:35 UTC 2025 - Friedrich Haubensak <hsk17@mail.de>
|
||||||
|
|
||||||
|
- Add openucx-strict-headers.patch and openucx-extern-c.patch from
|
||||||
|
upstream and additional openucx-strict-headers-additional.patch
|
||||||
|
to build w/ gcc-15 (boo#1241939)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Apr 1 12:31:11 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||||
|
|
||||||
|
- Add UCT-IB-UD-Use-GRH-to-detect-address-family-on-non-Mellanox-hardware.patch
|
||||||
|
to fix an UD init issue on non-Mellanox RDMA HW (bsc#1240204).
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Feb 19 15:47:23 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||||
|
|
||||||
|
- Update to ucx 1.18.0
|
||||||
|
- UCP
|
||||||
|
- Enabled using CUDA staging buffers for pipeline protocols by default
|
||||||
|
- Added endpoint reconfiguration support for non-reused p2p scenarios
|
||||||
|
- Enabled non-cacheable memory domains, activated for gdr_copy
|
||||||
|
- Added user_data parameter to ucp_ep_query
|
||||||
|
- Added support for host memory pipeline through CUDA buffers for rendezvous protocol
|
||||||
|
- Added global VA infrastructure and memory region in absence of error handling
|
||||||
|
- Made protocol performance node names more informative
|
||||||
|
- Enforced always running on the same thread in single thread mode
|
||||||
|
- Multiple improvements in protocols selection infrastructure
|
||||||
|
- Added UCP_MEM_MAP_LOCK API flag to enforce locked memory mapping
|
||||||
|
- Allowed up-to 64 endpoint lanes for systems with many transports or devices
|
||||||
|
- Added usage tracker to worker
|
||||||
|
- Improved various logging messages
|
||||||
|
- Fixed stack overflow in exported rkey unpack
|
||||||
|
- Removed extra remote-cpu overhead from protocol estimation for zcopy
|
||||||
|
- Fixed performance estimation for rndv pipeline protocols
|
||||||
|
- Fixed ATP sending by picking the correct lane
|
||||||
|
- Fixed missing reg_id on memh creation
|
||||||
|
- Fixed repeated invalidations by retaining existing access flags
|
||||||
|
- Fixed abort reason propagation for rendezvous RTR mtype
|
||||||
|
- Do not check transport availability if it is disabled by UCX_TLS environment variable
|
||||||
|
- Fixed wrong flag being used for checking BCOPY capability
|
||||||
|
- Fixed sending too many ATPs for small messages
|
||||||
|
- Enforced 16 bits size for Active Messages identifiers
|
||||||
|
- Fixed unnecessary status check for emulated AMO
|
||||||
|
- Fixed more than one fragment sending in rendezvous pipeline
|
||||||
|
- Fixed crash by using biggest max frag across all lanes
|
||||||
|
- Fixed missing memory handle flags by copying from parent to child
|
||||||
|
- Fixed worker interface activate count
|
||||||
|
- Fixed flush requests by replacing ATP/flush lane map with lane indexes
|
||||||
|
- Fixed lost uct_flags when merging memory regions
|
||||||
|
- UCT
|
||||||
|
- Fixed memory domain UCT flags description
|
||||||
|
- RDMA CORE (IB, ROCE, etc.)
|
||||||
|
- Added environment variable to manage DC initiator capacity
|
||||||
|
- Added DC dcs_hybrid policy
|
||||||
|
- Reduced MLX5/DV stack size consumption
|
||||||
|
- Added ODP support for verbs and mlx5dv
|
||||||
|
- Added support of CUDA managed memory on IB when ODP is available
|
||||||
|
- Added support of Adaptive Routing on RoCE
|
||||||
|
- Enabled use of implicit ODP with relaxed ordering
|
||||||
|
- Improved GPU-Direct detection in IB transport
|
||||||
|
- Increased DC initiator default count to 32 for performance optimization
|
||||||
|
- Added ConnectX-8 device support with DDP
|
||||||
|
- Added support for subnet filter list for RoCE interfaces
|
||||||
|
- Enhanced the error message to provide more details when a connection cannot be
|
||||||
|
established due to unreachable transports
|
||||||
|
- Added IB MLX5 as a separate UCX module with separate RPM sub-package
|
||||||
|
- Added initial support for GGA transport, for fast DPU memory access
|
||||||
|
- Set IB DevX atomic mode based on device capabilities
|
||||||
|
- Removed DC keepalive mechanism, since the keepalive is done on UCP layer
|
||||||
|
- Optimized cross-gVMI memory registration using indirect memory keys cache
|
||||||
|
- Improved various logging messages
|
||||||
|
- Fixed FETCH_ADD remote access error for ODP/KSM case
|
||||||
|
- Fixed missing conditional compilation checks for DM
|
||||||
|
- Fixed IB MD allocation naming typo
|
||||||
|
- Fixed invalid GIDs filter in IB
|
||||||
|
- Fixed flags usage in MLX5 zcopy_post
|
||||||
|
- Do not limit ODP registration retries
|
||||||
|
- Fixed JUCX failures by considering the number of supported completion vectors
|
||||||
|
- UCS
|
||||||
|
- Added support for wildcards in configuration parameter names
|
||||||
|
- Added ASAN protection to several internal data structures
|
||||||
|
- Reduced stack usage in topology detection code
|
||||||
|
- Improved bitmaps configuration parsing with wider bitfield
|
||||||
|
- Added options to set topology distance between devices
|
||||||
|
- Optimized VFS unix socket watch by using user private folder
|
||||||
|
- Added general IP subnet matching infrastructure
|
||||||
|
- Extend array data structure to support user-provided array copy routine
|
||||||
|
- Improved time units description
|
||||||
|
- Fixed a crash by using heap allocation to process expired timers in batch
|
||||||
|
- Fixed allocation issue on memtrack dump
|
||||||
|
- Fixed deletion of the monitored folder in VFS
|
||||||
|
- Fixed unsafe resize for DC initiator array
|
||||||
|
- Fixed function macro invocation to match C standard
|
||||||
|
- Fixed calling async handler on already released resource
|
||||||
|
- Fixed performance by setting higher bandwidth for different NUMA nodes on Grace
|
||||||
|
- Fixed undeclared value error in timer conversion routine
|
||||||
|
- Fixed uninitialized value access in registration cache
|
||||||
|
- UCM
|
||||||
|
- Extend CUDA memory hooks to include memory mapping APIs
|
||||||
|
- Fixed race condition in parsing proc maps
|
||||||
|
- Fixed mremap failure while parsing /proc/self/maps
|
||||||
|
- TCP
|
||||||
|
- Always bind endpoint to interface
|
||||||
|
- Tools
|
||||||
|
- Improved performance by increasing window size for put_bw and add get_bw in ucx_perftest
|
||||||
|
- Added multi-send flag for receive operations in bandwidth benchmarks in ucx_perftest
|
||||||
|
- Improved ucx_perftest uni-directional test with added fence
|
||||||
|
- Detailed ucx_perftest batch section of command-line documentation
|
||||||
|
- Fixed buffer size potential overflow in ucx_perftest
|
||||||
|
- Fixed missing address when packing memory keys on ucx_perftest
|
||||||
|
- Fixed memory leak for endpoint report in ucx_info
|
||||||
|
- Fixed build without openmp in ucx_perftest
|
||||||
|
- Fixed UCT device override on server side on ucx_perftest
|
||||||
|
- Documentation
|
||||||
|
- Added a section regarding adaptive routing on RoCE
|
||||||
|
- Architecture
|
||||||
|
- Added CPU Model for MI300A
|
||||||
|
- Added Fujitsu ARM specific values to ucx.conf
|
||||||
|
- Added AMD Turin support
|
||||||
|
- Added an optimized non-temporal memory copy implementation for AMD CPU
|
||||||
|
- Build
|
||||||
|
- Improved compiler error reporting with added flag
|
||||||
|
- Improved coverity script to allow faster turnaround time
|
||||||
|
- Improved Intel Compiler detection and support
|
||||||
|
- Fixed using correct ASAN version for running tests
|
||||||
|
- Configuration
|
||||||
|
- Used POSIX bourne syntax to check equality
|
||||||
|
- Fixed build failure by using proper flags in compiler.m4
|
||||||
|
- Fixed perftest MAD support default guessing
|
||||||
|
- GO
|
||||||
|
- Added multi-send flag and user memh support in request params
|
||||||
|
- Added serialized thread mode to avoid subtle races between threads
|
||||||
|
- Fixed make distcheck
|
||||||
|
- Packaging
|
||||||
|
- Improved dpkg-buildpackage sample command by explicitly adding mlx5 related arguments
|
||||||
|
- Delete UCS-TIME-Add-math.h-to-provide-INFINITY.patch which was merged upstream
|
||||||
|
- Refresh openucx-s390x-support.patch due to API changes
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Sat Sep 7 14:22:20 UTC 2024 - Nicolas Morey <nicolas.morey@suse.com>
|
Sat Sep 7 14:22:20 UTC 2024 - Nicolas Morey <nicolas.morey@suse.com>
|
||||||
|
|
||||||
|
12
openucx.spec
12
openucx.spec
@@ -1,7 +1,7 @@
|
|||||||
#
|
#
|
||||||
# spec file for package openucx
|
# spec file for package openucx
|
||||||
#
|
#
|
||||||
# Copyright (c) 2024 SUSE LLC
|
# Copyright (c) 2025 SUSE LLC
|
||||||
#
|
#
|
||||||
# All modifications and additions to the file contributed by third parties
|
# All modifications and additions to the file contributed by third parties
|
||||||
# remain the property of their copyright owners, unless otherwise agreed
|
# remain the property of their copyright owners, unless otherwise agreed
|
||||||
@@ -20,7 +20,7 @@
|
|||||||
%define version_suf %{nil}
|
%define version_suf %{nil}
|
||||||
|
|
||||||
Name: openucx
|
Name: openucx
|
||||||
Version: 1.17.0
|
Version: 1.19.0
|
||||||
Release: 0
|
Release: 0
|
||||||
Summary: Communication layer for Message Passing (MPI)
|
Summary: Communication layer for Message Passing (MPI)
|
||||||
License: BSD-3-Clause
|
License: BSD-3-Clause
|
||||||
@@ -32,7 +32,6 @@ URL: http://openucx.org/
|
|||||||
Source: https://github.com/openucx/ucx/releases/download/v%version%{?version_suf}/ucx-%version.tar.gz
|
Source: https://github.com/openucx/ucx/releases/download/v%version%{?version_suf}/ucx-%version.tar.gz
|
||||||
Patch1: openucx-s390x-support.patch
|
Patch1: openucx-s390x-support.patch
|
||||||
Patch2: ucm-fix-UCX_MEM_MALLOC_RELOC.patch
|
Patch2: ucm-fix-UCX_MEM_MALLOC_RELOC.patch
|
||||||
Patch3: UCS-TIME-Add-math.h-to-provide-INFINITY.patch
|
|
||||||
BuildRequires: autoconf >= 2.63
|
BuildRequires: autoconf >= 2.63
|
||||||
BuildRequires: automake >= 1.10
|
BuildRequires: automake >= 1.10
|
||||||
BuildRequires: binutils-devel
|
BuildRequires: binutils-devel
|
||||||
@@ -158,7 +157,8 @@ export UCX_CFLAGS="$UCX_CFLAGS -mno-sse -mno-sse2"
|
|||||||
--disable-debug --disable-assertions \
|
--disable-debug --disable-assertions \
|
||||||
--disable-params-check \
|
--disable-params-check \
|
||||||
--with-rc --with-ud --with-dc \
|
--with-rc --with-ud --with-dc \
|
||||||
--with-mlx5-dv --with-rdmacm
|
--with-ib-hw-tm --with-dm --with-devx \
|
||||||
|
--with-mlx5 --with-rdmacm
|
||||||
|
|
||||||
# Override BASE_CFLAGS to disable Werror (boo#1121267)
|
# Override BASE_CFLAGS to disable Werror (boo#1121267)
|
||||||
make %{?_smp_mflags} V=1 BASE_CFLAGS="-g -Wall"
|
make %{?_smp_mflags} V=1 BASE_CFLAGS="-g -Wall"
|
||||||
@@ -230,6 +230,7 @@ mv %buildroot/%_bindir/io_demo %buildroot/%_libexecdir/%{name}/
|
|||||||
%_libdir/libuct.so.*
|
%_libdir/libuct.so.*
|
||||||
%dir %_libdir/ucx/
|
%dir %_libdir/ucx/
|
||||||
%_libdir/ucx/libuct_*.so.*
|
%_libdir/ucx/libuct_*.so.*
|
||||||
|
%_libdir/ucx/libucx_perftest_mad.so.*
|
||||||
|
|
||||||
%files -n libuct-devel
|
%files -n libuct-devel
|
||||||
%defattr(-,root,root)
|
%defattr(-,root,root)
|
||||||
@@ -237,9 +238,12 @@ mv %buildroot/%_bindir/io_demo %buildroot/%_libexecdir/%{name}/
|
|||||||
%_libdir/libuct.so
|
%_libdir/libuct.so
|
||||||
%dir %_libdir/ucx/
|
%dir %_libdir/ucx/
|
||||||
%_libdir/ucx/libuct_*.so
|
%_libdir/ucx/libuct_*.so
|
||||||
|
%_libdir/ucx/libucx_perftest_mad.so
|
||||||
%_libdir/pkgconfig/ucx-uct.pc
|
%_libdir/pkgconfig/ucx-uct.pc
|
||||||
%_libdir/pkgconfig/ucx-cma.pc
|
%_libdir/pkgconfig/ucx-cma.pc
|
||||||
%_libdir/pkgconfig/ucx-ib.pc
|
%_libdir/pkgconfig/ucx-ib.pc
|
||||||
|
%_libdir/pkgconfig/ucx-ib-efa.pc
|
||||||
|
%_libdir/pkgconfig/ucx-ib-mlx5.pc
|
||||||
%_libdir/pkgconfig/ucx-rdmacm.pc
|
%_libdir/pkgconfig/ucx-rdmacm.pc
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
BIN
ucx-1.17.0.tar.gz
(Stored with Git LFS)
BIN
ucx-1.17.0.tar.gz
(Stored with Git LFS)
Binary file not shown.
3
ucx-1.19.0.tar.gz
Normal file
3
ucx-1.19.0.tar.gz
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:9af07d55281059542f20c5b411db668643543174e51ac71f53f7ac839164f285
|
||||||
|
size 3391294
|
Reference in New Issue
Block a user