forked from pool/openucx
Compare commits
22 Commits
| Author | SHA256 | Date | |
|---|---|---|---|
|
|
3ed2204149 | ||
|
|
ed9e44370b | ||
|
|
7690a30a01 | ||
|
|
a1035f1e89 | ||
| 2e169061f4 | |||
| 0e3357c05c | |||
| 5f25c6c29c | |||
| f8b8d435cc | |||
| e6035d1f52 | |||
| f22c7e86d8 | |||
| d8d8c7c955 | |||
| 77c5e72d38 | |||
| 28afc5599d | |||
| ad2b6e1eb3 | |||
| 47635a7117 | |||
| 9a474b25ce | |||
| 83523eaad4 | |||
| 68685ed0da | |||
| a5f1adbb12 | |||
| de09e2a891 | |||
| 49c5ede7c9 | |||
| b79725a512 |
12
README.md
Normal file
12
README.md
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
## Build Results
|
||||
|
||||
Current state of openucx in openSUSE:Factory is
|
||||
|
||||

|
||||
|
||||
The current state of openucx in the devel project build (science:HPC)
|
||||
|
||||

|
||||
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
commit 2d79ffee423fd4570599258e00689cc745e8785e
|
||||
Author: Nicolas Morey <nmorey@suse.com>
|
||||
Date: Fri Nov 7 17:19:54 2025 +0100
|
||||
|
||||
UCP/CORE: Fix config type for dynamic_tl_progress_factor
|
||||
|
||||
Signed-off-by: Nicolas Morey <nmorey@suse.com>
|
||||
|
||||
diff --git src/ucp/core/ucp_context.c src/ucp/core/ucp_context.c
|
||||
index 8b9dbeaca9ea..4cbae096ed93 100644
|
||||
--- src/ucp/core/ucp_context.c
|
||||
+++ src/ucp/core/ucp_context.c
|
||||
@@ -440,7 +440,7 @@ static ucs_config_field_t ucp_context_config_table[] = {
|
||||
"Number of usage tracker rounds performed for each progress operation. Must be\n"
|
||||
"non-zero value.",
|
||||
ucs_offsetof(ucp_context_config_t, dynamic_tl_progress_factor),
|
||||
- UCS_CONFIG_TYPE_TIME_UNITS},
|
||||
+ UCS_CONFIG_TYPE_UINT},
|
||||
|
||||
{"RESOLVE_REMOTE_EP_ID", "n",
|
||||
"Defines whether resolving remote endpoint ID is required or not when\n"
|
||||
@@ -1,224 +0,0 @@
|
||||
commit d437b65a6df080416048067141b1c206a52bdc78
|
||||
Author: Nathan Hjelm <hjelmn@google.com>
|
||||
Date: Wed Oct 16 20:32:48 2024 +0000
|
||||
|
||||
UCT/IB/UD: Use GRH to detect address family on non-Mellanox hardware
|
||||
|
||||
Setting the service level in the work completion is a Mellanox-specific feature,
|
||||
so it can not be relied on to detect IPv4 vs IPv6. This commit fixes the
|
||||
detection logic for non-Mellanox providers by detecting the address class from
|
||||
the grh instead. This is done by detecting either 0x6a (IPv6) at offset 0 or
|
||||
0x45 (IPv4) at offset 20 of the receive buffer. Since the first 20B of IPv4
|
||||
packets are undefined ud_verbs sets the first byte of each posted receive to a
|
||||
known value (0xff) since the provider is unliklely to touch these bytes. This
|
||||
commit makes no changes to the mlx5 code which continues to rely on the CQE data
|
||||
to determine if a packet is IPv4 or IPv6. It can be updated to use the non-mlx5
|
||||
logic but since the IP version is present in the CGE there is no need.
|
||||
|
||||
Signed-off-by: Nathan Hjelm <hjelmn@google.com>
|
||||
|
||||
diff --git src/uct/ib/mlx5/ib_mlx5.h src/uct/ib/mlx5/ib_mlx5.h
|
||||
index 3183ea460a8a..3ec48b7197d8 100644
|
||||
--- src/uct/ib/mlx5/ib_mlx5.h
|
||||
+++ src/uct/ib/mlx5/ib_mlx5.h
|
||||
@@ -1,6 +1,7 @@
|
||||
/**
|
||||
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2014. ALL RIGHTS RESERVED.
|
||||
* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED.
|
||||
+* Copyright (c) Google, LLC, 2024. ALL RIGHTS RESERVED.
|
||||
*
|
||||
* See file LICENSE for terms.
|
||||
*/
|
||||
@@ -66,6 +67,9 @@
|
||||
#define UCT_IB_MLX5_ATOMIC_MODE_EXT 3
|
||||
#define UCT_IB_MLX5_CQE_FLAG_L3_IN_DATA UCS_BIT(28) /* GRH/IP in the receive buffer */
|
||||
#define UCT_IB_MLX5_CQE_FLAG_L3_IN_CQE UCS_BIT(29) /* GRH/IP in the CQE */
|
||||
+/* Bits 24-26 of flags_rqpn indicate the packet type */
|
||||
+#define UCT_IB_MLX5_RQPN_ROCE_FLAG_IPV6 UCS_BIT(24)
|
||||
+#define UCT_IB_MLX5_RQPN_ROCE_FLAG_IPV4 UCS_BIT(25)
|
||||
#define UCT_IB_MLX5_CQE_FORMAT_MASK 0xc
|
||||
#define UCT_IB_MLX5_MINICQE_ARR_MAX_SIZE 7
|
||||
#define UCT_IB_MLX5_MP_RQ_BYTE_CNT_MASK 0x0000FFFF /* Byte count mask for multi-packet RQs */
|
||||
diff --git src/uct/ib/mlx5/ib_mlx5.inl src/uct/ib/mlx5/ib_mlx5.inl
|
||||
index 6602143c8bf5..2aa58455d5cd 100644
|
||||
--- src/uct/ib/mlx5/ib_mlx5.inl
|
||||
+++ src/uct/ib/mlx5/ib_mlx5.inl
|
||||
@@ -1,5 +1,6 @@
|
||||
/**
|
||||
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2016. ALL RIGHTS RESERVED.
|
||||
+ * Copyright (c) Google, LLC, 2024. ALL RIGHTS RESERVED.
|
||||
*
|
||||
* See file LICENSE for terms.
|
||||
*/
|
||||
@@ -88,6 +89,35 @@ uct_ib_mlx5_cqe_is_grh_present(struct mlx5_cqe64* cqe)
|
||||
UCT_IB_MLX5_CQE_FLAG_L3_IN_CQE);
|
||||
}
|
||||
|
||||
+static UCS_F_ALWAYS_INLINE size_t
|
||||
+uct_ib_mlx5_cqe_roce_gid_len(struct mlx5_cqe64* cqe)
|
||||
+{
|
||||
+ /*
|
||||
+ * Take the packet type from CQE, because:
|
||||
+ * 1. According to Annex17_RoCEv2 (A17.4.5.1):
|
||||
+ * For UD, the Completion Queue Entry (CQE) includes remote address
|
||||
+ * information (InfiniBand Specification Vol. 1 Rev 1.2.1 Section 11.4.2.1).
|
||||
+ * For RoCEv2, the remote address information comprises the source L2
|
||||
+ * Address and a flag that indicates if the received frame is an IPv4,
|
||||
+ * IPv6 or RoCE packet.
|
||||
+ *
|
||||
+ * 2. According to PRM, for responder UD/DC over RoCE sl represents RoCE
|
||||
+ * packet type as:
|
||||
+ * bit 3 : when set R-RoCE frame contains an UDP header otherwise not
|
||||
+ * Bits[2:0]: L3_Header_Type, as defined below
|
||||
+ * - 0x0 : GRH - (RoCE v1.0)
|
||||
+ * - 0x1 : IPv6 - (RoCE v1.5/v2.0)
|
||||
+ * - 0x2 : IPv4 - (RoCE v1.5/v2.0)
|
||||
+ *
|
||||
+ * The service level is the most significant byte of cqe->flags_rqpn.
|
||||
+ *
|
||||
+ * Alternatively, this could be detected by examining the packet contents
|
||||
+ * as is done for non-mlx5 transports.
|
||||
+ */
|
||||
+ return (cqe->flags_rqpn & htonl(UCT_IB_MLX5_RQPN_ROCE_FLAG_IPV4)) ?
|
||||
+ UCS_IPV4_ADDR_LEN : UCS_IPV6_ADDR_LEN;
|
||||
+}
|
||||
+
|
||||
static UCS_F_ALWAYS_INLINE void*
|
||||
uct_ib_mlx5_gid_from_cqe(struct mlx5_cqe64* cqe)
|
||||
{
|
||||
diff --git src/uct/ib/mlx5/ud/ud_mlx5.c src/uct/ib/mlx5/ud/ud_mlx5.c
|
||||
index 58f4ae6446a3..27a96b1b615b 100644
|
||||
--- src/uct/ib/mlx5/ud/ud_mlx5.c
|
||||
+++ src/uct/ib/mlx5/ud/ud_mlx5.c
|
||||
@@ -2,6 +2,7 @@
|
||||
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2019. ALL RIGHTS RESERVED.
|
||||
* Copyright (C) ARM Ltd. 2017. ALL RIGHTS RESERVED.
|
||||
* Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED.
|
||||
+* Copyright (c) Google, LLC, 2024. ALL RIGHTS RESERVED.
|
||||
*
|
||||
* See file LICENSE for terms.
|
||||
*/
|
||||
@@ -521,7 +522,7 @@ uct_ud_mlx5_iface_poll_rx(uct_ud_mlx5_iface_t *iface, int is_async)
|
||||
|
||||
if (!uct_ud_iface_check_grh(&iface->super, packet,
|
||||
uct_ib_mlx5_cqe_is_grh_present(cqe),
|
||||
- cqe->flags_rqpn & 0xFF)) {
|
||||
+ uct_ib_mlx5_cqe_roce_gid_len(cqe))) {
|
||||
ucs_mpool_put_inline(desc);
|
||||
goto out_polled;
|
||||
}
|
||||
diff --git src/uct/ib/ud/base/ud_iface.h src/uct/ib/ud/base/ud_iface.h
|
||||
index 1efecd291d98..89fa7e3810fc 100644
|
||||
--- src/uct/ib/ud/base/ud_iface.h
|
||||
+++ src/uct/ib/ud/base/ud_iface.h
|
||||
@@ -1,5 +1,6 @@
|
||||
/**
|
||||
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2020. ALL RIGHTS RESERVED.
|
||||
+* Copyright (c) Google, LLC, 2024. ALL RIGHTS RESERVED.
|
||||
*
|
||||
* See file LICENSE for terms.
|
||||
*/
|
||||
@@ -395,10 +396,9 @@ static UCS_F_ALWAYS_INLINE void uct_ud_leave(uct_ud_iface_t *iface)
|
||||
|
||||
static UCS_F_ALWAYS_INLINE int
|
||||
uct_ud_iface_check_grh(uct_ud_iface_t *iface, void *packet, int is_grh_present,
|
||||
- uint8_t roce_pkt_type)
|
||||
+ size_t gid_len)
|
||||
{
|
||||
struct ibv_grh *grh = (struct ibv_grh *)packet;
|
||||
- size_t gid_len;
|
||||
union ibv_gid *gid;
|
||||
khiter_t khiter;
|
||||
char gid_str[128] UCS_V_UNUSED;
|
||||
@@ -412,25 +412,6 @@ uct_ud_iface_check_grh(uct_ud_iface_t *iface, void *packet, int is_grh_present,
|
||||
return 1;
|
||||
}
|
||||
|
||||
- /*
|
||||
- * Take the packet type from CQE, because:
|
||||
- * 1. According to Annex17_RoCEv2 (A17.4.5.1):
|
||||
- * For UD, the Completion Queue Entry (CQE) includes remote address
|
||||
- * information (InfiniBand Specification Vol. 1 Rev 1.2.1 Section 11.4.2.1).
|
||||
- * For RoCEv2, the remote address information comprises the source L2
|
||||
- * Address and a flag that indicates if the received frame is an IPv4,
|
||||
- * IPv6 or RoCE packet.
|
||||
- * 2. According to PRM, for responder UD/DC over RoCE sl represents RoCE
|
||||
- * packet type as:
|
||||
- * bit 3 : when set R-RoCE frame contains an UDP header otherwise not
|
||||
- * Bits[2:0]: L3_Header_Type, as defined below
|
||||
- * - 0x0 : GRH - (RoCE v1.0)
|
||||
- * - 0x1 : IPv6 - (RoCE v1.5/v2.0)
|
||||
- * - 0x2 : IPv4 - (RoCE v1.5/v2.0)
|
||||
- */
|
||||
- gid_len = ((roce_pkt_type & UCT_IB_CQE_SL_PKTYPE_MASK) == 0x2) ?
|
||||
- UCS_IPV4_ADDR_LEN : UCS_IPV6_ADDR_LEN;
|
||||
-
|
||||
if (ucs_likely((gid_len == iface->gid_table.last_len) &&
|
||||
uct_ud_gid_equal(&grh->dgid, &iface->gid_table.last,
|
||||
gid_len))) {
|
||||
diff --git src/uct/ib/ud/verbs/ud_verbs.c src/uct/ib/ud/verbs/ud_verbs.c
|
||||
index 989bdb59d08f..848dc4e5cd66 100644
|
||||
--- src/uct/ib/ud/verbs/ud_verbs.c
|
||||
+++ src/uct/ib/ud/verbs/ud_verbs.c
|
||||
@@ -1,5 +1,6 @@
|
||||
/**
|
||||
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2019. ALL RIGHTS RESERVED.
|
||||
+* Copyright (c) Google, LLC, 2024. ALL RIGHTS RESERVED.
|
||||
*
|
||||
* See file LICENSE for terms.
|
||||
*/
|
||||
@@ -393,6 +394,20 @@ uct_ud_verbs_iface_poll_tx(uct_ud_verbs_iface_t *iface, int is_async)
|
||||
return 1;
|
||||
}
|
||||
|
||||
+static UCS_F_ALWAYS_INLINE size_t uct_ud_verbs_iface_get_gid_len(void *packet)
|
||||
+{
|
||||
+ /* The GRH will contain either an IPv4 or IPv6 header. If the former is
|
||||
+ * present the header will start at offset 20 in the buffer otherwise it
|
||||
+ * will start at offset 0. Since the two headers are of fixed size (20 or
|
||||
+ * 40 bytes) this means we will either see 0x6? at offset 0 (IPv6) or 0x45
|
||||
+ * at offset 20. The detection is a little tricky for IPv6 given that the
|
||||
+ * first 20B are undefined for IPv4. To overcome this the first byte of
|
||||
+ * the posted receive buffer is set to 0xff.
|
||||
+ */
|
||||
+ return ((((uint8_t*)packet)[0] & 0xf0) == 0x60) ? UCS_IPV6_ADDR_LEN :
|
||||
+ UCS_IPV4_ADDR_LEN;
|
||||
+}
|
||||
+
|
||||
static UCS_F_ALWAYS_INLINE unsigned
|
||||
uct_ud_verbs_iface_poll_rx(uct_ud_verbs_iface_t *iface, int is_async)
|
||||
{
|
||||
@@ -413,7 +428,8 @@ uct_ud_verbs_iface_poll_rx(uct_ud_verbs_iface_t *iface, int is_async)
|
||||
|
||||
UCT_IB_IFACE_VERBS_FOREACH_RXWQE(&iface->super.super, i, packet, wc, num_wcs) {
|
||||
if (!uct_ud_iface_check_grh(&iface->super, packet,
|
||||
- wc[i].wc_flags & IBV_WC_GRH, wc[i].sl)) {
|
||||
+ wc[i].wc_flags & IBV_WC_GRH,
|
||||
+ uct_ud_verbs_iface_get_gid_len(packet))) {
|
||||
ucs_mpool_put_inline((void*)wc[i].wr_id);
|
||||
continue;
|
||||
}
|
||||
@@ -696,7 +712,7 @@ uct_ud_verbs_iface_post_recv_always(uct_ud_verbs_iface_t *iface, int max)
|
||||
struct ibv_recv_wr *bad_wr;
|
||||
uct_ib_recv_wr_t *wrs;
|
||||
unsigned count;
|
||||
- int ret;
|
||||
+ int ret, i;
|
||||
|
||||
wrs = ucs_alloca(sizeof *wrs * max);
|
||||
|
||||
@@ -706,6 +722,14 @@ uct_ud_verbs_iface_post_recv_always(uct_ud_verbs_iface_t *iface, int max)
|
||||
return;
|
||||
}
|
||||
|
||||
+ /* Set the first byte in the receive buffer grh to a known value not equal to
|
||||
+ * 0x6?. This should aid in the detection of IPv6 vs IPv4 because the first
|
||||
+ * byte is undefined in the later and 0x6? in the former. It is unlikely
|
||||
+ * this byte is touched with IPv4. */
|
||||
+ for (i = 0; i < count; ++i) {
|
||||
+ ((uint8_t*)wrs[i].sg.addr)[0] = 0xff;
|
||||
+ }
|
||||
+
|
||||
ret = ibv_post_recv(iface->super.qp, &wrs[0].ibwr, &bad_wr);
|
||||
if (ret != 0) {
|
||||
ucs_fatal("ibv_post_recv() returned %d: %m", ret);
|
||||
23
UCT-SELF-Fix-config-type-for-num_devices.patch
Normal file
23
UCT-SELF-Fix-config-type-for-num_devices.patch
Normal file
@@ -0,0 +1,23 @@
|
||||
commit 9655ec674b1d6278a80705eeb1e5bf0a36d7a211
|
||||
Author: Nicolas Morey <nmorey@suse.com>
|
||||
Date: Fri Nov 7 17:51:31 2025 +0100
|
||||
|
||||
UCT/SELF: Fix config type for num_devices
|
||||
|
||||
size_t may be larger than an int. This causes issue on big endian systems
|
||||
|
||||
Signed-off-by: Nicolas Morey <nmorey@suse.com>
|
||||
|
||||
diff --git src/uct/sm/self/self.c src/uct/sm/self/self.c
|
||||
index 6e7815c21dfa..1986e9cde290 100644
|
||||
--- src/uct/sm/self/self.c
|
||||
+++ src/uct/sm/self/self.c
|
||||
@@ -57,7 +57,7 @@ static ucs_config_field_t uct_self_md_config_table[] = {
|
||||
UCS_CONFIG_TYPE_TABLE(uct_md_config_table)},
|
||||
|
||||
{"NUM_DEVICES", "1", "Number of \"self\" devices to create",
|
||||
- ucs_offsetof(uct_self_md_config_t, num_devices), UCS_CONFIG_TYPE_INT},
|
||||
+ ucs_offsetof(uct_self_md_config_t, num_devices), UCS_CONFIG_TYPE_ULONG},
|
||||
|
||||
{NULL}
|
||||
};
|
||||
@@ -1,4 +1,4 @@
|
||||
commit ba1d7048df80ee535e01335992f70568e2f88c80
|
||||
commit e5fd9ff24191cfd99b5759bdaf291cc36aaa6346
|
||||
Author: Nicolas Morey <nmorey@suse.com>
|
||||
Date: Wed Feb 19 16:46:33 2025 +0100
|
||||
|
||||
@@ -91,7 +91,7 @@ index 000000000000..2beb5de54fab
|
||||
+
|
||||
+#endif
|
||||
diff --git src/ucs/Makefile.am src/ucs/Makefile.am
|
||||
index 86a469a60bcc..6751bad764b8 100644
|
||||
index 699a4addcd29..2f20f9945411 100644
|
||||
--- src/ucs/Makefile.am
|
||||
+++ src/ucs/Makefile.am
|
||||
@@ -24,6 +24,7 @@ nobase_dist_libucs_la_HEADERS = \
|
||||
@@ -118,7 +118,7 @@ index 86a469a60bcc..6751bad764b8 100644
|
||||
arch/x86_64/cpu.h \
|
||||
arch/cpu.h \
|
||||
config/ucm_opts.h \
|
||||
@@ -149,6 +152,7 @@ libucs_la_SOURCES = \
|
||||
@@ -150,6 +153,7 @@ libucs_la_SOURCES = \
|
||||
algorithm/string_distance.c \
|
||||
arch/aarch64/cpu.c \
|
||||
arch/aarch64/global_opts.c \
|
||||
@@ -140,7 +140,7 @@ index 849647902fab..a328c37e2020 100644
|
||||
# error "Unsupported architecture"
|
||||
#endif
|
||||
diff --git src/ucs/arch/bitops.h src/ucs/arch/bitops.h
|
||||
index f8e51c45888a..476631d95eb6 100644
|
||||
index ae531834451e..d4228b135641 100644
|
||||
--- src/ucs/arch/bitops.h
|
||||
+++ src/ucs/arch/bitops.h
|
||||
@@ -23,6 +23,8 @@ BEGIN_C_DECLS
|
||||
@@ -235,7 +235,7 @@ index 550d22b8b751..d8e4a7cca694 100644
|
||||
#endif
|
||||
diff --git src/ucs/arch/s390x/bitops.h src/ucs/arch/s390x/bitops.h
|
||||
new file mode 100644
|
||||
index 000000000000..ce48ff1ff451
|
||||
index 000000000000..88b74558f333
|
||||
--- /dev/null
|
||||
+++ src/ucs/arch/s390x/bitops.h
|
||||
@@ -0,0 +1,37 @@
|
||||
@@ -262,7 +262,7 @@ index 000000000000..ce48ff1ff451
|
||||
+{
|
||||
+ if (!n)
|
||||
+ return 0;
|
||||
+ return 63 - __builtin_clz(n);
|
||||
+ return 63 - __builtin_clzll(n);
|
||||
+}
|
||||
+
|
||||
+static UCS_F_ALWAYS_INLINE unsigned ucs_ffs32(uint32_t n)
|
||||
@@ -400,7 +400,7 @@ index 000000000000..4fa0c74034a7
|
||||
+#endif
|
||||
diff --git src/ucs/arch/s390x/global_opts.h src/ucs/arch/s390x/global_opts.h
|
||||
new file mode 100644
|
||||
index 000000000000..225e4e5e896a
|
||||
index 000000000000..b7c5693266d9
|
||||
--- /dev/null
|
||||
+++ src/ucs/arch/s390x/global_opts.h
|
||||
@@ -0,0 +1,25 @@
|
||||
@@ -411,8 +411,8 @@ index 000000000000..225e4e5e896a
|
||||
+*/
|
||||
+
|
||||
+
|
||||
+#ifndef UCS_PPC64_GLOBAL_OPTS_H_
|
||||
+#define UCS_PPC64_GLOBAL_OPTS_H_
|
||||
+#ifndef UCS_S390X_GLOBAL_OPTS_H_
|
||||
+#define UCS_S390X_GLOBAL_OPTS_H_
|
||||
+
|
||||
+#include <ucs/sys/compiler_def.h>
|
||||
+
|
||||
@@ -430,10 +430,10 @@ index 000000000000..225e4e5e896a
|
||||
+#endif
|
||||
+
|
||||
diff --git src/ucs/sys/sys.c src/ucs/sys/sys.c
|
||||
index d0b5effe11a3..ce22a2097f18 100644
|
||||
index 7cd875e8f7b2..b8b2d3c026be 100644
|
||||
--- src/ucs/sys/sys.c
|
||||
+++ src/ucs/sys/sys.c
|
||||
@@ -1258,8 +1258,19 @@ void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length)
|
||||
@@ -1265,8 +1265,19 @@ void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length)
|
||||
if (old_ptr == NULL) {
|
||||
/* Note: Must pass the 0 offset as "long", otherwise it will be
|
||||
* partially undefined when converted to syscall arguments */
|
||||
|
||||
101
openucx.changes
101
openucx.changes
@@ -1,3 +1,104 @@
|
||||
-------------------------------------------------------------------
|
||||
Fri Jan 02 14:32:48 UTC 2026 - Nicolas Morey <nmorey@suse.com>
|
||||
|
||||
- Update to v1.19.1
|
||||
- Features
|
||||
- UCP
|
||||
- Do not require transport memory support if rendezvous protocol is not used
|
||||
- Build
|
||||
- Added CUDA 13 support to the release pipeline
|
||||
- Added Rocky OS support to the release pipeline
|
||||
- Bugfixes
|
||||
- UCS
|
||||
- Fixed Netlink fetch mechanism
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Nov 5 16:48:53 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
- Minor fixes to openucx-s390x-support.patch
|
||||
- Add patches to fix a badly initialized value in settings
|
||||
- UCP-CORE-Fix-config-type-for-dynamic_tl_progress_factor.patch
|
||||
- UCT-SELF-Fix-config-type-for-num_devices.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Jun 25 15:49:50 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
- Update to ucx 1.19.0
|
||||
- UCP
|
||||
- Enabled multi-GPU support within a single process
|
||||
- Added dynamic selection between strong and weak fences in RMA flush operations
|
||||
- Improved endpoint reconfiguration capabilities
|
||||
- Added All2All lane selection for multi-NIC-GPU systems
|
||||
- Improved rkey debug info when config cache limit is reached
|
||||
- Improved UCP protocol selection based on available memory types
|
||||
- Removed dummy memory key from irrelevant transports (TCP, CMA and CUDA)
|
||||
- Improved RNDV performance with device-local staging buffers
|
||||
- Enabled error handling for RMA get_offload protocols
|
||||
- Made UCX_TLS=^ib disable all transports including auxiliary
|
||||
- Fixed send request status handling
|
||||
- Fixed performance degradation in RNDV by optimizing md cache updates
|
||||
- Fixed protocol selection when first lane is filtered out by fragment size
|
||||
- Fixed rkey selection by using memory registration flag
|
||||
- UCT
|
||||
- Defined uct_rkey_unpack_v2 API to support passing sys-dev
|
||||
- RDMA CORE (IB, ROCE, etc.)
|
||||
- Added SRD transport support in EFA with reordering, AM, and control operations
|
||||
- Removed XGVMI BF2 support (umem)
|
||||
- Removed device memory indirect key
|
||||
- Fixed VFS objects for DCIs and pools
|
||||
- Added routing table cache to the reachability check
|
||||
- Fixed strict order usage in IB auxiliary rkeys
|
||||
- Improved various init logging messages
|
||||
- Improved reliability of DC transport by adding DCI validation and separating connection logic
|
||||
- Fixed segfault in DC fence operation
|
||||
- UCS
|
||||
- Removed compilation warnings
|
||||
- Use UCS function for counting leading zeros on x86 architecture
|
||||
- Fixed a compilation warning
|
||||
- Shared Memory
|
||||
- Fixed FIFO availability check for sm transport
|
||||
- Tools
|
||||
- Added name filter option (-F 'str') to ucx_info for config and feature dumps
|
||||
- Improved ucx_info input validation
|
||||
- Documentation
|
||||
- Fixed open-mpi clone instruction
|
||||
- Build
|
||||
- Fixed enum-int-mismatch warnings with GCC 15
|
||||
- Drop patches merged upstream:
|
||||
- UCT-IB-UD-Use-GRH-to-detect-address-family-on-non-Mellanox-hardware.patch
|
||||
- openucx-extern-c.patch
|
||||
- openucx-strict-headers-additional.patch
|
||||
- openucx-strict-headers.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Jun 12 08:28:59 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
- Update to ucx 1.18.1
|
||||
- CUDA
|
||||
- Added config keys to update cuda_copy bandwidth for coherent platforms
|
||||
- Improved cache invalidation of memory allocated using CUDA memory pool
|
||||
- AZP
|
||||
- Added Ubuntu 24.04 to build and release pipeline
|
||||
- UCP
|
||||
- Fixed assertion failure when maximum lane fragment is smaller than AM header
|
||||
- Fixed potential active message user header use after free with protocol reconfiguration
|
||||
- CUDA
|
||||
- Fixed registration of CUDA Fabric memory allocated by UCT
|
||||
- Fixed VA recycling check of memory allocated using VMM and CUDA memory pool
|
||||
- RDMA CORE (IB, ROCE, etc.)
|
||||
- Do not use ConnectX-8 SMI subdevices for communication
|
||||
- Fixed remote access error by disabling ODP when the device supports DDP
|
||||
- Fixed configuration logic by disabling DDP when AR is disabled
|
||||
- UCM
|
||||
- Fixed crash with bistro hooks for CUDA 12.9 on amd64
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed May 2 14:16:35 UTC 2025 - Friedrich Haubensak <hsk17@mail.de>
|
||||
|
||||
- Add openucx-strict-headers.patch and openucx-extern-c.patch from
|
||||
upstream and additional openucx-strict-headers-additional.patch
|
||||
to build w/ gcc-15 (boo#1241939)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Tue Apr 1 12:31:11 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
%define version_suf %{nil}
|
||||
|
||||
Name: openucx
|
||||
Version: 1.18.0
|
||||
Version: 1.19.1
|
||||
Release: 0
|
||||
Summary: Communication layer for Message Passing (MPI)
|
||||
License: BSD-3-Clause
|
||||
@@ -30,9 +30,11 @@ URL: http://openucx.org/
|
||||
#Git-Clone: git://github.com/openucx/ucx
|
||||
#Git-Web: https://github.com/openucx/ucx
|
||||
Source: https://github.com/openucx/ucx/releases/download/v%version%{?version_suf}/ucx-%version.tar.gz
|
||||
Source100: README.md
|
||||
Patch1: openucx-s390x-support.patch
|
||||
Patch2: ucm-fix-UCX_MEM_MALLOC_RELOC.patch
|
||||
Patch3: UCT-IB-UD-Use-GRH-to-detect-address-family-on-non-Mellanox-hardware.patch
|
||||
Patch3: UCP-CORE-Fix-config-type-for-dynamic_tl_progress_factor.patch
|
||||
Patch4: UCT-SELF-Fix-config-type-for-num_devices.patch
|
||||
BuildRequires: autoconf >= 2.63
|
||||
BuildRequires: automake >= 1.10
|
||||
BuildRequires: binutils-devel
|
||||
@@ -243,6 +245,7 @@ mv %buildroot/%_bindir/io_demo %buildroot/%_libexecdir/%{name}/
|
||||
%_libdir/pkgconfig/ucx-uct.pc
|
||||
%_libdir/pkgconfig/ucx-cma.pc
|
||||
%_libdir/pkgconfig/ucx-ib.pc
|
||||
%_libdir/pkgconfig/ucx-ib-efa.pc
|
||||
%_libdir/pkgconfig/ucx-ib-mlx5.pc
|
||||
%_libdir/pkgconfig/ucx-rdmacm.pc
|
||||
|
||||
|
||||
BIN
ucx-1.18.0.tar.gz
LFS
BIN
ucx-1.18.0.tar.gz
LFS
Binary file not shown.
BIN
ucx-1.19.1.tar.gz
LFS
Normal file
BIN
ucx-1.19.1.tar.gz
LFS
Normal file
Binary file not shown.
Reference in New Issue
Block a user