Accepting request 558744 from home:NMoreyChaisemartin:branches:science:HPC
- Update to v1.5.3 - Core - Handle malloc failures - Ensure global lock is initialized on Windows - Fix spelling and formatting errors in man pages - PSM - Fix print format mismatches - Remove 15 second startup delay when no hardware is installed - Preserve FI_MR_SCALABLE mode bit for backwards compatability - PSM2 - Fix print format mismatches - Allow all to all communication between scalable endpoints - Preserve FI_MR_SCALABLE mode bit for backwards compatability - Fix reference counting issue with opened domains - Fix segfault for RMA/atomic operations to local scalable endpoints - Fix resource counting related issues for Tx/Rx contexts - Allow completion suppression when fi_context is non-NULL - Use correct queue for triggered operations with scalable endpoints - Sockets - Fix check for invalid connection handle - Fix crash in fi_av_remove - Util - Fix number of bits used for connection index - Verbs - Fix incorrect CQ entry data for MSG endpoints - Properly check for errors from getifaddrs - Retry getifaddr on failure because of busy netlink sockets - Ack CM events on error paths - Remove 0001-prov-psm-Eliminate-psm2-compat-library-delay-with-hf.patch as it was merged upstream OBS-URL: https://build.opensuse.org/request/show/558744 OBS-URL: https://build.opensuse.org/package/show/science:HPC/libfabric?expand=0&rev=28
This commit is contained in:
parent
ad87934964
commit
c053ee0122
@ -1,86 +0,0 @@
|
|||||||
From 6c8c40ad84fa790831407e5cd25375af898d929b Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jianxin Xiong <jianxin.xiong@intel.com>
|
|
||||||
Date: Mon, 9 Oct 2017 15:10:45 -0700
|
|
||||||
Subject: [PATCH] prov/psm: Eliminate psm2-compat library delay with hfi
|
|
||||||
devices missing
|
|
||||||
|
|
||||||
The PSM2 library may introduce a 15 second delay at device initialization
|
|
||||||
time if the hfi devices are missing. This has been handled in the psm2
|
|
||||||
provider by checking the existence of the device files before initializing
|
|
||||||
the device.
|
|
||||||
|
|
||||||
The psm provider didn't handle this situation because the issue doesn't
|
|
||||||
exist with native PSM library over TrueScale. However, when PSM is
|
|
||||||
supported via the psm2-compat library over PSM2, the same delay can be
|
|
||||||
observed.
|
|
||||||
|
|
||||||
Now add the same mechanism to the psm provider.
|
|
||||||
|
|
||||||
Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com>
|
|
||||||
---
|
|
||||||
prov/psm/src/psmx_init.c | 34 ++++++++++++++++++++++++++++++++++
|
|
||||||
1 file changed, 34 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/prov/psm/src/psmx_init.c b/prov/psm/src/psmx_init.c
|
|
||||||
index 118ef1a81..c4b06160c 100644
|
|
||||||
--- a/prov/psm/src/psmx_init.c
|
|
||||||
+++ b/prov/psm/src/psmx_init.c
|
|
||||||
@@ -32,10 +32,12 @@
|
|
||||||
|
|
||||||
#include "psmx.h"
|
|
||||||
#include "prov.h"
|
|
||||||
+#include <glob.h>
|
|
||||||
|
|
||||||
static int psmx_init_count = 0;
|
|
||||||
static int psmx_lib_initialized = 0;
|
|
||||||
static pthread_mutex_t psmx_lib_mutex;
|
|
||||||
+static int psmx_compat_lib = 0;
|
|
||||||
|
|
||||||
struct psmx_env psmx_env = {
|
|
||||||
.name_server = 1,
|
|
||||||
@@ -103,6 +105,12 @@ static int psmx_init_lib(void)
|
|
||||||
PSM_VERNO_MAJOR, PSM_VERNO_MINOR, major, minor);
|
|
||||||
}
|
|
||||||
|
|
||||||
+ if (major > 1) {
|
|
||||||
+ psmx_compat_lib = 1;
|
|
||||||
+ FI_INFO(&psmx_prov, FI_LOG_CORE,
|
|
||||||
+ "PSM is supported via the psm2-compat library over PSM2.\n");
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
psmx_lib_initialized = 1;
|
|
||||||
|
|
||||||
out:
|
|
||||||
@@ -197,6 +205,32 @@ static int psmx_getinfo(uint32_t version, const char *node, const char *service,
|
|
||||||
if (psmx_init_lib())
|
|
||||||
return -FI_ENODATA;
|
|
||||||
|
|
||||||
+ if (psmx_compat_lib) {
|
|
||||||
+ /*
|
|
||||||
+ * native PSM running over TrueScale doesn't have the issue handled
|
|
||||||
+ * here. it's only present when PSM is supported via the psm2-compat
|
|
||||||
+ * library, where the PSM functions are just wrappers around the PSM2
|
|
||||||
+ * counterparts.
|
|
||||||
+ *
|
|
||||||
+ * psm2_ep_num_devunits() may wait for 15 seconds before return
|
|
||||||
+ * when /dev/hfi1_0 is not present. Check the existence of any hfi1
|
|
||||||
+ * device interface first to avoid this delay. Note that the devices
|
|
||||||
+ * don't necessarily appear consecutively so we need to check all
|
|
||||||
+ * possible device names before returning "no device found" error.
|
|
||||||
+ * This also means if "/dev/hfi1_0" doesn't exist but other devices
|
|
||||||
+ * exist, we are still going to see the delay; but that's a rare case.
|
|
||||||
+ */
|
|
||||||
+ glob_t glob_buf;
|
|
||||||
+
|
|
||||||
+ if ((glob("/dev/hfi1_[0-9]", 0, NULL, &glob_buf) != 0) &&
|
|
||||||
+ (glob("/dev/hfi1_[0-9][0-9]", GLOB_APPEND, NULL, &glob_buf) != 0)) {
|
|
||||||
+ FI_INFO(&psmx_prov, FI_LOG_CORE,
|
|
||||||
+ "no hfi1 device is found.\n");
|
|
||||||
+ return -FI_ENODATA;
|
|
||||||
+ }
|
|
||||||
+ globfree(&glob_buf);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
if (psm_ep_num_devunits(&cnt) || !cnt) {
|
|
||||||
FI_INFO(&psmx_prov, FI_LOG_CORE,
|
|
||||||
"no PSM device is found.\n");
|
|
2
_service
2
_service
@ -8,7 +8,7 @@
|
|||||||
<param name="versionformat">@PARENT_TAG@.@TAG_OFFSET@.%h</param>
|
<param name="versionformat">@PARENT_TAG@.@TAG_OFFSET@.%h</param>
|
||||||
<param name="versionrewrite-pattern">v(.*)</param>
|
<param name="versionrewrite-pattern">v(.*)</param>
|
||||||
<param name="versionrewrite-replacement">\1</param>
|
<param name="versionrewrite-replacement">\1</param>
|
||||||
<param name="revision">480a6db351fbe8ee38077902c8df875e3cd13205</param>
|
<param name="revision">09aaaf78c239df7fb57f19de44cc96b25701bcb0</param>
|
||||||
</service>
|
</service>
|
||||||
<service name="recompress" mode="disabled">
|
<service name="recompress" mode="disabled">
|
||||||
<param name="file">libfabric*.tar</param>
|
<param name="file">libfabric*.tar</param>
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
|
||||||
oid sha256:c2ebdb6376c35b4c88935648843e633bb0a2549eed96d8e610d4ce7ea39d2754
|
|
||||||
size 1068927
|
|
3
libfabric-1.5.3.0.09aaaf78c239.tar.bz2
Normal file
3
libfabric-1.5.3.0.09aaaf78c239.tar.bz2
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:eeca796583059a9d3454648b585a98e790ea9c16f8ebe75872e824e410eb8010
|
||||||
|
size 1072867
|
@ -1,3 +1,37 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Dec 20 08:49:03 UTC 2017 - nmoreychaisemartin@suse.com
|
||||||
|
|
||||||
|
- Update to v1.5.3
|
||||||
|
- Core
|
||||||
|
- Handle malloc failures
|
||||||
|
- Ensure global lock is initialized on Windows
|
||||||
|
- Fix spelling and formatting errors in man pages
|
||||||
|
- PSM
|
||||||
|
- Fix print format mismatches
|
||||||
|
- Remove 15 second startup delay when no hardware is installed
|
||||||
|
- Preserve FI_MR_SCALABLE mode bit for backwards compatability
|
||||||
|
- PSM2
|
||||||
|
- Fix print format mismatches
|
||||||
|
- Allow all to all communication between scalable endpoints
|
||||||
|
- Preserve FI_MR_SCALABLE mode bit for backwards compatability
|
||||||
|
- Fix reference counting issue with opened domains
|
||||||
|
- Fix segfault for RMA/atomic operations to local scalable endpoints
|
||||||
|
- Fix resource counting related issues for Tx/Rx contexts
|
||||||
|
- Allow completion suppression when fi_context is non-NULL
|
||||||
|
- Use correct queue for triggered operations with scalable endpoints
|
||||||
|
- Sockets
|
||||||
|
- Fix check for invalid connection handle
|
||||||
|
- Fix crash in fi_av_remove
|
||||||
|
- Util
|
||||||
|
- Fix number of bits used for connection index
|
||||||
|
- Verbs
|
||||||
|
- Fix incorrect CQ entry data for MSG endpoints
|
||||||
|
- Properly check for errors from getifaddrs
|
||||||
|
- Retry getifaddr on failure because of busy netlink sockets
|
||||||
|
- Ack CM events on error paths
|
||||||
|
- Remove 0001-prov-psm-Eliminate-psm2-compat-library-delay-with-hf.patch
|
||||||
|
as it was merged upstream
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Mon Nov 20 16:27:13 UTC 2017 - nmoreychaisemartin@suse.com
|
Mon Nov 20 16:27:13 UTC 2017 - nmoreychaisemartin@suse.com
|
||||||
|
|
||||||
|
@ -17,18 +17,17 @@
|
|||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
%define git_ver .0.480a6db3
|
%define git_ver .0.09aaaf78c239
|
||||||
|
|
||||||
Name: libfabric
|
Name: libfabric
|
||||||
Summary: User-space RDMA Fabric Interfaces
|
Summary: User-space RDMA Fabric Interfaces
|
||||||
License: GPL-2.0 or BSD-2-Clause
|
License: GPL-2.0 or BSD-2-Clause
|
||||||
Group: Development/Libraries/C and C++
|
Group: Development/Libraries/C and C++
|
||||||
Version: 1.5.2
|
Version: 1.5.3
|
||||||
Release: 0
|
Release: 0
|
||||||
Source: %{name}-%{version}%{git_ver}.tar.bz2
|
Source: %{name}-%{version}%{git_ver}.tar.bz2
|
||||||
Source1: baselibs.conf
|
Source1: baselibs.conf
|
||||||
Patch0: libfabric-libtool.patch
|
Patch0: libfabric-libtool.patch
|
||||||
Patch1: 0001-prov-psm-Eliminate-psm2-compat-library-delay-with-hf.patch
|
|
||||||
Url: http://www.github.com/ofiwg/libfabric
|
Url: http://www.github.com/ofiwg/libfabric
|
||||||
Prefix: ${_prefix}
|
Prefix: ${_prefix}
|
||||||
BuildRequires: autoconf
|
BuildRequires: autoconf
|
||||||
@ -72,7 +71,6 @@ services, such as RDMA. This package contains the development files.
|
|||||||
%prep
|
%prep
|
||||||
%setup -q -n %{name}-%{version}%{git_ver}
|
%setup -q -n %{name}-%{version}%{git_ver}
|
||||||
%patch0 -p1
|
%patch0 -p1
|
||||||
%patch1 -p1
|
|
||||||
|
|
||||||
%build
|
%build
|
||||||
rm -f config/libtool.m4
|
rm -f config/libtool.m4
|
||||||
|
Loading…
x
Reference in New Issue
Block a user