Accepting request 538233 from home:StefanBruens:branches:science:HPC
- Fix github issue #3393: Add 0001-prov-psm-Eliminate-psm2-compat-library-delay-with-hf.patch OBS-URL: https://build.opensuse.org/request/show/538233 OBS-URL: https://build.opensuse.org/package/show/science:HPC/libfabric?expand=0&rev=24
This commit is contained in:
parent
9d33a72309
commit
779aa64bf6
@ -0,0 +1,86 @@
|
||||
From 6c8c40ad84fa790831407e5cd25375af898d929b Mon Sep 17 00:00:00 2001
|
||||
From: Jianxin Xiong <jianxin.xiong@intel.com>
|
||||
Date: Mon, 9 Oct 2017 15:10:45 -0700
|
||||
Subject: [PATCH] prov/psm: Eliminate psm2-compat library delay with hfi
|
||||
devices missing
|
||||
|
||||
The PSM2 library may introduce a 15 second delay at device initialization
|
||||
time if the hfi devices are missing. This has been handled in the psm2
|
||||
provider by checking the existence of the device files before initializing
|
||||
the device.
|
||||
|
||||
The psm provider didn't handle this situation because the issue doesn't
|
||||
exist with native PSM library over TrueScale. However, when PSM is
|
||||
supported via the psm2-compat library over PSM2, the same delay can be
|
||||
observed.
|
||||
|
||||
Now add the same mechanism to the psm provider.
|
||||
|
||||
Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com>
|
||||
---
|
||||
prov/psm/src/psmx_init.c | 34 ++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 34 insertions(+)
|
||||
|
||||
diff --git a/prov/psm/src/psmx_init.c b/prov/psm/src/psmx_init.c
|
||||
index 118ef1a81..c4b06160c 100644
|
||||
--- a/prov/psm/src/psmx_init.c
|
||||
+++ b/prov/psm/src/psmx_init.c
|
||||
@@ -32,10 +32,12 @@
|
||||
|
||||
#include "psmx.h"
|
||||
#include "prov.h"
|
||||
+#include <glob.h>
|
||||
|
||||
static int psmx_init_count = 0;
|
||||
static int psmx_lib_initialized = 0;
|
||||
static pthread_mutex_t psmx_lib_mutex;
|
||||
+static int psmx_compat_lib = 0;
|
||||
|
||||
struct psmx_env psmx_env = {
|
||||
.name_server = 1,
|
||||
@@ -103,6 +105,12 @@ static int psmx_init_lib(void)
|
||||
PSM_VERNO_MAJOR, PSM_VERNO_MINOR, major, minor);
|
||||
}
|
||||
|
||||
+ if (major > 1) {
|
||||
+ psmx_compat_lib = 1;
|
||||
+ FI_INFO(&psmx_prov, FI_LOG_CORE,
|
||||
+ "PSM is supported via the psm2-compat library over PSM2.\n");
|
||||
+ }
|
||||
+
|
||||
psmx_lib_initialized = 1;
|
||||
|
||||
out:
|
||||
@@ -197,6 +205,32 @@ static int psmx_getinfo(uint32_t version, const char *node, const char *service,
|
||||
if (psmx_init_lib())
|
||||
return -FI_ENODATA;
|
||||
|
||||
+ if (psmx_compat_lib) {
|
||||
+ /*
|
||||
+ * native PSM running over TrueScale doesn't have the issue handled
|
||||
+ * here. it's only present when PSM is supported via the psm2-compat
|
||||
+ * library, where the PSM functions are just wrappers around the PSM2
|
||||
+ * counterparts.
|
||||
+ *
|
||||
+ * psm2_ep_num_devunits() may wait for 15 seconds before return
|
||||
+ * when /dev/hfi1_0 is not present. Check the existence of any hfi1
|
||||
+ * device interface first to avoid this delay. Note that the devices
|
||||
+ * don't necessarily appear consecutively so we need to check all
|
||||
+ * possible device names before returning "no device found" error.
|
||||
+ * This also means if "/dev/hfi1_0" doesn't exist but other devices
|
||||
+ * exist, we are still going to see the delay; but that's a rare case.
|
||||
+ */
|
||||
+ glob_t glob_buf;
|
||||
+
|
||||
+ if ((glob("/dev/hfi1_[0-9]", 0, NULL, &glob_buf) != 0) &&
|
||||
+ (glob("/dev/hfi1_[0-9][0-9]", GLOB_APPEND, NULL, &glob_buf) != 0)) {
|
||||
+ FI_INFO(&psmx_prov, FI_LOG_CORE,
|
||||
+ "no hfi1 device is found.\n");
|
||||
+ return -FI_ENODATA;
|
||||
+ }
|
||||
+ globfree(&glob_buf);
|
||||
+ }
|
||||
+
|
||||
if (psm_ep_num_devunits(&cnt) || !cnt) {
|
||||
FI_INFO(&psmx_prov, FI_LOG_CORE,
|
||||
"no PSM device is found.\n");
|
@ -1,3 +1,9 @@
|
||||
-------------------------------------------------------------------
|
||||
Mon Oct 9 23:28:31 UTC 2017 - stefan.bruens@rwth-aachen.de
|
||||
|
||||
- Fix github issue #3393:
|
||||
Add 0001-prov-psm-Eliminate-psm2-compat-library-delay-with-hf.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Oct 5 07:10:28 UTC 2017 - nmoreychaisemartin@suse.com
|
||||
|
||||
|
@ -28,6 +28,7 @@ Release: 0
|
||||
Source: %{name}-%{version}%{git_ver}.tar.bz2
|
||||
Source1: baselibs.conf
|
||||
Patch0: libfabric-libtool.patch
|
||||
Patch1: 0001-prov-psm-Eliminate-psm2-compat-library-delay-with-hf.patch
|
||||
Url: http://www.github.com/ofiwg/libfabric
|
||||
Prefix: ${_prefix}
|
||||
BuildRequires: autoconf
|
||||
@ -71,6 +72,7 @@ services, such as RDMA. This package contains the development files.
|
||||
%prep
|
||||
%setup -q -n %{name}-%{version}%{git_ver}
|
||||
%patch0 -p1
|
||||
%patch1 -p1
|
||||
|
||||
%build
|
||||
rm -f config/libtool.m4
|
||||
|
Loading…
x
Reference in New Issue
Block a user