diff --git a/_service b/_service index b9a4e67..a4b190f 100644 --- a/_service +++ b/_service @@ -8,7 +8,7 @@ @PARENT_TAG@.@TAG_OFFSET@.%h v(.*) \1 - bf2450ea9afd7ec10c3f108927e2978e39823d62 + 9b91e2e5287160025f6fc0b555c8f0debfaf9b12 suse/rdma-core.spec diff --git a/rdma-core-16.0.bf2450ea.tar.gz b/rdma-core-16.0.bf2450ea.tar.gz deleted file mode 100644 index 3507f25..0000000 --- a/rdma-core-16.0.bf2450ea.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1826eba8aa2311202c72d3410fe7e7c07e434904737c680e3d9b40d0fc50a9a7 -size 942833 diff --git a/rdma-core-16.1.0.9b91e2e52871.tar.gz b/rdma-core-16.1.0.9b91e2e52871.tar.gz new file mode 100644 index 0000000..ee5cbd2 --- /dev/null +++ b/rdma-core-16.1.0.9b91e2e52871.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b83a503e07fefbdec6eefdde9e102b73f2fd34389838ff5e5e5e5c3069a13e8 +size 1059514 diff --git a/rdma-core.changes b/rdma-core.changes index 70c6df7..d439269 100644 --- a/rdma-core.changes +++ b/rdma-core.changes @@ -1,3 +1,16 @@ +------------------------------------------------------------------- +Thu Jan 4 11:41:20 UTC 2018 - nmoreychaisemartin@suse.com + +- Update to rdma-core v16.1: + * Backport fixes: + * srp_daemon: Don't create async_ev_thread if only run once + * srp_daemon: handle SM lid change + * srp_daemon: fix CQ handling +- Drop srp_daemon-Don-t-create-async_ev_thread-if-only-run-once.patch, + srp_daemon-fix-CQ-handling.patch, and + srp_daemon-handle-SM-lid-change.patch as they were merged upstream. + + ------------------------------------------------------------------- Wed Jan 3 09:17:10 UTC 2018 - nmoreychaisemartin@suse.com diff --git a/rdma-core.spec b/rdma-core.spec index 2832887..c1dba75 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -17,9 +17,9 @@ %bcond_without systemd -%define git_ver .0.bf2450ea +%define git_ver .0.9b91e2e52871 Name: rdma-core -Version: 16 +Version: 16.1 Release: 0 Summary: RDMA core userspace libraries and daemons License: GPL-2.0 or BSD-2-Clause @@ -50,9 +50,6 @@ Group: Productivity/Networking/Other Url: https://github.com/linux-rdma/rdma-core Source: rdma-core-%{version}%{git_ver}.tar.gz Source1: baselibs.conf -Patch1: srp_daemon-handle-SM-lid-change.patch -Patch2: srp_daemon-fix-CQ-handling.patch -Patch3: srp_daemon-Don-t-create-async_ev_thread-if-only-run-once.patch BuildRequires: binutils BuildRequires: cmake >= 2.8.11 BuildRequires: gcc @@ -328,9 +325,6 @@ on those changes. %prep %setup -q -n %{name}-%{version}%{git_ver} -%patch1 -%patch2 -%patch3 %build diff --git a/srp_daemon-Don-t-create-async_ev_thread-if-only-run-once.patch b/srp_daemon-Don-t-create-async_ev_thread-if-only-run-once.patch deleted file mode 100644 index dad81ef..0000000 --- a/srp_daemon-Don-t-create-async_ev_thread-if-only-run-once.patch +++ /dev/null @@ -1,40 +0,0 @@ -commit b1a51eeee28c14dbba332cf59a0e85a182374ed6 -Author: Honggang Li -Date: Wed Dec 20 03:09:58 2017 +0800 - - srp_daemon: Don't create async_ev_thread if only run once - - fd3005f0cd34 moves the signal handler setup from ibsrpdm path. So, - default signal handler will be used when the main pthread send signal - SIGINT to pthread async_ev_thread. ibsrpdm will exit with non-zero - exit code as default signal handler killed it. ibsrpdm should return - with exit code zero, if no error emerged. - - We should not create async_ev_thread for ibsrpdm. - - Fixes: fd3005f0cd34 ("srp_daemon: Move the setup of the wakeup_pipe after openlog") - Reviewed-by: Bart Van Assche - Signed-off-by: Honggang Li - -diff --git srp_daemon/srp_daemon.c srp_daemon/srp_daemon.c -index 36df5c3bfe79..a7e7807774c5 100644 ---- srp_daemon/srp_daemon.c -+++ srp_daemon/srp_daemon.c -@@ -1945,12 +1945,12 @@ static struct resources *alloc_res(void) - run_thread_get_trap_notices, &res->res); - if (ret) - goto err; -- } - -- ret = pthread_create(&res->res.async_ev_thread, NULL, -- run_thread_listen_to_events, &res->res); -- if (ret) -- goto err; -+ ret = pthread_create(&res->res.async_ev_thread, NULL, -+ run_thread_listen_to_events, &res->res); -+ if (ret) -+ goto err; -+ } - - if (config->retry_timeout && !config->once) { - ret = pthread_create(&res->res.reconnect_thread, NULL, diff --git a/srp_daemon-fix-CQ-handling.patch b/srp_daemon-fix-CQ-handling.patch deleted file mode 100644 index 79fef71..0000000 --- a/srp_daemon-fix-CQ-handling.patch +++ /dev/null @@ -1,105 +0,0 @@ -commit c1c584c34d249987d7c36ff061bc5f2eedec38fe -Author: Nicolas Morey-Chaisemartin -Date: Mon Dec 11 15:37:28 2017 +0100 - - srp_daemon: fix CQ handling - - SM traps are polled through poll_cq which waited for a CQ event - before polling the CQ itself. - However it may happens that multiple completions are attached - to a single event. As stated by the ibv_get_cq_event man page, - it is required to poll the the CQ to get those completions - after the call to ibv_req_notify_cq. - - As completions need to be handled one by one in an outer function, - start by polling the CQ and return the completion (if any) before - waiting for the next completion event. - This will allow emptying all pending completions, through multiple calls - to poll_cq, before waiting for a new event. - - The buggy use case seems to appear when the master SM is switched multiple - times between two nodes. As the number of ping-pong between the SMs increases, - the number of traps sent to notify that the SM just became master increases - too. This causes burst of completions linked to a single event. - Note that the race condition is also possible in other scenario. - - Signed-off-by: Nicolas Morey-Chaisemartin - Cc: stable@linux-rdma.org # v14, v15, v16 - -diff --git srp_daemon/srp_handle_traps.c srp_daemon/srp_handle_traps.c -index 25f2b9ab..77a47db3 100644 ---- srp_daemon/srp_handle_traps.c -+++ srp_daemon/srp_handle_traps.c -@@ -496,6 +496,34 @@ static int stop_threads(struct sync_resources *sync_res) - return result; - } - -+/***************************************************************************** -+* Function: poll_cq_once -+* Poll a CQ once. -+* Returns the number of completion polled (0 or 1). -+* Returns a negative value on error. -+*****************************************************************************/ -+static int poll_cq_once(struct sync_resources *sync_res, struct ibv_cq *cq, -+ struct ibv_wc *wc) -+{ -+ int ret; -+ -+ ret = ibv_poll_cq(cq, 1, wc); -+ if (ret < 0) { -+ pr_err("poll CQ failed\n"); -+ return ret; -+ } -+ -+ if (ret > 0 && wc->status != IBV_WC_SUCCESS) { -+ if (!stop_threads(sync_res)) -+ pr_err("got bad completion with status: 0x%x\n", -+ wc->status); -+ return -ret; -+ } -+ -+ return ret; -+} -+ -+ - static int poll_cq(struct sync_resources *sync_res, struct ibv_cq *cq, - struct ibv_wc *wc, struct ibv_comp_channel *channel) - { -@@ -504,6 +532,16 @@ static int poll_cq(struct sync_resources *sync_res, struct ibv_cq *cq, - void *ev_ctx; - - if (channel) { -+ /* There may be extra completions that -+ * were associated to the previous event. -+ * Only poll for the first one. If there are more than one, -+ * they will be handled by later call to poll_cq */ -+ ret = poll_cq_once(sync_res, cq, wc); -+ /* return directly if there was an error or -+ * 1 completion polled */ -+ if (ret) -+ return ret; -+ - if (ibv_get_cq_event(channel, &ev_cq, &ev_ctx)) { - pr_err("Failed to get cq_event\n"); - return -1; -@@ -524,18 +562,9 @@ static int poll_cq(struct sync_resources *sync_res, struct ibv_cq *cq, - } - - do { -- ret = ibv_poll_cq(cq, 1, wc); -- if (ret < 0) { -- pr_err("poll CQ failed\n"); -+ ret = poll_cq_once(sync_res, cq, wc); -+ if (ret < 0) - return ret; -- } -- -- if (ret > 0 && wc->status != IBV_WC_SUCCESS) { -- if (!stop_threads(sync_res)) -- pr_err("got bad completion with status: 0x%x\n", -- wc->status); -- return -ret; -- } - - if (ret == 0 && channel) { - pr_err("Weird poll returned no cqe after CQ event\n"); diff --git a/srp_daemon-handle-SM-lid-change.patch b/srp_daemon-handle-SM-lid-change.patch deleted file mode 100644 index 2313929..0000000 --- a/srp_daemon-handle-SM-lid-change.patch +++ /dev/null @@ -1,100 +0,0 @@ -commit 2fbc501061218e7df8e37bb2df6db73e00005e9b -Author: Nicolas Morey-Chaisemartin -Date: Mon Dec 4 15:15:55 2017 +0100 - - srp_daemon: handle SM lid change - - When srp_daemon was running and the master SM host changes, - srp_daemon output these errors at every scan: - srp_daemon[25394]: No response to inform info registration - srp_daemon[25394]: Fail to register to traps, maybe there is no opensm - running on fabric or IB port is down - - This was introduced by commit 4952e5f Fix a memory leak. - A side effect of this patch was that create_ah was only called when the - port lid changes. Which meant register_to_traps used an older, obsolete, - version of sm_lid and failed to connect to it. - - This patch fixes this behaviour by checking for both local lid changes and - SM lid changes, and calling create_ah on any of these events. - - Fixes: 4952e5f7 (Fix a memory leak) - Signed-off-by: Nicolas Morey-Chaisemartin - Cc: stable@linux-rdma.org # v14, v15, v16 - -diff --git srp_daemon/srp_daemon.c srp_daemon/srp_daemon.c -index 2465ccd9..36df5c3b 100644 ---- srp_daemon/srp_daemon.c -+++ srp_daemon/srp_daemon.c -@@ -1103,7 +1103,7 @@ static int get_shared_pkeys(struct resources *res, - int i, num_pkeys = 0; - uint16_t pkey; - uint16_t local_port_lid = get_port_lid(res->ud_res->ib_ctx, -- config->port_num); -+ config->port_num, NULL); - - in_mad_buf = malloc(sizeof(struct ib_user_mad) + - node_table_response_size); -@@ -2092,7 +2092,7 @@ int main(int argc, char *argv[]) - { - int ret; - struct resources *res; -- uint16_t lid; -+ uint16_t lid, sm_lid; - uint16_t pkey; - union umad_gid gid; - struct target_details *target; -@@ -2196,8 +2196,10 @@ catas_start: - - pr_debug("Starting a recalculation\n"); - port_lid = get_port_lid(res->ud_res->ib_ctx, -- config->port_num); -- if (port_lid != res->ud_res->port_attr.lid) { -+ config->port_num, &sm_lid); -+ if (port_lid != res->ud_res->port_attr.lid || -+ sm_lid != res->ud_res->port_attr.sm_lid) { -+ - if (res->ud_res->ah) { - ibv_destroy_ah(res->ud_res->ah); - res->ud_res->ah = NULL; -diff --git srp_daemon/srp_daemon.h srp_daemon/srp_daemon.h -index 5d268ed3..864b3d42 100644 ---- srp_daemon/srp_daemon.h -+++ srp_daemon/srp_daemon.h -@@ -299,7 +299,7 @@ void *run_thread_listen_to_events(void *res_in); - int get_node(struct umad_resources *umad_res, uint16_t dlid, uint64_t *guid); - int create_trap_resources(struct ud_resources *ud_res); - int register_to_traps(struct resources *res, int subscribe); --uint16_t get_port_lid(struct ibv_context *ib_ctx, int port_num); -+uint16_t get_port_lid(struct ibv_context *ib_ctx, int port_num, uint16_t *sm_lid); - int create_ah(struct ud_resources *ud_res); - void push_gid_to_list(struct sync_resources *res, union umad_gid *gid, - uint16_t pkey); -diff --git srp_daemon/srp_handle_traps.c srp_daemon/srp_handle_traps.c -index 6d94634e..25f2b9ab 100644 ---- srp_daemon/srp_handle_traps.c -+++ srp_daemon/srp_handle_traps.c -@@ -340,12 +340,20 @@ int ud_resources_create(struct ud_resources *res) - return 0; - } - --uint16_t get_port_lid(struct ibv_context *ib_ctx, int port_num) -+uint16_t get_port_lid(struct ibv_context *ib_ctx, int port_num, uint16_t *sm_lid) - { - struct ibv_port_attr port_attr; -+ int ret; -+ -+ ret = ibv_query_port(ib_ctx, port_num, &port_attr); - -- return ibv_query_port(ib_ctx, port_num, &port_attr) == 0 ? -- port_attr.lid : 0; -+ if (!ret) { -+ if (sm_lid) -+ *sm_lid = port_attr.sm_lid; -+ return port_attr.lid; -+ } -+ -+ return 0; - } - - int create_ah(struct ud_resources *ud_res)