open-iscsi/open-iscsi-login-retry-update

185 lines
6.4 KiB
Plaintext

diff --git a/etc/iscsid.conf b/etc/iscsid.conf
index f2691ee..4f6e08a 100644
--- a/etc/iscsid.conf
+++ b/etc/iscsid.conf
@@ -107,14 +107,31 @@ node.session.err_timeo.lu_reset_timeout = 20
# Retry
#******
-# To speficy the number of times iscsiadm should retry a login
-# to the target when we first login, modify the following line.
-# The default is 4. Valid values are any integer value. This only
+# To specify the number of times iscsid should retry a login
+# if the login attempt fails due to the node.conn[0].timeo.login_timeout
+# expiring modify the following line. Note that if the login fails
+# quickly (before node.conn[0].timeo.login_timeout fires) because the network
+# layer or the target returns an error, iscsid may retry the login more than
+# node.session.initial_login_retry_max times.
+#
+# This retry count along with node.conn[0].timeo.login_timeout
+# determines the maximum amount of time iscsid will try to
+# establish the initial login. node.session.initial_login_retry_max is
+# multiplied by the node.conn[0].timeo.login_timeout to determine the
+# maximum amount.
+#
+# The default node.session.initial_login_retry_max is 8 and
+# node.conn[0].timeo.login_timeout is 15 so we have:
+#
+# node.conn[0].timeo.login_timeout * node.session.initial_login_retry_max =
+# 120 seconds
+#
+# Valid values are any integer value. This only
# affects the initial login. Setting it to a high value can slow
# down the iscsi service startup. Setting it to a low value can
# cause a session to not get logged into, if there are distuptions
# during startup or if the network is not ready at that time.
-node.session.initial_login_retry_max = 4
+node.session.initial_login_retry_max = 8
################################
# session and device queue depth
diff --git a/usr/initiator.c b/usr/initiator.c
index 2f29ffc..b85416e 100644
--- a/usr/initiator.c
+++ b/usr/initiator.c
@@ -24,6 +24,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
+#include <sys/time.h>
#include "initiator.h"
#include "transport.h"
@@ -773,13 +774,45 @@ session_conn_reopen(iscsi_conn_t *conn, queue_task_t *qtask, int do_stop)
__session_conn_reopen(conn, qtask, do_stop, 0);
}
+static int iscsi_retry_initial_login(struct iscsi_conn *conn)
+{
+ int initial_login_retry_max;
+ struct timeval now, timeout, fail_time;
+
+ initial_login_retry_max =
+ conn->session->nrec.session.initial_login_retry_max;
+
+ memset(&now, 0, sizeof(now));
+ memset(&timeout, 0, sizeof(timeout));
+ memset(&fail_time, 0, sizeof(fail_time));
+
+ timeout.tv_sec = initial_login_retry_max * conn->login_timeout;
+ if (gettimeofday(&now, NULL)) {
+ log_error("Could not get time of day. Dropping down to "
+ "max retry check.\n");
+ return initial_login_retry_max > conn->session->reopen_cnt;
+ }
+ timeradd(&conn->initial_connect_time, &timeout, &fail_time);
+
+ /*
+ * if we have been trying for login_retry_max * login_timeout
+ * then it is time to give up
+ */
+ if (timercmp(&now, &fail_time, >)) {
+ log_debug(1, "Giving up on initial login attempt after "
+ "%u seconds.\n",
+ initial_login_retry_max * conn->login_timeout);
+ return 0;
+ }
+
+ return 1;
+}
+
static void iscsi_login_eh(struct iscsi_conn *conn, struct queue_task *qtask,
mgmt_ipc_err_e err)
{
struct iscsi_session *session = conn->session;
- int initial_login_retry_max;
- initial_login_retry_max = session->nrec.session.initial_login_retry_max;
log_debug(3, "iscsi_login_eh");
/*
* Flush polls and other events
@@ -791,12 +824,10 @@ static void iscsi_login_eh(struct iscsi_conn *conn, struct queue_task *qtask,
switch (session->r_stage) {
case R_STAGE_NO_CHANGE:
log_debug(6, "login failed STATE_XPT_WAIT/"
- "R_STAGE_NO_CHANGE (%d/%d)",
- session->reopen_cnt,
- initial_login_retry_max);
+ "R_STAGE_NO_CHANGE");
/* timeout during initial connect.
* clean connection. write ipc rsp or retry */
- if (initial_login_retry_max < session->reopen_cnt + 1)
+ if (!iscsi_retry_initial_login(conn))
session_conn_shutdown(conn, qtask, err);
else {
session->reopen_cnt++;
@@ -808,12 +839,10 @@ static void iscsi_login_eh(struct iscsi_conn *conn, struct queue_task *qtask,
break;
case R_STAGE_SESSION_REDIRECT:
log_debug(6, "login failed STATE_XPT_WAIT/"
- "R_STAGE_SESSION_REDIRECT (%d/%d)",
- session->reopen_cnt,
- initial_login_retry_max);
+ "R_STAGE_SESSION_REDIRECT");
/* timeout during initial redirect connect
* clean connection. write ipc rsp or retry */
- if (initial_login_retry_max < session->reopen_cnt + 1)
+ if (!iscsi_retry_initial_login(conn))
session_conn_shutdown(conn, qtask, err);
else
session_conn_reopen(conn, qtask, 0);
@@ -845,7 +874,7 @@ static void iscsi_login_eh(struct iscsi_conn *conn, struct queue_task *qtask,
* initial redirected connect. Clean connection
* and write rsp or retry.
*/
- if (initial_login_retry_max < session->reopen_cnt + 1)
+ if (!iscsi_retry_initial_login(conn))
session_conn_shutdown(conn, qtask, err);
else
session_conn_reopen(conn, qtask,
@@ -1997,6 +2026,11 @@ session_login_task(node_rec_t *rec, queue_task_t *qtask)
return MGMT_IPC_ERR_TRANS_FAILURE;
}
+ if (gettimeofday(&conn->initial_connect_time, NULL))
+ log_error("Could not get initial connect time. If "
+ "login errors iscsid may give up the initial "
+ "login early. You should manually login.");
+
qtask->rsp.command = MGMT_IPC_SESSION_LOGIN;
qtask->rsp.err = MGMT_IPC_OK;
return MGMT_IPC_OK;
diff --git a/usr/initiator.h b/usr/initiator.h
index 594e8f8..682ebfc 100644
--- a/usr/initiator.h
+++ b/usr/initiator.h
@@ -22,6 +22,7 @@
#include <stdint.h>
#include <net/if.h>
+#include <sys/time.h>
#include "types.h"
#include "iscsi_proto.h"
@@ -125,6 +126,7 @@ typedef struct iscsi_conn {
iscsi_conn_state_e state;
int userspace_nop;
+ struct timeval initial_connect_time;
actor_t login_timer;
actor_t nop_out_timer;
diff --git a/usr/iscsistart.c b/usr/iscsistart.c
index eb11bcc..cc57f1c 100644
--- a/usr/iscsistart.c
+++ b/usr/iscsistart.c
@@ -130,7 +130,7 @@ static int setup_session(void)
* For root boot we cannot change this so increase to account
* for boot using static setup.
*/
- config_rec.session.initial_login_retry_max = 120;
+ config_rec.session.initial_login_retry_max = 30;
/* we cannot answer so turn off */
config_rec.conn[0].timeo.noop_out_interval = 0;
config_rec.conn[0].timeo.noop_out_timeout = 0;