diff --git a/etc/iscsid.conf b/etc/iscsid.conf index f2691ee..4f6e08a 100644 --- a/etc/iscsid.conf +++ b/etc/iscsid.conf @@ -107,14 +107,31 @@ node.session.err_timeo.lu_reset_timeout = 20 # Retry #****** -# To speficy the number of times iscsiadm should retry a login -# to the target when we first login, modify the following line. -# The default is 4. Valid values are any integer value. This only +# To specify the number of times iscsid should retry a login +# if the login attempt fails due to the node.conn[0].timeo.login_timeout +# expiring modify the following line. Note that if the login fails +# quickly (before node.conn[0].timeo.login_timeout fires) because the network +# layer or the target returns an error, iscsid may retry the login more than +# node.session.initial_login_retry_max times. +# +# This retry count along with node.conn[0].timeo.login_timeout +# determines the maximum amount of time iscsid will try to +# establish the initial login. node.session.initial_login_retry_max is +# multiplied by the node.conn[0].timeo.login_timeout to determine the +# maximum amount. +# +# The default node.session.initial_login_retry_max is 8 and +# node.conn[0].timeo.login_timeout is 15 so we have: +# +# node.conn[0].timeo.login_timeout * node.session.initial_login_retry_max = +# 120 seconds +# +# Valid values are any integer value. This only # affects the initial login. Setting it to a high value can slow # down the iscsi service startup. Setting it to a low value can # cause a session to not get logged into, if there are distuptions # during startup or if the network is not ready at that time. -node.session.initial_login_retry_max = 4 +node.session.initial_login_retry_max = 8 ################################ # session and device queue depth diff --git a/usr/initiator.c b/usr/initiator.c index 2f29ffc..b85416e 100644 --- a/usr/initiator.c +++ b/usr/initiator.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "initiator.h" #include "transport.h" @@ -773,13 +774,45 @@ session_conn_reopen(iscsi_conn_t *conn, queue_task_t *qtask, int do_stop) __session_conn_reopen(conn, qtask, do_stop, 0); } +static int iscsi_retry_initial_login(struct iscsi_conn *conn) +{ + int initial_login_retry_max; + struct timeval now, timeout, fail_time; + + initial_login_retry_max = + conn->session->nrec.session.initial_login_retry_max; + + memset(&now, 0, sizeof(now)); + memset(&timeout, 0, sizeof(timeout)); + memset(&fail_time, 0, sizeof(fail_time)); + + timeout.tv_sec = initial_login_retry_max * conn->login_timeout; + if (gettimeofday(&now, NULL)) { + log_error("Could not get time of day. Dropping down to " + "max retry check.\n"); + return initial_login_retry_max > conn->session->reopen_cnt; + } + timeradd(&conn->initial_connect_time, &timeout, &fail_time); + + /* + * if we have been trying for login_retry_max * login_timeout + * then it is time to give up + */ + if (timercmp(&now, &fail_time, >)) { + log_debug(1, "Giving up on initial login attempt after " + "%u seconds.\n", + initial_login_retry_max * conn->login_timeout); + return 0; + } + + return 1; +} + static void iscsi_login_eh(struct iscsi_conn *conn, struct queue_task *qtask, mgmt_ipc_err_e err) { struct iscsi_session *session = conn->session; - int initial_login_retry_max; - initial_login_retry_max = session->nrec.session.initial_login_retry_max; log_debug(3, "iscsi_login_eh"); /* * Flush polls and other events @@ -791,12 +824,10 @@ static void iscsi_login_eh(struct iscsi_conn *conn, struct queue_task *qtask, switch (session->r_stage) { case R_STAGE_NO_CHANGE: log_debug(6, "login failed STATE_XPT_WAIT/" - "R_STAGE_NO_CHANGE (%d/%d)", - session->reopen_cnt, - initial_login_retry_max); + "R_STAGE_NO_CHANGE"); /* timeout during initial connect. * clean connection. write ipc rsp or retry */ - if (initial_login_retry_max < session->reopen_cnt + 1) + if (!iscsi_retry_initial_login(conn)) session_conn_shutdown(conn, qtask, err); else { session->reopen_cnt++; @@ -808,12 +839,10 @@ static void iscsi_login_eh(struct iscsi_conn *conn, struct queue_task *qtask, break; case R_STAGE_SESSION_REDIRECT: log_debug(6, "login failed STATE_XPT_WAIT/" - "R_STAGE_SESSION_REDIRECT (%d/%d)", - session->reopen_cnt, - initial_login_retry_max); + "R_STAGE_SESSION_REDIRECT"); /* timeout during initial redirect connect * clean connection. write ipc rsp or retry */ - if (initial_login_retry_max < session->reopen_cnt + 1) + if (!iscsi_retry_initial_login(conn)) session_conn_shutdown(conn, qtask, err); else session_conn_reopen(conn, qtask, 0); @@ -845,7 +874,7 @@ static void iscsi_login_eh(struct iscsi_conn *conn, struct queue_task *qtask, * initial redirected connect. Clean connection * and write rsp or retry. */ - if (initial_login_retry_max < session->reopen_cnt + 1) + if (!iscsi_retry_initial_login(conn)) session_conn_shutdown(conn, qtask, err); else session_conn_reopen(conn, qtask, @@ -1997,6 +2026,11 @@ session_login_task(node_rec_t *rec, queue_task_t *qtask) return MGMT_IPC_ERR_TRANS_FAILURE; } + if (gettimeofday(&conn->initial_connect_time, NULL)) + log_error("Could not get initial connect time. If " + "login errors iscsid may give up the initial " + "login early. You should manually login."); + qtask->rsp.command = MGMT_IPC_SESSION_LOGIN; qtask->rsp.err = MGMT_IPC_OK; return MGMT_IPC_OK; diff --git a/usr/initiator.h b/usr/initiator.h index 594e8f8..682ebfc 100644 --- a/usr/initiator.h +++ b/usr/initiator.h @@ -22,6 +22,7 @@ #include #include +#include #include "types.h" #include "iscsi_proto.h" @@ -125,6 +126,7 @@ typedef struct iscsi_conn { iscsi_conn_state_e state; int userspace_nop; + struct timeval initial_connect_time; actor_t login_timer; actor_t nop_out_timer; diff --git a/usr/iscsistart.c b/usr/iscsistart.c index eb11bcc..cc57f1c 100644 --- a/usr/iscsistart.c +++ b/usr/iscsistart.c @@ -130,7 +130,7 @@ static int setup_session(void) * For root boot we cannot change this so increase to account * for boot using static setup. */ - config_rec.session.initial_login_retry_max = 120; + config_rec.session.initial_login_retry_max = 30; /* we cannot answer so turn off */ config_rec.conn[0].timeo.noop_out_interval = 0; config_rec.conn[0].timeo.noop_out_timeout = 0;