Index: LVM2.2.02.98/daemons/clvmd/clvmd-corosync.c =================================================================== --- LVM2.2.02.98.orig/daemons/clvmd/clvmd-corosync.c 2014-04-14 14:08:40.547558968 +0800 +++ LVM2.2.02.98/daemons/clvmd/clvmd-corosync.c 2014-04-14 14:09:30.440619510 +0800 @@ -251,8 +251,12 @@ static void corosync_cpg_confchg_callbac ninfo = dm_hash_lookup_binary(node_hash, (char *)&left_list[i].nodeid, COROSYNC_CSID_LEN); - if (ninfo) + if (ninfo) { ninfo->state = NODE_DOWN; + char name[MAX_CLUSTER_MEMBER_NAME_LEN]; + sprintf(name, "%x", ninfo->nodeid); + decrease_inflight_expected_reply(name); + } } num_nodes = member_list_entries; Index: LVM2.2.02.98/daemons/clvmd/clvmd.c =================================================================== --- LVM2.2.02.98.orig/daemons/clvmd/clvmd.c 2014-04-14 14:08:40.582559010 +0800 +++ LVM2.2.02.98/daemons/clvmd/clvmd.c 2014-04-14 14:09:59.751655009 +0800 @@ -1602,6 +1602,56 @@ static void process_remote_command(struc free(replyargs); } +void decrease_inflight_expected_reply(char *nodename) +{ + struct local_client * thisfd; + struct node_reply *reply; + + DEBUGLOG("remote node %s down", nodename); + + for (thisfd = &local_client_head; thisfd != NULL; + thisfd = thisfd->next) { + /* in-flight request */ + if (thisfd->type == LOCAL_SOCK + && thisfd->bits.localsock.sent_out + && thisfd->bits.localsock.in_progress + && ! thisfd->bits.localsock.finished + && thisfd->bits.localsock.expected_replies > + thisfd->bits.localsock.num_replies) { + + pthread_mutex_lock(&thisfd->bits.localsock.reply_mutex); + + reply = thisfd->bits.localsock.replies; + while (reply && strcmp(reply->node, nodename) != 0) { + reply = reply->next; + } + /* if the remote down server has replies,do not decrease the expected_replies */ + if (reply) + continue; + + thisfd->bits.localsock.expected_replies--; + DEBUGLOG + ("remote node down, decrement the expected replies to (%ld),num_replies(%ld)", + thisfd->bits.localsock.expected_replies, + thisfd->bits.localsock.num_replies) + + if (thisfd->bits.localsock.expected_replies <= thisfd->bits.localsock.num_replies) { + /* tell pre_and_post thread to finish */ + if (thisfd->bits.localsock.threadid) { + thisfd->bits.localsock.all_success = 0; + pthread_mutex_lock(&thisfd->bits.localsock.mutex); + thisfd->bits.localsock.state = POST_COMMAND; + pthread_cond_signal(&thisfd->bits.localsock.cond); + pthread_mutex_unlock(&thisfd->bits.localsock.mutex); + } + } + pthread_mutex_unlock(&thisfd->bits.localsock.reply_mutex); + + } + } + +} + /* Add a reply to a command to the list of replies for this client. If we have got a full set then send them to the waiting client down the local socket */ @@ -1643,7 +1693,7 @@ static void add_reply_to_list(struct loc client->bits.localsock.expected_replies); /* If we have the whole lot then do the post-process */ - if (++client->bits.localsock.num_replies == + if (++client->bits.localsock.num_replies >= client->bits.localsock.expected_replies) { /* Post-process the command */ if (client->bits.localsock.threadid) { Index: LVM2.2.02.98/daemons/clvmd/clvmd.h =================================================================== --- LVM2.2.02.98.orig/daemons/clvmd/clvmd.h 2014-04-14 14:08:40.564558988 +0800 +++ LVM2.2.02.98/daemons/clvmd/clvmd.h 2014-04-14 14:09:30.442619512 +0800 @@ -112,6 +112,8 @@ extern int do_post_command(struct local_ extern void cmd_client_cleanup(struct local_client *client); extern int add_client(struct local_client *new_client); + +extern void decrease_inflight_expected_reply(); extern void clvmd_cluster_init_completed(void); extern void process_message(struct local_client *client, char *buf, int len, const char *csid);