corosync/0003-totemrrp-Fix-situation-when-all-rings-are-faulty.patch

From eaa92765f1226f16ddb58f1baf3494b817726cb4 Mon Sep 17 00:00:00 2001
From: Bin Liu <bliu@suse.com>
Date: Tue, 16 May 2017 10:49:23 +0800
Subject: [PATCH] totemrrp: Fix situation when all rings are faulty

Previously when all rings were marked as failed no message was sent via
any interface. This is problem because totemsrp relies on messages
delivered via localhost multicast loop socket so it never moved to
single node mode and it got stuck.

Solution is to send message via one of the interfaces (first one seems
to be obvious choice) if it was not sent via any interface.

Strictly speaking it should be enough to change just *_mcast_flush_send
functions, but changing others is just for sure.
---
 exec/totemrrp.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 62 insertions(+), 9 deletions(-)

diff --git a/exec/totemrrp.c b/exec/totemrrp.c
index 0d9a58d5..15ba7766 100644
--- a/exec/totemrrp.c
+++ b/exec/totemrrp.c
@@ -1002,7 +1002,7 @@ static void passive_monitor (
 	}

 	/*
-	 * Max is larger then threshold -> start adjusting process
+	 * Max is larger than threshold -> start adjusting process
 	 */
 	if (max > PASSIVE_RECV_COUNT_THRESHOLD) {
 		min_all = min_active = recv_count[iface_no];
@@ -1117,9 +1117,15 @@ static void passive_mcast_flush_send (
 		i++;
 	} while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1));

-	if (i <= instance->interface_count) {
-		totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
+	if (i > instance->interface_count) {
+		/*
+		 * All interfaces are faulty. It's still needed to send mcast
+		 * message to local host so use first interface.
+		 */
+		passive_instance->msg_xmit_iface = 0;
 	}
+
+	totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
 }

 static void passive_mcast_noflush_send (
@@ -1135,9 +1141,15 @@ static void passive_mcast_noflush_send (
 		i++;
 	} while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1));

-	if (i <= instance->interface_count) {
-		totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
+	if (i > instance->interface_count) {
+		/*
+		 * All interfaces are faulty. It's still needed to send mcast
+		 * message to local host so use first interface.
+		 */
+		passive_instance->msg_xmit_iface = 0;
 	}
+
+	totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
 }

 static void passive_token_recv (
@@ -1179,12 +1191,17 @@ static void passive_token_send (
 		i++;
 	} while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->token_xmit_iface] == 1));

-	if (i <= instance->interface_count) {
-		totemnet_token_send (
-		    instance->net_handles[passive_instance->token_xmit_iface],
-		    msg, msg_len);
+	if (i > instance->interface_count) {
+		/*
+		 * All interfaces are faulty. It's still needed to send token
+		 * message to (potentionally) local host so use first interface.
+		 */
+		passive_instance->msg_xmit_iface = 0;
 	}

+	totemnet_token_send (
+	    instance->net_handles[passive_instance->token_xmit_iface],
+	    msg, msg_len);
 }

 static void passive_recv_flush (struct totemrrp_instance *instance)
@@ -1568,13 +1585,24 @@ static void active_mcast_flush_send (
 	unsigned int msg_len)
 {
 	int i;
+	int msg_sent;
 	struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;

+	msg_sent = 0;
+
 	for (i = 0; i < instance->interface_count; i++) {
 		if (rrp_algo_instance->faulty[i] == 0) {
+			msg_sent = 1;
 			totemnet_mcast_flush_send (instance->net_handles[i], msg, msg_len);
 		}
 	}
+	if (!msg_sent) {
+		/*
+		 * All interfaces are faulty. It's still needed to send mcast
+		 * message to local host so use first interface.
+		 */
+		totemnet_mcast_flush_send (instance->net_handles[0], msg, msg_len);
+	}
 }

 static void active_mcast_noflush_send (
@@ -1583,13 +1611,24 @@ static void active_mcast_noflush_send (
 	unsigned int msg_len)
 {
 	int i;
+	int msg_sent;
 	struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;

+	msg_sent = 0;
 	for (i = 0; i < instance->interface_count; i++) {
 		if (rrp_algo_instance->faulty[i] == 0) {
+			msg_sent = 1;
 			totemnet_mcast_noflush_send (instance->net_handles[i], msg, msg_len);
 		}
 	}
+
+	if (!msg_sent) {
+		/*
+		 * All interfaces are faulty. It's still needed to send mcast
+		 * message to local host so use first interface.
+		 */
+		totemnet_mcast_noflush_send (instance->net_handles[0], msg, msg_len);
+	}
 }

 static void active_token_recv (
@@ -1645,15 +1684,29 @@ static void active_token_send (
 {
 	struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
 	int i;
+	int msg_sent;
+
+	msg_sent = 0;

 	for (i = 0; i < instance->interface_count; i++) {
 		if (rrp_algo_instance->faulty[i] == 0) {
+			msg_sent = 1;
 			totemnet_token_send (
 				instance->net_handles[i],
 				msg, msg_len);

 		}
 	}
+
+	if (!msg_sent) {
+		/*
+		 * All interfaces are faulty. It's still needed to send token
+		 * message to (potentionally) local host so use first interface.
+		 */
+		totemnet_token_send (
+		    instance->net_handles[0],
+		    msg, msg_len);
+	}
 }

 static void active_recv_flush (struct totemrrp_instance *instance)
--
2.12.0