diff --git a/0003-totemrrp-Fix-situation-when-all-rings-are-faulty.patch b/0003-totemrrp-Fix-situation-when-all-rings-are-faulty.patch new file mode 100644 index 0000000..4692c12 --- /dev/null +++ b/0003-totemrrp-Fix-situation-when-all-rings-are-faulty.patch @@ -0,0 +1,173 @@ +From eaa92765f1226f16ddb58f1baf3494b817726cb4 Mon Sep 17 00:00:00 2001 +From: Bin Liu +Date: Tue, 16 May 2017 10:49:23 +0800 +Subject: [PATCH] totemrrp: Fix situation when all rings are faulty + +Previously when all rings were marked as failed no message was sent via +any interface. This is problem because totemsrp relies on messages +delivered via localhost multicast loop socket so it never moved to +single node mode and it got stuck. + +Solution is to send message via one of the interfaces (first one seems +to be obvious choice) if it was not sent via any interface. + +Strictly speaking it should be enough to change just *_mcast_flush_send +functions, but changing others is just for sure. +--- + exec/totemrrp.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 62 insertions(+), 9 deletions(-) + +diff --git a/exec/totemrrp.c b/exec/totemrrp.c +index 0d9a58d5..15ba7766 100644 +--- a/exec/totemrrp.c ++++ b/exec/totemrrp.c +@@ -1002,7 +1002,7 @@ static void passive_monitor ( + } + + /* +- * Max is larger then threshold -> start adjusting process ++ * Max is larger than threshold -> start adjusting process + */ + if (max > PASSIVE_RECV_COUNT_THRESHOLD) { + min_all = min_active = recv_count[iface_no]; +@@ -1117,9 +1117,15 @@ static void passive_mcast_flush_send ( + i++; + } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1)); + +- if (i <= instance->interface_count) { +- totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len); ++ if (i > instance->interface_count) { ++ /* ++ * All interfaces are faulty. It's still needed to send mcast ++ * message to local host so use first interface. ++ */ ++ passive_instance->msg_xmit_iface = 0; + } ++ ++ totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len); + } + + static void passive_mcast_noflush_send ( +@@ -1135,9 +1141,15 @@ static void passive_mcast_noflush_send ( + i++; + } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1)); + +- if (i <= instance->interface_count) { +- totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len); ++ if (i > instance->interface_count) { ++ /* ++ * All interfaces are faulty. It's still needed to send mcast ++ * message to local host so use first interface. ++ */ ++ passive_instance->msg_xmit_iface = 0; + } ++ ++ totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len); + } + + static void passive_token_recv ( +@@ -1179,12 +1191,17 @@ static void passive_token_send ( + i++; + } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->token_xmit_iface] == 1)); + +- if (i <= instance->interface_count) { +- totemnet_token_send ( +- instance->net_handles[passive_instance->token_xmit_iface], +- msg, msg_len); ++ if (i > instance->interface_count) { ++ /* ++ * All interfaces are faulty. It's still needed to send token ++ * message to (potentionally) local host so use first interface. ++ */ ++ passive_instance->msg_xmit_iface = 0; + } + ++ totemnet_token_send ( ++ instance->net_handles[passive_instance->token_xmit_iface], ++ msg, msg_len); + } + + static void passive_recv_flush (struct totemrrp_instance *instance) +@@ -1568,13 +1585,24 @@ static void active_mcast_flush_send ( + unsigned int msg_len) + { + int i; ++ int msg_sent; + struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; + ++ msg_sent = 0; ++ + for (i = 0; i < instance->interface_count; i++) { + if (rrp_algo_instance->faulty[i] == 0) { ++ msg_sent = 1; + totemnet_mcast_flush_send (instance->net_handles[i], msg, msg_len); + } + } ++ if (!msg_sent) { ++ /* ++ * All interfaces are faulty. It's still needed to send mcast ++ * message to local host so use first interface. ++ */ ++ totemnet_mcast_flush_send (instance->net_handles[0], msg, msg_len); ++ } + } + + static void active_mcast_noflush_send ( +@@ -1583,13 +1611,24 @@ static void active_mcast_noflush_send ( + unsigned int msg_len) + { + int i; ++ int msg_sent; + struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; + ++ msg_sent = 0; + for (i = 0; i < instance->interface_count; i++) { + if (rrp_algo_instance->faulty[i] == 0) { ++ msg_sent = 1; + totemnet_mcast_noflush_send (instance->net_handles[i], msg, msg_len); + } + } ++ ++ if (!msg_sent) { ++ /* ++ * All interfaces are faulty. It's still needed to send mcast ++ * message to local host so use first interface. ++ */ ++ totemnet_mcast_noflush_send (instance->net_handles[0], msg, msg_len); ++ } + } + + static void active_token_recv ( +@@ -1645,15 +1684,29 @@ static void active_token_send ( + { + struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; + int i; ++ int msg_sent; ++ ++ msg_sent = 0; + + for (i = 0; i < instance->interface_count; i++) { + if (rrp_algo_instance->faulty[i] == 0) { ++ msg_sent = 1; + totemnet_token_send ( + instance->net_handles[i], + msg, msg_len); + + } + } ++ ++ if (!msg_sent) { ++ /* ++ * All interfaces are faulty. It's still needed to send token ++ * message to (potentionally) local host so use first interface. ++ */ ++ totemnet_token_send ( ++ instance->net_handles[0], ++ msg, msg_len); ++ } + } + + static void active_recv_flush (struct totemrrp_instance *instance) +-- +2.12.0 + diff --git a/corosync.changes b/corosync.changes index 97ab87d..e7826c2 100644 --- a/corosync.changes +++ b/corosync.changes @@ -1,3 +1,9 @@ +------------------------------------------------------------------- +Tue May 16 03:05:05 UTC 2017 - bliu@suse.com + +- totemrrp: Fix situation when all rings are faulty(bsc#1039215) + Added: 0003-totemrrp-Fix-situation-when-all-rings-are-faulty.patch + ------------------------------------------------------------------- Tue May 9 04:17:35 UTC 2017 - bliu@suse.com diff --git a/corosync.spec b/corosync.spec index 5997f4d..227dd33 100644 --- a/corosync.spec +++ b/corosync.spec @@ -64,6 +64,7 @@ Patch9: 0001-Logsys-Change-logsys-syslog_priority-priority.patch Patch10: 0001-logconfig.c-make-logging.syslog_priority-and-logging.patch Patch11: 0001-totemconfig.c-Fixed-Evicted-from-CPG-membership.patch Patch12: 0002-Main-call-mlock-after-fork.patch +Patch13: 0003-totemrrp-Fix-situation-when-all-rings-are-faulty.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build # openais is indeed gone and should be uninstalled. Yes, we do not @@ -135,6 +136,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build %patch10 -p1 %patch11 -p1 %patch12 -p1 +%patch13 -p1 %build %if %{with_runautogen}