From ec6bf0b62e04b35c6146ec8a5783faba25bc47719c1e47d6c93601509e139444 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Wed, 2 May 2018 02:49:52 +0000 Subject: [PATCH] Accepting request 602893 from network:ha-clustering:Unstable - cpg: Inform clients about left nodes during pause(bsc#1091593) Added: 0012-cpg-Inform-clients-about-left-nodes-during-pause.patch OBS-URL: https://build.opensuse.org/request/show/602893 OBS-URL: https://build.opensuse.org/package/show/network:ha-clustering:Factory/corosync?expand=0&rev=148 --- ...lients-about-left-nodes-during-pause.patch | 289 ++++++++++++++++++ corosync.changes | 6 + corosync.spec | 2 + 3 files changed, 297 insertions(+) create mode 100644 0012-cpg-Inform-clients-about-left-nodes-during-pause.patch diff --git a/0012-cpg-Inform-clients-about-left-nodes-during-pause.patch b/0012-cpg-Inform-clients-about-left-nodes-during-pause.patch new file mode 100644 index 0000000..cd8697e --- /dev/null +++ b/0012-cpg-Inform-clients-about-left-nodes-during-pause.patch @@ -0,0 +1,289 @@ +Subject: [PATCH] cpg: Inform clients about left nodes during pause + +Patch tries to fix incorrect behaviour during following test-case: +- 3 nodes +- Node 1 is paused +- Node 2 and 3 detects node 1 as failed and informs CPG clients +- Node 1 is unpaused +- Node 1 clients are informed about new membership, but not about Node 1 + being paused, so from Node 1 point-of-view, Node 2 and 3 failure + +Solution is to: +- Remove downlist master choose and always choose local node downlist. + For Node 1 in example above, downlist contains Node 2 and 3. +- Keep code which informs clients about left nodes +- Use joinlist as a authoritative source of nodes/clients which exists + in membership + +--- + exec/cpg.c | 164 +++++-------------------------------------------------------- + 1 file changed, 11 insertions(+), 153 deletions(-) + +diff --git a/exec/cpg.c b/exec/cpg.c +index 78ac1e9e..b851cba3 100644 +--- a/exec/cpg.c ++++ b/exec/cpg.c +@@ -139,13 +139,6 @@ enum cpg_sync_state { + CPGSYNC_JOINLIST + }; + +-enum cpg_downlist_state_e { +- CPG_DOWNLIST_NONE, +- CPG_DOWNLIST_WAITING_FOR_MESSAGES, +- CPG_DOWNLIST_APPLYING, +-}; +-static enum cpg_downlist_state_e downlist_state; +-static struct list_head downlist_messages_head; + static struct list_head joinlist_messages_head; + + struct cpg_pd { +@@ -295,9 +288,7 @@ static int cpg_exec_send_downlist(void); + + static int cpg_exec_send_joinlist(void); + +-static void downlist_messages_delete (void); +- +-static void downlist_master_choose_and_send (void); ++static void downlist_inform_clients (void); + + static void joinlist_inform_clients (void); + +@@ -499,14 +490,6 @@ struct req_exec_cpg_downlist { + mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); + }; + +-struct downlist_msg { +- mar_uint32_t sender_nodeid; +- mar_uint32_t old_members __attribute__((aligned(8))); +- mar_uint32_t left_nodes __attribute__((aligned(8))); +- mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); +- struct list_head list; +-}; +- + struct joinlist_msg { + mar_uint32_t sender_nodeid; + uint32_t pid; +@@ -566,8 +549,6 @@ static void cpg_sync_init ( + last_sync_ring_id.nodeid = ring_id->rep.nodeid; + last_sync_ring_id.seq = ring_id->seq; + +- downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES; +- + entries = 0; + /* + * Determine list of nodeids for downlist message +@@ -611,14 +592,10 @@ static void cpg_sync_activate (void) + my_member_list_entries * sizeof (unsigned int)); + my_old_member_list_entries = my_member_list_entries; + +- if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) { +- downlist_master_choose_and_send (); +- } ++ downlist_inform_clients (); + + joinlist_inform_clients (); + +- downlist_messages_delete (); +- downlist_state = CPG_DOWNLIST_NONE; + joinlist_messages_delete (); + + notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list); +@@ -626,8 +603,7 @@ static void cpg_sync_activate (void) + + static void cpg_sync_abort (void) + { +- downlist_state = CPG_DOWNLIST_NONE; +- downlist_messages_delete (); ++ + joinlist_messages_delete (); + } + +@@ -800,76 +776,17 @@ static int notify_lib_joinlist( + return CS_OK; + } + +-static void downlist_log(const char *msg, struct downlist_msg* dl) ++static void downlist_log(const char *msg, struct req_exec_cpg_downlist *dl) + { + log_printf (LOG_DEBUG, +- "%s: sender %s; members(old:%d left:%d)", ++ "%s: members(old:%d left:%d)", + msg, +- api->totem_ifaces_print(dl->sender_nodeid), + dl->old_members, + dl->left_nodes); + } + +-static struct downlist_msg* downlist_master_choose (void) ++static void downlist_inform_clients (void) + { +- struct downlist_msg *cmp; +- struct downlist_msg *best = NULL; +- struct list_head *iter; +- uint32_t cmp_members; +- uint32_t best_members; +- uint32_t i; +- int ignore_msg; +- +- for (iter = downlist_messages_head.next; +- iter != &downlist_messages_head; +- iter = iter->next) { +- +- cmp = list_entry(iter, struct downlist_msg, list); +- downlist_log("comparing", cmp); +- +- ignore_msg = 0; +- for (i = 0; i < cmp->left_nodes; i++) { +- if (cmp->nodeids[i] == api->totem_nodeid_get()) { +- log_printf (LOG_DEBUG, "Ignoring this entry because I'm in the left list\n"); +- +- ignore_msg = 1; +- break; +- } +- } +- +- if (ignore_msg) { +- continue ; +- } +- +- if (best == NULL) { +- best = cmp; +- continue; +- } +- +- best_members = best->old_members - best->left_nodes; +- cmp_members = cmp->old_members - cmp->left_nodes; +- +- if (cmp_members > best_members) { +- best = cmp; +- } else if (cmp_members == best_members) { +- if (cmp->old_members > best->old_members) { +- best = cmp; +- } else if (cmp->old_members == best->old_members) { +- if (cmp->sender_nodeid < best->sender_nodeid) { +- best = cmp; +- } +- } +- } +- } +- +- assert (best != NULL); +- +- return best; +-} +- +-static void downlist_master_choose_and_send (void) +-{ +- struct downlist_msg *stored_msg; + struct list_head *iter; + struct process_info *left_pi; + qb_map_t *group_map; +@@ -884,14 +801,7 @@ static void downlist_master_choose_and_send (void) + qb_map_iter_t *miter; + int i, size; + +- downlist_state = CPG_DOWNLIST_APPLYING; +- +- stored_msg = downlist_master_choose (); +- if (!stored_msg) { +- log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist"); +- return; +- } +- downlist_log("chosen downlist", stored_msg); ++ downlist_log("my downlist", &g_req_exec_cpg_downlist); + + group_map = qb_skiplist_create(); + +@@ -905,9 +815,9 @@ static void downlist_master_choose_and_send (void) + iter = iter->next; + + left_pi = NULL; +- for (i = 0; i < stored_msg->left_nodes; i++) { ++ for (i = 0; i < g_req_exec_cpg_downlist.left_nodes; i++) { + +- if (pi->nodeid == stored_msg->nodeids[i]) { ++ if (pi->nodeid == g_req_exec_cpg_downlist.nodeids[i]) { + left_pi = pi; + break; + } +@@ -1039,23 +949,6 @@ static void joinlist_inform_clients (void) + joinlist_remove_zombie_pi_entries (); + } + +-static void downlist_messages_delete (void) +-{ +- struct downlist_msg *stored_msg; +- struct list_head *iter, *iter_next; +- +- for (iter = downlist_messages_head.next; +- iter != &downlist_messages_head; +- iter = iter_next) { +- +- iter_next = iter->next; +- +- stored_msg = list_entry(iter, struct downlist_msg, list); +- list_del (&stored_msg->list); +- free (stored_msg); +- } +-} +- + static void joinlist_messages_delete (void) + { + struct joinlist_msg *stored_msg; +@@ -1076,7 +969,6 @@ static void joinlist_messages_delete (void) + + static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api) + { +- list_init (&downlist_messages_head); + list_init (&joinlist_messages_head); + api = corosync_api; + return (NULL); +@@ -1338,43 +1230,9 @@ static void message_handler_req_exec_cpg_downlist( + unsigned int nodeid) + { + const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message; +- int i; +- struct list_head *iter; +- struct downlist_msg *stored_msg; +- int found; +- +- if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) { +- log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d", +- req_exec_cpg_downlist->left_nodes, downlist_state); +- return; +- } +- +- stored_msg = malloc (sizeof (struct downlist_msg)); +- stored_msg->sender_nodeid = nodeid; +- stored_msg->old_members = req_exec_cpg_downlist->old_members; +- stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes; +- memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids, +- req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t)); +- list_init (&stored_msg->list); +- list_add (&stored_msg->list, &downlist_messages_head); +- +- for (i = 0; i < my_member_list_entries; i++) { +- found = 0; +- for (iter = downlist_messages_head.next; +- iter != &downlist_messages_head; +- iter = iter->next) { +- +- stored_msg = list_entry(iter, struct downlist_msg, list); +- if (my_member_list[i] == stored_msg->sender_nodeid) { +- found = 1; +- } +- } +- if (!found) { +- return; +- } +- } + +- downlist_master_choose_and_send (); ++ log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received", ++ req_exec_cpg_downlist->left_nodes); + } + + +-- +2.13.6 + diff --git a/corosync.changes b/corosync.changes index db56d19..bb37ff9 100644 --- a/corosync.changes +++ b/corosync.changes @@ -1,3 +1,9 @@ +------------------------------------------------------------------- +Wed May 2 02:36:56 UTC 2018 - bliu@suse.com + +- cpg: Inform clients about left nodes during pause(bsc#1091593) + Added: 0012-cpg-Inform-clients-about-left-nodes-during-pause.patch + ------------------------------------------------------------------- Thu Apr 26 06:43:54 UTC 2018 - bliu@suse.com diff --git a/corosync.spec b/corosync.spec index d61cff3..1f1e6a4 100644 --- a/corosync.spec +++ b/corosync.spec @@ -73,6 +73,7 @@ Patch15: 0008-bsc#1083561-upgrade-from-1-x-y.patch Patch16: 0009-bsc#1088619-add-version.patch Patch17: 0010-qdevice-net-instance.c-optarg-should-be-str.patch Patch18: 0011-NSS_NoDB_Init-the-parameter-is-reserved-must-be-NULL.patch +Patch19: 0012-cpg-Inform-clients-about-left-nodes-during-pause.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build # openais is indeed gone and should be uninstalled. Yes, we do not @@ -156,6 +157,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build %patch16 -p1 %patch17 -p1 %patch18 -p1 +%patch19 -p1 %build %if %{with runautogen}