Accepting request 602893 from network:ha-clustering:Unstable

- cpg: Inform clients about left nodes during pause(bsc#1091593)
    Added: 0012-cpg-Inform-clients-about-left-nodes-during-pause.patch

OBS-URL: https://build.opensuse.org/request/show/602893
OBS-URL: https://build.opensuse.org/package/show/network:ha-clustering:Factory/corosync?expand=0&rev=148
This commit is contained in:
Bin Liu 2018-05-02 02:49:52 +00:00 committed by Git OBS Bridge
parent b98c1fc972
commit ec6bf0b62e
3 changed files with 297 additions and 0 deletions

View File

@ -0,0 +1,289 @@
Subject: [PATCH] cpg: Inform clients about left nodes during pause
Patch tries to fix incorrect behaviour during following test-case:
- 3 nodes
- Node 1 is paused
- Node 2 and 3 detects node 1 as failed and informs CPG clients
- Node 1 is unpaused
- Node 1 clients are informed about new membership, but not about Node 1
being paused, so from Node 1 point-of-view, Node 2 and 3 failure
Solution is to:
- Remove downlist master choose and always choose local node downlist.
For Node 1 in example above, downlist contains Node 2 and 3.
- Keep code which informs clients about left nodes
- Use joinlist as a authoritative source of nodes/clients which exists
in membership
---
exec/cpg.c | 164 +++++--------------------------------------------------------
1 file changed, 11 insertions(+), 153 deletions(-)
diff --git a/exec/cpg.c b/exec/cpg.c
index 78ac1e9e..b851cba3 100644
--- a/exec/cpg.c
+++ b/exec/cpg.c
@@ -139,13 +139,6 @@ enum cpg_sync_state {
CPGSYNC_JOINLIST
};
-enum cpg_downlist_state_e {
- CPG_DOWNLIST_NONE,
- CPG_DOWNLIST_WAITING_FOR_MESSAGES,
- CPG_DOWNLIST_APPLYING,
-};
-static enum cpg_downlist_state_e downlist_state;
-static struct list_head downlist_messages_head;
static struct list_head joinlist_messages_head;
struct cpg_pd {
@@ -295,9 +288,7 @@ static int cpg_exec_send_downlist(void);
static int cpg_exec_send_joinlist(void);
-static void downlist_messages_delete (void);
-
-static void downlist_master_choose_and_send (void);
+static void downlist_inform_clients (void);
static void joinlist_inform_clients (void);
@@ -499,14 +490,6 @@ struct req_exec_cpg_downlist {
mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
};
-struct downlist_msg {
- mar_uint32_t sender_nodeid;
- mar_uint32_t old_members __attribute__((aligned(8)));
- mar_uint32_t left_nodes __attribute__((aligned(8)));
- mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
- struct list_head list;
-};
-
struct joinlist_msg {
mar_uint32_t sender_nodeid;
uint32_t pid;
@@ -566,8 +549,6 @@ static void cpg_sync_init (
last_sync_ring_id.nodeid = ring_id->rep.nodeid;
last_sync_ring_id.seq = ring_id->seq;
- downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES;
-
entries = 0;
/*
* Determine list of nodeids for downlist message
@@ -611,14 +592,10 @@ static void cpg_sync_activate (void)
my_member_list_entries * sizeof (unsigned int));
my_old_member_list_entries = my_member_list_entries;
- if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
- downlist_master_choose_and_send ();
- }
+ downlist_inform_clients ();
joinlist_inform_clients ();
- downlist_messages_delete ();
- downlist_state = CPG_DOWNLIST_NONE;
joinlist_messages_delete ();
notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list);
@@ -626,8 +603,7 @@ static void cpg_sync_activate (void)
static void cpg_sync_abort (void)
{
- downlist_state = CPG_DOWNLIST_NONE;
- downlist_messages_delete ();
+
joinlist_messages_delete ();
}
@@ -800,76 +776,17 @@ static int notify_lib_joinlist(
return CS_OK;
}
-static void downlist_log(const char *msg, struct downlist_msg* dl)
+static void downlist_log(const char *msg, struct req_exec_cpg_downlist *dl)
{
log_printf (LOG_DEBUG,
- "%s: sender %s; members(old:%d left:%d)",
+ "%s: members(old:%d left:%d)",
msg,
- api->totem_ifaces_print(dl->sender_nodeid),
dl->old_members,
dl->left_nodes);
}
-static struct downlist_msg* downlist_master_choose (void)
+static void downlist_inform_clients (void)
{
- struct downlist_msg *cmp;
- struct downlist_msg *best = NULL;
- struct list_head *iter;
- uint32_t cmp_members;
- uint32_t best_members;
- uint32_t i;
- int ignore_msg;
-
- for (iter = downlist_messages_head.next;
- iter != &downlist_messages_head;
- iter = iter->next) {
-
- cmp = list_entry(iter, struct downlist_msg, list);
- downlist_log("comparing", cmp);
-
- ignore_msg = 0;
- for (i = 0; i < cmp->left_nodes; i++) {
- if (cmp->nodeids[i] == api->totem_nodeid_get()) {
- log_printf (LOG_DEBUG, "Ignoring this entry because I'm in the left list\n");
-
- ignore_msg = 1;
- break;
- }
- }
-
- if (ignore_msg) {
- continue ;
- }
-
- if (best == NULL) {
- best = cmp;
- continue;
- }
-
- best_members = best->old_members - best->left_nodes;
- cmp_members = cmp->old_members - cmp->left_nodes;
-
- if (cmp_members > best_members) {
- best = cmp;
- } else if (cmp_members == best_members) {
- if (cmp->old_members > best->old_members) {
- best = cmp;
- } else if (cmp->old_members == best->old_members) {
- if (cmp->sender_nodeid < best->sender_nodeid) {
- best = cmp;
- }
- }
- }
- }
-
- assert (best != NULL);
-
- return best;
-}
-
-static void downlist_master_choose_and_send (void)
-{
- struct downlist_msg *stored_msg;
struct list_head *iter;
struct process_info *left_pi;
qb_map_t *group_map;
@@ -884,14 +801,7 @@ static void downlist_master_choose_and_send (void)
qb_map_iter_t *miter;
int i, size;
- downlist_state = CPG_DOWNLIST_APPLYING;
-
- stored_msg = downlist_master_choose ();
- if (!stored_msg) {
- log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist");
- return;
- }
- downlist_log("chosen downlist", stored_msg);
+ downlist_log("my downlist", &g_req_exec_cpg_downlist);
group_map = qb_skiplist_create();
@@ -905,9 +815,9 @@ static void downlist_master_choose_and_send (void)
iter = iter->next;
left_pi = NULL;
- for (i = 0; i < stored_msg->left_nodes; i++) {
+ for (i = 0; i < g_req_exec_cpg_downlist.left_nodes; i++) {
- if (pi->nodeid == stored_msg->nodeids[i]) {
+ if (pi->nodeid == g_req_exec_cpg_downlist.nodeids[i]) {
left_pi = pi;
break;
}
@@ -1039,23 +949,6 @@ static void joinlist_inform_clients (void)
joinlist_remove_zombie_pi_entries ();
}
-static void downlist_messages_delete (void)
-{
- struct downlist_msg *stored_msg;
- struct list_head *iter, *iter_next;
-
- for (iter = downlist_messages_head.next;
- iter != &downlist_messages_head;
- iter = iter_next) {
-
- iter_next = iter->next;
-
- stored_msg = list_entry(iter, struct downlist_msg, list);
- list_del (&stored_msg->list);
- free (stored_msg);
- }
-}
-
static void joinlist_messages_delete (void)
{
struct joinlist_msg *stored_msg;
@@ -1076,7 +969,6 @@ static void joinlist_messages_delete (void)
static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
{
- list_init (&downlist_messages_head);
list_init (&joinlist_messages_head);
api = corosync_api;
return (NULL);
@@ -1338,43 +1230,9 @@ static void message_handler_req_exec_cpg_downlist(
unsigned int nodeid)
{
const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message;
- int i;
- struct list_head *iter;
- struct downlist_msg *stored_msg;
- int found;
-
- if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
- log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d",
- req_exec_cpg_downlist->left_nodes, downlist_state);
- return;
- }
-
- stored_msg = malloc (sizeof (struct downlist_msg));
- stored_msg->sender_nodeid = nodeid;
- stored_msg->old_members = req_exec_cpg_downlist->old_members;
- stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes;
- memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids,
- req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t));
- list_init (&stored_msg->list);
- list_add (&stored_msg->list, &downlist_messages_head);
-
- for (i = 0; i < my_member_list_entries; i++) {
- found = 0;
- for (iter = downlist_messages_head.next;
- iter != &downlist_messages_head;
- iter = iter->next) {
-
- stored_msg = list_entry(iter, struct downlist_msg, list);
- if (my_member_list[i] == stored_msg->sender_nodeid) {
- found = 1;
- }
- }
- if (!found) {
- return;
- }
- }
- downlist_master_choose_and_send ();
+ log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received",
+ req_exec_cpg_downlist->left_nodes);
}
--
2.13.6

View File

@ -1,3 +1,9 @@
-------------------------------------------------------------------
Wed May 2 02:36:56 UTC 2018 - bliu@suse.com
- cpg: Inform clients about left nodes during pause(bsc#1091593)
Added: 0012-cpg-Inform-clients-about-left-nodes-during-pause.patch
-------------------------------------------------------------------
Thu Apr 26 06:43:54 UTC 2018 - bliu@suse.com

View File

@ -73,6 +73,7 @@ Patch15: 0008-bsc#1083561-upgrade-from-1-x-y.patch
Patch16: 0009-bsc#1088619-add-version.patch
Patch17: 0010-qdevice-net-instance.c-optarg-should-be-str.patch
Patch18: 0011-NSS_NoDB_Init-the-parameter-is-reserved-must-be-NULL.patch
Patch19: 0012-cpg-Inform-clients-about-left-nodes-during-pause.patch
BuildRoot: %{_tmppath}/%{name}-%{version}-build
# openais is indeed gone and should be uninstalled. Yes, we do not
@ -156,6 +157,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build
%patch16 -p1
%patch17 -p1
%patch18 -p1
%patch19 -p1
%build
%if %{with runautogen}