libdlm/bug-1191734_0016-Revert-dlm_controld-use-new-quorum-api-to-detect-mis.patch

187 lines
5.3 KiB
Diff
Raw Normal View History

From fd4aa4e5eec8477b08b279fbf6ed0eb1406c8fa8 Mon Sep 17 00:00:00 2001
From: Heming Zhao <heming.zhao@suse.com>
Date: Mon, 18 Oct 2021 16:20:32 +0800
Subject: [PATCH 2/2] Revert "dlm_controld: use new quorum api to detect missed
failures"
This reverts commit 2e893b981b19430aeca983dd63fb3ac0979d0b35.
---
dlm_controld/member.c | 106 ++++++------------------------------------
1 file changed, 15 insertions(+), 91 deletions(-)
diff --git a/dlm_controld/member.c b/dlm_controld/member.c
index d567c114b259..1d5bfa3d9166 100644
--- a/dlm_controld/member.c
+++ b/dlm_controld/member.c
@@ -20,8 +20,6 @@ static int old_node_count;
static uint32_t quorum_nodes[MAX_NODES];
static int quorum_node_count;
static struct list_head cluster_nodes;
-static uint32_t leavejoin_nodes[MAX_NODES];
-static int leavejoin_count;
struct node_cluster {
struct list_head list;
@@ -105,51 +103,15 @@ int is_cluster_member(uint32_t nodeid)
return is_member(quorum_nodes, quorum_node_count, nodeid);
}
-static int is_leavejoin_node(uint32_t nodeid)
-{
- return is_member(leavejoin_nodes, leavejoin_count, nodeid);
-}
-
-static void quorum_nodelist_callback(quorum_handle_t cbhandle, struct quorum_ring_id ring_id,
- uint32_t member_list_entries, const uint32_t *member_list,
- uint32_t joined_list_entries, const uint32_t *joined_list,
- uint32_t left_list_entries, const uint32_t *left_list)
-{
- uint64_t ring_seq = ring_id.seq;
- int i, j;
-
- for (i = 0; i < left_list_entries; i++) {
- log_debug("cluster left_list %u seq %llu",
- left_list[i], (unsigned long long)ring_seq);
- }
-
- for (j = 0; j < joined_list_entries; j++) {
- log_debug("cluster joined_list %u seq %llu",
- joined_list[j], (unsigned long long)ring_seq);
- }
-
- for (i = 0; i < left_list_entries; i++) {
- for (j = 0; j < joined_list_entries; j++) {
- if (joined_list[j] == left_list[i]) {
- log_debug("cluster node %d left and joined", joined_list[j]);
- if (!is_leavejoin_node(joined_list[j]))
- leavejoin_nodes[leavejoin_count++] = joined_list[j];
- }
- }
- }
-}
-
-static void quorum_callback(quorum_handle_t cbhandle, uint32_t quorate,
- struct quorum_ring_id ring_id, uint32_t node_list_entries,
- const uint32_t *node_list)
+static void quorum_callback(quorum_handle_t h, uint32_t quorate,
+ uint64_t ring_seq, uint32_t node_list_entries,
+ uint32_t *node_list)
{
corosync_cfg_node_address_t addrs[MAX_NODE_ADDRESSES];
corosync_cfg_node_address_t *addrptr = addrs;
const struct node_config *nc;
cs_error_t err;
int i, j, num_addrs;
- uint32_t nodeid;
- uint64_t ring_seq = ring_id.seq;
uint64_t now = monotime();
if (!cluster_joined_monotime) {
@@ -180,55 +142,15 @@ static void quorum_callback(quorum_handle_t cbhandle, uint32_t quorate,
if (!is_cluster_member(old_nodes[i])) {
log_debug("cluster node %u removed seq %llu",
old_nodes[i], (unsigned long long)cluster_ringid_seq);
-
rem_cluster_node(old_nodes[i], now);
del_configfs_node(old_nodes[i]);
}
}
- for (i = 0; i < leavejoin_count; i++) {
- nodeid = leavejoin_nodes[i];
-
- log_debug("cluster node %u leavejoin seq %llu",
- nodeid, (unsigned long long)cluster_ringid_seq);
-
- /* remove */
-
- rem_cluster_node(nodeid, now);
- del_configfs_node(nodeid);
-
- /* add */
-
- add_cluster_node(nodeid, now);
-
- fence_delay_begin = now;
-
- err = corosync_cfg_get_node_addrs(ch, nodeid,
- MAX_NODE_ADDRESSES,
- &num_addrs, addrs);
- if (err != CS_OK) {
- log_error("corosync_cfg_get_node_addrs failed nodeid %u", nodeid);
- continue;
- }
-
- nc = node_config_get(nodeid);
-
- for (j = 0; j < num_addrs; j++) {
- add_configfs_node(nodeid,
- addrptr[j].address,
- addrptr[j].address_length,
- (nodeid == our_nodeid),
- nc->mark);
- }
- }
-
for (i = 0; i < quorum_node_count; i++) {
- if (is_leavejoin_node(quorum_nodes[i]))
- continue;
if (!is_old_member(quorum_nodes[i])) {
log_debug("cluster node %u added seq %llu",
quorum_nodes[i], (unsigned long long)cluster_ringid_seq);
-
add_cluster_node(quorum_nodes[i], now);
fence_delay_begin = now;
@@ -254,11 +176,13 @@ static void quorum_callback(quorum_handle_t cbhandle, uint32_t quorate,
}
}
}
-
- memset(leavejoin_nodes, 0, sizeof(leavejoin_nodes));
- leavejoin_count = 0;
}
+static quorum_callbacks_t quorum_callbacks =
+{
+ .quorum_notify_fn = quorum_callback,
+};
+
void process_cluster(int ci)
{
cs_error_t err;
@@ -284,23 +208,23 @@ void update_cluster(void)
int setup_cluster(void)
{
- quorum_model_v1_data_t model_data;
cs_error_t err;
int fd;
- uint32_t quorum_type = 0;
+ uint32_t quorum_type;
INIT_LIST_HEAD(&cluster_nodes);
- memset(&model_data, 0, sizeof(model_data));
- model_data.quorum_notify_fn = quorum_callback;
- model_data.nodelist_notify_fn = quorum_nodelist_callback;
-
- err = quorum_model_initialize(&qh, QUORUM_MODEL_V1, (quorum_model_data_t *)&model_data, &quorum_type, NULL);
+ err = quorum_initialize(&qh, &quorum_callbacks, &quorum_type);
if (err != CS_OK) {
log_error("quorum init error %d", err);
return -1;
}
+ if (quorum_type == QUORUM_FREE) {
+ log_error("no quorum provider configured in corosync, unable to operate");
+ goto fail;
+ }
+
err = quorum_fd_get(qh, &fd);
if (err != CS_OK) {
log_error("quorum fd_get error %d", err);
--
2.33.0