f4fa43815b
- fenced: Handle fencing requested with nodeid by utilizing the membership cache of known nodes (bsc#1094208) * bsc-1094208-Refactor-fenced-Handle-fencing-requested-with-nodeid.patch - controld: able to manually confirm unseen nodes are down (bsc#1094208) * bsc-1094208-Fix-controld-able-to-manually-confirm-unseen-nodes-a.patch - Update to version 2.0.0+20180927.b67d8d0de: - logrotate: set a maximum size for logs - tools: ensure crm_resource --force-* commands get stderr messages - libcrmcommon: properly check whether resource supports parameters - tools: "return" from crm_mon after calling functions that don't - alerts: send all MIB OIDs with all SNMP alerts - resource-agents: add "s"-suffix where missing in metadata - libcommon: do not write to /proc/sys/kernel/sysrq when unneeded - pacemaker-based: drop declared, errant option never backed in tree - crm_mon: don't exit directly from cib_connect on error - scheduler: honor asymmetric orderings even when restarting OBS-URL: https://build.opensuse.org/request/show/640081 OBS-URL: https://build.opensuse.org/package/show/network:ha-clustering:Factory/pacemaker?expand=0&rev=311
258 lines
8.9 KiB
Diff
258 lines
8.9 KiB
Diff
From 73a0ee287cd48ee10ed28f9071459d40d74e8801 Mon Sep 17 00:00:00 2001
|
|
From: "Gao,Yan" <ygao@suse.com>
|
|
Date: Fri, 1 Jun 2018 15:23:49 +0200
|
|
Subject: [PATCH 1/2] Fix: controld: able to manually confirm unseen nodes are
|
|
down
|
|
|
|
9045bacb4 prevented manual fencing confirmations from creating node
|
|
entries for random unknown nodes, but it also disabled the ability to do
|
|
manual fencing confirmations for the nodes that are already known in the
|
|
CIB but not yet in the membership cache.
|
|
|
|
This commit fixes it by maintaining and utilizing an additional
|
|
membership cache of known nodes based on the CIB.
|
|
---
|
|
daemons/controld/controld_schedulerd.c | 5 +-
|
|
daemons/controld/controld_te_utils.c | 2 +-
|
|
include/crm/cluster/internal.h | 3 +
|
|
lib/cluster/membership.c | 164 +++++++++++++++++++++++++++++++++
|
|
4 files changed, 171 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c
|
|
index e5d5f69b0..4b53aaa97 100644
|
|
--- a/daemons/controld/controld_schedulerd.c
|
|
+++ b/daemons/controld/controld_schedulerd.c
|
|
@@ -355,8 +355,9 @@ do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void
|
|
|
|
CRM_LOG_ASSERT(output != NULL);
|
|
|
|
- // Refresh the remote node cache when the scheduler is invoked
|
|
- crm_remote_peer_cache_refresh(output);
|
|
+ /* Refresh the remote node cache and the known node cache when the
|
|
+ * scheduler is invoked */
|
|
+ crm_peer_caches_refresh(output);
|
|
|
|
crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
|
|
crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
|
|
diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c
|
|
index 3f538b9bc..5606ed654 100644
|
|
--- a/daemons/controld/controld_te_utils.c
|
|
+++ b/daemons/controld/controld_te_utils.c
|
|
@@ -269,7 +269,7 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
|
|
st_event->origin, st_event->id);
|
|
|
|
if (st_event->result == pcmk_ok) {
|
|
- crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
|
|
+ crm_node_t *peer = crm_find_known_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
|
|
const char *uuid = NULL;
|
|
gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
|
|
|
|
diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
|
|
index 369f22700..12bf41ab0 100644
|
|
--- a/include/crm/cluster/internal.h
|
|
+++ b/include/crm/cluster/internal.h
|
|
@@ -329,4 +329,7 @@ gboolean node_name_is_valid(const char *key, const char *name);
|
|
crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags);
|
|
crm_node_t * crm_find_peer(unsigned int id, const char *uname);
|
|
|
|
+void crm_peer_caches_refresh(xmlNode *cib);
|
|
+crm_node_t *crm_find_known_peer_full(unsigned int id, const char *uname, int flags);
|
|
+
|
|
#endif
|
|
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
|
|
index a487e762a..e5151f2b7 100644
|
|
--- a/lib/cluster/membership.c
|
|
+++ b/lib/cluster/membership.c
|
|
@@ -50,6 +50,8 @@ GHashTable *crm_peer_cache = NULL;
|
|
*/
|
|
GHashTable *crm_remote_peer_cache = NULL;
|
|
|
|
+GHashTable *crm_known_peer_cache = NULL;
|
|
+
|
|
unsigned long long crm_peer_seq = 0;
|
|
gboolean crm_have_quorum = FALSE;
|
|
static gboolean crm_autoreap = TRUE;
|
|
@@ -394,6 +396,10 @@ crm_peer_init(void)
|
|
if (crm_remote_peer_cache == NULL) {
|
|
crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
|
|
}
|
|
+
|
|
+ if (crm_known_peer_cache == NULL) {
|
|
+ crm_known_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
|
|
+ }
|
|
}
|
|
|
|
void
|
|
@@ -410,6 +416,13 @@ crm_peer_destroy(void)
|
|
g_hash_table_destroy(crm_remote_peer_cache);
|
|
crm_remote_peer_cache = NULL;
|
|
}
|
|
+
|
|
+ if (crm_known_peer_cache != NULL) {
|
|
+ crm_trace("Destroying known peer cache with %d members", g_hash_table_size(crm_known_peer_cache));
|
|
+ g_hash_table_destroy(crm_known_peer_cache);
|
|
+ crm_known_peer_cache = NULL;
|
|
+ }
|
|
+
|
|
}
|
|
|
|
void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
|
|
@@ -1001,3 +1014,154 @@ crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
|
|
{
|
|
return stonith_api_kick(nodeid, uname, 120, TRUE);
|
|
}
|
|
+
|
|
+static void
|
|
+known_peer_cache_refresh_helper(xmlNode *xml_node, void *user_data)
|
|
+{
|
|
+ const char *id = crm_element_value(xml_node, XML_ATTR_ID);
|
|
+ const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME);
|
|
+ crm_node_t * node = NULL;
|
|
+
|
|
+ CRM_CHECK(id != NULL && uname !=NULL, return);
|
|
+ node = g_hash_table_lookup(crm_known_peer_cache, id);
|
|
+
|
|
+ if (node == NULL) {
|
|
+ node = calloc(1, sizeof(crm_node_t));
|
|
+ if (node == NULL) {
|
|
+ errno = -ENOMEM;
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ node->uname = strdup(uname);
|
|
+ node->uuid = strdup(id);
|
|
+ if (node->uname == NULL || node->uuid == NULL) {
|
|
+ free(node);
|
|
+ errno = -ENOMEM;
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ g_hash_table_replace(crm_known_peer_cache, node->uuid, node);
|
|
+
|
|
+ } else if (is_set(node->flags, crm_node_dirty)) {
|
|
+ if (safe_str_neq(uname, node->uname)) {
|
|
+ free(node->uname);
|
|
+ node->uname = strdup(uname);
|
|
+ CRM_ASSERT(node->uname != NULL);
|
|
+ }
|
|
+
|
|
+ /* Node is in cache and hasn't been updated already, so mark it clean */
|
|
+ clear_bit(node->flags, crm_node_dirty);
|
|
+ }
|
|
+
|
|
+}
|
|
+
|
|
+#define XPATH_MEMBER_NODE_CONFIG \
|
|
+ "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \
|
|
+ "/" XML_CIB_TAG_NODE "[not(@type) or @type='member']"
|
|
+
|
|
+static void
|
|
+crm_known_peer_cache_refresh(xmlNode *cib)
|
|
+{
|
|
+ crm_peer_init();
|
|
+
|
|
+ g_hash_table_foreach(crm_known_peer_cache, mark_dirty, NULL);
|
|
+
|
|
+ crm_foreach_xpath_result(cib, XPATH_MEMBER_NODE_CONFIG,
|
|
+ known_peer_cache_refresh_helper, NULL);
|
|
+
|
|
+ /* Remove all old cache entries that weren't seen in the CIB */
|
|
+ g_hash_table_foreach_remove(crm_known_peer_cache, is_dirty, NULL);
|
|
+}
|
|
+
|
|
+void
|
|
+crm_peer_caches_refresh(xmlNode *cib)
|
|
+{
|
|
+ crm_remote_peer_cache_refresh(cib);
|
|
+ crm_known_peer_cache_refresh(cib);
|
|
+}
|
|
+
|
|
+crm_node_t *
|
|
+crm_find_known_peer_full(unsigned int id, const char *uname, int flags)
|
|
+{
|
|
+ GHashTableIter iter;
|
|
+ crm_node_t *node = NULL;
|
|
+ crm_node_t *by_id = NULL;
|
|
+ crm_node_t *by_name = NULL;
|
|
+
|
|
+ CRM_ASSERT(id > 0 || uname != NULL);
|
|
+
|
|
+ node = crm_find_peer_full(id, uname, flags);
|
|
+
|
|
+ if (node || !(flags & CRM_GET_PEER_CLUSTER)) {
|
|
+ return node;
|
|
+ }
|
|
+
|
|
+ if (uname != NULL) {
|
|
+ g_hash_table_iter_init(&iter, crm_known_peer_cache);
|
|
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
|
|
+ if (node->uname && strcasecmp(node->uname, uname) == 0) {
|
|
+ crm_trace("Name match: %s = %p", node->uname, node);
|
|
+ by_name = node;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (id > 0) {
|
|
+ char * id_str = crm_strdup_printf("%u", id);
|
|
+
|
|
+ g_hash_table_iter_init(&iter, crm_known_peer_cache);
|
|
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
|
|
+ if(node->id == id || strcasecmp(node->uuid, id_str) == 0) {
|
|
+ crm_trace("ID match: %u = %p", id, node);
|
|
+ by_id = node;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ free(id_str);
|
|
+ }
|
|
+
|
|
+ node = by_id; /* Good default */
|
|
+ if (by_id == by_name) {
|
|
+ /* Nothing to do if they match (both NULL counts) */
|
|
+ crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
|
|
+
|
|
+ } else if (by_id == NULL && by_name) {
|
|
+ crm_trace("Only one: %p for %u/%s", by_name, id, uname);
|
|
+
|
|
+ if (id && by_name->id) {
|
|
+ crm_notice("Node %u and %u share the same name '%s'",
|
|
+ id, by_name->id, uname);
|
|
+ node = NULL;
|
|
+
|
|
+ } else if (id && by_name->uuid) {
|
|
+ crm_notice("Node %u and %s share the same name '%s'",
|
|
+ id, by_name->uuid, uname);
|
|
+ node = NULL;
|
|
+
|
|
+ } else {
|
|
+ node = by_name;
|
|
+ }
|
|
+
|
|
+ } else if (by_name == NULL && by_id) {
|
|
+ crm_trace("Only one: %p for %u/%s", by_id, id, uname);
|
|
+
|
|
+ if (uname && by_id->uname) {
|
|
+ crm_notice("Node '%s' and '%s' share the same cluster nodeid %u",
|
|
+ uname, by_id->uname, id);
|
|
+ }
|
|
+
|
|
+ } else if (uname && by_id->uname) {
|
|
+ if (safe_str_eq(uname, by_id->uname)) {
|
|
+ crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
|
|
+
|
|
+ } else {
|
|
+ crm_notice("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
|
|
+ }
|
|
+
|
|
+ } else if (id && by_name->id) {
|
|
+ crm_notice("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
|
|
+ }
|
|
+
|
|
+ return node;
|
|
+}
|
|
--
|
|
2.16.4
|
|
|