diff --git a/_servicedata b/_servicedata index e217e29..b733bc9 100644 --- a/_servicedata +++ b/_servicedata @@ -1,6 +1,6 @@ git://github.com/ClusterLabs/pacemaker.git - f902897b62f8594f3788c0f95df68e002124a1f0 + 91415da4e67e6424a28ae394fc0f61a0d64dbdc8 \ No newline at end of file diff --git a/bsc-1094208-Fix-controld-able-to-manually-confirm-unseen-nodes-a.patch b/bsc-1094208-Fix-controld-able-to-manually-confirm-unseen-nodes-a.patch new file mode 100644 index 0000000..54ac2f0 --- /dev/null +++ b/bsc-1094208-Fix-controld-able-to-manually-confirm-unseen-nodes-a.patch @@ -0,0 +1,257 @@ +From 73a0ee287cd48ee10ed28f9071459d40d74e8801 Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Fri, 1 Jun 2018 15:23:49 +0200 +Subject: [PATCH 1/2] Fix: controld: able to manually confirm unseen nodes are + down + +9045bacb4 prevented manual fencing confirmations from creating node +entries for random unknown nodes, but it also disabled the ability to do +manual fencing confirmations for the nodes that are already known in the +CIB but not yet in the membership cache. + +This commit fixes it by maintaining and utilizing an additional +membership cache of known nodes based on the CIB. +--- + daemons/controld/controld_schedulerd.c | 5 +- + daemons/controld/controld_te_utils.c | 2 +- + include/crm/cluster/internal.h | 3 + + lib/cluster/membership.c | 164 +++++++++++++++++++++++++++++++++ + 4 files changed, 171 insertions(+), 3 deletions(-) + +diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c +index e5d5f69b0..4b53aaa97 100644 +--- a/daemons/controld/controld_schedulerd.c ++++ b/daemons/controld/controld_schedulerd.c +@@ -355,8 +355,9 @@ do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void + + CRM_LOG_ASSERT(output != NULL); + +- // Refresh the remote node cache when the scheduler is invoked +- crm_remote_peer_cache_refresh(output); ++ /* Refresh the remote node cache and the known node cache when the ++ * scheduler is invoked */ ++ crm_peer_caches_refresh(output); + + crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid); + crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); +diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c +index 3f538b9bc..5606ed654 100644 +--- a/daemons/controld/controld_te_utils.c ++++ b/daemons/controld/controld_te_utils.c +@@ -269,7 +269,7 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) + st_event->origin, st_event->id); + + if (st_event->result == pcmk_ok) { +- crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY); ++ crm_node_t *peer = crm_find_known_peer_full(0, st_event->target, CRM_GET_PEER_ANY); + const char *uuid = NULL; + gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); + +diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h +index 369f22700..12bf41ab0 100644 +--- a/include/crm/cluster/internal.h ++++ b/include/crm/cluster/internal.h +@@ -329,4 +329,7 @@ gboolean node_name_is_valid(const char *key, const char *name); + crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags); + crm_node_t * crm_find_peer(unsigned int id, const char *uname); + ++void crm_peer_caches_refresh(xmlNode *cib); ++crm_node_t *crm_find_known_peer_full(unsigned int id, const char *uname, int flags); ++ + #endif +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index a487e762a..e5151f2b7 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -50,6 +50,8 @@ GHashTable *crm_peer_cache = NULL; + */ + GHashTable *crm_remote_peer_cache = NULL; + ++GHashTable *crm_known_peer_cache = NULL; ++ + unsigned long long crm_peer_seq = 0; + gboolean crm_have_quorum = FALSE; + static gboolean crm_autoreap = TRUE; +@@ -394,6 +396,10 @@ crm_peer_init(void) + if (crm_remote_peer_cache == NULL) { + crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node); + } ++ ++ if (crm_known_peer_cache == NULL) { ++ crm_known_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node); ++ } + } + + void +@@ -410,6 +416,13 @@ crm_peer_destroy(void) + g_hash_table_destroy(crm_remote_peer_cache); + crm_remote_peer_cache = NULL; + } ++ ++ if (crm_known_peer_cache != NULL) { ++ crm_trace("Destroying known peer cache with %d members", g_hash_table_size(crm_known_peer_cache)); ++ g_hash_table_destroy(crm_known_peer_cache); ++ crm_known_peer_cache = NULL; ++ } ++ + } + + void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; +@@ -1001,3 +1014,154 @@ crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) + { + return stonith_api_kick(nodeid, uname, 120, TRUE); + } ++ ++static void ++known_peer_cache_refresh_helper(xmlNode *xml_node, void *user_data) ++{ ++ const char *id = crm_element_value(xml_node, XML_ATTR_ID); ++ const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME); ++ crm_node_t * node = NULL; ++ ++ CRM_CHECK(id != NULL && uname !=NULL, return); ++ node = g_hash_table_lookup(crm_known_peer_cache, id); ++ ++ if (node == NULL) { ++ node = calloc(1, sizeof(crm_node_t)); ++ if (node == NULL) { ++ errno = -ENOMEM; ++ return; ++ } ++ ++ node->uname = strdup(uname); ++ node->uuid = strdup(id); ++ if (node->uname == NULL || node->uuid == NULL) { ++ free(node); ++ errno = -ENOMEM; ++ return; ++ } ++ ++ g_hash_table_replace(crm_known_peer_cache, node->uuid, node); ++ ++ } else if (is_set(node->flags, crm_node_dirty)) { ++ if (safe_str_neq(uname, node->uname)) { ++ free(node->uname); ++ node->uname = strdup(uname); ++ CRM_ASSERT(node->uname != NULL); ++ } ++ ++ /* Node is in cache and hasn't been updated already, so mark it clean */ ++ clear_bit(node->flags, crm_node_dirty); ++ } ++ ++} ++ ++#define XPATH_MEMBER_NODE_CONFIG \ ++ "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \ ++ "/" XML_CIB_TAG_NODE "[not(@type) or @type='member']" ++ ++static void ++crm_known_peer_cache_refresh(xmlNode *cib) ++{ ++ crm_peer_init(); ++ ++ g_hash_table_foreach(crm_known_peer_cache, mark_dirty, NULL); ++ ++ crm_foreach_xpath_result(cib, XPATH_MEMBER_NODE_CONFIG, ++ known_peer_cache_refresh_helper, NULL); ++ ++ /* Remove all old cache entries that weren't seen in the CIB */ ++ g_hash_table_foreach_remove(crm_known_peer_cache, is_dirty, NULL); ++} ++ ++void ++crm_peer_caches_refresh(xmlNode *cib) ++{ ++ crm_remote_peer_cache_refresh(cib); ++ crm_known_peer_cache_refresh(cib); ++} ++ ++crm_node_t * ++crm_find_known_peer_full(unsigned int id, const char *uname, int flags) ++{ ++ GHashTableIter iter; ++ crm_node_t *node = NULL; ++ crm_node_t *by_id = NULL; ++ crm_node_t *by_name = NULL; ++ ++ CRM_ASSERT(id > 0 || uname != NULL); ++ ++ node = crm_find_peer_full(id, uname, flags); ++ ++ if (node || !(flags & CRM_GET_PEER_CLUSTER)) { ++ return node; ++ } ++ ++ if (uname != NULL) { ++ g_hash_table_iter_init(&iter, crm_known_peer_cache); ++ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { ++ if (node->uname && strcasecmp(node->uname, uname) == 0) { ++ crm_trace("Name match: %s = %p", node->uname, node); ++ by_name = node; ++ break; ++ } ++ } ++ } ++ ++ if (id > 0) { ++ char * id_str = crm_strdup_printf("%u", id); ++ ++ g_hash_table_iter_init(&iter, crm_known_peer_cache); ++ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { ++ if(node->id == id || strcasecmp(node->uuid, id_str) == 0) { ++ crm_trace("ID match: %u = %p", id, node); ++ by_id = node; ++ break; ++ } ++ } ++ free(id_str); ++ } ++ ++ node = by_id; /* Good default */ ++ if (by_id == by_name) { ++ /* Nothing to do if they match (both NULL counts) */ ++ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); ++ ++ } else if (by_id == NULL && by_name) { ++ crm_trace("Only one: %p for %u/%s", by_name, id, uname); ++ ++ if (id && by_name->id) { ++ crm_notice("Node %u and %u share the same name '%s'", ++ id, by_name->id, uname); ++ node = NULL; ++ ++ } else if (id && by_name->uuid) { ++ crm_notice("Node %u and %s share the same name '%s'", ++ id, by_name->uuid, uname); ++ node = NULL; ++ ++ } else { ++ node = by_name; ++ } ++ ++ } else if (by_name == NULL && by_id) { ++ crm_trace("Only one: %p for %u/%s", by_id, id, uname); ++ ++ if (uname && by_id->uname) { ++ crm_notice("Node '%s' and '%s' share the same cluster nodeid %u", ++ uname, by_id->uname, id); ++ } ++ ++ } else if (uname && by_id->uname) { ++ if (safe_str_eq(uname, by_id->uname)) { ++ crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id); ++ ++ } else { ++ crm_notice("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname); ++ } ++ ++ } else if (id && by_name->id) { ++ crm_notice("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); ++ } ++ ++ return node; ++} +-- +2.16.4 + diff --git a/bsc-1094208-Refactor-fenced-Handle-fencing-requested-with-nodeid.patch b/bsc-1094208-Refactor-fenced-Handle-fencing-requested-with-nodeid.patch new file mode 100644 index 0000000..b2b7fe3 --- /dev/null +++ b/bsc-1094208-Refactor-fenced-Handle-fencing-requested-with-nodeid.patch @@ -0,0 +1,227 @@ +From a3cfd279fdb88d54e325ebd46a15c1df8e16d351 Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Mon, 1 Oct 2018 17:23:14 +0200 +Subject: [PATCH 2/2] Refactor: fenced: Handle fencing requested with nodeid by + utilizing the membership cache of known nodes + +This partially reverts c52267dfb. +--- + daemons/fenced/fenced_commands.c | 9 +++------ + daemons/fenced/fenced_history.c | 8 +++----- + daemons/fenced/fenced_remote.c | 32 ++++---------------------------- + daemons/fenced/pacemaker-fenced.c | 20 +++++--------------- + daemons/fenced/pacemaker-fenced.h | 4 ---- + 5 files changed, 15 insertions(+), 58 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 1fdcee7f4..8e6f1b634 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2298,22 +2298,19 @@ stonith_fence(xmlNode * msg) + + } else { + const char *host = crm_element_value(dev, F_STONITH_TARGET); +- char *nodename = NULL; + + if (cmd->options & st_opt_cs_nodeid) { + int nodeid = crm_atoi(host, NULL); ++ crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY); + +- nodename = stonith_get_peer_name(nodeid); +- if (nodename) { +- host = nodename; ++ if (node) { ++ host = node->uname; + } + } + + /* If we get to here, then self-fencing is implicitly allowed */ + get_capable_devices(host, cmd->action, cmd->default_timeout, + TRUE, cmd, stonith_fence_get_devices_cb); +- +- free(nodename); + } + + return -EINPROGRESS; +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index 7e1631fa9..767aeb337 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -384,17 +384,16 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + int rc = 0; + const char *target = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_TRACE); +- char *nodename = NULL; + xmlNode *out_history = NULL; + + if (dev) { + target = crm_element_value(dev, F_STONITH_TARGET); + if (target && (options & st_opt_cs_nodeid)) { + int nodeid = crm_atoi(target, NULL); ++ crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY); + +- nodename = stonith_get_peer_name(nodeid); +- if (nodename) { +- target = nodename; ++ if (node) { ++ target = node->uname; + } + } + } +@@ -463,7 +462,6 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + stonith_remote_op_list); + *output = stonith_local_history_diff(NULL, FALSE, target); + } +- free(nodename); + free_xml(out_history); + return rc; + } +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index f1812dac2..110684473 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -931,30 +931,6 @@ stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) + return -EINPROGRESS; + } + +-char * +-stonith_get_peer_name(unsigned int nodeid) +-{ +- crm_node_t *node = crm_find_peer(nodeid, NULL); +- char *nodename = NULL; +- +- if (node && node->uname) { +- return strdup(node->uname); +- +- } else if ((nodename = get_node_name(nodeid))) { +- return nodename; +- +- } else { +- const char *last_known_name = g_hash_table_lookup(known_peer_names, GUINT_TO_POINTER(nodeid)); +- +- if (last_known_name) { +- crm_debug("Use the last known name %s for nodeid %u", last_known_name, nodeid); +- return strdup(last_known_name); +- } +- } +- +- return NULL; +-} +- + /*! + * \internal + * \brief Create a new remote stonith operation +@@ -1035,17 +1011,17 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) + + if (op->call_options & st_opt_cs_nodeid) { + int nodeid = crm_atoi(op->target, NULL); +- char *nodename = stonith_get_peer_name(nodeid); ++ crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY); + + /* Ensure the conversion only happens once */ + op->call_options &= ~st_opt_cs_nodeid; + +- if (nodename) { ++ if (node && node->uname) { + free(op->target); +- op->target = nodename; ++ op->target = strdup(node->uname); + + } else { +- crm_warn("Could not expand nodeid '%s' into a host name", op->target); ++ crm_warn("Could not expand nodeid '%s' into a host name (%p)", op->target, node); + } + } + +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index 0e0ac96e1..e67580059 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -50,8 +50,6 @@ static gboolean stonith_shutdown_flag = FALSE; + static qb_ipcs_service_t *ipcs = NULL; + static xmlNode *local_cib = NULL; + +-GHashTable *known_peer_names = NULL; +- + static cib_t *cib_api = NULL; + static void *cib_library = NULL; + +@@ -1049,6 +1047,8 @@ update_cib_cache_cb(const char *event, xmlNode * msg) + stonith_enabled_saved = FALSE; /* Trigger a full refresh below */ + } + ++ crm_peer_caches_refresh(local_cib); ++ + stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE); + if (stonith_enabled_xml) { + stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); +@@ -1103,6 +1103,8 @@ init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *us + have_cib_devices = TRUE; + local_cib = copy_xml(output); + ++ crm_peer_caches_refresh(local_cib); ++ + fencing_topology_init(); + cib_devices_update(); + } +@@ -1147,11 +1149,6 @@ stonith_cleanup(void) + qb_ipcs_destroy(ipcs); + } + +- if (known_peer_names != NULL) { +- g_hash_table_destroy(known_peer_names); +- known_peer_names = NULL; +- } +- + crm_peer_destroy(); + crm_client_cleanup(); + free_stonith_remote_op_list(); +@@ -1239,17 +1236,11 @@ static void + st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) + { + if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) { +- xmlNode *query = NULL; +- +- if (node->id && node->uname) { +- g_hash_table_insert(known_peer_names, GUINT_TO_POINTER(node->id), strdup(node->uname)); +- } +- + /* + * This is a hack until we can send to a nodeid and/or we fix node name lookups + * These messages are ignored in stonith_peer_callback() + */ +- query = create_xml_node(NULL, "stonith_command"); ++ xmlNode *query = create_xml_node(NULL, "stonith_command"); + + crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); + crm_xml_add(query, F_TYPE, T_STONITH_NG); +@@ -1444,7 +1435,6 @@ main(int argc, char **argv) + mainloop_add_signal(SIGTERM, stonith_shutdown); + + crm_peer_init(); +- known_peer_names = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free); + + if (stand_alone == FALSE) { + +diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h +index 7a51e95f6..3194e358c 100644 +--- a/daemons/fenced/pacemaker-fenced.h ++++ b/daemons/fenced/pacemaker-fenced.h +@@ -255,14 +255,10 @@ schedule_internal_command(const char *origin, + void (*done_cb) (GPid pid, int rc, const char *output, + gpointer user_data)); + +-char *stonith_get_peer_name(unsigned int nodeid); +- + extern char *stonith_our_uname; + extern gboolean stand_alone; + extern GHashTable *device_list; + extern GHashTable *topology; + extern long stonith_watchdog_timeout_ms; + +-extern GHashTable *known_peer_names; +- + extern GHashTable *stonith_remote_op_list; +-- +2.16.4 + diff --git a/pacemaker-2.0.0+20180911.b7803d27c.tar.xz b/pacemaker-2.0.0+20180911.b7803d27c.tar.xz deleted file mode 100644 index 82cc9dd..0000000 --- a/pacemaker-2.0.0+20180911.b7803d27c.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b188e2166656fa48639b97947cbcc506a386494b2bca4f2444e77e7bdab3c522 -size 3286436 diff --git a/pacemaker-2.0.0+20180927.b67d8d0de.tar.xz b/pacemaker-2.0.0+20180927.b67d8d0de.tar.xz new file mode 100644 index 0000000..a687772 --- /dev/null +++ b/pacemaker-2.0.0+20180927.b67d8d0de.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd84f7549dd70fa8ffde0567b8a3dc6a8e484b62ea4474ebcb507f697fba4576 +size 3469064 diff --git a/pacemaker.changes b/pacemaker.changes index 96924ad..016e51b 100644 --- a/pacemaker.changes +++ b/pacemaker.changes @@ -1,3 +1,27 @@ +------------------------------------------------------------------- +Fri Oct 5 12:57:16 UTC 2018 - ygao@suse.com + +- fenced: Handle fencing requested with nodeid by utilizing the membership cache of known nodes (bsc#1094208) + * bsc-1094208-Refactor-fenced-Handle-fencing-requested-with-nodeid.patch + +- controld: able to manually confirm unseen nodes are down (bsc#1094208) + * bsc-1094208-Fix-controld-able-to-manually-confirm-unseen-nodes-a.patch + +------------------------------------------------------------------- +Thu Oct 4 09:35:38 UTC 2018 - ygao@suse.com + +- Update to version 2.0.0+20180927.b67d8d0de: +- logrotate: set a maximum size for logs +- tools: ensure crm_resource --force-* commands get stderr messages +- libcrmcommon: properly check whether resource supports parameters +- tools: "return" from crm_mon after calling functions that don't +- alerts: send all MIB OIDs with all SNMP alerts +- resource-agents: add "s"-suffix where missing in metadata +- libcommon: do not write to /proc/sys/kernel/sysrq when unneeded +- pacemaker-based: drop declared, errant option never backed in tree +- crm_mon: don't exit directly from cib_connect on error +- scheduler: honor asymmetric orderings even when restarting + ------------------------------------------------------------------- Thu Sep 13 09:59:39 UTC 2018 - ygao@suse.com diff --git a/pacemaker.spec b/pacemaker.spec index c389efa..2e4888b 100644 --- a/pacemaker.spec +++ b/pacemaker.spec @@ -74,7 +74,7 @@ %endif Name: pacemaker -Version: 2.0.0+20180911.b7803d27c +Version: 2.0.0+20180927.b67d8d0de Release: 0 Summary: Scalable High-Availability cluster resource manager # AGPL-3.0 licensed extra/clustermon.sh is not present in the binary @@ -95,6 +95,8 @@ Patch6: bug-943295_pacemaker-lrmd-log-notice.patch Patch7: bug-977201_pacemaker-controld-self-fencing.patch Patch8: bug-995365_pacemaker-cts-restart-systemd-journald.patch Patch9: pacemaker-cts-StartCmd.patch +Patch10: bsc-1094208-Fix-controld-able-to-manually-confirm-unseen-nodes-a.patch +Patch11: bsc-1094208-Refactor-fenced-Handle-fencing-requested-with-nodeid.patch # Required for core functionality BuildRequires: autoconf BuildRequires: automake @@ -313,6 +315,8 @@ manager. %patch7 -p1 %patch8 -p1 %patch9 -p1 +%patch10 -p1 +%patch11 -p1 %build @@ -346,7 +350,7 @@ autoreconf -fvi %if !%{enable_fatal_warnings} --enable-fatal-warnings=no \ %endif - %{?python_path: PYTHON=%{python_path}} \ + PYTHON=%{python_path} \ %{!?with_hardening: --disable-hardening} \ %{!?with_legacy_links: --disable-legacy-links} \ %{?with_profiling: --with-profiling} \