Sync from SUSE:SLFO:Main pacemaker revision 3ecb5dbcf7355e5bf1f05b573a39eded

This commit is contained in:
Adrian Schröter 2025-02-26 11:48:05 +01:00
parent 54869ed28e
commit 1648f3aa58
35 changed files with 3076 additions and 6 deletions

View File

@ -11,7 +11,7 @@
<param name="version">3.0.0</param>
-->
<param name="versionformat">3.0.0+%cd.%h</param>
<param name="revision">fa492f5181</param>
<param name="revision">64cd85422c</param>
<param name="changesgenerate">enable</param>
</service>

View File

@ -5,4 +5,4 @@
</service>
<service name="tar_scm">
<param name="url">https://github.com/ClusterLabs/pacemaker.git</param>
<param name="changesrevision">dc802bfe4bebd89448b53e42dcc5d022905a4215</param></service></servicedata>
<param name="changesrevision">d60b9407f2db9c9eca5f4dea0726478e068db910</param></service></servicedata>

View File

@ -0,0 +1,116 @@
From a1d94f7ab57a71a36faa8282dd7db6af4cb25f39 Mon Sep 17 00:00:00 2001
From: Aleksei Burlakov <aburlakov@suse.com>
Date: Sat, 2 Nov 2024 12:49:01 +0100
Subject: [PATCH] Fix: crmadmin: return error if DC is not elected #2902 #3606
If the DC is not yet elected, the crmadmin will return an error.
(This change complements #3606).
---
cts/cli/regression.error_codes.exp | 4 ++++
include/crm/common/results.h | 2 ++
lib/common/results.c | 5 +++++
lib/pacemaker/pcmk_cluster_queries.c | 2 +-
4 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/cts/cli/regression.error_codes.exp b/cts/cli/regression.error_codes.exp
index f620845b9e..3c330eda3f 100644
--- a/cts/cli/regression.error_codes.exp
+++ b/cts/cli/regression.error_codes.exp
@@ -404,6 +404,7 @@ CRM_EX_ERROR - Error occurred
111: Requested item is not yet in effect
112: Could not determine status
113: Not applicable under current conditions
+ 114: DC is not yet elected
124: Timeout occurred
190: Service is active but might fail soon
191: Service is promoted but might fail soon
@@ -451,6 +452,7 @@ CRM_EX_ERROR - Error occurred
<result-code code="111" description="Requested item is not yet in effect"/>
<result-code code="112" description="Could not determine status"/>
<result-code code="113" description="Not applicable under current conditions"/>
+ <result-code code="114" description="DC is not yet elected"/>
<result-code code="124" description="Timeout occurred"/>
<result-code code="190" description="Service is active but might fail soon"/>
<result-code code="191" description="Service is promoted but might fail soon"/>
@@ -499,6 +501,7 @@ CRM_EX_ERROR - Error occurred
111: CRM_EX_NOT_YET_IN_EFFECT Requested item is not yet in effect
112: CRM_EX_INDETERMINATE Could not determine status
113: CRM_EX_UNSATISFIED Not applicable under current conditions
+ 114: CRM_EX_NO_DC DC is not yet elected
124: CRM_EX_TIMEOUT Timeout occurred
190: CRM_EX_DEGRADED Service is active but might fail soon
191: CRM_EX_DEGRADED_PROMOTED Service is promoted but might fail soon
@@ -546,6 +549,7 @@ CRM_EX_ERROR - Error occurred
<result-code code="111" name="CRM_EX_NOT_YET_IN_EFFECT" description="Requested item is not yet in effect"/>
<result-code code="112" name="CRM_EX_INDETERMINATE" description="Could not determine status"/>
<result-code code="113" name="CRM_EX_UNSATISFIED" description="Not applicable under current conditions"/>
+ <result-code code="114" name="CRM_EX_NO_DC" description="DC is not yet elected"/>
<result-code code="124" name="CRM_EX_TIMEOUT" description="Timeout occurred"/>
<result-code code="190" name="CRM_EX_DEGRADED" description="Service is active but might fail soon"/>
<result-code code="191" name="CRM_EX_DEGRADED_PROMOTED" description="Service is promoted but might fail soon"/>
diff --git a/include/crm/common/results.h b/include/crm/common/results.h
index 2fedb7c736..a671cb8efd 100644
--- a/include/crm/common/results.h
+++ b/include/crm/common/results.h
@@ -111,6 +111,7 @@ enum pcmk_rc_e {
/* When adding new values, use consecutively lower numbers, update the array
* in lib/common/results.c, and test with crm_error.
*/
+ pcmk_rc_no_dc = -1040,
pcmk_rc_compression = -1039,
pcmk_rc_ns_resolution = -1038,
pcmk_rc_no_transaction = -1037,
@@ -273,6 +274,7 @@ typedef enum crm_exit_e {
CRM_EX_NOT_YET_IN_EFFECT = 111, //!< Requested item is not in effect
CRM_EX_INDETERMINATE = 112, //!< Could not determine status
CRM_EX_UNSATISFIED = 113, //!< Requested item does not satisfy constraints
+ CRM_EX_NO_DC = 114, //!< DC is not yet elected, e.g. right after cluster restart
// Other
CRM_EX_TIMEOUT = 124, //!< Convention from timeout(1)
diff --git a/lib/common/results.c b/lib/common/results.c
index 507280492c..359d1eeccc 100644
--- a/lib/common/results.c
+++ b/lib/common/results.c
@@ -734,6 +734,7 @@ crm_exit_name(crm_exit_t exit_code)
case CRM_EX_NOT_YET_IN_EFFECT: return "CRM_EX_NOT_YET_IN_EFFECT";
case CRM_EX_INDETERMINATE: return "CRM_EX_INDETERMINATE";
case CRM_EX_UNSATISFIED: return "CRM_EX_UNSATISFIED";
+ case CRM_EX_NO_DC: return "CRM_EX_NO_DC";
case CRM_EX_OLD: return "CRM_EX_OLD";
case CRM_EX_TIMEOUT: return "CRM_EX_TIMEOUT";
case CRM_EX_DEGRADED: return "CRM_EX_DEGRADED";
@@ -786,6 +787,7 @@ crm_exit_str(crm_exit_t exit_code)
case CRM_EX_NOT_YET_IN_EFFECT: return "Requested item is not yet in effect";
case CRM_EX_INDETERMINATE: return "Could not determine status";
case CRM_EX_UNSATISFIED: return "Not applicable under current conditions";
+ case CRM_EX_NO_DC: return "DC is not yet elected";
case CRM_EX_OLD: return "Update was older than existing configuration";
case CRM_EX_TIMEOUT: return "Timeout occurred";
case CRM_EX_DEGRADED: return "Service is active but might fail soon";
@@ -922,6 +924,9 @@ pcmk_rc2exitc(int rc)
case pcmk_rc_bad_xml_patch:
return CRM_EX_DATAERR;
+ case pcmk_rc_no_dc:
+ return CRM_EX_NO_DC;
+
default:
return CRM_EX_ERROR;
}
diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c
index 3f7584e9ff..e99d91f7d0 100644
--- a/lib/pacemaker/pcmk_cluster_queries.c
+++ b/lib/pacemaker/pcmk_cluster_queries.c
@@ -235,7 +235,7 @@ designated_controller_event_cb(pcmk_ipc_api_t *controld_api,
reply = (const pcmk_controld_api_reply_t *) event_data;
out->message(out, "dc", reply->host_from);
- data->rc = pcmk_rc_ok;
+ data->rc = reply->host_from ? pcmk_rc_ok : pcmk_rc_no_dc;
}
/*!
--
2.43.0

View File

@ -0,0 +1,45 @@
From 20b1aaff9bb0e5fe8682c9c9630f2f34ec8b9086 Mon Sep 17 00:00:00 2001
From: Hideo Yamauchi <renayama19661014@ybb.ne.jp>
Date: Mon, 2 Dec 2024 16:26:42 +0900
Subject: [PATCH 1/2] Mid: systemd: Fix when monitor of systemd resource
continues to be pending.
---
daemons/execd/execd_commands.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
index 5356c5cc97..8ade0ac345 100644
--- a/daemons/execd/execd_commands.c
+++ b/daemons/execd/execd_commands.c
@@ -903,6 +903,27 @@ action_complete(svc_action_t * action)
}
}
}
+ } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) &&
+ (cmd->interval_ms > 0)) {
+ /* For monitors, excluding follow-up monitors, */
+ /* if the pending state persists from the first notification until its timeout, */
+ /* it will be treated as a timeout. */
+
+ if ((cmd->result.execution_status == PCMK_EXEC_PENDING) &&
+ (cmd->last_notify_op_status == PCMK_EXEC_PENDING)) {
+ int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000));
+
+ if (time_left >= 0) {
+ crm_notice("Giving up on %s %s (rc=%d): monitor pending timeout (first pending notification=%s timeout=%ds)",
+ cmd->rsc_id, cmd->action,
+ cmd->result.exit_status, pcmk__trim(ctime(&cmd->epoch_rcchange)), cmd->timeout_orig);
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_TIMEOUT,
+ "Investigate reason for timeout, and adjust "
+ "configured operation timeout if necessary");
+ cmd_original_times(cmd);
+ }
+ }
}
}
#endif
--
2.43.0

View File

@ -0,0 +1,36 @@
From 6fee07dfc482a0069184813437a84ebd7f11a7b4 Mon Sep 17 00:00:00 2001
From: Hideo Yamauchi <renayama19661014@ybb.ne.jp>
Date: Mon, 2 Dec 2024 16:27:34 +0900
Subject: [PATCH 2/2] Mid: systemd: If the state is Pending at the time of
probe, execute follow up monitor.
---
daemons/execd/execd_commands.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
index 8ade0ac345..ece2315e36 100644
--- a/daemons/execd/execd_commands.c
+++ b/daemons/execd/execd_commands.c
@@ -869,8 +869,16 @@ action_complete(svc_action_t * action)
cmd->real_action = cmd->action;
cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);
- } else if (cmd->real_action != NULL) {
- // This is follow-up monitor to check whether start/stop completed
+ } else if (cmd->result.execution_status == PCMK_EXEC_PENDING &&
+ pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) &&
+ cmd->interval_ms == 0 &&
+ cmd->real_action == NULL) {
+ /* If the state is Pending at the time of probe, execute follow-up monitor. */
+ goagain = true;
+ cmd->real_action = cmd->action;
+ cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);
+ } else if (cmd->real_action != NULL) {
+ // This is follow-up monitor to check whether start/stop/probe(monitor) completed
if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
goagain = true;
--
2.43.0

View File

@ -0,0 +1,104 @@
From 4193fb1d8e5023289e7a600a8dabc864b99f24a2 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 10 Dec 2024 14:56:20 -0600
Subject: [PATCH] Low: controller: round timeout when checking remaining remote
command time
Use pcmk__timeout_ms2s() to ensure that remote command timeout values
are properly rounded.
Also, refactor to drop remaining_timeout from remote_ra_cmd_t. The value
was only ever used locally when calculated and doesn't need to be
remembered.
---
daemons/controld/controld_remote_ra.c | 33 +++++++++++++++++----------
1 file changed, 21 insertions(+), 12 deletions(-)
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 060a231d74..df4e0bd6fc 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -55,7 +55,6 @@ typedef struct remote_ra_cmd_s {
int delay_id;
/*! timeout in ms for cmd */
int timeout;
- int remaining_timeout;
/*! recurring interval in ms */
guint interval_ms;
/*! interval timer id */
@@ -512,10 +511,18 @@ report_remote_ra_result(remote_ra_cmd_t * cmd)
lrmd__reset_result(&op);
}
-static void
-update_remaining_timeout(remote_ra_cmd_t * cmd)
+/*!
+ * \internal
+ * \brief Return a remote command's remaining timeout in seconds
+ *
+ * \param[in] cmd Remote command to check
+ *
+ * \return Command's remaining timeout in seconds
+ */
+static int
+remaining_timeout_sec(const remote_ra_cmd_t *cmd)
{
- cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
+ return pcmk__timeout_ms2s(cmd->timeout) - (time(NULL) - cmd->start_time);
}
static gboolean
@@ -525,6 +532,7 @@ retry_start_cmd_cb(gpointer data)
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd = NULL;
int rc = ETIME;
+ int remaining = 0;
if (!ra_data || !ra_data->cur_cmd) {
return FALSE;
@@ -534,10 +542,10 @@ retry_start_cmd_cb(gpointer data)
PCMK_ACTION_MIGRATE_FROM, NULL)) {
return FALSE;
}
- update_remaining_timeout(cmd);
- if (cmd->remaining_timeout > 0) {
- rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
+ remaining = remaining_timeout_sec(cmd);
+ if (remaining > 0) {
+ rc = handle_remote_ra_start(lrm_state, cmd, remaining * 1000);
} else {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
@@ -723,7 +731,7 @@ remote_lrm_op_callback(lrmd_event_data_t * op)
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
if (op->connection_rc < 0) {
- update_remaining_timeout(cmd);
+ int remaining = remaining_timeout_sec(cmd);
if ((op->connection_rc == -ENOKEY)
|| (op->connection_rc == -EKEYREJECTED)) {
@@ -732,14 +740,15 @@ remote_lrm_op_callback(lrmd_event_data_t * op)
PCMK_EXEC_ERROR,
pcmk_strerror(op->connection_rc));
- } else if (cmd->remaining_timeout > 3000) {
- crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
+ } else if (remaining > 3) {
+ crm_trace("Rescheduling start (%ds remains before timeout)",
+ remaining);
pcmk__create_timer(1000, retry_start_cmd_cb, lrm_state);
return;
} else {
- crm_trace("can't reschedule start, remaining timeout too small %d",
- cmd->remaining_timeout);
+ crm_trace("Not enough time before timeout (%ds) "
+ "to reschedule start", remaining);
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"%s without enough time to retry",
--
2.43.0

View File

@ -0,0 +1,23 @@
From 78e6b46a2bbb80af24a804045313370a6404a251 Mon Sep 17 00:00:00 2001
From: Hideo Yamauchi <renayama19661014@ybb.ne.jp>
Date: Thu, 9 Jan 2025 08:32:48 +0900
Subject: [PATCH] Mid: schedulerd: Resetting error and warning flags.
---
lib/pengine/status.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
Index: pacemaker-3.0.0+20250128.fa492f5181/lib/pengine/status.c
===================================================================
--- pacemaker-3.0.0+20250128.fa492f5181.orig/lib/pengine/status.c
+++ pacemaker-3.0.0+20250128.fa492f5181/lib/pengine/status.c
@@ -447,6 +447,9 @@ set_working_set_defaults(pcmk_scheduler_
|pcmk__sched_stop_removed_resources
|pcmk__sched_cancel_removed_actions);
#endif
+
+ pcmk__config_has_error = false;
+ pcmk__config_has_warning = false;
}
pcmk_resource_t *

View File

@ -0,0 +1,374 @@
From c120c1ebbcb68966e229d4c3647fdbb511150f94 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Wed, 8 Jan 2025 20:44:25 -0800
Subject: [PATCH 1/2] Low: various: Correct some printf specifiers
As of 18a93372, we can use the 'z' modifier for size_t and ssize_t. So
here we take advantage of that to avoid some (long long) casts.
We also correct some incorrect specifiers (signed vs. unsigned, and
using proper macros from inttypes.h).
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
daemons/controld/controld_fsa.c | 12 +++---
daemons/controld/controld_messages.c | 8 ++--
lib/cluster/cpg.c | 24 +++++------
lib/common/ipc_server.c | 25 ++++++-----
lib/common/remote.c | 62 ++++++++++++----------------
lib/services/services_linux.c | 17 ++++----
6 files changed, 67 insertions(+), 81 deletions(-)
Index: pacemaker-3.0.0+20250128.fa492f5181/daemons/controld/controld_fsa.c
===================================================================
--- pacemaker-3.0.0+20250128.fa492f5181.orig/daemons/controld/controld_fsa.c
+++ pacemaker-3.0.0+20250128.fa492f5181/daemons/controld/controld_fsa.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2024 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -9,6 +9,7 @@
#include <crm_internal.h>
+#include <inttypes.h> // PRIx64
#include <sys/param.h>
#include <stdio.h>
#include <stdint.h> // uint64_t
@@ -279,9 +280,10 @@ s_crmd_fsa(enum crmd_fsa_cause cause)
|| (controld_globals.fsa_actions != A_NOTHING)
|| pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) {
- crm_debug("Exiting the FSA: queue=%d, fsa_actions=%#llx, stalled=%s",
+ crm_debug("Exiting the FSA: queue=%d, fsa_actions=%" PRIx64
+ ", stalled=%s",
g_list_length(controld_globals.fsa_message_queue),
- (unsigned long long) controld_globals.fsa_actions,
+ controld_globals.fsa_actions,
pcmk__flag_text(controld_globals.flags,
controld_fsa_is_stalled));
} else {
@@ -505,9 +507,9 @@ s_crmd_fsa_actions(fsa_data_t * fsa_data
/* Error checking and reporting */
} else {
- crm_err("Action %s not supported " QB_XS " %#llx",
+ crm_err("Action %s not supported " QB_XS " %" PRIx64,
fsa_action2string(controld_globals.fsa_actions),
- (unsigned long long) controld_globals.fsa_actions);
+ controld_globals.fsa_actions);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL,
__func__);
}
Index: pacemaker-3.0.0+20250128.fa492f5181/daemons/controld/controld_messages.c
===================================================================
--- pacemaker-3.0.0+20250128.fa492f5181.orig/daemons/controld/controld_messages.c
+++ pacemaker-3.0.0+20250128.fa492f5181/daemons/controld/controld_messages.c
@@ -9,8 +9,10 @@
#include <crm_internal.h>
-#include <sys/param.h>
+#include <inttypes.h> // PRIx64
+#include <stdint.h> // uint64_t
#include <string.h>
+#include <sys/param.h>
#include <time.h>
#include <crm/crm.h>
@@ -110,8 +112,7 @@ register_fsa_input_adv(enum crmd_fsa_cau
fsa_data->actions = with_actions;
if (with_actions != A_NOTHING) {
- crm_trace("Adding actions %.16llx to input",
- (unsigned long long) with_actions);
+ crm_trace("Adding actions %.16" PRIx64 " to input", with_actions);
}
if (data != NULL) {
@@ -1382,4 +1383,3 @@ broadcast_remote_state_message(const cha
pcmk__cluster_send_message(NULL, pcmk_ipc_controld, msg);
pcmk__xml_free(msg);
}
-
Index: pacemaker-3.0.0+20250128.fa492f5181/lib/cluster/cpg.c
===================================================================
--- pacemaker-3.0.0+20250128.fa492f5181.orig/lib/cluster/cpg.c
+++ pacemaker-3.0.0+20250128.fa492f5181/lib/cluster/cpg.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2024 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -237,8 +237,7 @@ crm_cs_flush(gpointer data)
}
sent++;
- crm_trace("CPG message sent, size=%llu",
- (unsigned long long) iov->iov_len);
+ crm_trace("CPG message sent, size=%zu", iov->iov_len);
cs_message_queue = g_list_remove(cs_message_queue, iov);
free(iov->iov_base);
@@ -360,10 +359,9 @@ check_message_sanity(const pcmk__cpg_msg
(((msg->size > 1) && (msg->data[msg->size - 2] == '\0'))
|| (msg->data[msg->size - 1] != '\0'))) {
crm_err("CPG message %d from %s invalid: "
- "Payload does not end at byte %llu "
+ "Payload does not end at byte %" PRIu32 " "
QB_XS " from %s[%u] to %s@%s",
- msg->id, ais_dest(&(msg->sender)),
- (unsigned long long) msg->size,
+ msg->id, ais_dest(&(msg->sender)), msg->size,
msg_type2text(msg->sender.type), msg->sender.pid,
msg_type2text(msg->host.type), ais_dest(&(msg->host)));
return false;
@@ -1009,15 +1007,13 @@ send_cpg_text(const char *data, const pc
iov->iov_len = msg->header.size;
if (msg->compressed_size > 0) {
- crm_trace("Queueing CPG message %u to %s "
- "(%llu bytes, %d bytes compressed payload): %.200s",
- msg->id, target, (unsigned long long) iov->iov_len,
- msg->compressed_size, data);
+ crm_trace("Queueing CPG message %" PRIu32 " to %s "
+ "(%zu bytes, %" PRIu32 " bytes compressed payload): %.200s",
+ msg->id, target, iov->iov_len, msg->compressed_size, data);
} else {
- crm_trace("Queueing CPG message %u to %s "
- "(%llu bytes, %d bytes payload): %.200s",
- msg->id, target, (unsigned long long) iov->iov_len,
- msg->size, data);
+ crm_trace("Queueing CPG message %" PRIu32 " to %s "
+ "(%zu bytes, %" PRIu32 " bytes payload): %.200s",
+ msg->id, target, iov->iov_len, msg->size, data);
}
free(target);
Index: pacemaker-3.0.0+20250128.fa492f5181/lib/common/ipc_server.c
===================================================================
--- pacemaker-3.0.0+20250128.fa492f5181.orig/lib/common/ipc_server.c
+++ pacemaker-3.0.0+20250128.fa492f5181/lib/common/ipc_server.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2024 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -540,9 +540,8 @@ crm_ipcs_flush_events(pcmk__client_t *c)
queue_len -= sent;
if (sent > 0 || queue_len) {
- crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)",
- sent, queue_len, c->ipcs, c->pid,
- pcmk_rc_str(rc), (long long) qb_rc);
+ crm_trace("Sent %u events (%u remaining) for %p[%d]: %s (%zd)",
+ sent, queue_len, c->ipcs, c->pid, pcmk_rc_str(rc), qb_rc);
}
if (queue_len) {
Index: pacemaker-3.0.0+20250128.fa492f5181/lib/common/remote.c
===================================================================
--- pacemaker-3.0.0+20250128.fa492f5181.orig/lib/common/remote.c
+++ pacemaker-3.0.0+20250128.fa492f5181/lib/common/remote.c
@@ -137,29 +137,26 @@ send_tls(gnutls_session_t session, struc
return EINVAL;
}
- crm_trace("Sending TLS message of %llu bytes",
- (unsigned long long) unsent_len);
+ crm_trace("Sending TLS message of %zu bytes", unsent_len);
+
while (true) {
gnutls_rc = gnutls_record_send(session, unsent, unsent_len);
if (gnutls_rc == GNUTLS_E_INTERRUPTED || gnutls_rc == GNUTLS_E_AGAIN) {
- crm_trace("Retrying to send %llu bytes remaining",
- (unsigned long long) unsent_len);
+ crm_trace("Retrying to send %zu bytes remaining", unsent_len);
} else if (gnutls_rc < 0) {
// Caller can log as error if necessary
- crm_info("TLS connection terminated: %s " QB_XS " rc=%lld",
- gnutls_strerror((int) gnutls_rc),
- (long long) gnutls_rc);
+ crm_info("TLS connection terminated: %s " QB_XS " rc=%zd",
+ gnutls_strerror((int) gnutls_rc), gnutls_rc);
return ECONNABORTED;
} else if (gnutls_rc < unsent_len) {
- crm_trace("Sent %lld of %llu bytes remaining",
- (long long) gnutls_rc, (unsigned long long) unsent_len);
+ crm_trace("Sent %zd of %zu bytes remaining", gnutls_rc, unsent_len);
unsent_len -= gnutls_rc;
unsent += gnutls_rc;
} else {
- crm_trace("Sent all %lld bytes remaining", (long long) gnutls_rc);
+ crm_trace("Sent all %zd bytes remaining", gnutls_rc);
break;
}
}
@@ -178,16 +175,16 @@ send_plaintext(int sock, struct iovec *i
return EINVAL;
}
- crm_debug("Sending plaintext message of %llu bytes to socket %d",
- (unsigned long long) unsent_len, sock);
+ crm_debug("Sending plaintext message of %zu bytes to socket %d",
+ unsent_len, sock);
while (true) {
write_rc = write(sock, unsent, unsent_len);
if (write_rc < 0) {
int rc = errno;
if ((errno == EINTR) || (errno == EAGAIN)) {
- crm_trace("Retrying to send %llu bytes remaining to socket %d",
- (unsigned long long) unsent_len, sock);
+ crm_trace("Retrying to send %zu bytes remaining to socket %d",
+ unsent_len, sock);
continue;
}
@@ -197,15 +194,14 @@ send_plaintext(int sock, struct iovec *i
return rc;
} else if (write_rc < unsent_len) {
- crm_trace("Sent %lld of %llu bytes remaining",
- (long long) write_rc, (unsigned long long) unsent_len);
+ crm_trace("Sent %zd of %zu bytes remaining", write_rc, unsent_len);
unsent += write_rc;
unsent_len -= write_rc;
continue;
} else {
- crm_trace("Sent all %lld bytes remaining: %.100s",
- (long long) write_rc, (char *) (iov->iov_base));
+ crm_trace("Sent all %zd bytes remaining: %.100s",
+ write_rc, (char *) (iov->iov_base));
break;
}
}
@@ -456,8 +452,7 @@ pcmk__read_available_remote_data(pcmk__r
/* automatically grow the buffer when needed */
if(remote->buffer_size < read_len) {
remote->buffer_size = 2 * read_len;
- crm_trace("Expanding buffer to %llu bytes",
- (unsigned long long) remote->buffer_size);
+ crm_trace("Expanding buffer to %zu bytes", remote->buffer_size);
remote->buffer = pcmk__realloc(remote->buffer, remote->buffer_size + 1);
}
@@ -470,8 +465,8 @@ pcmk__read_available_remote_data(pcmk__r
} else if (read_rc == GNUTLS_E_AGAIN) {
rc = EAGAIN;
} else if (read_rc < 0) {
- crm_debug("TLS receive failed: %s (%lld)",
- gnutls_strerror(read_rc), (long long) read_rc);
+ crm_debug("TLS receive failed: %s (%zd)",
+ gnutls_strerror((int) read_rc), read_rc);
rc = EIO;
}
} else if (remote->tcp_socket) {
@@ -491,35 +486,32 @@ pcmk__read_available_remote_data(pcmk__r
remote->buffer_offset += read_rc;
/* always null terminate buffer, the +1 to alloc always allows for this. */
remote->buffer[remote->buffer_offset] = '\0';
- crm_trace("Received %lld more bytes (%llu total)",
- (long long) read_rc,
- (unsigned long long) remote->buffer_offset);
+ crm_trace("Received %zd more bytes (%zu total)",
+ read_rc, remote->buffer_offset);
} else if ((rc == EINTR) || (rc == EAGAIN)) {
crm_trace("No data available for non-blocking remote read: %s (%d)",
pcmk_rc_str(rc), rc);
} else if (read_rc == 0) {
- crm_debug("End of remote data encountered after %llu bytes",
- (unsigned long long) remote->buffer_offset);
+ crm_debug("End of remote data encountered after %zu bytes",
+ remote->buffer_offset);
return ENOTCONN;
} else {
- crm_debug("Error receiving remote data after %llu bytes: %s (%d)",
- (unsigned long long) remote->buffer_offset,
- pcmk_rc_str(rc), rc);
+ crm_debug("Error receiving remote data after %zu bytes: %s (%d)",
+ remote->buffer_offset, pcmk_rc_str(rc), rc);
return ENOTCONN;
}
header = localized_remote_header(remote);
if(header) {
if(remote->buffer_offset < header->size_total) {
- crm_trace("Read partial remote message (%llu of %u bytes)",
- (unsigned long long) remote->buffer_offset,
- header->size_total);
+ crm_trace("Read partial remote message (%zu of %" PRIu32 " bytes)",
+ remote->buffer_offset, header->size_total);
} else {
- crm_trace("Read full remote message of %llu bytes",
- (unsigned long long) remote->buffer_offset);
+ crm_trace("Read full remote message of %zu bytes",
+ remote->buffer_offset);
return pcmk_rc_ok;
}
}
Index: pacemaker-3.0.0+20250128.fa492f5181/lib/services/services_linux.c
===================================================================
--- pacemaker-3.0.0+20250128.fa492f5181.orig/lib/services/services_linux.c
+++ pacemaker-3.0.0+20250128.fa492f5181/lib/services/services_linux.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2024 the Pacemaker project contributors
+ * Copyright 2010-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -305,14 +305,12 @@ svc_read_output(int fd, svc_action_t * o
if (is_stderr && op->stderr_data) {
len = strlen(op->stderr_data);
data = op->stderr_data;
- crm_trace("Reading %s stderr into offset %lld",
- op->id, (long long) len);
+ crm_trace("Reading %s stderr into offset %zu", op->id, len);
} else if (is_stderr == FALSE && op->stdout_data) {
len = strlen(op->stdout_data);
data = op->stdout_data;
- crm_trace("Reading %s stdout into offset %lld",
- op->id, (long long) len);
+ crm_trace("Reading %s stdout into offset %zu", op->id, len);
} else {
crm_trace("Reading %s %s", op->id, out_type(is_stderr));
@@ -324,8 +322,8 @@ svc_read_output(int fd, svc_action_t * o
if (rc > 0) {
if (len < MAX_OUTPUT) {
buf[rc] = 0;
- crm_trace("Received %lld bytes of %s %s: %.80s",
- (long long) rc, op->id, out_type(is_stderr), buf);
+ crm_trace("Received %zd bytes of %s %s: %.80s",
+ rc, op->id, out_type(is_stderr), buf);
data = pcmk__realloc(data, len + rc + 1);
strcpy(data + len, buf);
len += rc;
@@ -340,9 +338,8 @@ svc_read_output(int fd, svc_action_t * o
} while ((rc == buf_read_len) || (rc < 0));
if (discarded > 0) {
- crm_warn("Truncated %s %s to %lld bytes (discarded %lld)",
- op->id, out_type(is_stderr), (long long) len,
- (long long) discarded);
+ crm_warn("Truncated %s %s to %zu bytes (discarded %zu)",
+ op->id, out_type(is_stderr), len, discarded);
}
if (is_stderr) {

View File

@ -0,0 +1,121 @@
From d8564d24f704fa6d5b1f6e9963b5b3587953a598 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Wed, 8 Jan 2025 22:49:08 -0800
Subject: [PATCH 2/2] Low: libcrmcommon: Catch correct errors for remote
connection sockets
connect() can be interrupted by a signal, and we can treat that the same
as EAGAIN.
select() doesn't set errno to EINPROGRESS, but it can set EINTR.
read() and write() can set EWOULDBLOCK, which is not guaranteed to have
the same value as EAGAIN. However, it might (and in fact it does on my
system), which causes a compiler error ("duplicate case value") if we
use a switch statement for those two cases.
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
lib/common/remote.c | 36 +++++++++++++++++++++---------------
1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/lib/common/remote.c b/lib/common/remote.c
index 6a4a4ff79a..bb29ad938c 100644
--- a/lib/common/remote.c
+++ b/lib/common/remote.c
@@ -169,7 +169,6 @@ send_plaintext(int sock, struct iovec *iov)
{
const char *unsent = iov->iov_base;
size_t unsent_len = iov->iov_len;
- ssize_t write_rc;
if (unsent == NULL) {
return EINVAL;
@@ -178,11 +177,12 @@ send_plaintext(int sock, struct iovec *iov)
crm_debug("Sending plaintext message of %zu bytes to socket %d",
unsent_len, sock);
while (true) {
- write_rc = write(sock, unsent, unsent_len);
+ ssize_t write_rc = write(sock, unsent, unsent_len);
+
if (write_rc < 0) {
int rc = errno;
- if ((errno == EINTR) || (errno == EAGAIN)) {
+ if ((rc == EINTR) || (rc == EAGAIN) || (rc == EWOULDBLOCK)) {
crm_trace("Retrying to send %zu bytes remaining to socket %d",
unsent_len, sock);
continue;
@@ -197,15 +197,13 @@ send_plaintext(int sock, struct iovec *iov)
crm_trace("Sent %zd of %zu bytes remaining", write_rc, unsent_len);
unsent += write_rc;
unsent_len -= write_rc;
- continue;
} else {
crm_trace("Sent all %zd bytes remaining: %.100s",
write_rc, (char *) (iov->iov_base));
- break;
+ return pcmk_rc_ok;
}
}
- return pcmk_rc_ok;
}
// \return Standard Pacemaker return code
@@ -485,15 +483,15 @@ pcmk__read_available_remote_data(pcmk__remote_t *remote)
crm_trace("Received %zd more bytes (%zu total)",
read_rc, remote->buffer_offset);
- } else if ((rc == EINTR) || (rc == EAGAIN)) {
- crm_trace("No data available for non-blocking remote read: %s (%d)",
- pcmk_rc_str(rc), rc);
-
} else if (read_rc == 0) {
crm_debug("End of remote data encountered after %zu bytes",
remote->buffer_offset);
return ENOTCONN;
+ } else if ((rc == EINTR) || (rc == EAGAIN) || (rc == EWOULDBLOCK)) {
+ crm_trace("No data available for non-blocking remote read: %s (%d)",
+ pcmk_rc_str(rc), rc);
+
} else {
crm_debug("Error receiving remote data after %zu bytes: %s (%d)",
remote->buffer_offset, pcmk_rc_str(rc), rc);
@@ -608,7 +606,7 @@ check_connect_finished(gpointer userdata)
if (rc < 0) { // select() error
rc = errno;
- if ((rc == EINPROGRESS) || (rc == EAGAIN)) {
+ if ((rc == EINTR) || (rc == EAGAIN)) {
if ((time(NULL) - cb_data->start) < pcmk__timeout_ms2s(cb_data->timeout_ms)) {
return TRUE; // There is time left, so reschedule timer
} else {
@@ -704,11 +702,19 @@ connect_socket_retry(int sock, const struct sockaddr *addr, socklen_t addrlen,
}
rc = connect(sock, addr, addrlen);
- if (rc < 0 && (errno != EINPROGRESS) && (errno != EAGAIN)) {
+ if (rc < 0) {
rc = errno;
- crm_warn("Could not connect socket: %s " QB_XS " rc=%d",
- pcmk_rc_str(rc), rc);
- return rc;
+ switch (rc) {
+ case EINTR:
+ case EINPROGRESS:
+ case EAGAIN:
+ break;
+
+ default:
+ crm_warn("Could not connect socket: %s " QB_XS " rc=%d",
+ pcmk_rc_str(rc), rc);
+ return rc;
+ }
}
cb_data = pcmk__assert_alloc(1, sizeof(struct tcp_async_cb_data));
--
2.43.0

View File

@ -0,0 +1,38 @@
From 961be70139c04beb19d4b514afa73eadee54d23c Mon Sep 17 00:00:00 2001
From: Athos Ribeiro <athos.ribeiro@canonical.com>
Date: Thu, 9 Jan 2025 11:37:14 -0300
Subject: [PATCH] Low: controller: address format-overflow warnings
When using -O3 to build pacemaker, gcc (14) will throw format-overflow
warnings for some possibly null '%s' directive arguments. While we could
address the current instance of such warning by introducing checks for
null pointers, let's just remove this log entry here since it is not
meaningful.
alerts.c:153:19: error: '%s' directive argument is null [-Werror=format-overflow=]
153 | crm_trace("Inserting alert key %s = '%s'", *key, value);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
alerts.c:153:46: note: format string is defined here
153 | crm_trace("Inserting alert key %s = '%s'", *key, value);
|
Signed-off-by: Athos Ribeiro <athos.ribeiro@canonical.com>
---
daemons/controld/controld_te_events.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
index 9ac0a2c4a2..b65f43ab10 100644
--- a/daemons/controld/controld_te_events.c
+++ b/daemons/controld/controld_te_events.c
@@ -327,7 +327,6 @@ get_cancel_action(const char *id, const char *node)
task = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
- crm_trace("Wrong key %s for %s on %s", task, id, node);
continue;
}
--
2.43.0

View File

@ -0,0 +1,35 @@
From 9e68cb64f6660fe9e40c4ef75e1a891aa0804dbb Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 24 Oct 2024 11:18:18 -0500
Subject: [PATCH 01/16] Refactor: pacemaker-attrd: always add remoteness to
attribute value XML
... even if false, for code consistency and simplicity
---
daemons/attrd/attrd_attributes.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index b3eda6e2f9..74301d678a 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -143,14 +143,13 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a,
crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id);
crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
pcmk__xe_add_node(xml, v->nodename, v->nodeid);
- if (pcmk_is_set(v->flags, attrd_value_remote)) {
- crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1);
- }
crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING,
pcmk__timeout_ms2s(a->timeout_ms));
crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE,
pcmk_is_set(a->flags, attrd_attr_is_private));
+ crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE,
+ pcmk_is_set(v->flags, attrd_value_remote));
crm_xml_add_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE, force_write);
return xml;
--
2.43.0

View File

@ -0,0 +1,85 @@
From a1a2e20080688865b2f49e7f84b98e41e5b0381f Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 24 Oct 2024 12:13:54 -0500
Subject: [PATCH 02/16] Refactor: pacemaker-attrd: don't use "uuid" to mean
"XML ID"
... in write_attribute()
---
daemons/attrd/attrd_cib.c | 33 +++++++++++++++++----------------
1 file changed, 17 insertions(+), 16 deletions(-)
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index a40e7a1087..b8f509ab7d 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -543,21 +543,22 @@ write_attribute(attribute_t *a, bool ignore_delay)
/* Iterate over each peer value of this attribute */
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
- const char *uuid = NULL;
+ const char *node_xml_id = NULL;
+ // Try to get the XML ID used for the node in the CIB
if (pcmk_is_set(v->flags, attrd_value_remote)) {
- /* If this is a Pacemaker Remote node, the node's UUID is the same
- * as its name, which we already have.
- */
- uuid = v->nodename;
+ // A Pacemaker Remote node's XML ID is the same as its name
+ node_xml_id = v->nodename;
} else {
- // This will create a cluster node cache entry if none exists
+ /* Get cluster node XML IDs from the peer caches.
+ * This will create a cluster node cache entry if none exists.
+ */
pcmk__node_status_t *peer = pcmk__get_node(v->nodeid, v->nodename,
NULL,
pcmk__node_search_any);
- uuid = peer->xml_id;
+ node_xml_id = peer->xml_id;
// Remember peer's node ID if we're just now learning it
if ((peer->cluster_layer_id != 0) && (v->nodeid == 0)) {
@@ -574,27 +575,27 @@ write_attribute(attribute_t *a, bool ignore_delay)
}
// Defer write if this is a cluster node that's never been seen
- if (uuid == NULL) {
+ if (node_xml_id == NULL) {
attrd_set_attr_flags(a, attrd_attr_uuid_missing);
- crm_notice("Cannot update %s[%s]='%s' now because node's UUID is "
- "unknown (will retry if learned)",
+ crm_notice("Cannot write %s[%s]='%s' to CIB because node's XML ID "
+ "is unknown (will retry if learned)",
a->id, v->nodename, v->current);
continue;
}
// Update this value as part of the CIB transaction we're building
- rc = add_attr_update(a, v->current, uuid);
+ rc = add_attr_update(a, v->current, node_xml_id);
if (rc != pcmk_rc_ok) {
- crm_err("Failed to update %s[%s]='%s': %s "
- QB_XS " node uuid=%s id=%" PRIu32,
+ crm_err("Couldn't add %s[%s]='%s' to CIB transaction: %s "
+ QB_XS " node XML ID %s",
a->id, v->nodename, v->current, pcmk_rc_str(rc),
- uuid, v->nodeid);
+ node_xml_id);
continue;
}
- crm_debug("Writing %s[%s]=%s (node-state-id=%s node-id=%" PRIu32 ")",
+ crm_debug("Added %s[%s]=%s to CIB transaction (node XML ID %s)",
a->id, v->nodename, pcmk__s(v->current, "(unset)"),
- uuid, v->nodeid);
+ node_xml_id);
cib_updates++;
/* Preservation of the attribute to transmit alert */
--
2.43.0

View File

@ -0,0 +1,42 @@
From 704b42f153f060af814ae83e1193d383f14088c4 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 24 Oct 2024 12:29:33 -0500
Subject: [PATCH 03/16] Low: pacemaker-attrd: use API to get peer XML ID
... for cleaner separation, and to ensure it is set whenever possible.
---
daemons/attrd/attrd_cib.c | 2 +-
daemons/attrd/attrd_corosync.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index b8f509ab7d..6129f54c75 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -558,7 +558,7 @@ write_attribute(attribute_t *a, bool ignore_delay)
NULL,
pcmk__node_search_any);
- node_xml_id = peer->xml_id;
+ node_xml_id = pcmk__cluster_node_uuid(peer);
// Remember peer's node ID if we're just now learning it
if ((peer->cluster_layer_id != 0) && (v->nodeid == 0)) {
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index eeb2b9b1df..72ebc1843b 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -215,8 +215,8 @@ record_peer_nodeid(attribute_value_t *v, const char *host)
pcmk__node_status_t *known_peer =
pcmk__get_node(v->nodeid, host, NULL, pcmk__node_search_cluster_member);
- crm_trace("Learned %s has node id %s",
- known_peer->name, known_peer->xml_id);
+ crm_trace("Learned %s has XML ID %s",
+ known_peer->name, pcmk__cluster_node_uuid(known_peer));
if (attrd_election_won()) {
attrd_write_attributes(attrd_write_changed);
}
--
2.43.0

View File

@ -0,0 +1,48 @@
From 28faf9cdd3cd79b0290b9b457c5192421fc5c52f Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 24 Oct 2024 14:46:36 -0500
Subject: [PATCH 04/16] Low: pacemaker-attrd: bail earlier if value won't be
written
We only need the node XML ID for writing values to the CIB, so if a
value will never be written, skip looking for the XML ID.
This does mean that cluster nodes won't be added to the peer cache for
unwritten attributes, but that shouldn't matter for them.
---
daemons/attrd/attrd_cib.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 6129f54c75..808a7bc7e3 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -545,6 +545,12 @@ write_attribute(attribute_t *a, bool ignore_delay)
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
const char *node_xml_id = NULL;
+ // Private attributes (or any in standalone mode) are not written to CIB
+ if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) {
+ private_updates++;
+ continue;
+ }
+
// Try to get the XML ID used for the node in the CIB
if (pcmk_is_set(v->flags, attrd_value_remote)) {
// A Pacemaker Remote node's XML ID is the same as its name
@@ -568,12 +574,6 @@ write_attribute(attribute_t *a, bool ignore_delay)
}
}
- /* If this is a private attribute, no update needs to be sent */
- if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) {
- private_updates++;
- continue;
- }
-
// Defer write if this is a cluster node that's never been seen
if (node_xml_id == NULL) {
attrd_set_attr_flags(a, attrd_attr_uuid_missing);
--
2.43.0

View File

@ -0,0 +1,244 @@
From ee01715f3ae7ff64da6f8aad0d3537faa84b013b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 28 Oct 2024 11:24:08 -0500
Subject: [PATCH 05/16] Refactor: libcrmcluster: allow searching by XML ID in
pcmk__search_node_caches()
---
daemons/attrd/attrd_ipc.c | 2 +-
daemons/based/based_messages.c | 2 +-
daemons/controld/controld_corosync.c | 2 +-
daemons/controld/controld_fencing.c | 2 +-
daemons/controld/controld_messages.c | 8 ++++---
daemons/fenced/fenced_commands.c | 2 +-
daemons/fenced/fenced_history.c | 2 +-
daemons/fenced/fenced_remote.c | 2 +-
include/crm/cluster/internal.h | 1 +
lib/cluster/cpg.c | 2 +-
lib/cluster/membership.c | 35 ++++++++++++++++++----------
11 files changed, 37 insertions(+), 23 deletions(-)
diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
index 4b26cdb3d7..5ab2763dbf 100644
--- a/daemons/attrd/attrd_ipc.c
+++ b/daemons/attrd/attrd_ipc.c
@@ -155,7 +155,7 @@ attrd_client_peer_remove(pcmk__request_t *request)
pcmk__node_status_t *node = NULL;
char *host_alloc = NULL;
- node = pcmk__search_node_caches(nodeid, NULL,
+ node = pcmk__search_node_caches(nodeid, NULL, NULL,
pcmk__node_search_cluster_member);
if ((node != NULL) && (node->name != NULL)) {
// Use cached name if available
diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c
index 25d31f49ac..e8a85904f7 100644
--- a/daemons/based/based_messages.c
+++ b/daemons/based/based_messages.c
@@ -254,7 +254,7 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml
// Notify originating peer so it can notify its local clients
pcmk__node_status_t *origin = NULL;
- origin = pcmk__search_node_caches(0, host,
+ origin = pcmk__search_node_caches(0, host, NULL,
pcmk__node_search_cluster_member);
crm_info("Rejecting upgrade request from %s: %s "
diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
index 02b0e823ad..61cf6293cc 100644
--- a/daemons/controld/controld_corosync.c
+++ b/daemons/controld/controld_corosync.c
@@ -119,7 +119,7 @@ cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name,
if (controld_globals.dc_name != NULL) {
pcmk__node_status_t *peer = NULL;
- peer = pcmk__search_node_caches(0, controld_globals.dc_name,
+ peer = pcmk__search_node_caches(0, controld_globals.dc_name, NULL,
pcmk__node_search_cluster_member);
if (peer != NULL) {
for (int i = 0; i < left_list_entries; ++i) {
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index e24523cbb0..093f48eb45 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -591,7 +591,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
|pcmk__node_search_cluster_cib;
pcmk__node_status_t *peer = pcmk__search_node_caches(0, event->target,
- flags);
+ NULL, flags);
const char *uuid = NULL;
if (peer == NULL) {
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
index 1f4b3891ce..65b1b829ce 100644
--- a/daemons/controld/controld_messages.c
+++ b/daemons/controld/controld_messages.c
@@ -501,7 +501,7 @@ relay_message(xmlNode * msg, gboolean originated_locally)
}
if (!broadcast) {
- node_to = pcmk__search_node_caches(0, host_to,
+ node_to = pcmk__search_node_caches(0, host_to, NULL,
pcmk__node_search_cluster_member);
if (node_to == NULL) {
crm_warn("Ignoring message %s because node %s is unknown",
@@ -943,7 +943,8 @@ handle_node_info_request(const xmlNode *msg)
value = controld_globals.cluster->priv->node_name;
}
- node = pcmk__search_node_caches(node_id, value, pcmk__node_search_any);
+ node = pcmk__search_node_caches(node_id, value, NULL,
+ pcmk__node_search_any);
if (node) {
crm_xml_add(reply_data, PCMK_XA_ID, node->xml_id);
crm_xml_add(reply_data, PCMK_XA_UNAME, node->name);
@@ -1070,7 +1071,8 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
const char *from = crm_element_value(stored_msg, PCMK__XA_SRC);
pcmk__node_status_t *node =
- pcmk__search_node_caches(0, from, pcmk__node_search_cluster_member);
+ pcmk__search_node_caches(0, from, NULL,
+ pcmk__node_search_cluster_member);
pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
if(AM_I_DC == FALSE) {
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 082a4f5af3..9205ec727d 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2875,7 +2875,7 @@ fence_locally(xmlNode *msg, pcmk__action_result_t *result)
pcmk__node_status_t *node = NULL;
pcmk__scan_min_int(host, &nodeid, 0);
- node = pcmk__search_node_caches(nodeid, NULL,
+ node = pcmk__search_node_caches(nodeid, NULL, NULL,
pcmk__node_search_any
|pcmk__node_search_cluster_cib);
if (node != NULL) {
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index a5285209be..d1e088a617 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -487,7 +487,7 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
pcmk__node_status_t *node = NULL;
pcmk__scan_min_int(target, &nodeid, 0);
- node = pcmk__search_node_caches(nodeid, NULL,
+ node = pcmk__search_node_caches(nodeid, NULL, NULL,
pcmk__node_search_any
|pcmk__node_search_cluster_cib);
if (node != NULL) {
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 1e19c51dc3..0f92ed5f30 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1245,7 +1245,7 @@ create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
pcmk__node_status_t *node = NULL;
pcmk__scan_min_int(op->target, &nodeid, 0);
- node = pcmk__search_node_caches(nodeid, NULL,
+ node = pcmk__search_node_caches(nodeid, NULL, NULL,
pcmk__node_search_any
|pcmk__node_search_cluster_cib);
diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
index 11a82beee3..0afca28950 100644
--- a/include/crm/cluster/internal.h
+++ b/include/crm/cluster/internal.h
@@ -309,6 +309,7 @@ void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name);
void pcmk__cluster_forget_remote_node(const char *node_name);
pcmk__node_status_t *pcmk__search_node_caches(unsigned int id,
const char *uname,
+ const char *xml_id,
uint32_t flags);
void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id);
diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
index 559dd408e0..fa9892e993 100644
--- a/lib/cluster/cpg.c
+++ b/lib/cluster/cpg.c
@@ -576,7 +576,7 @@ node_left(const char *cpg_group_name, int event_counter,
size_t member_list_entries)
{
pcmk__node_status_t *peer =
- pcmk__search_node_caches(cpg_peer->nodeid, NULL,
+ pcmk__search_node_caches(cpg_peer->nodeid, NULL, NULL,
pcmk__node_search_cluster_member);
const struct cpg_address **rival = NULL;
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index 0705b6570d..bccbe12aa7 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -156,7 +156,7 @@ pcmk__cluster_lookup_remote_node(const char *node_name)
* entry unless it has a node ID, which means the name actually is
* associated with a cluster node. (@TODO return an error in that case?)
*/
- node = pcmk__search_node_caches(0, node_name,
+ node = pcmk__search_node_caches(0, node_name, NULL,
pcmk__node_search_cluster_member);
if ((node != NULL) && (node->xml_id == NULL)) {
/* node_name could be a pointer into the cache entry being removed, so
@@ -791,36 +791,47 @@ search_cluster_member_cache(unsigned int id, const char *uname,
* \internal
* \brief Search caches for a node (cluster or Pacemaker Remote)
*
- * \param[in] id If not 0, cluster node ID to search for
- * \param[in] uname If not NULL, node name to search for
- * \param[in] flags Group of enum pcmk__node_search_flags
+ * \param[in] id If not 0, cluster node ID to search for
+ * \param[in] uname If not NULL, node name to search for
+ * \param[in] xml_id If not NULL, CIB XML ID of node to search for
+ * \param[in] flags Group of enum pcmk__node_search_flags
*
* \return Node cache entry if found, otherwise NULL
*/
pcmk__node_status_t *
-pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
+pcmk__search_node_caches(unsigned int id, const char *uname,
+ const char *xml_id, uint32_t flags)
{
pcmk__node_status_t *node = NULL;
- pcmk__assert((id > 0) || (uname != NULL));
+ pcmk__assert((id > 0) || (uname != NULL) || (xml_id != NULL));
pcmk__cluster_init_node_caches();
- if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) {
- node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
+ if (pcmk_is_set(flags, pcmk__node_search_remote)) {
+ if (uname != NULL) {
+ node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
+ } else if (xml_id != NULL) {
+ node = g_hash_table_lookup(pcmk__remote_peer_cache, xml_id);
+ }
}
if ((node == NULL)
&& pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
- node = search_cluster_member_cache(id, uname, NULL);
+ node = search_cluster_member_cache(id, uname, xml_id);
}
if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
- char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
+ if (xml_id != NULL) {
+ node = find_cib_cluster_node(xml_id, uname);
+ } else {
+ // Assumes XML ID is node ID as string (as with Corosync)
+ char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
- node = find_cib_cluster_node(id_str, uname);
- free(id_str);
+ node = find_cib_cluster_node(id_str, uname);
+ free(id_str);
+ }
}
return node;
--
2.43.0

View File

@ -0,0 +1,155 @@
From 634ce35492459b03f26ecb217033fc033264287c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 28 Oct 2024 12:26:12 -0500
Subject: [PATCH 06/16] Refactor: libcrmcluster: rename
pcmk__cluster_node_uuid()
... to pcmk__cluster_get_xml_id(). It's getting the CIB XML ID; there's
no such thing as a cluster-layer UUID, and it can be used with Pacemaker
Remote nodes as well as cluster nodes.
---
daemons/attrd/attrd_cib.c | 2 +-
daemons/controld/controld_control.c | 2 +-
daemons/controld/controld_fencing.c | 4 ++--
daemons/controld/controld_join_dc.c | 2 +-
daemons/controld/controld_membership.c | 2 +-
include/crm/cluster/internal.h | 2 +-
lib/cluster/cluster.c | 9 ++++++---
lib/cluster/membership.c | 2 +-
8 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 808a7bc7e3..ad2bf2052c 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -564,7 +564,7 @@ write_attribute(attribute_t *a, bool ignore_delay)
NULL,
pcmk__node_search_any);
- node_xml_id = pcmk__cluster_node_uuid(peer);
+ node_xml_id = pcmk__cluster_get_xml_id(peer);
// Remember peer's node ID if we're just now learning it
if ((peer->cluster_layer_id != 0) && (v->nodeid == 0)) {
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index 4b00f894ef..ed4751b8d7 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -69,7 +69,7 @@ do_ha_control(long long action,
free(controld_globals.our_uuid);
controld_globals.our_uuid =
- pcmk__str_copy(pcmk__cluster_node_uuid(node));
+ pcmk__str_copy(pcmk__cluster_get_xml_id(node));
if (controld_globals.our_uuid == NULL) {
crm_err("Could not obtain local uuid");
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 093f48eb45..7565b6c6c4 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -384,7 +384,7 @@ execute_stonith_cleanup(void)
char *target = iter->data;
pcmk__node_status_t *target_node =
pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member);
- const char *uuid = pcmk__cluster_node_uuid(target_node);
+ const char *uuid = pcmk__cluster_get_xml_id(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
@@ -598,7 +598,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
return;
}
- uuid = pcmk__cluster_node_uuid(peer);
+ uuid = pcmk__cluster_get_xml_id(peer);
if (AM_I_DC) {
/* The DC always sends updates */
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
index 7ada26949d..9960966c6b 100644
--- a/daemons/controld/controld_join_dc.c
+++ b/daemons/controld/controld_join_dc.c
@@ -920,7 +920,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data)
*/
crm_trace("Updating node name and UUID in CIB for %s", join_to);
tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
- crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_node_uuid(join_node));
+ crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_get_xml_id(join_node));
crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
pcmk__xml_free(tmp1);
diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c
index 8075955953..daf0c5fd43 100644
--- a/daemons/controld/controld_membership.c
+++ b/daemons/controld/controld_membership.c
@@ -142,7 +142,7 @@ create_node_state_update(pcmk__node_status_t *node, int flags,
}
if (crm_xml_add(node_state, PCMK_XA_ID,
- pcmk__cluster_node_uuid(node)) == NULL) {
+ pcmk__cluster_get_xml_id(node)) == NULL) {
crm_info("Node update for %s cancelled: no ID", node->name);
pcmk__xml_free(node_state);
return NULL;
diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
index 0afca28950..bc722cb3de 100644
--- a/include/crm/cluster/internal.h
+++ b/include/crm/cluster/internal.h
@@ -260,7 +260,7 @@ char *pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id,
# endif
-const char *pcmk__cluster_node_uuid(pcmk__node_status_t *node);
+const char *pcmk__cluster_get_xml_id(pcmk__node_status_t *node);
char *pcmk__cluster_node_name(uint32_t nodeid);
const char *pcmk__cluster_local_node_name(void);
const char *pcmk__node_name_from_uuid(const char *uuid);
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index 3427a409f3..87abcfc43e 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -34,14 +34,14 @@ CRM_TRACE_INIT_DATA(cluster);
/*!
* \internal
- * \brief Get a node's cluster-layer UUID, setting it if not already set
+ * \brief Get a node's XML ID in the CIB, setting it if not already set
*
* \param[in,out] node Node to check
*
- * \return Cluster-layer node UUID of \p node, or \c NULL if unknown
+ * \return CIB XML ID of \p node if known, otherwise \c NULL
*/
const char *
-pcmk__cluster_node_uuid(pcmk__node_status_t *node)
+pcmk__cluster_get_xml_id(pcmk__node_status_t *node)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
@@ -52,6 +52,9 @@ pcmk__cluster_node_uuid(pcmk__node_status_t *node)
return node->xml_id;
}
+ // xml_id is always set when a Pacemaker Remote node entry is created
+ CRM_CHECK(!pcmk_is_set(node->flags, pcmk__node_status_remote), return NULL);
+
switch (cluster_layer) {
#if SUPPORT_COROSYNC
case pcmk_cluster_layer_corosync:
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index bccbe12aa7..ad55658d78 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -1000,7 +1000,7 @@ pcmk__get_node(unsigned int id, const char *uname, const char *xml_id,
}
if ((xml_id == NULL) && (node->xml_id == NULL)) {
- xml_id = pcmk__cluster_node_uuid(node);
+ xml_id = pcmk__cluster_get_xml_id(node);
if (xml_id == NULL) {
crm_debug("Cannot obtain an XML ID for node %s[%u] at this time",
node->name, id);
--
2.43.0

View File

@ -0,0 +1,102 @@
From 8b4d6075c778f374f7289a910dad03f425e27728 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 28 Oct 2024 12:40:14 -0500
Subject: [PATCH 07/16] Low: libcrmcluster: use pcmk__cluster_get_xml_id() when
possible
... rather than using node->xml_id directly, so it gets set whenever
possible. Also, make comparisons case-sensitive.
---
lib/cluster/cluster.c | 3 ++-
lib/cluster/election.c | 7 ++++---
lib/cluster/membership.c | 14 +++++++++-----
3 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index 87abcfc43e..b560eaae52 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -337,7 +337,8 @@ pcmk__node_name_from_uuid(const char *uuid)
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
- if (pcmk__str_eq(node->xml_id, uuid, pcmk__str_casei)) {
+ if (pcmk__str_eq(uuid, pcmk__cluster_get_xml_id(node),
+ pcmk__str_none)) {
return node->name;
}
}
diff --git a/lib/cluster/election.c b/lib/cluster/election.c
index 60a9156de9..51d4630b18 100644
--- a/lib/cluster/election.c
+++ b/lib/cluster/election.c
@@ -307,7 +307,8 @@ election_vote(pcmk_cluster_t *cluster)
NULL, message_type, CRM_OP_VOTE, NULL);
cluster->priv->election->count++;
- crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->xml_id);
+ crm_xml_add(vote, PCMK__XA_ELECTION_OWNER,
+ pcmk__cluster_get_xml_id(our_node));
crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, cluster->priv->election->count);
// Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
@@ -546,8 +547,8 @@ election_count_vote(pcmk_cluster_t *cluster, const xmlNode *message,
our_node = pcmk__get_node(0, cluster->priv->node_name, NULL,
pcmk__node_search_cluster_member);
we_are_owner = (our_node != NULL)
- && pcmk__str_eq(our_node->xml_id, vote.election_owner,
- pcmk__str_none);
+ && pcmk__str_eq(pcmk__cluster_get_xml_id(our_node),
+ vote.election_owner, pcmk__str_none);
if (!can_win) {
reason = "Not eligible";
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index ad55658d78..e033f4e754 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -153,7 +153,7 @@ pcmk__cluster_lookup_remote_node(const char *node_name)
/* It's theoretically possible that the node was added to the cluster peer
* cache before it was known to be a Pacemaker Remote node. Remove that
- * entry unless it has a node ID, which means the name actually is
+ * entry unless it has an XML ID, which means the name actually is
* associated with a cluster node. (@TODO return an error in that case?)
*/
node = pcmk__search_node_caches(0, node_name, NULL,
@@ -713,8 +713,11 @@ search_cluster_member_cache(unsigned int id, const char *uname,
} else if (uuid != NULL) {
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
- if (pcmk__str_eq(node->xml_id, uuid, pcmk__str_casei)) {
- crm_trace("UUID match: %s", node->xml_id);
+ const char *this_xml_id = pcmk__cluster_get_xml_id(node);
+
+ if (pcmk__str_eq(uuid, this_xml_id, pcmk__str_none)) {
+ crm_trace("Found cluster node cache entry by XML ID %s",
+ this_xml_id);
by_id = node;
break;
}
@@ -1388,7 +1391,8 @@ find_cib_cluster_node(const char *id, const char *uname)
if (id) {
g_hash_table_iter_init(&iter, cluster_node_cib_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
- if (pcmk__str_eq(node->xml_id, id, pcmk__str_casei)) {
+ if (pcmk__str_eq(id, pcmk__cluster_get_xml_id(node),
+ pcmk__str_none)) {
crm_trace("ID match: %s= %p", id, node);
by_id = node;
break;
@@ -1424,7 +1428,7 @@ find_cib_cluster_node(const char *id, const char *uname)
* Return by_id. */
} else if ((id != NULL) && (by_name->xml_id != NULL)
- && pcmk__str_eq(id, by_name->xml_id, pcmk__str_casei)) {
+ && pcmk__str_eq(id, by_name->xml_id, pcmk__str_none)) {
/* Multiple nodes have the same id in the CIB.
* Return by_name. */
node = by_name;
--
2.43.0

View File

@ -0,0 +1,67 @@
From e3376f56cdb35a3f87389b71111bdfe29a0ea31b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 30 Oct 2024 10:43:51 -0500
Subject: [PATCH 08/16] Refactor: libcrmcluster: track local node XML ID in
cluster object
This effectively reverts 7afc16075
---
include/crm/cluster/internal.h | 1 +
lib/cluster/cluster.c | 1 +
lib/cluster/corosync.c | 9 +++++++--
3 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
index bc722cb3de..0d0ed59f2a 100644
--- a/include/crm/cluster/internal.h
+++ b/include/crm/cluster/internal.h
@@ -91,6 +91,7 @@ typedef struct pcmk__election pcmk__election_t;
struct pcmk__cluster_private {
enum pcmk_ipc_server server; //!< Server this connection is for (if any)
char *node_name; //!< Local node name at cluster layer
+ char *node_xml_id; //!< Local node XML ID in CIB
pcmk__election_t *election; //!< Election state (if election is needed)
/* @TODO Corosync uses an integer node ID, but cluster layers in the
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index b560eaae52..dda4b8e89a 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -166,6 +166,7 @@ pcmk_cluster_free(pcmk_cluster_t *cluster)
return;
}
election_fini(cluster);
+ free(cluster->priv->node_xml_id);
free(cluster->priv->node_name);
free(cluster->priv);
free(cluster);
diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c
index 32443a1e07..2782b10067 100644
--- a/lib/cluster/corosync.c
+++ b/lib/cluster/corosync.c
@@ -460,6 +460,7 @@ pcmk__corosync_connect(pcmk_cluster_t *cluster)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
+ pcmk__node_status_t *local_node = NULL;
int rc = pcmk_rc_ok;
pcmk__cluster_init_node_caches();
@@ -490,8 +491,12 @@ pcmk__corosync_connect(pcmk_cluster_t *cluster)
}
// Ensure local node always exists in peer cache
- pcmk__get_node(cluster->priv->node_id, cluster->priv->node_name, NULL,
- pcmk__node_search_cluster_member);
+ local_node = pcmk__get_node(cluster->priv->node_id,
+ cluster->priv->node_name, NULL,
+ pcmk__node_search_cluster_member);
+
+ cluster->priv->node_xml_id = pcmk__corosync_uuid(local_node);
+ CRM_LOG_ASSERT(cluster->priv->node_xml_id != NULL);
return pcmk_rc_ok;
}
--
2.43.0

View File

@ -0,0 +1,350 @@
From 85d7a70916c5fd85d89ad34396be5df0ed151669 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 24 Oct 2024 11:30:00 -0500
Subject: [PATCH 09/16] Refactor: pacemaker-attrd: track node CIB ID rather
than cluster ID
Previously, attribute_value_t had a nodeid member to track the cluster
ID of the node that the value is for. However the only reason we need
it is to be able to get the node's XML ID in the CIB, for writing out
the value. Rename it to node_xml_id and track the XML ID directly.
In practice, there is no real change, since the CIB XML ID of Corosync
nodes is simply their cluster ID as a string. This allows us to keep
the same XML attribute and value in peer messages for backward
compatibility.
If we ever support node XML IDs that are *not* the string equivalent of
their cluster IDs, rolling upgrades will be possible only from versions
with this commit and later.
---
daemons/attrd/attrd_alerts.c | 12 ++++++++-
daemons/attrd/attrd_attributes.c | 11 +++++++-
daemons/attrd/attrd_cib.c | 45 ++++++++++++++++++++------------
daemons/attrd/attrd_corosync.c | 42 +++++++++++++++--------------
daemons/attrd/attrd_ipc.c | 14 +++++-----
daemons/attrd/attrd_messages.c | 6 +++--
daemons/attrd/attrd_utils.c | 1 +
daemons/attrd/pacemaker-attrd.h | 4 +--
8 files changed, 86 insertions(+), 49 deletions(-)
diff --git a/daemons/attrd/attrd_alerts.c b/daemons/attrd/attrd_alerts.c
index 55cb477c22..81d02d2ce2 100644
--- a/daemons/attrd/attrd_alerts.c
+++ b/daemons/attrd/attrd_alerts.c
@@ -124,12 +124,22 @@ attrd_read_options(gpointer user_data)
}
int
-attrd_send_attribute_alert(const char *node, int nodeid,
+attrd_send_attribute_alert(const char *node, const char *node_xml_id,
const char *attr, const char *value)
{
+ uint32_t nodeid = 0U;
+ pcmk__node_status_t *node_status = NULL;
+
if (attrd_alert_list == NULL) {
return pcmk_ok;
}
+ node_status = pcmk__search_node_caches(0, node, node_xml_id,
+ pcmk__node_search_remote
+ |pcmk__node_search_cluster_member
+ |pcmk__node_search_cluster_cib);
+ if (node_status != NULL) {
+ nodeid = node_status->cluster_layer_id;
+ }
return lrmd_send_attribute_alert(attrd_lrmd_connect(), attrd_alert_list,
node, nodeid, attr, value);
}
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index 74301d678a..6d80acfce1 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -142,7 +142,16 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a,
crm_xml_add(xml, PCMK__XA_ATTR_SET_TYPE, a->set_type);
crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id);
crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
- pcmk__xe_add_node(xml, v->nodename, v->nodeid);
+ crm_xml_add(xml, PCMK__XA_ATTR_HOST, v->nodename);
+
+ /* @COMPAT Prior to 2.1.10 and 3.0.1, the node's cluster ID was added
+ * instead of its XML ID. For Corosync and Pacemaker Remote nodes, those are
+ * the same, but if we ever support node XML IDs that differ from their
+ * cluster IDs, we will have to drop support for rolling upgrades from
+ * versions before those.
+ */
+ crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, v->node_xml_id);
+
crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING,
pcmk__timeout_ms2s(a->timeout_ms));
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index ad2bf2052c..665af3625d 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -10,7 +10,6 @@
#include <crm_internal.h>
#include <errno.h>
-#include <inttypes.h> // PRIu32
#include <stdbool.h>
#include <stdlib.h>
#include <glib.h>
@@ -450,10 +449,12 @@ send_alert_attributes_value(attribute_t *a, GHashTable *t)
g_hash_table_iter_init(&vIter, t);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) {
- rc = attrd_send_attribute_alert(at->nodename, at->nodeid,
+ rc = attrd_send_attribute_alert(at->nodename, at->node_xml_id,
a->id, at->current);
- crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d",
- a->id, at->nodename, at->current, at->nodeid, rc);
+ crm_trace("Sent alerts for %s[%s]=%s with node XML ID %s "
+ "(%s agents failed)",
+ a->id, at->nodename, at->current, at->node_xml_id,
+ ((rc == 0)? "no" : ((rc == -1)? "some" : "all")));
}
}
@@ -462,7 +463,7 @@ set_alert_attribute_value(GHashTable *t, attribute_value_t *v)
{
attribute_value_t *a_v = pcmk__assert_alloc(1, sizeof(attribute_value_t));
- a_v->nodeid = v->nodeid;
+ a_v->node_xml_id = pcmk__str_copy(v->node_xml_id);
a_v->nodename = pcmk__str_copy(v->nodename);
a_v->current = pcmk__str_copy(v->current);
@@ -551,26 +552,25 @@ write_attribute(attribute_t *a, bool ignore_delay)
continue;
}
- // Try to get the XML ID used for the node in the CIB
+ /* We need the node's CIB XML ID to write out its attributes, so look
+ * for it now. Check the node caches first, even if the ID was
+ * previously known (in case it changed), but use any previous value as
+ * a fallback.
+ */
+
if (pcmk_is_set(v->flags, attrd_value_remote)) {
// A Pacemaker Remote node's XML ID is the same as its name
node_xml_id = v->nodename;
} else {
- /* Get cluster node XML IDs from the peer caches.
- * This will create a cluster node cache entry if none exists.
- */
- pcmk__node_status_t *peer = pcmk__get_node(v->nodeid, v->nodename,
- NULL,
+ // This creates a cluster node cache entry if none exists
+ pcmk__node_status_t *peer = pcmk__get_node(0, v->nodename,
+ v->node_xml_id,
pcmk__node_search_any);
node_xml_id = pcmk__cluster_get_xml_id(peer);
-
- // Remember peer's node ID if we're just now learning it
- if ((peer->cluster_layer_id != 0) && (v->nodeid == 0)) {
- crm_trace("Learned ID %" PRIu32 " for node %s",
- peer->cluster_layer_id, v->nodename);
- v->nodeid = peer->cluster_layer_id;
+ if (node_xml_id == NULL) {
+ node_xml_id = v->node_xml_id;
}
}
@@ -583,6 +583,17 @@ write_attribute(attribute_t *a, bool ignore_delay)
continue;
}
+ /* Remember the XML ID and let peers know it (in case one of them
+ * becomes the writer later)
+ */
+ if (!pcmk__str_eq(v->node_xml_id, node_xml_id, pcmk__str_none)) {
+ crm_trace("Setting %s[%s] node XML ID to %s (was %s)",
+ a->id, v->nodename, node_xml_id,
+ pcmk__s(v->node_xml_id, "unknown"));
+ pcmk__str_update(&(v->node_xml_id), node_xml_id);
+ attrd_broadcast_value(a, v);
+ }
+
// Update this value as part of the CIB transaction we're building
rc = add_attr_update(a, v->current, node_xml_id);
if (rc != pcmk_rc_ok) {
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 72ebc1843b..02816b94d2 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -209,19 +209,6 @@ attrd_peer_change_cb(enum pcmk__node_update kind, pcmk__node_status_t *peer,
}
}
-static void
-record_peer_nodeid(attribute_value_t *v, const char *host)
-{
- pcmk__node_status_t *known_peer =
- pcmk__get_node(v->nodeid, host, NULL, pcmk__node_search_cluster_member);
-
- crm_trace("Learned %s has XML ID %s",
- known_peer->name, pcmk__cluster_node_uuid(known_peer));
- if (attrd_election_won()) {
- attrd_write_attributes(attrd_write_changed);
- }
-}
-
#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)")
#define readable_peer(p) \
@@ -235,6 +222,7 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
int is_remote = 0;
bool changed = false;
attribute_value_t *v = NULL;
+ const char *node_xml_id = crm_element_value(xml, PCMK__XA_ATTR_HOST_ID);
// Create entry for value if not already existing
v = g_hash_table_lookup(a->values, host);
@@ -245,6 +233,13 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
g_hash_table_replace(a->values, v->nodename, v);
}
+ /* If update doesn't contain the node XML ID, fall back to any previously
+ * known value (for logging)
+ */
+ if (node_xml_id == NULL) {
+ node_xml_id = v->node_xml_id;
+ }
+
// If value is for a Pacemaker Remote node, remember that
crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
if (is_remote) {
@@ -270,11 +265,12 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
} else if (changed) {
crm_notice("Setting %s[%s]%s%s: %s -> %s "
- QB_XS " from %s with %s write delay",
+ QB_XS " from %s with %s write delay and node XML ID %s",
attr, host, a->set_type ? " in " : "",
pcmk__s(a->set_type, ""), readable_value(v),
pcmk__s(value, "(unset)"), peer->name,
- (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
+ (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms),
+ pcmk__s(node_xml_id, "unknown"));
pcmk__str_update(&v->current, value);
attrd_set_attr_flags(a, attrd_attr_changed);
@@ -319,11 +315,17 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
// This allows us to later detect local values that peer doesn't know about
attrd_set_value_flags(v, attrd_value_from_peer);
- /* If this is a cluster node whose node ID we are learning, remember it */
- if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote)
- && (crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID,
- (int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
- record_peer_nodeid(v, host);
+ // Remember node's XML ID if we're just learning it
+ if ((node_xml_id != NULL)
+ && !pcmk__str_eq(node_xml_id, v->node_xml_id, pcmk__str_none)) {
+ crm_trace("Learned %s[%s] node XML ID is %s (was %s)",
+ a->id, v->nodename, node_xml_id,
+ pcmk__s(v->node_xml_id, "unknown"));
+ pcmk__str_update(&(v->node_xml_id), node_xml_id);
+ if (attrd_election_won()) {
+ // In case we couldn't write a value missing the XML ID before
+ attrd_write_attributes(attrd_write_changed);
+ }
}
}
diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
index 5ab2763dbf..fd917a37bb 100644
--- a/daemons/attrd/attrd_ipc.c
+++ b/daemons/attrd/attrd_ipc.c
@@ -12,6 +12,7 @@
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
+#include <inttypes.h> // PRIu32
#include <sys/types.h>
#include <crm/cluster.h>
@@ -232,12 +233,13 @@ attrd_client_refresh(pcmk__request_t *request)
static void
handle_missing_host(xmlNode *xml)
{
- const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
-
- if (host == NULL) {
- crm_trace("Inferring host");
- pcmk__xe_add_node(xml, attrd_cluster->priv->node_name,
- attrd_cluster->priv->node_id);
+ if (crm_element_value(xml, PCMK__XA_ATTR_HOST) == NULL) {
+ crm_trace("Inferring local node %s with XML ID %s",
+ attrd_cluster->priv->node_name,
+ attrd_cluster->priv->node_xml_id);
+ crm_xml_add(xml, PCMK__XA_ATTR_HOST, attrd_cluster->priv->node_name);
+ crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID,
+ attrd_cluster->priv->node_xml_id);
}
}
diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c
index b6eebc66cb..e1038a820b 100644
--- a/daemons/attrd/attrd_messages.c
+++ b/daemons/attrd/attrd_messages.c
@@ -9,6 +9,7 @@
#include <crm_internal.h>
+#include <inttypes.h> // PRIu32
#include <glib.h>
#include <crm/common/messages_internal.h>
@@ -314,8 +315,9 @@ attrd_broadcast_protocol(void)
crm_xml_add(attrd_op, PCMK__XA_ATTR_NAME, CRM_ATTR_PROTOCOL);
crm_xml_add(attrd_op, PCMK__XA_ATTR_VALUE, ATTRD_PROTOCOL_VERSION);
crm_xml_add_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1);
- pcmk__xe_add_node(attrd_op, attrd_cluster->priv->node_name,
- attrd_cluster->priv->node_id);
+ crm_xml_add(attrd_op, PCMK__XA_ATTR_HOST, attrd_cluster->priv->node_name);
+ crm_xml_add(attrd_op, PCMK__XA_ATTR_HOST_ID,
+ attrd_cluster->priv->node_xml_id);
crm_debug("Broadcasting attrd protocol version %s for node %s",
ATTRD_PROTOCOL_VERSION, attrd_cluster->priv->node_name);
diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c
index f219b8862d..3621f5f354 100644
--- a/daemons/attrd/attrd_utils.c
+++ b/daemons/attrd/attrd_utils.c
@@ -232,6 +232,7 @@ attrd_free_attribute_value(gpointer data)
free(v->nodename);
free(v->current);
free(v->requested);
+ free(v->node_xml_id);
free(v);
}
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index 13646b8e51..07103a6b01 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -99,7 +99,7 @@ extern crm_trigger_t *attrd_config_read;
void attrd_lrmd_disconnect(void);
gboolean attrd_read_options(gpointer user_data);
-int attrd_send_attribute_alert(const char *node, int nodeid,
+int attrd_send_attribute_alert(const char *node, const char *node_xml_id,
const char *attr, const char *value);
// Elections
@@ -155,7 +155,7 @@ typedef struct attribute_value_s {
char *nodename; // Node that this value is for
char *current; // Attribute value
char *requested; // Value specified in pending CIB write, if any
- uint32_t nodeid; // Cluster node ID of node that this value is for
+ char *node_xml_id; // XML ID used for node in CIB
uint32_t flags; // Group of attrd_value_flags
} attribute_value_t;
--
2.43.0

View File

@ -0,0 +1,82 @@
From aee2a5af668b6e15d9da6ecbbba7521acd1f0ea1 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 7 Feb 2024 16:05:50 -0600
Subject: [PATCH 10/16] Refactor: pacemaker-attrd: rename flag to match recent
change
---
daemons/attrd/attrd_cib.c | 15 +++++++++------
daemons/attrd/pacemaker-attrd.h | 16 ++++++++++++----
2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 665af3625d..e7eeaa96d9 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -532,10 +532,13 @@ write_attribute(attribute_t *a, bool ignore_delay)
}
}
- /* Attribute will be written shortly, so clear changed flag and force
- * write flag, and initialize UUID missing flag to false.
+ /* The changed and force-write flags apply only to the next write,
+ * which this is, so clear them now. Also clear the "node unknown" flag
+ * because we will check whether it is known below and reset if appopriate.
*/
- attrd_clear_attr_flags(a, attrd_attr_changed|attrd_attr_uuid_missing|attrd_attr_force_write);
+ attrd_clear_attr_flags(a, attrd_attr_changed
+ |attrd_attr_force_write
+ |attrd_attr_node_unknown);
/* Make the table for the attribute trap */
alert_attribute_value = pcmk__strikey_table(NULL,
@@ -576,7 +579,7 @@ write_attribute(attribute_t *a, bool ignore_delay)
// Defer write if this is a cluster node that's never been seen
if (node_xml_id == NULL) {
- attrd_set_attr_flags(a, attrd_attr_uuid_missing);
+ attrd_set_attr_flags(a, attrd_attr_node_unknown);
crm_notice("Cannot write %s[%s]='%s' to CIB because node's XML ID "
"is unknown (will retry if learned)",
a->id, v->nodename, v->current);
@@ -668,8 +671,8 @@ attrd_write_attributes(uint32_t options)
pcmk_is_set(options, attrd_write_all)? "all" : "changed");
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
- if (!pcmk_is_set(options, attrd_write_all) &&
- pcmk_is_set(a->flags, attrd_attr_uuid_missing)) {
+ if (!pcmk_is_set(options, attrd_write_all)
+ && pcmk_is_set(a->flags, attrd_attr_node_unknown)) {
// Try writing this attribute again, in case peer ID was learned
attrd_set_attr_flags(a, attrd_attr_changed);
} else if (pcmk_is_set(a->flags, attrd_attr_force_write)) {
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index 07103a6b01..f0535eabaa 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -115,10 +115,18 @@ void attrd_xml_add_writer(xmlNode *xml);
enum attrd_attr_flags {
attrd_attr_none = 0U,
- attrd_attr_changed = (1U << 0), // Attribute value has changed since last write
- attrd_attr_uuid_missing = (1U << 1), // Whether we know we're missing a peer UUID
- attrd_attr_is_private = (1U << 2), // Whether to keep this attribute out of the CIB
- attrd_attr_force_write = (1U << 3), // Update attribute by ignoring delay
+
+ // At least one of attribute's values has changed since last write
+ attrd_attr_changed = (1U << 0),
+
+ // At least one of attribute's values has an unknown node XML ID
+ attrd_attr_node_unknown = (1U << 1),
+
+ // This attribute should never be written to the CIB
+ attrd_attr_is_private = (1U << 2),
+
+ // Ignore any configured delay for next write of this attribute
+ attrd_attr_force_write = (1U << 3),
};
typedef struct attribute_s {
--
2.43.0

View File

@ -0,0 +1,49 @@
From a045a72a7ea122c10c4ceacb0cf15ff7ecd125c0 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 7 Feb 2024 16:13:44 -0600
Subject: [PATCH 11/16] Refactor: pacemaker-attrd: use variable for whether to
write
... for readability and to reduce code duplication
---
daemons/attrd/attrd_cib.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index e7eeaa96d9..193b06739e 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -492,13 +492,19 @@ write_attribute(attribute_t *a, bool ignore_delay)
GHashTableIter iter;
GHashTable *alert_attribute_value = NULL;
int rc = pcmk_ok;
+ bool should_write = true;
if (a == NULL) {
return;
}
+ // Private attributes (or any in standalone mode) are not written to the CIB
+ if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) {
+ should_write = false;
+ }
+
/* If this attribute will be written to the CIB ... */
- if (!stand_alone && !pcmk_is_set(a->flags, attrd_attr_is_private)) {
+ if (should_write) {
/* Defer the write if now's not a good time */
if (a->update && (a->update < last_cib_op_done)) {
crm_info("Write out of '%s' continuing: update %d considered lost",
@@ -549,8 +555,7 @@ write_attribute(attribute_t *a, bool ignore_delay)
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
const char *node_xml_id = NULL;
- // Private attributes (or any in standalone mode) are not written to CIB
- if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) {
+ if (!should_write) {
private_updates++;
continue;
}
--
2.43.0

View File

@ -0,0 +1,306 @@
From 034c421a457b9dd5c654cb26292d9c05b1cd9244 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 24 Oct 2024 17:36:40 -0500
Subject: [PATCH 12/16] Low: pacemaker-attrd: track node XML IDs independent of
attribute values
Previously, node XML IDs were kept in attribute_value_t. That meant that
they were duplicated for every value for a node, the values might be
known for some values and unknown or inconsistent for others, and newly
learned XML IDs would have to be broadcast per value.
Now, maintain a global node XML ID cache.
---
daemons/attrd/Makefile.am | 1 +
daemons/attrd/attrd_attributes.c | 2 +-
daemons/attrd/attrd_cib.c | 25 +++++-----
daemons/attrd/attrd_corosync.c | 11 +++--
daemons/attrd/attrd_nodes.c | 82 ++++++++++++++++++++++++++++++++
daemons/attrd/attrd_utils.c | 1 -
daemons/attrd/pacemaker-attrd.c | 2 +
daemons/attrd/pacemaker-attrd.h | 6 +++
8 files changed, 112 insertions(+), 18 deletions(-)
create mode 100644 daemons/attrd/attrd_nodes.c
diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am
index 47119679cf..a2c8fd1477 100644
--- a/daemons/attrd/Makefile.am
+++ b/daemons/attrd/Makefile.am
@@ -31,6 +31,7 @@ pacemaker_attrd_SOURCES = attrd_alerts.c \
attrd_elections.c \
attrd_ipc.c \
attrd_messages.c \
+ attrd_nodes.c \
attrd_sync.c \
attrd_utils.c \
pacemaker-attrd.c
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index 6d80acfce1..fdc238375e 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -150,7 +150,7 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a,
* cluster IDs, we will have to drop support for rolling upgrades from
* versions before those.
*/
- crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, v->node_xml_id);
+ crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, attrd_get_node_xml_id(v->nodename));
crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING,
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 193b06739e..4231e4a668 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -449,11 +449,14 @@ send_alert_attributes_value(attribute_t *a, GHashTable *t)
g_hash_table_iter_init(&vIter, t);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) {
- rc = attrd_send_attribute_alert(at->nodename, at->node_xml_id,
+ const char *node_xml_id = attrd_get_node_xml_id(at->nodename);
+
+ rc = attrd_send_attribute_alert(at->nodename, node_xml_id,
a->id, at->current);
crm_trace("Sent alerts for %s[%s]=%s with node XML ID %s "
"(%s agents failed)",
- a->id, at->nodename, at->current, at->node_xml_id,
+ a->id, at->nodename, at->current,
+ pcmk__s(node_xml_id, "unknown"),
((rc == 0)? "no" : ((rc == -1)? "some" : "all")));
}
}
@@ -463,7 +466,6 @@ set_alert_attribute_value(GHashTable *t, attribute_value_t *v)
{
attribute_value_t *a_v = pcmk__assert_alloc(1, sizeof(attribute_value_t));
- a_v->node_xml_id = pcmk__str_copy(v->node_xml_id);
a_v->nodename = pcmk__str_copy(v->nodename);
a_v->current = pcmk__str_copy(v->current);
@@ -554,6 +556,7 @@ write_attribute(attribute_t *a, bool ignore_delay)
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
const char *node_xml_id = NULL;
+ const char *prev_xml_id = NULL;
if (!should_write) {
private_updates++;
@@ -566,6 +569,8 @@ write_attribute(attribute_t *a, bool ignore_delay)
* a fallback.
*/
+ prev_xml_id = attrd_get_node_xml_id(v->nodename);
+
if (pcmk_is_set(v->flags, attrd_value_remote)) {
// A Pacemaker Remote node's XML ID is the same as its name
node_xml_id = v->nodename;
@@ -573,12 +578,12 @@ write_attribute(attribute_t *a, bool ignore_delay)
} else {
// This creates a cluster node cache entry if none exists
pcmk__node_status_t *peer = pcmk__get_node(0, v->nodename,
- v->node_xml_id,
+ prev_xml_id,
pcmk__node_search_any);
node_xml_id = pcmk__cluster_get_xml_id(peer);
if (node_xml_id == NULL) {
- node_xml_id = v->node_xml_id;
+ node_xml_id = prev_xml_id;
}
}
@@ -591,15 +596,11 @@ write_attribute(attribute_t *a, bool ignore_delay)
continue;
}
- /* Remember the XML ID and let peers know it (in case one of them
- * becomes the writer later)
- */
- if (!pcmk__str_eq(v->node_xml_id, node_xml_id, pcmk__str_none)) {
+ if (!pcmk__str_eq(prev_xml_id, node_xml_id, pcmk__str_none)) {
crm_trace("Setting %s[%s] node XML ID to %s (was %s)",
a->id, v->nodename, node_xml_id,
- pcmk__s(v->node_xml_id, "unknown"));
- pcmk__str_update(&(v->node_xml_id), node_xml_id);
- attrd_broadcast_value(a, v);
+ pcmk__s(prev_xml_id, "unknown"));
+ attrd_set_node_xml_id(v->nodename, node_xml_id);
}
// Update this value as part of the CIB transaction we're building
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 02816b94d2..e97e09cb86 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -222,6 +222,7 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
int is_remote = 0;
bool changed = false;
attribute_value_t *v = NULL;
+ const char *prev_xml_id = NULL;
const char *node_xml_id = crm_element_value(xml, PCMK__XA_ATTR_HOST_ID);
// Create entry for value if not already existing
@@ -236,8 +237,9 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
/* If update doesn't contain the node XML ID, fall back to any previously
* known value (for logging)
*/
+ prev_xml_id = attrd_get_node_xml_id(v->nodename);
if (node_xml_id == NULL) {
- node_xml_id = v->node_xml_id;
+ node_xml_id = prev_xml_id;
}
// If value is for a Pacemaker Remote node, remember that
@@ -317,11 +319,11 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
// Remember node's XML ID if we're just learning it
if ((node_xml_id != NULL)
- && !pcmk__str_eq(node_xml_id, v->node_xml_id, pcmk__str_none)) {
+ && !pcmk__str_eq(node_xml_id, prev_xml_id, pcmk__str_none)) {
crm_trace("Learned %s[%s] node XML ID is %s (was %s)",
a->id, v->nodename, node_xml_id,
- pcmk__s(v->node_xml_id, "unknown"));
- pcmk__str_update(&(v->node_xml_id), node_xml_id);
+ pcmk__s(prev_xml_id, "unknown"));
+ attrd_set_node_xml_id(v->nodename, node_xml_id);
if (attrd_election_won()) {
// In case we couldn't write a value missing the XML ID before
attrd_write_attributes(attrd_write_changed);
@@ -540,6 +542,7 @@ attrd_peer_remove(const char *host, bool uncache, const char *source)
if (uncache) {
pcmk__purge_node_from_cache(host, 0);
+ attrd_forget_node_xml_id(host);
}
}
diff --git a/daemons/attrd/attrd_nodes.c b/daemons/attrd/attrd_nodes.c
new file mode 100644
index 0000000000..8fb7797f2d
--- /dev/null
+++ b/daemons/attrd/attrd_nodes.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2024-2025 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h> // NULL
+#include <glib.h> // GHashTable, etc.
+
+#include "pacemaker-attrd.h"
+
+// Track the last known node XML ID for each node name
+static GHashTable *node_xml_ids = NULL;
+
+/*!
+ * \internal
+ * \brief Get last known XML ID for a given node
+ *
+ * \param[in] node_name Name of node to check
+ *
+ * \return Last known XML ID for node (or NULL if none known)
+ *
+ * \note The return value may become invalid if attrd_set_node_xml_id() or
+ * attrd_forget_node_xml_id() is later called for \p node_name.
+ */
+const char *
+attrd_get_node_xml_id(const char *node_name)
+{
+ if (node_xml_ids == NULL) {
+ return NULL;
+ }
+ return g_hash_table_lookup(node_xml_ids, node_name);
+}
+
+/*!
+ * \internal
+ * \brief Set last known XML ID for a given node
+ *
+ * \param[in] node_name Name of node to set
+ * \param[in] node_xml_id New XML ID to set for node
+ */
+void
+attrd_set_node_xml_id(const char *node_name, const char *node_xml_id)
+{
+ if (node_xml_ids == NULL) {
+ node_xml_ids = pcmk__strikey_table(free, free);
+ }
+ pcmk__insert_dup(node_xml_ids, node_name, node_xml_id);
+}
+
+/*!
+ * \internal
+ * \brief Forget last known XML ID for a given node
+ *
+ * \param[in] node_name Name of node to forget
+ */
+void
+attrd_forget_node_xml_id(const char *node_name)
+{
+ if (node_xml_ids == NULL) {
+ return;
+ }
+ g_hash_table_remove(node_xml_ids, node_name);
+}
+
+/*!
+ * \internal
+ * \brief Free the node XML ID cache
+ */
+void
+attrd_cleanup_xml_ids(void)
+{
+ if (node_xml_ids != NULL) {
+ g_hash_table_destroy(node_xml_ids);
+ node_xml_ids = NULL;
+ }
+}
diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c
index 3621f5f354..f219b8862d 100644
--- a/daemons/attrd/attrd_utils.c
+++ b/daemons/attrd/attrd_utils.c
@@ -232,7 +232,6 @@ attrd_free_attribute_value(gpointer data)
free(v->nodename);
free(v->current);
free(v->requested);
- free(v->node_xml_id);
free(v);
}
diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c
index a5dac1272a..3c31bcd932 100644
--- a/daemons/attrd/pacemaker-attrd.c
+++ b/daemons/attrd/pacemaker-attrd.c
@@ -207,6 +207,8 @@ main(int argc, char **argv)
g_hash_table_destroy(attributes);
}
+ attrd_cleanup_xml_ids();
+
g_strfreev(processed_args);
pcmk__free_arg_context(context);
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index f0535eabaa..57d707c37c 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -252,4 +252,10 @@ bool attrd_request_has_sync_point(xmlNode *xml);
extern gboolean stand_alone;
+// Node utilities (from attrd_nodes.c)
+const char *attrd_get_node_xml_id(const char *node_name);
+void attrd_set_node_xml_id(const char *node_name, const char *node_xml_id);
+void attrd_forget_node_xml_id(const char *node_name);
+void attrd_cleanup_xml_ids(void);
+
#endif /* PACEMAKER_ATTRD__H */
--
2.43.0

View File

@ -0,0 +1,31 @@
From 5025e575cfb6118b4fa4d55e80e6425de03f41d2 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 14 Jan 2025 09:24:15 -0600
Subject: [PATCH 13/16] Refactor: pacemaker-attrd: drop unused struct member
---
daemons/attrd/pacemaker-attrd.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index 57d707c37c..cc0dcf29ee 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2013-2024 the Pacemaker project contributors
+ * Copyright 2013-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -163,7 +163,6 @@ typedef struct attribute_value_s {
char *nodename; // Node that this value is for
char *current; // Attribute value
char *requested; // Value specified in pending CIB write, if any
- char *node_xml_id; // XML ID used for node in CIB
uint32_t flags; // Group of attrd_value_flags
} attribute_value_t;
--
2.43.0

View File

@ -0,0 +1,32 @@
From c709b083d2e798970de0d4df5758764203d105b9 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 14 Jan 2025 14:11:04 -0600
Subject: [PATCH 14/16] Low: libcrmcluster: better detect remote nodes in peer
cache
---
lib/cluster/membership.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index e033f4e754..04bcc396f7 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -158,7 +158,13 @@ pcmk__cluster_lookup_remote_node(const char *node_name)
*/
node = pcmk__search_node_caches(0, node_name, NULL,
pcmk__node_search_cluster_member);
- if ((node != NULL) && (node->xml_id == NULL)) {
+ if ((node != NULL)
+ && ((node->xml_id == NULL)
+ /* This assumes only Pacemaker Remote nodes have their XML ID the
+ * same as their node name
+ */
+ || pcmk__str_eq(node->name, node->xml_id, pcmk__str_none))) {
+
/* node_name could be a pointer into the cache entry being removed, so
* reassign it to a copy before the original gets freed
*/
--
2.43.0

View File

@ -0,0 +1,62 @@
From 200b0896455e243f2840b9849eb3a9230315c85f Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 14 Jan 2025 14:11:58 -0600
Subject: [PATCH 15/16] Refactor: controller: drop unused argument
---
daemons/controld/controld_fencing.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 7565b6c6c4..e5f03ef51c 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -208,8 +208,7 @@ cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
}
static void
-send_stonith_update(pcmk__graph_action_t *action, const char *target,
- const char *uuid)
+send_stonith_update(const char *target, const char *uuid)
{
int rc = pcmk_ok;
pcmk__node_status_t *peer = NULL;
@@ -387,7 +386,7 @@ execute_stonith_cleanup(void)
const char *uuid = pcmk__cluster_get_xml_id(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
- send_stonith_update(NULL, target, uuid);
+ send_stonith_update(target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
@@ -602,7 +601,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
if (AM_I_DC) {
/* The DC always sends updates */
- send_stonith_update(NULL, event->target, uuid);
+ send_stonith_update(event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
@@ -639,7 +638,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
* have them do so too after the election
*/
if (controld_is_local_node(event->executioner)) {
- send_stonith_update(NULL, event->target, uuid);
+ send_stonith_update(event->target, uuid);
}
add_stonith_cleanup(event->target);
}
@@ -887,7 +886,7 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
is_remote_node);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
- send_stonith_update(action, target, uuid);
+ send_stonith_update(target, uuid);
pcmk__set_graph_action_flags(action,
pcmk__graph_action_sent_update);
}
--
2.43.0

View File

@ -0,0 +1,147 @@
From 3c1145cc6b520cca5180fc91c8345e666b09ebce Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 14 Jan 2025 14:19:23 -0600
Subject: [PATCH 16/16] Refactor: controller: best practices for
send_stonith_update()
Add a doxygen block, rename function to
update_node_state_after_fencing() and uuid argument to target_xml_id for
readability, and improve log messages, comments, and formatting.
---
daemons/controld/controld_fencing.c | 53 ++++++++++++-----------------
1 file changed, 21 insertions(+), 32 deletions(-)
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index e5f03ef51c..49d1142cb3 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -207,11 +207,19 @@ cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
}
}
+/*!
+ * \internal
+ * \brief Update a fencing target's node state
+ *
+ * \param[in] target Node that was successfully fenced
+ * \param[in] target_xml_id CIB XML ID of target
+ */
static void
-send_stonith_update(const char *target, const char *uuid)
+update_node_state_after_fencing(const char *target, const char *target_xml_id)
{
int rc = pcmk_ok;
pcmk__node_status_t *peer = NULL;
+ xmlNode *node_state = NULL;
/* We (usually) rely on the membership layer to do node_update_cluster,
* and the peer status callback to do node_update_peer, because the node
@@ -219,18 +227,10 @@ send_stonith_update(const char *target, const char *uuid)
*/
int flags = node_update_join | node_update_expected;
- /* zero out the node-status & remove all LRM status info */
- xmlNode *node_state = NULL;
-
- CRM_CHECK(target != NULL, return);
- CRM_CHECK(uuid != NULL, return);
-
- /* Make sure the membership and join caches are accurate.
- * Try getting any existing node cache entry also by node uuid in case it
- * doesn't have an uname yet.
- */
- peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any);
+ CRM_CHECK((target != NULL) && (target_xml_id != NULL), return);
+ // Ensure target is cached
+ peer = pcmk__get_node(0, target, target_xml_id, pcmk__node_search_any);
CRM_CHECK(peer != NULL, return);
if (peer->state == NULL) {
@@ -242,16 +242,15 @@ send_stonith_update(const char *target, const char *uuid)
}
if (peer->xml_id == NULL) {
- crm_info("Recording XML ID '%s' for node '%s'", uuid, target);
- peer->xml_id = pcmk__str_copy(uuid);
+ crm_info("Recording XML ID '%s' for node '%s'", target_xml_id, target);
+ peer->xml_id = pcmk__str_copy(target_xml_id);
}
crmd_peer_down(peer, TRUE);
- /* Generate a node state update for the CIB */
node_state = create_node_state_update(peer, flags, NULL, __func__);
+ crm_xml_add(node_state, PCMK_XA_ID, target_xml_id);
- /* we have to mark whether or not remote nodes have already been fenced */
if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
char *now_s = pcmk__ttoa(time(NULL));
@@ -259,25 +258,15 @@ send_stonith_update(const char *target, const char *uuid)
free(now_s);
}
- /* Force our known ID */
- crm_xml_add(node_state, PCMK_XA_ID, uuid);
-
rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
PCMK_XE_STATUS, node_state,
cib_can_create);
+ pcmk__xml_free(node_state);
- /* Delay processing the trigger until the update completes */
- crm_debug("Sending fencing update %d for %s", rc, target);
+ crm_debug("Updating node state for %s after fencing (call %d)", target, rc);
fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);
- // Make sure it sticks
- /* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn,
- * cib_none);
- */
-
controld_delete_node_state(peer->name, controld_section_all, cib_none);
- pcmk__xml_free(node_state);
- return;
}
/*!
@@ -386,7 +375,7 @@ execute_stonith_cleanup(void)
const char *uuid = pcmk__cluster_get_xml_id(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
- send_stonith_update(target, uuid);
+ update_node_state_after_fencing(target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
@@ -601,7 +590,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
if (AM_I_DC) {
/* The DC always sends updates */
- send_stonith_update(event->target, uuid);
+ update_node_state_after_fencing(event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
@@ -638,7 +627,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
* have them do so too after the election
*/
if (controld_is_local_node(event->executioner)) {
- send_stonith_update(event->target, uuid);
+ update_node_state_after_fencing(event->target, uuid);
}
add_stonith_cleanup(event->target);
}
@@ -886,7 +875,7 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
is_remote_node);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
- send_stonith_update(target, uuid);
+ update_node_state_after_fencing(target, uuid);
pcmk__set_graph_action_flags(action,
pcmk__graph_action_sent_update);
}
--
2.43.0

View File

@ -0,0 +1,58 @@
From 2cad441b37a6aff8a695754332793ac569ad54ba Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Fri, 24 Jan 2025 13:20:46 -0800
Subject: [PATCH] Low: libcrmcommon: Fix memory leak in
text_end_list()/curses_end_list()
We were freeing the string members of text_list_data_t, but not the
text_list_data_t object itself. Similarly for curses_list_data_t.
Ref T704
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
lib/common/output_text.c | 3 ++-
tools/crm_mon_curses.c | 3 ++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/lib/common/output_text.c b/lib/common/output_text.c
index 5b557834b7..4e0024c95a 100644
--- a/lib/common/output_text.c
+++ b/lib/common/output_text.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2019-2024 the Pacemaker project contributors
+ * Copyright 2019-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -34,6 +34,7 @@ free_list_data(gpointer data) {
free(list_data->singular_noun);
free(list_data->plural_noun);
+ free(list_data);
}
static void
diff --git a/tools/crm_mon_curses.c b/tools/crm_mon_curses.c
index 325ae03515..0654322cbe 100644
--- a/tools/crm_mon_curses.c
+++ b/tools/crm_mon_curses.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2019-2024 the Pacemaker project contributors
+ * Copyright 2019-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -40,6 +40,7 @@ free_list_data(gpointer data) {
free(list_data->singular_noun);
free(list_data->plural_noun);
+ free(list_data);
}
static void
--
2.43.0

View File

@ -0,0 +1,69 @@
From e8ab7135f77f22ec494a202a66c7a5e9b74630d0 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Fri, 31 Jan 2025 10:06:04 -0500
Subject: [PATCH] Low: python: Add python value for new CRM_EX_NO_DC exit code.
---
doc/sphinx/Pacemaker_Development/c.rst | 2 ++
include/crm/common/results.h | 5 ++++-
python/pacemaker/exitstatus.py | 3 ++-
3 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/doc/sphinx/Pacemaker_Development/c.rst b/doc/sphinx/Pacemaker_Development/c.rst
index bfdff64633..8d879617f1 100644
--- a/doc/sphinx/Pacemaker_Development/c.rst
+++ b/doc/sphinx/Pacemaker_Development/c.rst
@@ -850,6 +850,8 @@ messages and converting from one to another, can be found in
* ``crm_exit_t`` (the ``CRM_EX_*`` enum values) is a system-independent code
suitable for the exit status of a process, or for interchange between nodes.
+ These values need to be kept in sync with the ``ExitStatus`` enum in
+ ``python/pacemaker/exitstatus.py``.
* Other special-purpose status codes exist, such as ``enum ocf_exitcode`` for
the possible exit statuses of OCF resource agents (along with some
diff --git a/include/crm/common/results.h b/include/crm/common/results.h
index a671cb8efd..60a88ddbc0 100644
--- a/include/crm/common/results.h
+++ b/include/crm/common/results.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2024 the Pacemaker project contributors
+ * Copyright 2012-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -226,6 +226,9 @@ enum ocf_exitcode {
* tldp.org recommends 64-113 for application use.
*
* We try to overlap with the above conventions when practical.
+ *
+ * NOTE: When new exit codes are added here, remember to also update
+ * python/pacemaker/exitstatus.py.
*/
typedef enum crm_exit_e {
// Common convention
diff --git a/python/pacemaker/exitstatus.py b/python/pacemaker/exitstatus.py
index 03f7d2c8e2..3f951ce465 100644
--- a/python/pacemaker/exitstatus.py
+++ b/python/pacemaker/exitstatus.py
@@ -1,7 +1,7 @@
"""A module providing constants relating to why a process or function exited."""
__all__ = ["ExitStatus"]
-__copyright__ = "Copyright 2023-2024 the Pacemaker project contributors"
+__copyright__ = "Copyright 2023-2025 the Pacemaker project contributors"
__license__ = "GNU Lesser General Public License version 2.1 or later (LGPLv2.1+)"
from enum import IntEnum, unique
@@ -57,6 +57,7 @@ class ExitStatus(IntEnum):
NOT_YET_IN_EFFECT = 111
INDETERMINATE = 112
UNSATISFIED = 113
+ NO_DC = 114
TIMEOUT = 124
DEGRADED = 190
DEGRADED_PROMOTED = 191
--
2.43.0

View File

@ -0,0 +1,34 @@
From 51a93e7716566f78ba6f3b8fda34d0547e49d449 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Mon, 3 Feb 2025 12:25:30 -0500
Subject: [PATCH 1/2] Low: libcrmservices: Don't leak msg if systemd_proxy is
NULL.
---
lib/services/systemd.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
index b1ab9ea49c..282e588fdd 100644
--- a/lib/services/systemd.c
+++ b/lib/services/systemd.c
@@ -104,13 +104,15 @@ systemd_send_recv(DBusMessage *msg, DBusError *error, int timeout)
static DBusMessage *
systemd_call_simple_method(const char *method)
{
- DBusMessage *msg = systemd_new_method(method);
+ DBusMessage *msg = NULL;
DBusMessage *reply = NULL;
DBusError error;
/* Don't call systemd_init() here, because that calls this */
CRM_CHECK(systemd_proxy, return NULL);
+ msg = systemd_new_method(method);
+
if (msg == NULL) {
crm_err("Could not create message to send %s to systemd", method);
return NULL;
--
2.43.0

View File

@ -0,0 +1,37 @@
From 4a4e721520a7c72afc42ab2ffa944327edab7325 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Mon, 3 Feb 2025 12:46:41 -0500
Subject: [PATCH 2/2] Refactor: libcrmservices: Unref the dbus connection...
...when we disconnect from the bus. We aren't allowed to close the
connection since we acquired it with dbus_bus_get which makes it a
shared connection. So, this is the best cleanup we can do.
---
lib/services/dbus.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/lib/services/dbus.c b/lib/services/dbus.c
index 8befef8d2e..9033d3cea6 100644
--- a/lib/services/dbus.c
+++ b/lib/services/dbus.c
@@ -294,12 +294,12 @@ pcmk_dbus_connect(void)
void
pcmk_dbus_disconnect(DBusConnection *connection)
{
- /* Per the DBus documentation, connections created with
- * dbus_connection_open() are owned by libdbus and should never be closed.
- *
- * @TODO Should we call dbus_connection_unref() here?
+ /* We acquire our dbus connection with dbus_bus_get(), which makes it a
+ * shared connection. Therefore, we can't close or free it here. The
+ * best we can do is decrement the reference count so dbus knows when
+ * there are no more clients connected to it.
*/
- return;
+ dbus_connection_unref(connection);
}
// Custom DBus error names to use
--
2.43.0

View File

@ -0,0 +1,41 @@
From e128ae183337327ff62012a0a11125ddbe71f06b Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Thu, 6 Feb 2025 16:45:29 +0100
Subject: [PATCH] Fix: libcrmcluster: prevent external callers from triggering
assertion when connecting to cluster
When sbd is connecting to cluster by calling crm_cluster_connect() ->
pcmk_cluster_connect() -> pcmk__corosync_connect() ->
pcmk__cpg_connect() -> pcmk__server_message_type()
, it triggers assertion:
error: log_assertion_as: pcmk__server_message_type: Triggered fatal
assertion at servers.c:165 : (server > 0) && (server <
PCMK__NELEM(server_info))
This fixes it by avoiding calling pcmk__server_message_type() in
pcmk__cpg_connect().
---
lib/cluster/cpg.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
index db051fc9e4..9d84286828 100644
--- a/lib/cluster/cpg.c
+++ b/lib/cluster/cpg.c
@@ -805,7 +805,10 @@ pcmk__cpg_connect(pcmk_cluster_t *cluster)
cpg_evicted = false;
- cpg_group_name = pcmk__server_message_type(cluster->priv->server);
+ if (cluster->priv->server != pcmk_ipc_unknown) {
+ cpg_group_name = pcmk__server_message_type(cluster->priv->server);
+ }
+
if (cpg_group_name == NULL) {
/* The name will already be non-NULL for Pacemaker servers. If a
* command-line tool or external caller connects to the cluster,
--
2.43.0

BIN
pacemaker-3.0.0+20250128.fa492f5181.tar.xz (Stored with Git LFS)

Binary file not shown.

BIN
pacemaker-3.0.0+20250218.64cd85422c.tar.xz (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -1,3 +1,111 @@
-------------------------------------------------------------------
Fri Feb 21 09:34:47 UTC 2025 - Yan Gao <ygao@suse.com>
- Update to version 3.0.0+20250218.64cd85422c:
- build: Fix default pacemaker-remoted path
-------------------------------------------------------------------
Fri Feb 21 09:12:35 UTC 2025 - Yan Gao <ygao@suse.com>
- libcrmcluster: prevent external callers from triggering assertion when connecting to cluster (gh#ClusterLabs/pacemaker#3821)
* pacemaker#3821-0001-Fix-libcrmcluster-prevent-external-callers-from-trig.patch
-------------------------------------------------------------------
Wed Feb 19 17:01:34 UTC 2025 - Yan Gao <ygao@suse.com>
- libcrmservices: Unref the dbus connection... (gh#ClusterLabs/pacemaker#3816)
* pacemaker#3816-0002-Refactor-libcrmservices-Unref-the-dbus-connection.patch
- libcrmservices: Don't leak msg if systemd_proxy is NULL. (gh#ClusterLabs/pacemaker#3816)
* pacemaker#3816-0001-Low-libcrmservices-Don-t-leak-msg-if-systemd_proxy-i.patch
-------------------------------------------------------------------
Wed Feb 19 13:58:59 UTC 2025 - Yan Gao <ygao@suse.com>
- python: Add python value for new CRM_EX_NO_DC exit code. (gh#ClusterLabs/pacemaker#3815)
* pacemaker#3815-0001-Low-python-Add-python-value-for-new-CRM_EX_NO_DC-exi.patch
- libcrmcommon: Fix memory leak in text_end_list()/curses_end_list() (gh#ClusterLabs/pacemaker#3814)
* pacemaker#3814-0001-Low-libcrmcommon-Fix-memory-leak-in-text_end_list-cu.patch
- crmadmin: return error if DC is not elected #2902 #3606 (gh#ClusterLabs/pacemaker#3716)
* pacemaker#3716-0001-Fix-crmadmin-return-error-if-DC-is-not-elected-2902-.patch
-------------------------------------------------------------------
Mon Feb 17 20:29:03 UTC 2025 - Yan Gao <ygao@suse.com>
- controller: best practices for send_stonith_update() (gh#ClusterLabs/pacemaker#3796)
* pacemaker#3796-0016-Refactor-controller-best-practices-for-send_stonith_.patch
- controller: drop unused argument
* pacemaker#3796-0015-Refactor-controller-drop-unused-argument.patch
- libcrmcluster: better detect remote nodes in peer cache
* pacemaker#3796-0014-Low-libcrmcluster-better-detect-remote-nodes-in-peer.patch
- pacemaker-attrd: drop unused struct member
* pacemaker#3796-0013-Refactor-pacemaker-attrd-drop-unused-struct-member.patch
- pacemaker-attrd: track node XML IDs independent of attribute values
* pacemaker#3796-0012-Low-pacemaker-attrd-track-node-XML-IDs-independent-o.patch
- pacemaker-attrd: use variable for whether to write
* pacemaker#3796-0011-Refactor-pacemaker-attrd-use-variable-for-whether-to.patch
- pacemaker-attrd: rename flag to match recent change
* pacemaker#3796-0010-Refactor-pacemaker-attrd-rename-flag-to-match-recent.patch
- pacemaker-attrd: track node CIB ID rather than cluster ID
* pacemaker#3796-0009-Refactor-pacemaker-attrd-track-node-CIB-ID-rather-th.patch
- libcrmcluster: track local node XML ID in cluster object
* pacemaker#3796-0008-Refactor-libcrmcluster-track-local-node-XML-ID-in-cl.patch
- libcrmcluster: use pcmk__cluster_get_xml_id() when possible
* pacemaker#3796-0007-Low-libcrmcluster-use-pcmk__cluster_get_xml_id-when-.patch
- libcrmcluster: rename pcmk__cluster_node_uuid()
* pacemaker#3796-0006-Refactor-libcrmcluster-rename-pcmk__cluster_node_uui.patch
- libcrmcluster: allow searching by XML ID in pcmk__search_node_caches()
* pacemaker#3796-0005-Refactor-libcrmcluster-allow-searching-by-XML-ID-in-.patch
- pacemaker-attrd: bail earlier if value won't be written
* pacemaker#3796-0004-Low-pacemaker-attrd-bail-earlier-if-value-won-t-be-w.patch
- pacemaker-attrd: use API to get peer XML ID
* pacemaker#3796-0003-Low-pacemaker-attrd-use-API-to-get-peer-XML-ID.patch
- pacemaker-attrd: don't use "uuid" to mean "XML ID"
* pacemaker#3796-0002-Refactor-pacemaker-attrd-don-t-use-uuid-to-mean-XML-.patch
- pacemaker-attrd: always add remoteness to attribute value XML (gh#ClusterLabs/pacemaker#3796)
* pacemaker#3796-0001-Refactor-pacemaker-attrd-always-add-remoteness-to-at.patch
-------------------------------------------------------------------
Mon Feb 17 19:50:01 UTC 2025 - Yan Gao <ygao@suse.com>
- controller: address format-overflow warnings (gh#ClusterLabs/pacemaker#3794)
* pacemaker#3794-0001-Low-controller-address-format-overflow-warnings.patch
- libcrmcommon: Catch correct errors for remote connection sockets (gh#ClusterLabs/pacemaker#3793)
* pacemaker#3793-0002-Low-libcrmcommon-Catch-correct-errors-for-remote-con.patch
- various: Correct some printf specifiers (gh#ClusterLabs/pacemaker#3793)
* pacemaker#3793-0001-Low-various-Correct-some-printf-specifiers.patch
- schedulerd: Resetting error and warning flags. (gh#ClusterLabs/pacemaker#3791)
* pacemaker#3791-0001-Mid-schedulerd-Resetting-error-and-warning-flags.patch
- controller: round timeout when checking remaining remote command time (gh#ClusterLabs/pacemaker#3781)
* pacemaker#3781-0001-Low-controller-round-timeout-when-checking-remaining.patch
- systemd: If the state is Pending at the time of probe, execute follow up monitor. (gh#ClusterLabs/pacemaker#3746)
* pacemaker#3746-0002-Mid-systemd-If-the-state-is-Pending-at-the-time-of-p.patch
- systemd: Fix when monitor of systemd resource continues to be pending. (gh#ClusterLabs/pacemaker#3746)
* pacemaker#3746-0001-Mid-systemd-Fix-when-monitor-of-systemd-resource-con.patch
-------------------------------------------------------------------
Mon Feb 03 10:44:36 UTC 2025 - Yan Gao <ygao@suse.com>

View File

@ -121,7 +121,7 @@
%define with_regression_tests 0
Name: pacemaker
Version: 3.0.0+20250128.fa492f5181
Version: 3.0.0+20250218.64cd85422c
Release: 0
Summary: Scalable High-Availability cluster resource manager
# AGPL-3.0 licensed extra/clustermon.sh is not present in the binary
@ -140,6 +140,35 @@ Patch6: bug-977201_pacemaker-controld-self-fencing.patch
Patch7: bug-995365_pacemaker-cts-restart-systemd-journald.patch
Patch8: pacemaker-cts-StartCmd.patch
Patch9: bsc#1180966-0001-Log-pacemakerd-downgrade-the-warning-about-SBD_SYNC_.patch
Patch10: pacemaker#3746-0001-Mid-systemd-Fix-when-monitor-of-systemd-resource-con.patch
Patch11: pacemaker#3746-0002-Mid-systemd-If-the-state-is-Pending-at-the-time-of-p.patch
Patch12: pacemaker#3781-0001-Low-controller-round-timeout-when-checking-remaining.patch
Patch13: pacemaker#3791-0001-Mid-schedulerd-Resetting-error-and-warning-flags.patch
Patch14: pacemaker#3793-0001-Low-various-Correct-some-printf-specifiers.patch
Patch15: pacemaker#3793-0002-Low-libcrmcommon-Catch-correct-errors-for-remote-con.patch
Patch16: pacemaker#3794-0001-Low-controller-address-format-overflow-warnings.patch
Patch17: pacemaker#3796-0001-Refactor-pacemaker-attrd-always-add-remoteness-to-at.patch
Patch18: pacemaker#3796-0002-Refactor-pacemaker-attrd-don-t-use-uuid-to-mean-XML-.patch
Patch19: pacemaker#3796-0003-Low-pacemaker-attrd-use-API-to-get-peer-XML-ID.patch
Patch20: pacemaker#3796-0004-Low-pacemaker-attrd-bail-earlier-if-value-won-t-be-w.patch
Patch21: pacemaker#3796-0005-Refactor-libcrmcluster-allow-searching-by-XML-ID-in-.patch
Patch22: pacemaker#3796-0006-Refactor-libcrmcluster-rename-pcmk__cluster_node_uui.patch
Patch23: pacemaker#3796-0007-Low-libcrmcluster-use-pcmk__cluster_get_xml_id-when-.patch
Patch24: pacemaker#3796-0008-Refactor-libcrmcluster-track-local-node-XML-ID-in-cl.patch
Patch25: pacemaker#3796-0009-Refactor-pacemaker-attrd-track-node-CIB-ID-rather-th.patch
Patch26: pacemaker#3796-0010-Refactor-pacemaker-attrd-rename-flag-to-match-recent.patch
Patch27: pacemaker#3796-0011-Refactor-pacemaker-attrd-use-variable-for-whether-to.patch
Patch28: pacemaker#3796-0012-Low-pacemaker-attrd-track-node-XML-IDs-independent-o.patch
Patch29: pacemaker#3796-0013-Refactor-pacemaker-attrd-drop-unused-struct-member.patch
Patch30: pacemaker#3796-0014-Low-libcrmcluster-better-detect-remote-nodes-in-peer.patch
Patch31: pacemaker#3796-0015-Refactor-controller-drop-unused-argument.patch
Patch32: pacemaker#3796-0016-Refactor-controller-best-practices-for-send_stonith_.patch
Patch33: pacemaker#3716-0001-Fix-crmadmin-return-error-if-DC-is-not-elected-2902-.patch
Patch34: pacemaker#3814-0001-Low-libcrmcommon-Fix-memory-leak-in-text_end_list-cu.patch
Patch35: pacemaker#3815-0001-Low-python-Add-python-value-for-new-CRM_EX_NO_DC-exi.patch
Patch36: pacemaker#3816-0001-Low-libcrmservices-Don-t-leak-msg-if-systemd_proxy-i.patch
Patch37: pacemaker#3816-0002-Refactor-libcrmservices-Unref-the-dbus-connection.patch
Patch38: pacemaker#3821-0001-Fix-libcrmcluster-prevent-external-callers-from-trig.patch
# Required basic build tools
BuildRequires: autoconf
BuildRequires: automake