forked from pool/ocfs2-tools
488 lines
12 KiB
Diff
488 lines
12 KiB
Diff
diff --git a/ocfs2_controld/pacemaker.c b/ocfs2_controld/pacemaker.c
|
|
--- a/ocfs2_controld/pacemaker.c
|
|
+++ b/ocfs2_controld/pacemaker.c
|
|
@@ -20,20 +20,31 @@
|
|
#include <unistd.h>
|
|
#include <syslog.h>
|
|
|
|
-#include <crm/crm.h>
|
|
-#include <crm/common/cluster.h>
|
|
-#include <fencing/stonithd_api.h>
|
|
+#include <bzlib.h>
|
|
+
|
|
+#include <pacemaker/crm_config.h>
|
|
+
|
|
+/* heartbeat support is irrelevant here */
|
|
+#undef SUPPORT_HEARTBEAT
|
|
+#define SUPPORT_HEARTBEAT 0
|
|
+
|
|
+#include <pacemaker/crm/crm.h>
|
|
+#include <pacemaker/crm/ais.h>
|
|
+#include <pacemaker/crm/attrd.h>
|
|
+#include <pacemaker/crm/common/cluster.h>
|
|
+#include <pacemaker/crm/common/stack.h>
|
|
+#include <pacemaker/crm/common/ipc.h>
|
|
+#include <pacemaker/crm/msg_xml.h>
|
|
|
|
#include "ocfs2-kernel/kernel-list.h"
|
|
#include "o2cb/o2cb.h"
|
|
|
|
#include "ocfs2_controld.h"
|
|
|
|
-#include <bzlib.h>
|
|
-#include <crm/crm.h>
|
|
-#include <crm/ais.h>
|
|
#include <sys/utsname.h>
|
|
|
|
+#define log_printf(level, format, args...) syslog(level, "%s:%d " format "\n", __FILE__, __LINE__, ##args)
|
|
+
|
|
int our_nodeid = 0;
|
|
static int pcmk_ci;
|
|
static int stonithd_ci;
|
|
@@ -44,29 +55,57 @@ const char *stackname = "pcmk";
|
|
extern int ais_fd_async;
|
|
char *local_node_uname = NULL;
|
|
|
|
-int kill_stack_node(int nodeid)
|
|
-{
|
|
- int error = 1;
|
|
- stonith_ops_t st_op;
|
|
- char *target = nodeid2name(nodeid);
|
|
-
|
|
- log_debug("killing node %d (aka. %s)", nodeid, target);
|
|
-
|
|
- if(target) {
|
|
- st_op.timeout = 150;
|
|
- st_op.node_uuid = NULL;
|
|
- st_op.private_data = NULL;
|
|
- st_op.node_name = target;
|
|
- st_op.optype = POWEROFF;
|
|
-
|
|
- error = stonithd_node_fence(&st_op);
|
|
- }
|
|
+static IPC_Channel *attrd = NULL;
|
|
|
|
- if (error)
|
|
- log_debug("Unable to kill node %d, %d %d", nodeid, error,
|
|
- errno);
|
|
+static void attrd_deadfn(int ci)
|
|
+{
|
|
+ log_printf(LOG_ERR, "Lost connection to attrd");
|
|
+ attrd = NULL;
|
|
+ return;
|
|
+}
|
|
|
|
- return error;
|
|
+int kill_stack_node(int nodeid)
|
|
+{
|
|
+ gboolean rc = FALSE;
|
|
+ xmlNode *update = NULL;
|
|
+ time_t now = time(NULL);
|
|
+ crm_node_t *node = crm_get_peer(nodeid, NULL);
|
|
+
|
|
+ if(node == NULL || node->uname == NULL) {
|
|
+ log_printf(LOG_ERR, "%s: Don't know how to kick node %d/%p", __FUNCTION__, nodeid, node);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if(attrd == NULL) {
|
|
+ log_printf(LOG_INFO, "Connecting to attrd...");
|
|
+ attrd = init_client_ipc_comms_nodispatch(T_ATTRD);
|
|
+ if(attrd) {
|
|
+ connection_add(attrd->ops->get_recv_select_fd(attrd), NULL, attrd_deadfn);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if(attrd != NULL) {
|
|
+ update = create_xml_node(NULL, __FUNCTION__);
|
|
+ crm_xml_add(update, F_TYPE, T_ATTRD);
|
|
+ crm_xml_add(update, F_ORIG, crm_system_name);
|
|
+
|
|
+ crm_xml_add(update, F_ATTRD_TASK, "update");
|
|
+ crm_xml_add(update, F_ATTRD_SECTION, XML_CIB_TAG_STATUS);
|
|
+ crm_xml_add(update, F_ATTRD_ATTRIBUTE, "terminate");
|
|
+ crm_xml_add_int(update, F_ATTRD_VALUE, now);
|
|
+ crm_xml_add(update, F_ATTRD_HOST, node->uname);
|
|
+
|
|
+ rc = send_ipc_message(attrd, update);
|
|
+ free_xml(update);
|
|
+ }
|
|
+
|
|
+ if(rc) {
|
|
+ log_printf(LOG_INFO, "Requested that node %d/%s be kicked from the cluster", nodeid, node->uname);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ log_printf(LOG_ERR, "Could not kick node %d/%s from the cluster", nodeid, node->uname);
|
|
+ return 0;
|
|
}
|
|
|
|
char *nodeid2name(int nodeid) {
|
|
@@ -81,7 +120,7 @@ char *nodeid2name(int nodeid) {
|
|
int validate_cluster(const char *cluster)
|
|
{
|
|
if (!clustername) {
|
|
- log_error("Trying to validate before pacemaker is alive");
|
|
+ log_printf(LOG_ERR, "Trying to validate before pacemaker is alive");
|
|
return 0;
|
|
}
|
|
|
|
@@ -94,12 +133,12 @@ int validate_cluster(const char *cluster)
|
|
int get_clustername(const char **cluster)
|
|
{
|
|
if (!clustername) {
|
|
- log_error("Trying to validate before pacemaker is alive");
|
|
+ log_printf(LOG_ERR, "Trying to validate before pacemaker is alive");
|
|
return -EIO;
|
|
}
|
|
|
|
if (!cluster) {
|
|
- log_error("NULL passed!");
|
|
+ log_printf(LOG_ERR, "NULL passed!");
|
|
return -EINVAL;
|
|
}
|
|
|
|
@@ -110,316 +149,36 @@ int get_clustername(const char **cluster)
|
|
static void dead_pcmk(int ci)
|
|
{
|
|
if (ci != pcmk_ci) {
|
|
- log_error("Unknown connection %d", ci);
|
|
+ log_printf(LOG_ERR, "Unknown connection %d", ci);
|
|
return;
|
|
}
|
|
|
|
- log_error("pacemaker connection died");
|
|
+ log_printf(LOG_ERR, "pacemaker connection died");
|
|
shutdown_daemon();
|
|
connection_dead(ci);
|
|
}
|
|
|
|
+extern void terminate_ais_connection(void);
|
|
+
|
|
void exit_stack(void)
|
|
{
|
|
- log_debug("closing stonithd connection");
|
|
- stonithd_signoff();
|
|
-
|
|
log_debug("closing pacemaker connection");
|
|
- if (ais_fd_async) {
|
|
- close(ais_fd_async);
|
|
- ais_fd_async = 0;
|
|
- }
|
|
- if (ais_fd_sync) {
|
|
- close(ais_fd_sync);
|
|
- ais_fd_sync = 0;
|
|
- }
|
|
+ terminate_ais_connection();
|
|
}
|
|
|
|
static void process_pcmk(int ci)
|
|
{
|
|
- /* ci ::= client number */
|
|
- char *data = NULL;
|
|
- char *uncompressed = NULL;
|
|
- AIS_Message *msg = NULL;
|
|
- SaAisErrorT rc = SA_AIS_OK;
|
|
- mar_res_header_t *header = NULL;
|
|
- static int header_len = sizeof(mar_res_header_t);
|
|
-
|
|
- header = malloc(header_len);
|
|
- memset(header, 0, header_len);
|
|
-
|
|
- errno = 0;
|
|
- rc = saRecvRetry(ais_fd_async, header, header_len);
|
|
- if (rc != SA_AIS_OK) {
|
|
- cl_perror("Receiving message header failed: (%d) %s", rc,
|
|
- ais_error2text(rc));
|
|
- goto bail;
|
|
- } else if(header->size == header_len) {
|
|
- log_error("Empty message: id=%d, size=%d, error=%d, header_len=%d",
|
|
- header->id, header->size, header->error, header_len);
|
|
- goto done;
|
|
- } else if(header->size == 0 || header->size < header_len) {
|
|
- log_error("Mangled header: size=%d, header=%d, error=%d",
|
|
- header->size, header_len, header->error);
|
|
- goto done;
|
|
- } else if(header->error != 0) {
|
|
- log_error("Header contined error: %d", header->error);
|
|
- }
|
|
-
|
|
- header = realloc(header, header->size);
|
|
- /* Use a char* so we can store the remainder into an offset */
|
|
- data = (char*)header;
|
|
-
|
|
- errno = 0;
|
|
- rc = saRecvRetry(ais_fd_async, data+header_len, header->size - header_len);
|
|
- msg = (AIS_Message*)data;
|
|
-
|
|
- if (rc != SA_AIS_OK) {
|
|
- cl_perror("Receiving message body failed: (%d) %s", rc, ais_error2text(rc));
|
|
- goto bail;
|
|
- }
|
|
-
|
|
- data = msg->data;
|
|
- if(msg->is_compressed && msg->size > 0) {
|
|
- int rc = BZ_OK;
|
|
- unsigned int new_size = msg->size;
|
|
-
|
|
- if (check_message_sanity(msg, NULL) == FALSE)
|
|
- goto badmsg;
|
|
-
|
|
- log_debug("Decompressing message data");
|
|
- uncompressed = malloc(new_size);
|
|
- memset(uncompressed, 0, new_size);
|
|
-
|
|
- rc = BZ2_bzBuffToBuffDecompress(
|
|
- uncompressed, &new_size, data, msg->compressed_size,
|
|
- 1, 0);
|
|
-
|
|
- if(rc != BZ_OK) {
|
|
- log_error("Decompression failed: %d", rc);
|
|
- goto badmsg;
|
|
- }
|
|
-
|
|
- CRM_ASSERT(rc == BZ_OK);
|
|
- CRM_ASSERT(new_size == msg->size);
|
|
-
|
|
- data = uncompressed;
|
|
-
|
|
- } else if(check_message_sanity(msg, data) == FALSE) {
|
|
- goto badmsg;
|
|
-
|
|
- } else if(safe_str_eq("identify", data)) {
|
|
- int pid = getpid();
|
|
- char *pid_s = crm_itoa(pid);
|
|
-
|
|
- send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais);
|
|
- crm_free(pid_s);
|
|
- goto done;
|
|
- }
|
|
-
|
|
- if (msg->header.id == crm_class_members) {
|
|
- xmlNode *xml = string2xml(data);
|
|
-
|
|
- if(xml != NULL) {
|
|
- const char *value = crm_element_value(xml, "id");
|
|
- if(value)
|
|
- crm_peer_seq = crm_int_helper(value, NULL);
|
|
-
|
|
- log_debug("Updating membership %llu", crm_peer_seq);
|
|
- /* crm_log_xml_info(xml, __PRETTY_FUNCTION__); */
|
|
- xml_child_iter(xml, node, crm_update_ais_node(node, crm_peer_seq));
|
|
- crm_calculate_quorum();
|
|
- free_xml(xml);
|
|
- } else {
|
|
- log_error("Invalid peer update: %s", data);
|
|
- }
|
|
- } else {
|
|
- log_error("Unexpected AIS message type: %d", msg->header.id);
|
|
- }
|
|
-
|
|
-done:
|
|
- free(uncompressed);
|
|
- free(msg);
|
|
- return;
|
|
-
|
|
-badmsg:
|
|
- log_error("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):"
|
|
- " min=%d, total=%d, size=%d, bz2_size=%d",
|
|
- msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type),
|
|
- ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
|
|
- msg->sender.pid, (int)sizeof(AIS_Message),
|
|
- msg->header.size, msg->size, msg->compressed_size);
|
|
- free(uncompressed);
|
|
- free(msg);
|
|
- return;
|
|
-
|
|
-bail:
|
|
- log_error("AIS connection failed");
|
|
- return;
|
|
-}
|
|
-
|
|
-static void dead_stonithd(int ci)
|
|
-{
|
|
- if (ci != stonithd_ci) {
|
|
- log_error("Unknown connection %d", ci);
|
|
- return;
|
|
- }
|
|
-
|
|
- log_error("stonithd connection died");
|
|
- shutdown_daemon();
|
|
- connection_dead(ci);
|
|
-}
|
|
-
|
|
-static void process_stonithd(int ci)
|
|
-{
|
|
- IPC_Channel *stonithd_ch = stonithd_input_IPC_channel();
|
|
-
|
|
- while (stonithd_op_result_ready()) {
|
|
- if (stonithd_ch->ch_status != IPC_CONNECT) {
|
|
- /* The message which was pending for us is that
|
|
- * the IPC status is now IPC_DISCONNECT */
|
|
- break;
|
|
- }
|
|
-
|
|
- if (ST_FAIL == stonithd_receive_ops_result(FALSE)) {
|
|
- log_error("stonithd_receive_ops_result() failed");
|
|
- }
|
|
- }
|
|
-
|
|
- if (stonithd_ch->ch_status != IPC_CONNECT)
|
|
- dead_stonithd(stonithd_ci);
|
|
-}
|
|
-
|
|
-static void result_stonithd(stonith_ops_t *op)
|
|
-{
|
|
- if (op == NULL) {
|
|
- log_error("Called with a NULL op!");
|
|
- return;
|
|
- }
|
|
-
|
|
- log_debug("Stonithd result: call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s",
|
|
- op->call_id, op->optype, op->node_name, op->op_result,
|
|
- (char *)op->node_list, op->private_data);
|
|
-
|
|
- switch(op->op_result) {
|
|
- case STONITH_SUCCEEDED:
|
|
- break;
|
|
- case STONITH_CANNOT:
|
|
- case STONITH_TIMEOUT:
|
|
- case STONITH_GENERIC:
|
|
- log_error("Stonith of %s failed (%d)",
|
|
- op->node_name, op->op_result);
|
|
- break;
|
|
- default:
|
|
- log_error("Unsupported action result: %d", op->op_result);
|
|
- }
|
|
-}
|
|
-
|
|
-static gboolean setup_stonith(void)
|
|
-{
|
|
- int lpc = 0;
|
|
- int rc = ST_OK;
|
|
- int stonithd_fd;
|
|
- const char *reason = NULL;
|
|
- IPC_Channel *stonithd_ch = NULL;
|
|
-
|
|
- for(lpc = 0; lpc < 30; lpc++) {
|
|
- log_debug("Attempting connection to fencing daemon...");
|
|
-
|
|
- sleep(1);
|
|
- rc = stonithd_signon("ocfs2-tools");
|
|
- if(rc == ST_OK)
|
|
- break;
|
|
-
|
|
- log_error("Sign-in failed: pausing and trying again in 2s...");
|
|
- sleep(1);
|
|
- }
|
|
-
|
|
- if(rc != ST_OK) {
|
|
- reason = "Sign-in failed";
|
|
- goto bail;
|
|
- }
|
|
-
|
|
- rc = stonithd_set_stonith_ops_callback(result_stonithd);
|
|
- if(rc != ST_OK) {
|
|
- reason = "Setup failed";
|
|
- goto bail;
|
|
- }
|
|
-
|
|
- stonithd_ch = stonithd_input_IPC_channel();
|
|
- if(stonithd_ch == NULL) {
|
|
- reason = "No connection";
|
|
- goto bail;
|
|
- }
|
|
- stonithd_fd = stonithd_ch->ops->get_recv_select_fd(stonithd_ch);
|
|
- if(stonithd_ch <= 0) {
|
|
- reason = "No fd";
|
|
- goto bail;
|
|
- }
|
|
-
|
|
- stonithd_ci = connection_add(stonithd_fd, process_stonithd,
|
|
- dead_stonithd);
|
|
- if (stonithd_ci < 0) {
|
|
- log_error("Unable to add stonithd client: %s",
|
|
- strerror(-stonithd_ci));
|
|
- goto bail;
|
|
- }
|
|
-
|
|
- return TRUE;
|
|
-
|
|
-bail:
|
|
- log_error("Unable to add stonithd client: %s", reason);
|
|
- return FALSE;
|
|
+ ais_dispatch(ais_fd_async, NULL);
|
|
}
|
|
|
|
int setup_stack(void)
|
|
{
|
|
- int retries = 0;
|
|
- int pid;
|
|
- char *pid_s;
|
|
- int rc = SA_AIS_OK;
|
|
- struct utsname name;
|
|
-
|
|
crm_log_init("ocfs2_controld", LOG_INFO, FALSE, TRUE, 0, NULL);
|
|
- crm_peer_init();
|
|
|
|
- if (local_node_uname == NULL) {
|
|
- if (uname(&name) < 0) {
|
|
- cl_perror("uname(2) call failed");
|
|
- exit(100);
|
|
+ if(init_ais_connection(NULL, NULL, NULL, &local_node_uname, &our_nodeid) == FALSE) {
|
|
+ log_printf(LOG_ERR, "Connection to our AIS plugin (%d) failed", CRM_SERVICE);
|
|
+ return -1;
|
|
}
|
|
- local_node_uname = crm_strdup(name.nodename);
|
|
- log_debug("Local node name: %s", local_node_uname);
|
|
- }
|
|
-
|
|
-retry:
|
|
- log_debug("Creating connection to our AIS plugin");
|
|
- rc = saServiceConnect (&ais_fd_sync, &ais_fd_async, CRM_SERVICE);
|
|
- if (rc != SA_AIS_OK)
|
|
- log_error("Connection to our AIS plugin (%d) failed: %s (%d)",
|
|
- CRM_SERVICE, ais_error2text(rc), rc);
|
|
-
|
|
- switch(rc) {
|
|
- case SA_AIS_OK:
|
|
- break;
|
|
- case SA_AIS_ERR_TRY_AGAIN:
|
|
- if(retries < 30) {
|
|
- sleep(1);
|
|
- retries++;
|
|
- goto retry;
|
|
- }
|
|
- log_error("Retry count exceeded");
|
|
- return 0;
|
|
- default:
|
|
- return 0;
|
|
- }
|
|
-
|
|
- log_debug("AIS connection established");
|
|
-
|
|
- pid = getpid();
|
|
- pid_s = crm_itoa(pid);
|
|
- send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais);
|
|
- crm_free(pid_s);
|
|
|
|
/* Sign up for membership updates */
|
|
send_ais_text(crm_class_notify, "true", TRUE, NULL, crm_msg_ais);
|
|
@@ -427,14 +186,11 @@ retry:
|
|
/* Requesting the current list of known nodes */
|
|
send_ais_text(crm_class_members, __FUNCTION__, TRUE, NULL, crm_msg_ais);
|
|
|
|
- our_nodeid = get_ais_nodeid();
|
|
- log_debug("Local node id: %d", our_nodeid);
|
|
-
|
|
pcmk_ci = connection_add(ais_fd_async, process_pcmk, dead_pcmk);
|
|
- if (pcmk_ci >= 0 && setup_stonith())
|
|
+ if (pcmk_ci >= 0)
|
|
return ais_fd_async;
|
|
|
|
- log_error("Unable to add pacemaker client: %s", strerror(-pcmk_ci));
|
|
+ log_printf(LOG_ERR, "Unable to add pacemaker client: %s", strerror(-pcmk_ci));
|
|
exit_stack();
|
|
return pcmk_ci;
|
|
}
|