lvm2/clvmd-Avoid-a-3-way-deadlock-in-dead-client-cleanup.patch

253 lines
8.2 KiB
Diff
Raw Normal View History

From 25bed9968155f43ef0b8832383ef711e7ae1685d Mon Sep 17 00:00:00 2001
From: Petr Rockai <prockai@redhat.com>
Date: Mon, 9 Sep 2013 00:01:44 +0200
Subject: [PATCH] clvmd: Avoid a 3-way deadlock in dead-client cleanup.
---
daemons/clvmd/clvmd.c | 177 +++++++++++++++++++++++++++-----------------------
daemons/clvmd/clvmd.h | 1 +
2 files changed, 96 insertions(+), 82 deletions(-)
Index: LVM2.2.02.98/daemons/clvmd/clvmd.c
===================================================================
--- LVM2.2.02.98.orig/daemons/clvmd/clvmd.c 2014-03-18 16:14:07.237654914 +0800
+++ LVM2.2.02.98/daemons/clvmd/clvmd.c 2014-03-18 16:14:07.272654866 +0800
@@ -120,6 +120,7 @@ static void *pre_and_post_thread(void *a
static int send_message(void *buf, int msglen, const char *csid, int fd,
const char *errtext);
static int read_from_local_sock(struct local_client *thisfd);
+static int cleanup_zombie(struct local_client *thisfd);
static int process_local_command(struct clvm_header *msg, int msglen,
struct local_client *client,
unsigned short xid);
@@ -694,6 +695,7 @@ static int local_rendezvous_callback(str
newfd->bits.localsock.sent_out = FALSE;
newfd->bits.localsock.threadid = 0;
newfd->bits.localsock.finished = 0;
+ newfd->bits.localsock.cleanup_needed = 0;
newfd->bits.localsock.pipe_client = NULL;
newfd->bits.localsock.private = NULL;
newfd->bits.localsock.all_success = 1;
@@ -884,7 +886,7 @@ static void main_loop(int local_sock, in
for (thisfd = &local_client_head; thisfd != NULL;
thisfd = thisfd->next) {
- if (thisfd->removeme) {
+ if (thisfd->removeme && !cleanup_zombie(thisfd)) {
struct local_client *free_fd;
lastfd->next = thisfd->next;
free_fd = thisfd;
@@ -911,7 +913,6 @@ static void main_loop(int local_sock, in
/* Got error or EOF: Remove it from the list safely */
if (ret <= 0) {
- struct local_client *free_fd;
int type = thisfd->type;
/* If the cluster socket shuts down, so do we */
@@ -921,12 +922,7 @@ static void main_loop(int local_sock, in
DEBUGLOG("ret == %d, errno = %d. removing client\n",
ret, errno);
- lastfd->next = thisfd->next;
- free_fd = thisfd;
- safe_close(&(free_fd->fd));
-
- /* Queue cleanup, this also frees the client struct */
- add_to_lvmqueue(free_fd, NULL, 0, NULL);
+ thisfd->removeme = 1;
break;
}
@@ -1089,6 +1085,95 @@ static void be_daemon(int timeout)
}
+static int cleanup_zombie(struct local_client *thisfd)
+{
+ int *status;
+
+ if (thisfd->type != LOCAL_SOCK)
+ return 0;
+
+ if (!thisfd->bits.localsock.cleanup_needed)
+ return 0;
+
+ DEBUGLOG("EOF on local socket: inprogress=%d\n",
+ thisfd->bits.localsock.in_progress);
+
+ thisfd->bits.localsock.finished = 1;
+
+ /* If the client went away in mid command then tidy up */
+ if (thisfd->bits.localsock.in_progress) {
+ pthread_kill(thisfd->bits.localsock.threadid, SIGUSR2);
+ if (pthread_mutex_trylock(&thisfd->bits.localsock.mutex))
+ goto bail;
+ thisfd->bits.localsock.state = POST_COMMAND;
+ pthread_cond_signal(&thisfd->bits.localsock.cond);
+ pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
+
+ /* Free any unsent buffers */
+ free_reply(thisfd);
+ }
+
+ /* Kill the subthread & free resources */
+ if (thisfd->bits.localsock.threadid) {
+ DEBUGLOG("Waiting for child thread\n");
+ pthread_mutex_lock(&thisfd->bits.localsock.mutex);
+ thisfd->bits.localsock.state = PRE_COMMAND;
+ pthread_cond_signal(&thisfd->bits.localsock.cond);
+ pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
+
+ if ((errno = pthread_join(thisfd->bits.localsock.threadid,
+ (void **) &status)))
+ log_sys_error("pthread_join", "");
+
+ DEBUGLOG("Joined child thread\n");
+
+ thisfd->bits.localsock.threadid = 0;
+ pthread_cond_destroy(&thisfd->bits.localsock.cond);
+ pthread_mutex_destroy(&thisfd->bits.localsock.mutex);
+
+ /* Remove the pipe client */
+ if (thisfd->bits.localsock.pipe_client != NULL) {
+ struct local_client *newfd;
+ struct local_client *lastfd = NULL;
+ struct local_client *free_fd = NULL;
+
+ (void) close(thisfd->bits.localsock.pipe_client->fd); /* Close pipe */
+ (void) close(thisfd->bits.localsock.pipe);
+
+ /* Remove pipe client */
+ for (newfd = &local_client_head; newfd != NULL;
+ newfd = newfd->next) {
+ if (thisfd->bits.localsock.
+ pipe_client == newfd) {
+ thisfd->bits.localsock.
+ pipe_client = NULL;
+
+ lastfd->next = newfd->next;
+ free_fd = newfd;
+ newfd->next = lastfd;
+ free(free_fd);
+ break;
+ }
+ lastfd = newfd;
+ }
+ }
+ }
+
+ /* Free the command buffer */
+ free(thisfd->bits.localsock.cmd);
+
+ /* Clear out the cross-link */
+ if (thisfd->bits.localsock.pipe_client != NULL)
+ thisfd->bits.localsock.pipe_client->bits.pipe.client =
+ NULL;
+
+ safe_close(&(thisfd->fd));
+ thisfd->bits.localsock.cleanup_needed = 0;
+ return 0;
+bail:
+ return 1;
+}
+
/* Called when we have a read from the local socket.
was in the main loop but it's grown up and is a big girl now */
static int read_from_local_sock(struct local_client *thisfd)
@@ -1106,80 +1191,8 @@ static int read_from_local_sock(struct l
/* EOF or error on socket */
if (len <= 0) {
- int *status;
-
- DEBUGLOG("EOF on local socket: inprogress=%d\n",
- thisfd->bits.localsock.in_progress);
-
- thisfd->bits.localsock.finished = 1;
-
- /* If the client went away in mid command then tidy up */
- if (thisfd->bits.localsock.in_progress) {
- pthread_kill(thisfd->bits.localsock.threadid, SIGUSR2);
- pthread_mutex_lock(&thisfd->bits.localsock.mutex);
- thisfd->bits.localsock.state = POST_COMMAND;
- pthread_cond_signal(&thisfd->bits.localsock.cond);
- pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
-
- /* Free any unsent buffers */
- free_reply(thisfd);
- }
-
- /* Kill the subthread & free resources */
- if (thisfd->bits.localsock.threadid) {
- DEBUGLOG("Waiting for child thread\n");
- pthread_mutex_lock(&thisfd->bits.localsock.mutex);
- thisfd->bits.localsock.state = PRE_COMMAND;
- pthread_cond_signal(&thisfd->bits.localsock.cond);
- pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
-
- if ((errno = pthread_join(thisfd->bits.localsock.threadid,
- (void **) &status)))
- log_sys_error("pthread_join", "");
-
- DEBUGLOG("Joined child thread\n");
-
- thisfd->bits.localsock.threadid = 0;
- pthread_cond_destroy(&thisfd->bits.localsock.cond);
- pthread_mutex_destroy(&thisfd->bits.localsock.mutex);
-
- /* Remove the pipe client */
- if (thisfd->bits.localsock.pipe_client != NULL) {
- struct local_client *newfd;
- struct local_client *lastfd = NULL;
- struct local_client *free_fd = NULL;
-
- (void) close(thisfd->bits.localsock.pipe_client->fd); /* Close pipe */
- (void) close(thisfd->bits.localsock.pipe);
-
- /* Remove pipe client */
- for (newfd = &local_client_head; newfd != NULL;
- newfd = newfd->next) {
- if (thisfd->bits.localsock.
- pipe_client == newfd) {
- thisfd->bits.localsock.
- pipe_client = NULL;
-
- lastfd->next = newfd->next;
- free_fd = newfd;
- newfd->next = lastfd;
- free(free_fd);
- break;
- }
- lastfd = newfd;
- }
- }
- }
-
- /* Free the command buffer */
- free(thisfd->bits.localsock.cmd);
-
- /* Clear out the cross-link */
- if (thisfd->bits.localsock.pipe_client != NULL)
- thisfd->bits.localsock.pipe_client->bits.pipe.client =
- NULL;
-
- safe_close(&(thisfd->fd));
+ thisfd->bits.localsock.cleanup_needed = 1;
+ cleanup_zombie(thisfd); /* we ignore errors here */
return 0;
} else {
int comms_pipe[2];
Index: LVM2.2.02.98/daemons/clvmd/clvmd.h
===================================================================
--- LVM2.2.02.98.orig/daemons/clvmd/clvmd.h 2012-10-15 22:24:58.000000000 +0800
+++ LVM2.2.02.98/daemons/clvmd/clvmd.h 2014-03-18 16:14:07.272654866 +0800
@@ -53,6 +53,7 @@ struct localsock_bits {
int finished; /* Flag to tell subthread to exit */
int all_success; /* Set to 0 if any node (or the pre_command)
failed */
+ int cleanup_needed; /* helper for cleanup_zombie */
struct local_client *pipe_client;
pthread_t threadid;
enum { PRE_COMMAND, POST_COMMAND, QUIT } state;