253 lines
8.2 KiB
Diff
253 lines
8.2 KiB
Diff
|
From 25bed9968155f43ef0b8832383ef711e7ae1685d Mon Sep 17 00:00:00 2001
|
||
|
From: Petr Rockai <prockai@redhat.com>
|
||
|
Date: Mon, 9 Sep 2013 00:01:44 +0200
|
||
|
Subject: [PATCH] clvmd: Avoid a 3-way deadlock in dead-client cleanup.
|
||
|
|
||
|
---
|
||
|
daemons/clvmd/clvmd.c | 177 +++++++++++++++++++++++++++-----------------------
|
||
|
daemons/clvmd/clvmd.h | 1 +
|
||
|
2 files changed, 96 insertions(+), 82 deletions(-)
|
||
|
|
||
|
Index: LVM2.2.02.98/daemons/clvmd/clvmd.c
|
||
|
===================================================================
|
||
|
--- LVM2.2.02.98.orig/daemons/clvmd/clvmd.c 2014-03-18 16:14:07.237654914 +0800
|
||
|
+++ LVM2.2.02.98/daemons/clvmd/clvmd.c 2014-03-18 16:14:07.272654866 +0800
|
||
|
@@ -120,6 +120,7 @@ static void *pre_and_post_thread(void *a
|
||
|
static int send_message(void *buf, int msglen, const char *csid, int fd,
|
||
|
const char *errtext);
|
||
|
static int read_from_local_sock(struct local_client *thisfd);
|
||
|
+static int cleanup_zombie(struct local_client *thisfd);
|
||
|
static int process_local_command(struct clvm_header *msg, int msglen,
|
||
|
struct local_client *client,
|
||
|
unsigned short xid);
|
||
|
@@ -694,6 +695,7 @@ static int local_rendezvous_callback(str
|
||
|
newfd->bits.localsock.sent_out = FALSE;
|
||
|
newfd->bits.localsock.threadid = 0;
|
||
|
newfd->bits.localsock.finished = 0;
|
||
|
+ newfd->bits.localsock.cleanup_needed = 0;
|
||
|
newfd->bits.localsock.pipe_client = NULL;
|
||
|
newfd->bits.localsock.private = NULL;
|
||
|
newfd->bits.localsock.all_success = 1;
|
||
|
@@ -884,7 +886,7 @@ static void main_loop(int local_sock, in
|
||
|
for (thisfd = &local_client_head; thisfd != NULL;
|
||
|
thisfd = thisfd->next) {
|
||
|
|
||
|
- if (thisfd->removeme) {
|
||
|
+ if (thisfd->removeme && !cleanup_zombie(thisfd)) {
|
||
|
struct local_client *free_fd;
|
||
|
lastfd->next = thisfd->next;
|
||
|
free_fd = thisfd;
|
||
|
@@ -911,7 +913,6 @@ static void main_loop(int local_sock, in
|
||
|
|
||
|
/* Got error or EOF: Remove it from the list safely */
|
||
|
if (ret <= 0) {
|
||
|
- struct local_client *free_fd;
|
||
|
int type = thisfd->type;
|
||
|
|
||
|
/* If the cluster socket shuts down, so do we */
|
||
|
@@ -921,12 +922,7 @@ static void main_loop(int local_sock, in
|
||
|
|
||
|
DEBUGLOG("ret == %d, errno = %d. removing client\n",
|
||
|
ret, errno);
|
||
|
- lastfd->next = thisfd->next;
|
||
|
- free_fd = thisfd;
|
||
|
- safe_close(&(free_fd->fd));
|
||
|
-
|
||
|
- /* Queue cleanup, this also frees the client struct */
|
||
|
- add_to_lvmqueue(free_fd, NULL, 0, NULL);
|
||
|
+ thisfd->removeme = 1;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
@@ -1089,6 +1085,95 @@ static void be_daemon(int timeout)
|
||
|
|
||
|
}
|
||
|
|
||
|
+static int cleanup_zombie(struct local_client *thisfd)
|
||
|
+{
|
||
|
+ int *status;
|
||
|
+
|
||
|
+ if (thisfd->type != LOCAL_SOCK)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ if (!thisfd->bits.localsock.cleanup_needed)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ DEBUGLOG("EOF on local socket: inprogress=%d\n",
|
||
|
+ thisfd->bits.localsock.in_progress);
|
||
|
+
|
||
|
+ thisfd->bits.localsock.finished = 1;
|
||
|
+
|
||
|
+ /* If the client went away in mid command then tidy up */
|
||
|
+ if (thisfd->bits.localsock.in_progress) {
|
||
|
+ pthread_kill(thisfd->bits.localsock.threadid, SIGUSR2);
|
||
|
+ if (pthread_mutex_trylock(&thisfd->bits.localsock.mutex))
|
||
|
+ goto bail;
|
||
|
+ thisfd->bits.localsock.state = POST_COMMAND;
|
||
|
+ pthread_cond_signal(&thisfd->bits.localsock.cond);
|
||
|
+ pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
|
||
|
+
|
||
|
+ /* Free any unsent buffers */
|
||
|
+ free_reply(thisfd);
|
||
|
+ }
|
||
|
+
|
||
|
+ /* Kill the subthread & free resources */
|
||
|
+ if (thisfd->bits.localsock.threadid) {
|
||
|
+ DEBUGLOG("Waiting for child thread\n");
|
||
|
+ pthread_mutex_lock(&thisfd->bits.localsock.mutex);
|
||
|
+ thisfd->bits.localsock.state = PRE_COMMAND;
|
||
|
+ pthread_cond_signal(&thisfd->bits.localsock.cond);
|
||
|
+ pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
|
||
|
+
|
||
|
+ if ((errno = pthread_join(thisfd->bits.localsock.threadid,
|
||
|
+ (void **) &status)))
|
||
|
+ log_sys_error("pthread_join", "");
|
||
|
+
|
||
|
+ DEBUGLOG("Joined child thread\n");
|
||
|
+
|
||
|
+ thisfd->bits.localsock.threadid = 0;
|
||
|
+ pthread_cond_destroy(&thisfd->bits.localsock.cond);
|
||
|
+ pthread_mutex_destroy(&thisfd->bits.localsock.mutex);
|
||
|
+
|
||
|
+ /* Remove the pipe client */
|
||
|
+ if (thisfd->bits.localsock.pipe_client != NULL) {
|
||
|
+ struct local_client *newfd;
|
||
|
+ struct local_client *lastfd = NULL;
|
||
|
+ struct local_client *free_fd = NULL;
|
||
|
+
|
||
|
+ (void) close(thisfd->bits.localsock.pipe_client->fd); /* Close pipe */
|
||
|
+ (void) close(thisfd->bits.localsock.pipe);
|
||
|
+
|
||
|
+ /* Remove pipe client */
|
||
|
+ for (newfd = &local_client_head; newfd != NULL;
|
||
|
+ newfd = newfd->next) {
|
||
|
+ if (thisfd->bits.localsock.
|
||
|
+ pipe_client == newfd) {
|
||
|
+ thisfd->bits.localsock.
|
||
|
+ pipe_client = NULL;
|
||
|
+
|
||
|
+ lastfd->next = newfd->next;
|
||
|
+ free_fd = newfd;
|
||
|
+ newfd->next = lastfd;
|
||
|
+ free(free_fd);
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ lastfd = newfd;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ /* Free the command buffer */
|
||
|
+ free(thisfd->bits.localsock.cmd);
|
||
|
+
|
||
|
+ /* Clear out the cross-link */
|
||
|
+ if (thisfd->bits.localsock.pipe_client != NULL)
|
||
|
+ thisfd->bits.localsock.pipe_client->bits.pipe.client =
|
||
|
+ NULL;
|
||
|
+
|
||
|
+ safe_close(&(thisfd->fd));
|
||
|
+ thisfd->bits.localsock.cleanup_needed = 0;
|
||
|
+ return 0;
|
||
|
+bail:
|
||
|
+ return 1;
|
||
|
+}
|
||
|
+
|
||
|
/* Called when we have a read from the local socket.
|
||
|
was in the main loop but it's grown up and is a big girl now */
|
||
|
static int read_from_local_sock(struct local_client *thisfd)
|
||
|
@@ -1106,80 +1191,8 @@ static int read_from_local_sock(struct l
|
||
|
|
||
|
/* EOF or error on socket */
|
||
|
if (len <= 0) {
|
||
|
- int *status;
|
||
|
-
|
||
|
- DEBUGLOG("EOF on local socket: inprogress=%d\n",
|
||
|
- thisfd->bits.localsock.in_progress);
|
||
|
-
|
||
|
- thisfd->bits.localsock.finished = 1;
|
||
|
-
|
||
|
- /* If the client went away in mid command then tidy up */
|
||
|
- if (thisfd->bits.localsock.in_progress) {
|
||
|
- pthread_kill(thisfd->bits.localsock.threadid, SIGUSR2);
|
||
|
- pthread_mutex_lock(&thisfd->bits.localsock.mutex);
|
||
|
- thisfd->bits.localsock.state = POST_COMMAND;
|
||
|
- pthread_cond_signal(&thisfd->bits.localsock.cond);
|
||
|
- pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
|
||
|
-
|
||
|
- /* Free any unsent buffers */
|
||
|
- free_reply(thisfd);
|
||
|
- }
|
||
|
-
|
||
|
- /* Kill the subthread & free resources */
|
||
|
- if (thisfd->bits.localsock.threadid) {
|
||
|
- DEBUGLOG("Waiting for child thread\n");
|
||
|
- pthread_mutex_lock(&thisfd->bits.localsock.mutex);
|
||
|
- thisfd->bits.localsock.state = PRE_COMMAND;
|
||
|
- pthread_cond_signal(&thisfd->bits.localsock.cond);
|
||
|
- pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
|
||
|
-
|
||
|
- if ((errno = pthread_join(thisfd->bits.localsock.threadid,
|
||
|
- (void **) &status)))
|
||
|
- log_sys_error("pthread_join", "");
|
||
|
-
|
||
|
- DEBUGLOG("Joined child thread\n");
|
||
|
-
|
||
|
- thisfd->bits.localsock.threadid = 0;
|
||
|
- pthread_cond_destroy(&thisfd->bits.localsock.cond);
|
||
|
- pthread_mutex_destroy(&thisfd->bits.localsock.mutex);
|
||
|
-
|
||
|
- /* Remove the pipe client */
|
||
|
- if (thisfd->bits.localsock.pipe_client != NULL) {
|
||
|
- struct local_client *newfd;
|
||
|
- struct local_client *lastfd = NULL;
|
||
|
- struct local_client *free_fd = NULL;
|
||
|
-
|
||
|
- (void) close(thisfd->bits.localsock.pipe_client->fd); /* Close pipe */
|
||
|
- (void) close(thisfd->bits.localsock.pipe);
|
||
|
-
|
||
|
- /* Remove pipe client */
|
||
|
- for (newfd = &local_client_head; newfd != NULL;
|
||
|
- newfd = newfd->next) {
|
||
|
- if (thisfd->bits.localsock.
|
||
|
- pipe_client == newfd) {
|
||
|
- thisfd->bits.localsock.
|
||
|
- pipe_client = NULL;
|
||
|
-
|
||
|
- lastfd->next = newfd->next;
|
||
|
- free_fd = newfd;
|
||
|
- newfd->next = lastfd;
|
||
|
- free(free_fd);
|
||
|
- break;
|
||
|
- }
|
||
|
- lastfd = newfd;
|
||
|
- }
|
||
|
- }
|
||
|
- }
|
||
|
-
|
||
|
- /* Free the command buffer */
|
||
|
- free(thisfd->bits.localsock.cmd);
|
||
|
-
|
||
|
- /* Clear out the cross-link */
|
||
|
- if (thisfd->bits.localsock.pipe_client != NULL)
|
||
|
- thisfd->bits.localsock.pipe_client->bits.pipe.client =
|
||
|
- NULL;
|
||
|
-
|
||
|
- safe_close(&(thisfd->fd));
|
||
|
+ thisfd->bits.localsock.cleanup_needed = 1;
|
||
|
+ cleanup_zombie(thisfd); /* we ignore errors here */
|
||
|
return 0;
|
||
|
} else {
|
||
|
int comms_pipe[2];
|
||
|
Index: LVM2.2.02.98/daemons/clvmd/clvmd.h
|
||
|
===================================================================
|
||
|
--- LVM2.2.02.98.orig/daemons/clvmd/clvmd.h 2012-10-15 22:24:58.000000000 +0800
|
||
|
+++ LVM2.2.02.98/daemons/clvmd/clvmd.h 2014-03-18 16:14:07.272654866 +0800
|
||
|
@@ -53,6 +53,7 @@ struct localsock_bits {
|
||
|
int finished; /* Flag to tell subthread to exit */
|
||
|
int all_success; /* Set to 0 if any node (or the pre_command)
|
||
|
failed */
|
||
|
+ int cleanup_needed; /* helper for cleanup_zombie */
|
||
|
struct local_client *pipe_client;
|
||
|
pthread_t threadid;
|
||
|
enum { PRE_COMMAND, POST_COMMAND, QUIT } state;
|