63 lines
2.0 KiB
Diff
63 lines
2.0 KiB
Diff
|
From d2e11da4b7fd0453e942f43e4196dc63b3dbd708 Mon Sep 17 00:00:00 2001
|
||
|
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
|
||
|
Date: Fri, 22 Feb 2019 13:30:27 +0100
|
||
|
Subject: [PATCH] mdmon: wait for previous mdmon to exit during takeover
|
||
|
Git-commit: d2e11da4b7fd0453e942f43e4196dc63b3dbd708
|
||
|
Patch-mainline: mdadm-4.1+
|
||
|
References: jsc#SLE-10078, jsc#SLE-9348
|
||
|
|
||
|
Since the patch c76242c5("mdmon: get safe mode delay file descriptor
|
||
|
early"), safe_mode_dalay is set properly by initrd mdmon. But in some
|
||
|
cases with filesystem traffic since the very start of the system, it
|
||
|
might take a while to transit to clean state. Due to fact that new
|
||
|
mdmon does not wait for the old one to exit - it might happen that the
|
||
|
new one switches safe_mode_delay back to seconds, before old one exits.
|
||
|
As the result two mdmons are running concurrently on same array.
|
||
|
|
||
|
Wait for the old mdmon to exit by pinging it with SIGUSR1 signal, just
|
||
|
in case it is sleeping.
|
||
|
|
||
|
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
|
||
|
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
|
||
|
Signed-off-by: Coly Li <colyli@suse.de>
|
||
|
|
||
|
---
|
||
|
mdmon.c | 14 +++++++++++---
|
||
|
1 file changed, 11 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/mdmon.c b/mdmon.c
|
||
|
index 0955fcc..ff985d2 100644
|
||
|
--- a/mdmon.c
|
||
|
+++ b/mdmon.c
|
||
|
@@ -171,6 +171,7 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
|
||
|
int fd;
|
||
|
int n;
|
||
|
long fl;
|
||
|
+ int rv;
|
||
|
|
||
|
/* first rule of survival... don't off yourself */
|
||
|
if (pid == getpid())
|
||
|
@@ -201,9 +202,16 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
|
||
|
fl &= ~O_NONBLOCK;
|
||
|
fcntl(sock, F_SETFL, fl);
|
||
|
n = read(sock, buf, 100);
|
||
|
- /* Ignore result, it is just the wait that
|
||
|
- * matters
|
||
|
- */
|
||
|
+
|
||
|
+ /* If there is I/O going on it might took some time to get to
|
||
|
+ * clean state. Wait for monitor to exit fully to avoid races.
|
||
|
+ * Ping it with SIGUSR1 in case that it is sleeping */
|
||
|
+ for (n = 0; n < 25; n++) {
|
||
|
+ rv = kill(pid, SIGUSR1);
|
||
|
+ if (rv < 0)
|
||
|
+ break;
|
||
|
+ usleep(200000);
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
void remove_pidfile(char *devname)
|
||
|
--
|
||
|
2.25.0
|
||
|
|