mdadm/0027-mdmon-fix-wrong-array-state-when-disk-fails-during-m.patch

From ae7d61e35ec2ab6361c3e509a8db00698ef3396f Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Tue, 7 May 2019 16:08:47 +0200
Subject: [PATCH] mdmon: fix wrong array state when disk fails during mdmon
 startup
Git-commit: ae7d61e35ec2ab6361c3e509a8db00698ef3396f
Patch-mainline: mdadm-4.1+
References: jsc#SLE-10078, jsc#SLE-9348

If a member drive disappears and is set faulty by the kernel during
mdmon startup, after ss->load_container() but before manage_new(), mdmon
will try to readd the faulty drive to the array and start rebuilding.
Metadata on the active drive is updated, but the faulty drive is not
removed from the array and is left in a "blocked" state and any write
request to the array will block. If the faulty drive reappears in the
system e.g. after a reboot, the array will not assemble because metadata
on the drives will be incompatible (at least on imsm).

Fix this by adding a new option for sysfs_read(): "GET_DEVS_ALL". This
is an extension for the "GET_DEVS" option and causes all member devices
to be returned, even if the associated block device has been removed.
Use this option in manage_new() to include the faulty device on the
active_array's devices list. Mdmon will then properly remove the faulty
device from the array and update the metadata to reflect the degraded
state.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
Signed-off-by: Coly Li <colyli@suse.de>

---
 managemon.c   |  2 +-
 mdadm.h       |  1 +
 super-intel.c |  2 +-
 sysfs.c       | 23 ++++++++++++++---------
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/managemon.c b/managemon.c
index 29b91ba..200cf83 100644
--- a/managemon.c
+++ b/managemon.c
@@ -678,7 +678,7 @@ static void manage_new(struct mdstat_ent *mdstat,
 	mdi = sysfs_read(-1, mdstat->devnm,
 			 GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
 			 GET_SAFEMODE|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
-			 GET_LAYOUT);
+			 GET_LAYOUT|GET_DEVS_ALL);
 
 	if (!mdi)
 		return;
diff --git a/mdadm.h b/mdadm.h
index 705bd9b..427cc52 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -647,6 +647,7 @@ enum sysfs_read_flags {
 	GET_ERROR	= (1 << 24),
 	GET_ARRAY_STATE = (1 << 25),
 	GET_CONSISTENCY_POLICY	= (1 << 26),
+	GET_DEVS_ALL	= (1 << 27),
 };
 
 /* If fd >= 0, get the array it is open on,
diff --git a/super-intel.c b/super-intel.c
index 2ba045a..4fd5e84 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8560,7 +8560,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
 	disk = get_imsm_disk(super, ord_to_idx(ord));
 
 	/* check for new failures */
-	if (state & DS_FAULTY) {
+	if (disk && (state & DS_FAULTY)) {
 		if (mark_failure(super, dev, disk, ord_to_idx(ord)))
 			super->updates_pending++;
 	}
diff --git a/sysfs.c b/sysfs.c
index df6fdda..2dd9ab6 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -313,17 +313,22 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
 			/* assume this is a stale reference to a hot
 			 * removed device
 			 */
-			free(dev);
-			continue;
+			if (!(options & GET_DEVS_ALL)) {
+				free(dev);
+				continue;
+			}
+		} else {
+			sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
 		}
-		sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
 
-		/* special case check for block devices that can go 'offline' */
-		strcpy(dbase, "block/device/state");
-		if (load_sys(fname, buf, sizeof(buf)) == 0 &&
-		    strncmp(buf, "offline", 7) == 0) {
-			free(dev);
-			continue;
+		if (!(options & GET_DEVS_ALL)) {
+			/* special case check for block devices that can go 'offline' */
+			strcpy(dbase, "block/device/state");
+			if (load_sys(fname, buf, sizeof(buf)) == 0 &&
+			    strncmp(buf, "offline", 7) == 0) {
+				free(dev);
+				continue;
+			}
 		}
 
 		/* finally add this disk to the array */
-- 
2.25.0
Accepting request 781064 from home:colyli:branches:Base:System - Update for latest mdadm-4.1+ patches, this is required by jsc#SLE-10078 and jsc#SLE-9348. Mostly the purpose is for latest Intel IMSM raid support. The following patches also include previous patches with new re-ordered prefix numbers. - Makefile: install mdadm_env.sh to /usr/lib/mdadm (bsc#1111960) 0000-Makefile-install-mdadm_env.sh-to-usr-lib-mdadm.patch - Assemble: keep MD_DISK_FAILFAST and MD_DISK_WRITEMOSTLY flag (jsc#SLE-10078, jsc#SLE-9348) 0001-Assemble-keep-MD_DISK_FAILFAST-and-MD_DISK_WRITEMOST.patch - Document PART-POLICY lines (jsc#SLE-10078, jsc#SLE-9348) 0002-Document-PART-POLICY-lines.patc - policy: support devices with multiple paths. (jsc#SLE-10078, jsc#SLE-9348) 0003-policy-support-devices-with-multiple-paths.patch - mdcheck: add systemd unit files to run mdcheck. (bsc#1115407) 0004-mdcheck-add-systemd-unit-files-to-run-mdcheck.patch - Monitor: add system timer to run --oneshot periodically (bsc#1115407) 0005-Monitor-add-system-timer-to-run-oneshot-periodically.patch - imsm: update metadata correctly while raid10 double (jsc#SLE-10078, jsc#SLE-9348) 0006-imsm-update-metadata-correctly-while-raid10-double-d.patch - Assemble: mask FAILFAST and WRITEMOSTLY flags when finding (jsc#SLE-10078, jsc#SLE-9348) 0007-Assemble-mask-FAILFAST-and-WRITEMOSTLY-flags-when-fi.patch - Grow: avoid overflow in compute_backup_blocks() (jsc#SLE-10078, jsc#SLE-9348) 0008-Grow-avoid-overflow-in-compute_backup_blocks.patch - Grow: report correct new chunk size. (jsc#SLE-10078, jsc#SLE-9348) 0009-Grow-report-correct-new-chunk-size.patch OBS-URL: https://build.opensuse.org/request/show/781064 OBS-URL: https://build.opensuse.org/package/show/Base:System/mdadm?expand=0&rev=181 2020-03-04 05:49:18 +01:00			`From ae7d61e35ec2ab6361c3e509a8db00698ef3396f Mon Sep 17 00:00:00 2001`
			`From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>`
			`Date: Tue, 7 May 2019 16:08:47 +0200`
			`Subject: [PATCH] mdmon: fix wrong array state when disk fails during mdmon`
			`startup`
			`Git-commit: ae7d61e35ec2ab6361c3e509a8db00698ef3396f`
			`Patch-mainline: mdadm-4.1+`
			`References: jsc#SLE-10078, jsc#SLE-9348`

			`If a member drive disappears and is set faulty by the kernel during`
			`mdmon startup, after ss->load_container() but before manage_new(), mdmon`
			`will try to readd the faulty drive to the array and start rebuilding.`
			`Metadata on the active drive is updated, but the faulty drive is not`
			`removed from the array and is left in a "blocked" state and any write`
			`request to the array will block. If the faulty drive reappears in the`
			`system e.g. after a reboot, the array will not assemble because metadata`
			`on the drives will be incompatible (at least on imsm).`

			`Fix this by adding a new option for sysfs_read(): "GET_DEVS_ALL". This`
			`is an extension for the "GET_DEVS" option and causes all member devices`
			`to be returned, even if the associated block device has been removed.`
			`Use this option in manage_new() to include the faulty device on the`
			`active_array's devices list. Mdmon will then properly remove the faulty`
			`device from the array and update the metadata to reflect the degraded`
			`state.`

			`Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>`
			`Signed-off-by: Jes Sorensen <jsorensen@fb.com>`
			`Signed-off-by: Coly Li <colyli@suse.de>`

			`---`
			`managemon.c \| 2 +-`
			`mdadm.h \| 1 +`
			`super-intel.c \| 2 +-`
			`sysfs.c \| 23 ++++++++++++++---------`
			`4 files changed, 17 insertions(+), 11 deletions(-)`

			`diff --git a/managemon.c b/managemon.c`
			`index 29b91ba..200cf83 100644`
			`--- a/managemon.c`
			`+++ b/managemon.c`
			`@@ -678,7 +678,7 @@ static void manage_new(struct mdstat_ent *mdstat,`
			`mdi = sysfs_read(-1, mdstat->devnm,`
			`GET_LEVEL\|GET_CHUNK\|GET_DISKS\|GET_COMPONENT\|`
			`GET_SAFEMODE\|GET_DEVS\|GET_OFFSET\|GET_SIZE\|GET_STATE\|`
			`- GET_LAYOUT);`
			`+ GET_LAYOUT\|GET_DEVS_ALL);`

			`if (!mdi)`
			`return;`
			`diff --git a/mdadm.h b/mdadm.h`
			`index 705bd9b..427cc52 100644`
			`--- a/mdadm.h`
			`+++ b/mdadm.h`
			`@@ -647,6 +647,7 @@ enum sysfs_read_flags {`
			`GET_ERROR = (1 << 24),`
			`GET_ARRAY_STATE = (1 << 25),`
			`GET_CONSISTENCY_POLICY = (1 << 26),`
			`+ GET_DEVS_ALL = (1 << 27),`
			`};`

			`/* If fd >= 0, get the array it is open on,`
			`diff --git a/super-intel.c b/super-intel.c`
			`index 2ba045a..4fd5e84 100644`
			`--- a/super-intel.c`
			`+++ b/super-intel.c`
			`@@ -8560,7 +8560,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)`
			`disk = get_imsm_disk(super, ord_to_idx(ord));`

			`/* check for new failures */`
			`- if (state & DS_FAULTY) {`
			`+ if (disk && (state & DS_FAULTY)) {`
			`if (mark_failure(super, dev, disk, ord_to_idx(ord)))`
			`super->updates_pending++;`
			`}`
			`diff --git a/sysfs.c b/sysfs.c`
			`index df6fdda..2dd9ab6 100644`
			`--- a/sysfs.c`
			`+++ b/sysfs.c`
			`@@ -313,17 +313,22 @@ struct mdinfo sysfs_read(int fd, char devnm, unsigned long options)`
			`/* assume this is a stale reference to a hot`
			`* removed device`
			`*/`
			`- free(dev);`
			`- continue;`
			`+ if (!(options & GET_DEVS_ALL)) {`
			`+ free(dev);`
			`+ continue;`
			`+ }`
			`+ } else {`
			`+ sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);`
			`}`
			`- sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);`

			`- /* special case check for block devices that can go 'offline' */`
			`- strcpy(dbase, "block/device/state");`
			`- if (load_sys(fname, buf, sizeof(buf)) == 0 &&`
			`- strncmp(buf, "offline", 7) == 0) {`
			`- free(dev);`
			`- continue;`
			`+ if (!(options & GET_DEVS_ALL)) {`
			`+ /* special case check for block devices that can go 'offline' */`
			`+ strcpy(dbase, "block/device/state");`
			`+ if (load_sys(fname, buf, sizeof(buf)) == 0 &&`
			`+ strncmp(buf, "offline", 7) == 0) {`
			`+ free(dev);`
			`+ continue;`
			`+ }`
			`}`

			`/* finally add this disk to the array */`
			`--`
			`2.25.0`