2e35d7583b
- Update for latest mdadm-4.1+ patches, this is required by jsc#SLE-10078 and jsc#SLE-9348. Mostly the purpose is for latest Intel IMSM raid support. The following patches also include previous patches with new re-ordered prefix numbers. - Makefile: install mdadm_env.sh to /usr/lib/mdadm (bsc#1111960) 0000-Makefile-install-mdadm_env.sh-to-usr-lib-mdadm.patch - Assemble: keep MD_DISK_FAILFAST and MD_DISK_WRITEMOSTLY flag (jsc#SLE-10078, jsc#SLE-9348) 0001-Assemble-keep-MD_DISK_FAILFAST-and-MD_DISK_WRITEMOST.patch - Document PART-POLICY lines (jsc#SLE-10078, jsc#SLE-9348) 0002-Document-PART-POLICY-lines.patc - policy: support devices with multiple paths. (jsc#SLE-10078, jsc#SLE-9348) 0003-policy-support-devices-with-multiple-paths.patch - mdcheck: add systemd unit files to run mdcheck. (bsc#1115407) 0004-mdcheck-add-systemd-unit-files-to-run-mdcheck.patch - Monitor: add system timer to run --oneshot periodically (bsc#1115407) 0005-Monitor-add-system-timer-to-run-oneshot-periodically.patch - imsm: update metadata correctly while raid10 double (jsc#SLE-10078, jsc#SLE-9348) 0006-imsm-update-metadata-correctly-while-raid10-double-d.patch - Assemble: mask FAILFAST and WRITEMOSTLY flags when finding (jsc#SLE-10078, jsc#SLE-9348) 0007-Assemble-mask-FAILFAST-and-WRITEMOSTLY-flags-when-fi.patch - Grow: avoid overflow in compute_backup_blocks() (jsc#SLE-10078, jsc#SLE-9348) 0008-Grow-avoid-overflow-in-compute_backup_blocks.patch - Grow: report correct new chunk size. (jsc#SLE-10078, jsc#SLE-9348) 0009-Grow-report-correct-new-chunk-size.patch OBS-URL: https://build.opensuse.org/request/show/781064 OBS-URL: https://build.opensuse.org/package/show/Base:System/mdadm?expand=0&rev=181
115 lines
3.8 KiB
Diff
115 lines
3.8 KiB
Diff
From ae7d61e35ec2ab6361c3e509a8db00698ef3396f Mon Sep 17 00:00:00 2001
|
|
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
|
|
Date: Tue, 7 May 2019 16:08:47 +0200
|
|
Subject: [PATCH] mdmon: fix wrong array state when disk fails during mdmon
|
|
startup
|
|
Git-commit: ae7d61e35ec2ab6361c3e509a8db00698ef3396f
|
|
Patch-mainline: mdadm-4.1+
|
|
References: jsc#SLE-10078, jsc#SLE-9348
|
|
|
|
If a member drive disappears and is set faulty by the kernel during
|
|
mdmon startup, after ss->load_container() but before manage_new(), mdmon
|
|
will try to readd the faulty drive to the array and start rebuilding.
|
|
Metadata on the active drive is updated, but the faulty drive is not
|
|
removed from the array and is left in a "blocked" state and any write
|
|
request to the array will block. If the faulty drive reappears in the
|
|
system e.g. after a reboot, the array will not assemble because metadata
|
|
on the drives will be incompatible (at least on imsm).
|
|
|
|
Fix this by adding a new option for sysfs_read(): "GET_DEVS_ALL". This
|
|
is an extension for the "GET_DEVS" option and causes all member devices
|
|
to be returned, even if the associated block device has been removed.
|
|
Use this option in manage_new() to include the faulty device on the
|
|
active_array's devices list. Mdmon will then properly remove the faulty
|
|
device from the array and update the metadata to reflect the degraded
|
|
state.
|
|
|
|
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
|
|
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
|
|
Signed-off-by: Coly Li <colyli@suse.de>
|
|
|
|
---
|
|
managemon.c | 2 +-
|
|
mdadm.h | 1 +
|
|
super-intel.c | 2 +-
|
|
sysfs.c | 23 ++++++++++++++---------
|
|
4 files changed, 17 insertions(+), 11 deletions(-)
|
|
|
|
diff --git a/managemon.c b/managemon.c
|
|
index 29b91ba..200cf83 100644
|
|
--- a/managemon.c
|
|
+++ b/managemon.c
|
|
@@ -678,7 +678,7 @@ static void manage_new(struct mdstat_ent *mdstat,
|
|
mdi = sysfs_read(-1, mdstat->devnm,
|
|
GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
|
|
GET_SAFEMODE|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
|
|
- GET_LAYOUT);
|
|
+ GET_LAYOUT|GET_DEVS_ALL);
|
|
|
|
if (!mdi)
|
|
return;
|
|
diff --git a/mdadm.h b/mdadm.h
|
|
index 705bd9b..427cc52 100644
|
|
--- a/mdadm.h
|
|
+++ b/mdadm.h
|
|
@@ -647,6 +647,7 @@ enum sysfs_read_flags {
|
|
GET_ERROR = (1 << 24),
|
|
GET_ARRAY_STATE = (1 << 25),
|
|
GET_CONSISTENCY_POLICY = (1 << 26),
|
|
+ GET_DEVS_ALL = (1 << 27),
|
|
};
|
|
|
|
/* If fd >= 0, get the array it is open on,
|
|
diff --git a/super-intel.c b/super-intel.c
|
|
index 2ba045a..4fd5e84 100644
|
|
--- a/super-intel.c
|
|
+++ b/super-intel.c
|
|
@@ -8560,7 +8560,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
|
|
disk = get_imsm_disk(super, ord_to_idx(ord));
|
|
|
|
/* check for new failures */
|
|
- if (state & DS_FAULTY) {
|
|
+ if (disk && (state & DS_FAULTY)) {
|
|
if (mark_failure(super, dev, disk, ord_to_idx(ord)))
|
|
super->updates_pending++;
|
|
}
|
|
diff --git a/sysfs.c b/sysfs.c
|
|
index df6fdda..2dd9ab6 100644
|
|
--- a/sysfs.c
|
|
+++ b/sysfs.c
|
|
@@ -313,17 +313,22 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
|
|
/* assume this is a stale reference to a hot
|
|
* removed device
|
|
*/
|
|
- free(dev);
|
|
- continue;
|
|
+ if (!(options & GET_DEVS_ALL)) {
|
|
+ free(dev);
|
|
+ continue;
|
|
+ }
|
|
+ } else {
|
|
+ sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
|
|
}
|
|
- sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
|
|
|
|
- /* special case check for block devices that can go 'offline' */
|
|
- strcpy(dbase, "block/device/state");
|
|
- if (load_sys(fname, buf, sizeof(buf)) == 0 &&
|
|
- strncmp(buf, "offline", 7) == 0) {
|
|
- free(dev);
|
|
- continue;
|
|
+ if (!(options & GET_DEVS_ALL)) {
|
|
+ /* special case check for block devices that can go 'offline' */
|
|
+ strcpy(dbase, "block/device/state");
|
|
+ if (load_sys(fname, buf, sizeof(buf)) == 0 &&
|
|
+ strncmp(buf, "offline", 7) == 0) {
|
|
+ free(dev);
|
|
+ continue;
|
|
+ }
|
|
}
|
|
|
|
/* finally add this disk to the array */
|
|
--
|
|
2.25.0
|
|
|