2018-12-10 03:29:03 +01:00
|
|
|
From d7a1fda2769ba272d89de6caeab35d52b73a9c3c Mon Sep 17 00:00:00 2001
|
|
|
|
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
|
|
|
|
Date: Wed, 17 Oct 2018 12:11:41 +0200
|
|
|
|
Subject: [PATCH 5/5] imsm: update metadata correctly while raid10 double
|
|
|
|
degradation
|
2020-03-04 05:49:18 +01:00
|
|
|
Git-commit: d7a1fda2769ba272d89de6caeab35d52b73a9c3c
|
|
|
|
Patch-mainline: mdadm-4.1+
|
|
|
|
References: jsc#SLE-10078, jsc#SLE-9348
|
2018-12-10 03:29:03 +01:00
|
|
|
|
|
|
|
Mdmon calls end_migration() when map state changes from normal to
|
|
|
|
degraded. It is not valid because in raid 10 double degradation case
|
|
|
|
mdmon breaks checkpointing but array is still rebuilding.
|
|
|
|
In this case mdmon has to mark map as degraded and continues marking
|
|
|
|
recovery checkpoint in metadata. Migration can be finished only if newly
|
|
|
|
failed device is a rebuilding device.
|
|
|
|
|
|
|
|
Add catching double degraded to degraded transition. Migration is
|
|
|
|
finished but map state doesn't change, array is still degraded.
|
|
|
|
|
|
|
|
Update failed_disk_num correctly. If double degradation
|
|
|
|
happens rebuild will start on the lowest slot, but this variable points
|
|
|
|
to the first failed slot. If second fail happens while rebuild this
|
|
|
|
variable shouldn't be updated until rebuild is not finished.
|
|
|
|
|
|
|
|
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
|
|
|
|
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
|
2020-03-04 05:49:18 +01:00
|
|
|
Signed-off-by: Coly Li <colyli@suse.de>
|
|
|
|
|
2018-12-10 03:29:03 +01:00
|
|
|
---
|
|
|
|
super-intel.c | 25 +++++++++++++++++++------
|
|
|
|
1 file changed, 19 insertions(+), 6 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/super-intel.c b/super-intel.c
|
|
|
|
index 6438987b778c..d2035ccd8270 100644
|
|
|
|
--- a/super-intel.c
|
|
|
|
+++ b/super-intel.c
|
|
|
|
@@ -8136,7 +8136,8 @@ static int mark_failure(struct intel_super *super,
|
|
|
|
set_imsm_ord_tbl_ent(map2, slot2,
|
|
|
|
idx | IMSM_ORD_REBUILD);
|
|
|
|
}
|
|
|
|
- if (map->failed_disk_num == 0xff)
|
|
|
|
+ if (map->failed_disk_num == 0xff ||
|
|
|
|
+ (!is_rebuilding(dev) && map->failed_disk_num > slot))
|
|
|
|
map->failed_disk_num = slot;
|
|
|
|
|
|
|
|
clear_disk_badblocks(super->bbm_log, ord_to_idx(ord));
|
|
|
|
@@ -8558,13 +8559,25 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (is_rebuilding(dev)) {
|
|
|
|
- dprintf_cont("while rebuilding.");
|
|
|
|
+ dprintf_cont("while rebuilding ");
|
|
|
|
if (map->map_state != map_state) {
|
|
|
|
- dprintf_cont(" Map state change");
|
|
|
|
- end_migration(dev, super, map_state);
|
|
|
|
+ dprintf_cont("map state change ");
|
|
|
|
+ if (n == map->failed_disk_num) {
|
|
|
|
+ dprintf_cont("end migration");
|
|
|
|
+ end_migration(dev, super, map_state);
|
|
|
|
+ } else {
|
|
|
|
+ dprintf_cont("raid10 double degradation, map state change");
|
|
|
|
+ map->map_state = map_state;
|
|
|
|
+ }
|
|
|
|
super->updates_pending++;
|
|
|
|
- } else if (!rebuild_done) {
|
|
|
|
+ } else if (!rebuild_done)
|
|
|
|
break;
|
|
|
|
+ else if (n == map->failed_disk_num) {
|
|
|
|
+ /* r10 double degraded to degraded transition */
|
|
|
|
+ dprintf_cont("raid10 double degradation end migration");
|
|
|
|
+ end_migration(dev, super, map_state);
|
|
|
|
+ a->last_checkpoint = 0;
|
|
|
|
+ super->updates_pending++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check if recovery is really finished */
|
|
|
|
@@ -8575,7 +8588,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
|
|
|
|
}
|
|
|
|
if (recovery_not_finished) {
|
|
|
|
dprintf_cont("\n");
|
|
|
|
- dprintf("Rebuild has not finished yet, state not changed");
|
|
|
|
+ dprintf_cont("Rebuild has not finished yet, map state changes only if raid10 double degradation happens");
|
|
|
|
if (a->last_checkpoint < mdi->recovery_start) {
|
|
|
|
a->last_checkpoint =
|
|
|
|
mdi->recovery_start;
|
|
|
|
--
|
|
|
|
2.14.0.rc0.dirty
|
|
|
|
|