From 5563a724f6392bed1ac2e9b6aefc9ea94a42c921f407d63e3aee9cb32a0f3946 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Wed, 9 Dec 2020 21:09:06 +0000 Subject: [PATCH] Accepting request 853269 from home:colyli:branches:Base:System - There are some important fixes merged in mdadm upstream which should go with jsc#SLE-13700. This is the update from upstream mdadm including the important fixes we should have. - Detail: show correct raid level when the array is inactive (jsc#SLE-13700) 0095-Detail-show-correct-raid-level-when-the-array-is-ina.patch - Don't create bitmap for raid5 with journal disk (jsc#SLE-13700) 0096-Don-t-create-bitmap-for-raid5-with-journal-disk.patch - Monitor: refresh mdstat fd after select (jsc#SLE-13700) 0097-Monitor-refresh-mdstat-fd-after-select.patch - Monitor: stop notifing about containers. (jsc#SLE-13700) 0098-Monitor-stop-notifing-about-containers.patch - mdmonitor: set small delay once (jsc#SLE-13700) 0099-mdmonitor-set-small-delay-once.patch - Check if other Monitor instance running before fork. (jsc#SLE-13700) 0100-Check-if-other-Monitor-instance-running-before-fork.patch - Super1: allow RAID0 layout setting to be removed. (jsc#SLE-13700) 0101-Super1-allow-RAID0-layout-setting-to-be-removed.patch - Detail: fix segfault during IMSM raid creation (jsc#SLE-13700) 0102-Detail-fix-segfault-during-IMSM-raid-creation.patch - Create.c: close mdfd and generate uevent (jsc#SLE-13700) 0103-Create.c-close-mdfd-and-generate-uevent.patch - imsm: update num_data_stripes according to dev_size (jsc#SLE-13700) 0104-imsm-update-num_data_stripes-according-to-dev_size.patch - imsm: remove redundant calls to imsm_get_map (jsc#SLE-13700) OBS-URL: https://build.opensuse.org/request/show/853269 OBS-URL: https://build.opensuse.org/package/show/Base:System/mdadm?expand=0&rev=191 --- ...ect-raid-level-when-the-array-is-ina.patch | 68 ++++ ...e-bitmap-for-raid5-with-journal-disk.patch | 29 ++ ...nitor-refresh-mdstat-fd-after-select.patch | 70 ++++ ...nitor-stop-notifing-about-containers.patch | 78 ++++ 0099-mdmonitor-set-small-delay-once.patch | 103 +++++ ...Monitor-instance-running-before-fork.patch | 103 +++++ ...w-RAID0-layout-setting-to-be-removed.patch | 136 +++++++ ...x-segfault-during-IMSM-raid-creation.patch | 33 ++ ...ate.c-close-mdfd-and-generate-uevent.patch | 37 ++ ...m_data_stripes-according-to-dev_size.patch | 166 ++++++++ ...move-redundant-calls-to-imsm_get_map.patch | 35 ++ ...e-default-modes-when-creating-a-file.patch | 55 +++ ...imit-support-to-first-NVMe-namespace.patch | 95 +++++ 0108-mdadm-Unify-forks-behaviour.patch | 354 ++++++++++++++++++ ...ow-correct-state-for-clustered-array.patch | 197 ++++++++++ ...Make-target-to-install-binaries-only.patch | 43 +++ ...dev-start-grow-service-automatically.patch | 37 ++ mdadm.changes | 49 +++ mdadm.spec | 34 ++ 19 files changed, 1722 insertions(+) create mode 100644 0095-Detail-show-correct-raid-level-when-the-array-is-ina.patch create mode 100644 0096-Don-t-create-bitmap-for-raid5-with-journal-disk.patch create mode 100644 0097-Monitor-refresh-mdstat-fd-after-select.patch create mode 100644 0098-Monitor-stop-notifing-about-containers.patch create mode 100644 0099-mdmonitor-set-small-delay-once.patch create mode 100644 0100-Check-if-other-Monitor-instance-running-before-fork.patch create mode 100644 0101-Super1-allow-RAID0-layout-setting-to-be-removed.patch create mode 100644 0102-Detail-fix-segfault-during-IMSM-raid-creation.patch create mode 100644 0103-Create.c-close-mdfd-and-generate-uevent.patch create mode 100644 0104-imsm-update-num_data_stripes-according-to-dev_size.patch create mode 100644 0105-imsm-remove-redundant-calls-to-imsm_get_map.patch create mode 100644 0106-Monitor-don-t-use-default-modes-when-creating-a-file.patch create mode 100644 0107-imsm-limit-support-to-first-NVMe-namespace.patch create mode 100644 0108-mdadm-Unify-forks-behaviour.patch create mode 100644 0109-mdadm-Detail-show-correct-state-for-clustered-array.patch create mode 100644 0110-Make-target-to-install-binaries-only.patch create mode 100644 0111-udev-start-grow-service-automatically.patch diff --git a/0095-Detail-show-correct-raid-level-when-the-array-is-ina.patch b/0095-Detail-show-correct-raid-level-when-the-array-is-ina.patch new file mode 100644 index 0000000..3820023 --- /dev/null +++ b/0095-Detail-show-correct-raid-level-when-the-array-is-ina.patch @@ -0,0 +1,68 @@ +From 64bf4dff34301a4b44883a8bc03f7835faef121e Mon Sep 17 00:00:00 2001 +From: Lidong Zhong +Date: Mon, 14 Sep 2020 10:52:18 +0800 +Subject: [PATCH 01/17] Detail: show correct raid level when the array is + inactive + +Sometimes the raid level in the output of `mdadm -D /dev/mdX` is +misleading when the array is in inactive state. Here is a testcase for +introduction. +1\ creating a raid1 device with two disks. Specify a different hostname +rather than the real one for later verfication. + +node1:~ # mdadm --create /dev/md0 --homehost TESTARRAY -o -l 1 -n 2 /dev/sdb +/dev/sdc +2\ remove one of the devices and reboot +3\ show the detail of raid1 device + +node1:~ # mdadm -D /dev/md127 +/dev/md127: + Version : 1.2 + Raid Level : raid0 + Total Devices : 1 + Persistence : Superblock is persistent + State : inactive +Working Devices : 1 + +You can see that the "Raid Level" in /dev/md127 is raid0 now. +After step 2\ is done, the degraded raid1 device is recognized +as a "foreign" array in 64-md-raid-assembly.rules. And thus the +timer to activate the raid1 device is not triggered. The array +level returned from GET_ARRAY_INFO ioctl is 0. And the string +shown for "Raid Level" is +str = map_num(pers, array.level); +And the definition of pers is +mapping_t pers[] = { +{ "linear", LEVEL_LINEAR}, +{ "raid0", 0}, +{ "0", 0} +... +So the misleading "raid0" is shown in this testcase. + +Changelog: +v1: don't show "Raid Level" when array is inactive +Signed-off-by: Lidong Zhong +Signed-off-by: Jes Sorensen +--- + Detail.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/Detail.c b/Detail.c +index 24eeba0..b6587c8 100644 +--- a/Detail.c ++++ b/Detail.c +@@ -224,7 +224,10 @@ int Detail(char *dev, struct context *c) + } + + /* Ok, we have some info to print... */ +- str = map_num(pers, array.level); ++ if (inactive) ++ str = map_num(pers, info->array.level); ++ else ++ str = map_num(pers, array.level); + + if (c->export) { + if (array.raid_disks) { +-- +2.26.2 + diff --git a/0096-Don-t-create-bitmap-for-raid5-with-journal-disk.patch b/0096-Don-t-create-bitmap-for-raid5-with-journal-disk.patch new file mode 100644 index 0000000..54103a2 --- /dev/null +++ b/0096-Don-t-create-bitmap-for-raid5-with-journal-disk.patch @@ -0,0 +1,29 @@ +From 2ce091724031e18f522994ffd1e5eb0dc404bcba Mon Sep 17 00:00:00 2001 +From: Xiao Ni +Date: Tue, 15 Sep 2020 15:44:42 +0800 +Subject: [PATCH 02/17] Don't create bitmap for raid5 with journal disk + +Journal disk and bitmap can't exist at the same time. It needs to check if the raid +has a journal disk when creating bitmap. + +Signed-off-by: Xiao Ni +Signed-off-by: Jes Sorensen +--- + Create.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/Create.c b/Create.c +index 6f84e5b..0efa19c 100644 +--- a/Create.c ++++ b/Create.c +@@ -542,6 +542,7 @@ int Create(struct supertype *st, char *mddev, + if (!s->bitmap_file && + s->level >= 1 && + st->ss->add_internal_bitmap && ++ s->journaldisks == 0 && + (s->consistency_policy != CONSISTENCY_POLICY_RESYNC && + s->consistency_policy != CONSISTENCY_POLICY_PPL) && + (s->write_behind || s->size > 100*1024*1024ULL)) { +-- +2.26.2 + diff --git a/0097-Monitor-refresh-mdstat-fd-after-select.patch b/0097-Monitor-refresh-mdstat-fd-after-select.patch new file mode 100644 index 0000000..e6d0df5 --- /dev/null +++ b/0097-Monitor-refresh-mdstat-fd-after-select.patch @@ -0,0 +1,70 @@ +From e2308733910a157b0a4d4e78721f239d44b91a24 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Wed, 9 Sep 2020 10:31:17 +0200 +Subject: [PATCH 03/17] Monitor: refresh mdstat fd after select + +After 52209d6ee118 ("Monitor: release /proc/mdstat fd when no arrays +present") mdstat fd is closed if mdstat is empty or cannot be opened. +It causes that monitor is not able to select on mdstat. Select +doesn't fail because it gets valid descriptor to a different resource. +As a result any new event will be unnoticed until timeout (delay). + +Refresh mdstat after wake up, don't poll on wrong resource. + +Signed-off-by: Mariusz Tkaczyk +Signed-off-by: Jes Sorensen +--- + Monitor.c | 6 +++--- + mdstat.c | 4 ++-- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/Monitor.c b/Monitor.c +index 2d6b3b9..80a3200 100644 +--- a/Monitor.c ++++ b/Monitor.c +@@ -216,8 +216,6 @@ int Monitor(struct mddev_dev *devlist, + if (mdstat) + free_mdstat(mdstat); + mdstat = mdstat_read(oneshot ? 0 : 1, 0); +- if (!mdstat) +- mdstat_close(); + + for (st = statelist; st; st = st->next) + if (check_array(st, mdstat, c->test, &info, +@@ -238,8 +236,10 @@ int Monitor(struct mddev_dev *devlist, + if (!new_found) { + if (oneshot) + break; +- else ++ else { + mdstat_wait(c->delay); ++ mdstat_close(); ++ } + } + c->test = 0; + +diff --git a/mdstat.c b/mdstat.c +index 20577a3..48559e6 100644 +--- a/mdstat.c ++++ b/mdstat.c +@@ -135,7 +135,6 @@ struct mdstat_ent *mdstat_read(int hold, int start) + if (hold && mdstat_fd != -1) { + off_t offset = lseek(mdstat_fd, 0L, 0); + if (offset == (off_t)-1) { +- mdstat_close(); + return NULL; + } + fd = dup(mdstat_fd); +@@ -312,7 +311,8 @@ void mdstat_wait(int seconds) + if (mdstat_fd >= 0) { + FD_SET(mdstat_fd, &fds); + maxfd = mdstat_fd; +- } ++ } else ++ return; + tm.tv_sec = seconds; + tm.tv_usec = 0; + select(maxfd + 1, NULL, NULL, &fds, &tm); +-- +2.26.2 + diff --git a/0098-Monitor-stop-notifing-about-containers.patch b/0098-Monitor-stop-notifing-about-containers.patch new file mode 100644 index 0000000..3c72ac4 --- /dev/null +++ b/0098-Monitor-stop-notifing-about-containers.patch @@ -0,0 +1,78 @@ +From 007087d0898a045901e4e120296e6d9b845b20a6 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Wed, 9 Sep 2020 10:31:18 +0200 +Subject: [PATCH 04/17] Monitor: stop notifing about containers. + +Stop reporting any events from container but still track them, +it is important for spare migration. +Stop mdmonitor if no redundant array is presented in mdstat. +There is nothing to follow. + +Signed-off-by: Mariusz Tkaczyk +Signed-off-by: Jes Sorensen +--- + Monitor.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +diff --git a/Monitor.c b/Monitor.c +index 80a3200..aed7a69 100644 +--- a/Monitor.c ++++ b/Monitor.c +@@ -212,15 +212,24 @@ int Monitor(struct mddev_dev *devlist, + int new_found = 0; + struct state *st, **stp; + int anydegraded = 0; ++ int anyredundant = 0; + + if (mdstat) + free_mdstat(mdstat); + mdstat = mdstat_read(oneshot ? 0 : 1, 0); + +- for (st = statelist; st; st = st->next) ++ for (st = statelist; st; st = st->next) { + if (check_array(st, mdstat, c->test, &info, + increments, c->prefer)) + anydegraded = 1; ++ /* for external arrays, metadata is filled for ++ * containers only ++ */ ++ if (st->metadata && st->metadata->ss->external) ++ continue; ++ if (st->err == 0 && !anyredundant) ++ anyredundant = 1; ++ } + + /* now check if there are any new devices found in mdstat */ + if (c->scan) +@@ -236,6 +245,9 @@ int Monitor(struct mddev_dev *devlist, + if (!new_found) { + if (oneshot) + break; ++ else if (!anyredundant) { ++ break; ++ } + else { + mdstat_wait(c->delay); + mdstat_close(); +@@ -542,7 +554,8 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, + st->err = 0; + st->percent = RESYNC_NONE; + new_array = 1; +- alert("NewArray", st->devname, NULL, ainfo); ++ if (!is_container) ++ alert("NewArray", st->devname, NULL, ainfo); + } + + if (st->utime == array.utime && st->failed == sra->array.failed_disks && +@@ -676,7 +689,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, + return retval; + + disappeared: +- if (!st->err) ++ if (!st->err && !is_container) + alert("DeviceDisappeared", dev, NULL, ainfo); + st->err++; + goto out; +-- +2.26.2 + diff --git a/0099-mdmonitor-set-small-delay-once.patch b/0099-mdmonitor-set-small-delay-once.patch new file mode 100644 index 0000000..345fc5f --- /dev/null +++ b/0099-mdmonitor-set-small-delay-once.patch @@ -0,0 +1,103 @@ +From cab9c67d461c65a1138359f9f6d39636466b90e4 Mon Sep 17 00:00:00 2001 +From: Blazej Kucman +Date: Wed, 9 Sep 2020 10:31:19 +0200 +Subject: [PATCH 05/17] mdmonitor: set small delay once + +If mdmonitor is awakened by event, set small delay once +to deal with udev and mdadm. + +Signed-off-by: Blazej Kucman +Signed-off-by: Mariusz Tkaczyk +Signed-off-by: Jes Sorensen +--- + Monitor.c | 14 +++++++++++++- + mdadm.h | 2 +- + mdstat.c | 18 +++++++++++++++--- + 3 files changed, 29 insertions(+), 5 deletions(-) + +diff --git a/Monitor.c b/Monitor.c +index aed7a69..0fb4f77 100644 +--- a/Monitor.c ++++ b/Monitor.c +@@ -128,6 +128,7 @@ int Monitor(struct mddev_dev *devlist, + char *mailfrom; + struct alert_info info; + struct mddev_ident *mdlist; ++ int delay_for_event = c->delay; + + if (!mailaddr) { + mailaddr = conf_get_mailaddr(); +@@ -249,7 +250,18 @@ int Monitor(struct mddev_dev *devlist, + break; + } + else { +- mdstat_wait(c->delay); ++ int wait_result = mdstat_wait(delay_for_event); ++ ++ /* ++ * If mdmonitor is awaken by event, set small delay once ++ * to deal with udev and mdadm. ++ */ ++ if (wait_result != 0) { ++ if (c->delay > 5) ++ delay_for_event = 5; ++ } else ++ delay_for_event = c->delay; ++ + mdstat_close(); + } + } +diff --git a/mdadm.h b/mdadm.h +index 399478b..4961c0f 100644 +--- a/mdadm.h ++++ b/mdadm.h +@@ -628,7 +628,7 @@ struct mdstat_ent { + extern struct mdstat_ent *mdstat_read(int hold, int start); + extern void mdstat_close(void); + extern void free_mdstat(struct mdstat_ent *ms); +-extern void mdstat_wait(int seconds); ++extern int mdstat_wait(int seconds); + extern void mdstat_wait_fd(int fd, const sigset_t *sigmask); + extern int mddev_busy(char *devnm); + extern struct mdstat_ent *mdstat_by_component(char *name); +diff --git a/mdstat.c b/mdstat.c +index 48559e6..dd96cca 100644 +--- a/mdstat.c ++++ b/mdstat.c +@@ -302,7 +302,17 @@ void mdstat_close(void) + mdstat_fd = -1; + } + +-void mdstat_wait(int seconds) ++/* ++ * function: mdstat_wait ++ * Description: Function waits for event on mdstat. ++ * Parameters: ++ * seconds - timeout for waiting ++ * Returns: ++ * > 0 - detected event ++ * 0 - timeout ++ * < 0 - detected error ++ */ ++int mdstat_wait(int seconds) + { + fd_set fds; + struct timeval tm; +@@ -312,10 +322,12 @@ void mdstat_wait(int seconds) + FD_SET(mdstat_fd, &fds); + maxfd = mdstat_fd; + } else +- return; ++ return -1; ++ + tm.tv_sec = seconds; + tm.tv_usec = 0; +- select(maxfd + 1, NULL, NULL, &fds, &tm); ++ ++ return select(maxfd + 1, NULL, NULL, &fds, &tm); + } + + void mdstat_wait_fd(int fd, const sigset_t *sigmask) +-- +2.26.2 + diff --git a/0100-Check-if-other-Monitor-instance-running-before-fork.patch b/0100-Check-if-other-Monitor-instance-running-before-fork.patch new file mode 100644 index 0000000..dc9b03a --- /dev/null +++ b/0100-Check-if-other-Monitor-instance-running-before-fork.patch @@ -0,0 +1,103 @@ +From 7f3b2d1d1621cbdc60b5af4a41445391010fe9e1 Mon Sep 17 00:00:00 2001 +From: Blazej Kucman +Date: Wed, 9 Sep 2020 10:31:20 +0200 +Subject: [PATCH 06/17] Check if other Monitor instance running before fork. + +Make error message visible to the user. + +Signed-off-by: Blazej Kucman +Signed-off-by: Mariusz Tkaczyk +Signed-off-by: Jes Sorensen +--- + Monitor.c | 44 ++++++++++++++++++++++++++++---------------- + 1 file changed, 28 insertions(+), 16 deletions(-) + +diff --git a/Monitor.c b/Monitor.c +index 0fb4f77..7fd4808 100644 +--- a/Monitor.c ++++ b/Monitor.c +@@ -63,6 +63,7 @@ struct alert_info { + }; + static int make_daemon(char *pidfile); + static int check_one_sharer(int scan); ++static void write_autorebuild_pid(void); + static void alert(char *event, char *dev, char *disc, struct alert_info *info); + static int check_array(struct state *st, struct mdstat_ent *mdstat, + int test, struct alert_info *info, +@@ -153,6 +154,11 @@ int Monitor(struct mddev_dev *devlist, + info.mailfrom = mailfrom; + info.dosyslog = dosyslog; + ++ if (share){ ++ if (check_one_sharer(c->scan)) ++ return 1; ++ } ++ + if (daemonise) { + int rv = make_daemon(pidfile); + if (rv >= 0) +@@ -160,8 +166,7 @@ int Monitor(struct mddev_dev *devlist, + } + + if (share) +- if (check_one_sharer(c->scan)) +- return 1; ++ write_autorebuild_pid(); + + if (devlist == NULL) { + mdlist = conf_get_ident(NULL); +@@ -328,8 +333,8 @@ static int check_one_sharer(int scan) + int pid; + FILE *comm_fp; + FILE *fp; +- char comm_path[100]; +- char path[100]; ++ char comm_path[PATH_MAX]; ++ char path[PATH_MAX]; + char comm[20]; + + sprintf(path, "%s/autorebuild.pid", MDMON_DIR); +@@ -356,21 +361,28 @@ static int check_one_sharer(int scan) + } + fclose(fp); + } +- if (scan) { +- if (mkdir(MDMON_DIR, S_IRWXU) < 0 && errno != EEXIST) { ++ return 0; ++} ++ ++static void write_autorebuild_pid() ++{ ++ char path[PATH_MAX]; ++ int pid; ++ FILE *fp; ++ sprintf(path, "%s/autorebuild.pid", MDMON_DIR); ++ ++ if (mkdir(MDMON_DIR, S_IRWXU) < 0 && errno != EEXIST) { ++ pr_err("Can't create autorebuild.pid file\n"); ++ } else { ++ fp = fopen(path, "w"); ++ if (!fp) + pr_err("Can't create autorebuild.pid file\n"); +- } else { +- fp = fopen(path, "w"); +- if (!fp) +- pr_err("Cannot create autorebuild.pidfile\n"); +- else { +- pid = getpid(); +- fprintf(fp, "%d\n", pid); +- fclose(fp); +- } ++ else { ++ pid = getpid(); ++ fprintf(fp, "%d\n", pid); ++ fclose(fp); + } + } +- return 0; + } + + static void alert(char *event, char *dev, char *disc, struct alert_info *info) +-- +2.26.2 + diff --git a/0101-Super1-allow-RAID0-layout-setting-to-be-removed.patch b/0101-Super1-allow-RAID0-layout-setting-to-be-removed.patch new file mode 100644 index 0000000..1a3c3a5 --- /dev/null +++ b/0101-Super1-allow-RAID0-layout-setting-to-be-removed.patch @@ -0,0 +1,136 @@ +From 97b51a2c2d00b79a59f2a8e37134031b0c9e0223 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Wed, 14 Oct 2020 13:12:48 +1100 +Subject: [PATCH 07/17] Super1: allow RAID0 layout setting to be removed. + +Once the RAID0 layout has been set, the RAID0 array cannot be assembled +on an older kernel which doesn't understand layouts. +This is an intentional safety feature, but sometimes people need the +ability to roll-back to a previously working configuration. + +So add "--update=layout-unspecified" to remove RAID0 layout information +from the superblock. +Running "--assemble --update=layout-unspecified" will cause the assembly +the fail when run on a newer kernel, but will allow it to work on +an older kernel. + +Signed-off-by: NeilBrown +Signed-off-by: Jes Sorensen +--- + md.4 | 13 +++++++++++++ + mdadm.8.in | 15 +++++++++++++-- + mdadm.c | 5 +++-- + super1.c | 6 +++++- + 4 files changed, 34 insertions(+), 5 deletions(-) + +diff --git a/md.4 b/md.4 +index aecff38..60fdd27 100644 +--- a/md.4 ++++ b/md.4 +@@ -215,6 +215,19 @@ option or the + .B "--update=layout-alternate" + option. + ++Once you have updated the layout you will not be able to mount the array ++on an older kernel. If you need to revert to an older kernel, the ++layout information can be erased with the ++.B "--update=layout-unspecificed" ++option. If you use this option to ++.B --assemble ++while running a newer kernel, the array will NOT assemble, but the ++metadata will be update so that it can be assembled on an older kernel. ++ ++No that setting the layout to "unspecified" removes protections against ++this bug, and you must be sure that the kernel you use matches the ++layout of the array. ++ + .SS RAID1 + + A RAID1 array is also known as a mirrored set (though mirrors tend to +diff --git a/mdadm.8.in b/mdadm.8.in +index ab832e8..34a93a8 100644 +--- a/mdadm.8.in ++++ b/mdadm.8.in +@@ -1213,6 +1213,7 @@ argument given to this flag can be one of + .BR no\-ppl , + .BR layout\-original , + .BR layout\-alternate , ++.BR layout\-unspecified , + .BR metadata , + or + .BR super\-minor . +@@ -1368,8 +1369,9 @@ The + .B layout\-original + and + .B layout\-alternate +-options are for RAID0 arrays in use before Linux 5.4. If the array was being +-used with Linux 3.13 or earlier, then to assemble the array on a new kernel, ++options are for RAID0 arrays with non-uniform devices size that were in ++use before Linux 5.4. If the array was being used with Linux 3.13 or ++earlier, then to assemble the array on a new kernel, + .B \-\-update=layout\-original + must be given. If the array was created and used with a kernel from Linux 3.14 to + Linux 5.3, then +@@ -1379,6 +1381,15 @@ will happen normally. + For more information, see + .IR md (4). + ++The ++.B layout\-unspecified ++option reverts the effect of ++.B layout\-orignal ++or ++.B layout\-alternate ++and allows the array to be again used on a kernel prior to Linux 5.3. ++This option should be used with great caution. ++ + .TP + .BR \-\-freeze\-reshape + Option is intended to be used in start-up scripts during initrd boot phase. +diff --git a/mdadm.c b/mdadm.c +index 1b3467f..493d70e 100644 +--- a/mdadm.c ++++ b/mdadm.c +@@ -796,7 +796,8 @@ int main(int argc, char *argv[]) + if (strcmp(c.update, "revert-reshape") == 0) + continue; + if (strcmp(c.update, "layout-original") == 0 || +- strcmp(c.update, "layout-alternate") == 0) ++ strcmp(c.update, "layout-alternate") == 0 || ++ strcmp(c.update, "layout-unspecified") == 0) + continue; + if (strcmp(c.update, "byteorder") == 0) { + if (ss) { +@@ -828,7 +829,7 @@ int main(int argc, char *argv[]) + " 'summaries', 'homehost', 'home-cluster', 'byteorder', 'devicesize',\n" + " 'no-bitmap', 'metadata', 'revert-reshape'\n" + " 'bbl', 'no-bbl', 'force-no-bbl', 'ppl', 'no-ppl'\n" +- " 'layout-original', 'layout-alternate'\n" ++ " 'layout-original', 'layout-alternate', 'layout-unspecified'\n" + ); + exit(outf == stdout ? 0 : 2); + +diff --git a/super1.c b/super1.c +index 7664883..8b0d6ff 100644 +--- a/super1.c ++++ b/super1.c +@@ -1551,11 +1551,15 @@ static int update_super1(struct supertype *st, struct mdinfo *info, + else if (strcmp(update, "nofailfast") == 0) + sb->devflags &= ~FailFast1; + else if (strcmp(update, "layout-original") == 0 || +- strcmp(update, "layout-alternate") == 0) { ++ strcmp(update, "layout-alternate") == 0 || ++ strcmp(update, "layout-unspecified") == 0) { + if (__le32_to_cpu(sb->level) != 0) { + pr_err("%s: %s only supported for RAID0\n", + devname?:"", update); + rv = -1; ++ } else if (strcmp(update, "layout-unspecified") == 0) { ++ sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_RAID0_LAYOUT); ++ sb->layout = 0; + } else { + sb->feature_map |= __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT); + sb->layout = __cpu_to_le32(update[7] == 'o' ? 1 : 2); +-- +2.26.2 + diff --git a/0102-Detail-fix-segfault-during-IMSM-raid-creation.patch b/0102-Detail-fix-segfault-during-IMSM-raid-creation.patch new file mode 100644 index 0000000..d7db830 --- /dev/null +++ b/0102-Detail-fix-segfault-during-IMSM-raid-creation.patch @@ -0,0 +1,33 @@ +From c3129b39a7d467eec063681529f46f84a2a85308 Mon Sep 17 00:00:00 2001 +From: Lidong Zhong +Date: Sun, 22 Nov 2020 23:12:29 +0800 +Subject: [PATCH 08/17] Detail: fix segfault during IMSM raid creation + +It can be reproduced with non IMSM hardware and IMSM_NO_PLATFORM +environmental variable set. The array state is inactive when creating +an IMSM container. And the structure info is NULL because load_super() +always fails since no intel HBA information could be obtained. + +Signed-off-by: Lidong Zhong +Reported-by: Tkaczyk Mariusz +Fixes: 64bf4dff3430 (Detail: show correct raid level when the array is inactive) +--- + Detail.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Detail.c b/Detail.c +index b6587c8..ea86884 100644 +--- a/Detail.c ++++ b/Detail.c +@@ -224,7 +224,7 @@ int Detail(char *dev, struct context *c) + } + + /* Ok, we have some info to print... */ +- if (inactive) ++ if (inactive && info) + str = map_num(pers, info->array.level); + else + str = map_num(pers, array.level); +-- +2.26.2 + diff --git a/0103-Create.c-close-mdfd-and-generate-uevent.patch b/0103-Create.c-close-mdfd-and-generate-uevent.patch new file mode 100644 index 0000000..142469f --- /dev/null +++ b/0103-Create.c-close-mdfd-and-generate-uevent.patch @@ -0,0 +1,37 @@ +From ce559078a5650afb9f635204b31a89a1fa0061e3 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Tue, 24 Nov 2020 13:39:49 +0100 +Subject: [PATCH 09/17] Create.c: close mdfd and generate uevent + +During mdfd closing change event is not generated because open() is +called before start watching mddevice by udev. +Device is ready at this stage. Unblock device, close fd and +generate event to give a chance next layers to work. + +Signed-off-by: Mariusz Tkaczyk +--- + Create.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/Create.c b/Create.c +index 0efa19c..51f8826 100644 +--- a/Create.c ++++ b/Create.c +@@ -1083,12 +1083,9 @@ int Create(struct supertype *st, char *mddev, + } else { + pr_err("not starting array - not enough devices.\n"); + } +- close(mdfd); +- /* Give udev a moment to process the Change event caused +- * by the close. +- */ +- usleep(100*1000); + udev_unblock(); ++ close(mdfd); ++ sysfs_uevent(&info, "change"); + return 0; + + abort: +-- +2.26.2 + diff --git a/0104-imsm-update-num_data_stripes-according-to-dev_size.patch b/0104-imsm-update-num_data_stripes-according-to-dev_size.patch new file mode 100644 index 0000000..c2832ef --- /dev/null +++ b/0104-imsm-update-num_data_stripes-according-to-dev_size.patch @@ -0,0 +1,166 @@ +From 895ffd992954069e4ea67efb8a85bb0fd72c3707 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Tue, 24 Nov 2020 14:15:15 +0100 +Subject: [PATCH 10/17] imsm: update num_data_stripes according to dev_size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +If array was created in UEFI there is possibility that +member size is not rounded to 1MB. After any size reconfiguration +it will be rounded down to 1MB per each member but the old +component size will remain in metadata. +During reshape old array size is calculated from component size because +dev_size is not a part of map and is bumped to new value quickly. +It may result in size mismatch if array is assembled during reshape. + +If difference in calculated size and dev_size is observed try to fix it. +num_data_stripes value can be safety updated to smaller value if array +doesn't occuppy whole reserved component space. + +Signed-off-by: Mariusz Tkaczyk +--- + super-intel.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 78 insertions(+), 6 deletions(-) + +diff --git a/super-intel.c b/super-intel.c +index 3a73d2b..9562064 100644 +--- a/super-intel.c ++++ b/super-intel.c +@@ -3453,7 +3453,6 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, + __u64 blocks_per_unit = blocks_per_migr_unit(super, + dev); + __u64 units = current_migr_unit(migr_rec); +- unsigned long long array_blocks; + int used_disks; + + if (__le32_to_cpu(migr_rec->ascending_migr) && +@@ -3472,12 +3471,8 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, + + used_disks = imsm_num_data_members(prev_map); + if (used_disks > 0) { +- array_blocks = per_dev_array_size(map) * ++ info->custom_array_size = per_dev_array_size(map) * + used_disks; +- info->custom_array_size = +- round_size_to_mb(array_blocks, +- used_disks); +- + } + } + case MIGR_VERIFY: +@@ -11682,6 +11677,68 @@ int imsm_takeover(struct supertype *st, struct geo_params *geo) + return 0; + } + ++/* Flush size update if size calculated by num_data_stripes is higher than ++ * imsm_dev_size to eliminate differences during reshape. ++ * Mdmon will recalculate them correctly. ++ * If subarray index is not set then check whole container. ++ * Returns: ++ * 0 - no error occurred ++ * 1 - error detected ++ */ ++static int imsm_fix_size_mismatch(struct supertype *st, int subarray_index) ++{ ++ struct intel_super *super = st->sb; ++ int tmp = super->current_vol; ++ int ret_val = 1; ++ int i; ++ ++ for (i = 0; i < super->anchor->num_raid_devs; i++) { ++ if (subarray_index >= 0 && i != subarray_index) ++ continue; ++ super->current_vol = i; ++ struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); ++ struct imsm_map *map = get_imsm_map(dev, MAP_0); ++ unsigned int disc_count = imsm_num_data_members(map); ++ struct geo_params geo; ++ struct imsm_update_size_change *update; ++ unsigned long long calc_size = per_dev_array_size(map) * disc_count; ++ unsigned long long d_size = imsm_dev_size(dev); ++ int u_size; ++ ++ if (calc_size == d_size || dev->vol.migr_type == MIGR_GEN_MIGR) ++ continue; ++ ++ /* There is a difference, verify that imsm_dev_size is ++ * rounded correctly and push update. ++ */ ++ if (d_size != round_size_to_mb(d_size, disc_count)) { ++ dprintf("imsm: Size of volume %d is not rounded correctly\n", ++ i); ++ goto exit; ++ } ++ memset(&geo, 0, sizeof(struct geo_params)); ++ geo.size = d_size; ++ u_size = imsm_create_metadata_update_for_size_change(st, &geo, ++ &update); ++ if (u_size < 1) { ++ dprintf("imsm: Cannot prepare size change update\n"); ++ goto exit; ++ } ++ imsm_update_metadata_locally(st, update, u_size); ++ if (st->update_tail) { ++ append_metadata_update(st, update, u_size); ++ flush_metadata_updates(st); ++ st->update_tail = &st->updates; ++ } else { ++ imsm_sync_metadata(st); ++ } ++ } ++ ret_val = 0; ++exit: ++ super->current_vol = tmp; ++ return ret_val; ++} ++ + static int imsm_reshape_super(struct supertype *st, unsigned long long size, + int level, + int layout, int chunksize, int raid_disks, +@@ -11718,6 +11775,11 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size, + struct imsm_update_reshape *u = NULL; + int len; + ++ if (imsm_fix_size_mismatch(st, -1)) { ++ dprintf("imsm: Cannot fix size mismatch\n"); ++ goto exit_imsm_reshape_super; ++ } ++ + len = imsm_create_metadata_update_for_reshape( + st, &geo, old_raid_disks, &u); + +@@ -12020,6 +12082,7 @@ static int imsm_manage_reshape( + unsigned long long start_buf_shift; /* [bytes] */ + int degraded = 0; + int source_layout = 0; ++ int subarray_index = -1; + + if (!sra) + return ret_val; +@@ -12033,6 +12096,7 @@ static int imsm_manage_reshape( + dv->dev->vol.migr_state == 1) { + dev = dv->dev; + migr_vol_qan++; ++ subarray_index = dv->index; + } + } + /* Only one volume can migrate at the same time */ +@@ -12217,6 +12281,14 @@ static int imsm_manage_reshape( + + /* return '1' if done */ + ret_val = 1; ++ ++ /* After the reshape eliminate size mismatch in metadata. ++ * Don't update md/component_size here, volume hasn't ++ * to take whole space. It is allowed by kernel. ++ * md/component_size will be set propoperly after next assembly. ++ */ ++ imsm_fix_size_mismatch(st, subarray_index); ++ + abort: + free(buf); + /* See Grow.c: abort_reshape() for further explanation */ +-- +2.26.2 + diff --git a/0105-imsm-remove-redundant-calls-to-imsm_get_map.patch b/0105-imsm-remove-redundant-calls-to-imsm_get_map.patch new file mode 100644 index 0000000..8f8bdc3 --- /dev/null +++ b/0105-imsm-remove-redundant-calls-to-imsm_get_map.patch @@ -0,0 +1,35 @@ +From b65c1f4a2340e24ae00babc4399fb4030ff99517 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Tue, 24 Nov 2020 15:58:53 +0100 +Subject: [PATCH 11/17] imsm: remove redundant calls to imsm_get_map + +MAP_0 is gotten and the beginning, there is no need to get it again. + +Signed-off-by: Mariusz Tkaczyk +--- + super-intel.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/super-intel.c b/super-intel.c +index 9562064..95f4eaf 100644 +--- a/super-intel.c ++++ b/super-intel.c +@@ -8598,7 +8598,6 @@ static void imsm_set_disk(struct active_array *a, int n, int state) + break; + } + end_migration(dev, super, map_state); +- map = get_imsm_map(dev, MAP_0); + map->failed_disk_num = ~0; + super->updates_pending++; + a->last_checkpoint = 0; +@@ -8610,7 +8609,6 @@ static void imsm_set_disk(struct active_array *a, int n, int state) + end_migration(dev, super, map_state); + else + map->map_state = map_state; +- map = get_imsm_map(dev, MAP_0); + map->failed_disk_num = ~0; + super->updates_pending++; + break; +-- +2.26.2 + diff --git a/0106-Monitor-don-t-use-default-modes-when-creating-a-file.patch b/0106-Monitor-don-t-use-default-modes-when-creating-a-file.patch new file mode 100644 index 0000000..686e91e --- /dev/null +++ b/0106-Monitor-don-t-use-default-modes-when-creating-a-file.patch @@ -0,0 +1,55 @@ +From ca4b156b2059ee00a9143313267ee4a098967d76 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Tue, 24 Nov 2020 16:41:01 +0100 +Subject: [PATCH 12/17] Monitor: don't use default modes when creating a file + +Replace fopen() calls by open() with creation mode directly specified. +This fixes the potential security issue. Use octal values instead masks. + +Signed-off-by: Mariusz Tkaczyk +--- + Monitor.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/Monitor.c b/Monitor.c +index 7fd4808..a82e99d 100644 +--- a/Monitor.c ++++ b/Monitor.c +@@ -305,8 +305,11 @@ static int make_daemon(char *pidfile) + if (!pidfile) + printf("%d\n", pid); + else { +- FILE *pid_file; +- pid_file=fopen(pidfile, "w"); ++ FILE *pid_file = NULL; ++ int fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC, ++ 0644); ++ if (fd >= 0) ++ pid_file = fdopen(fd, "w"); + if (!pid_file) + perror("cannot create pid file"); + else { +@@ -368,13 +371,17 @@ static void write_autorebuild_pid() + { + char path[PATH_MAX]; + int pid; +- FILE *fp; ++ FILE *fp = NULL; + sprintf(path, "%s/autorebuild.pid", MDMON_DIR); + +- if (mkdir(MDMON_DIR, S_IRWXU) < 0 && errno != EEXIST) { ++ if (mkdir(MDMON_DIR, 0700) < 0 && errno != EEXIST) { + pr_err("Can't create autorebuild.pid file\n"); + } else { +- fp = fopen(path, "w"); ++ int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0700); ++ ++ if (fd >= 0) ++ fp = fdopen(fd, "w"); ++ + if (!fp) + pr_err("Can't create autorebuild.pid file\n"); + else { +-- +2.26.2 + diff --git a/0107-imsm-limit-support-to-first-NVMe-namespace.patch b/0107-imsm-limit-support-to-first-NVMe-namespace.patch new file mode 100644 index 0000000..837eac0 --- /dev/null +++ b/0107-imsm-limit-support-to-first-NVMe-namespace.patch @@ -0,0 +1,95 @@ +From a8f3cfd54e45c8aabc4a99cdc92b6b9080b26607 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Wed, 4 Nov 2020 10:01:28 +0100 +Subject: [PATCH 13/17] imsm: limit support to first NVMe namespace + +Due to metadata limitations NVMe multinamespace support has to be removed. + +Signed-off-by: Mariusz Tkaczyk +--- + platform-intel.c | 31 +++++++++++++++++++++++++++++++ + platform-intel.h | 1 + + super-intel.c | 11 ++++++++++- + 3 files changed, 42 insertions(+), 1 deletion(-) + +diff --git a/platform-intel.c b/platform-intel.c +index 04bffc5..f1f6d4c 100644 +--- a/platform-intel.c ++++ b/platform-intel.c +@@ -766,3 +766,34 @@ char *vmd_domain_to_controller(struct sys_dev *hba, char *buf) + closedir(dir); + return NULL; + } ++/* Verify that NVMe drive is supported by IMSM ++ * Returns: ++ * 0 - not supported ++ * 1 - supported ++ */ ++int imsm_is_nvme_supported(int disk_fd, int verbose) ++{ ++ char nsid_path[PATH_MAX]; ++ char buf[PATH_MAX]; ++ struct stat stb; ++ ++ if (disk_fd < 0) ++ return 0; ++ ++ if (fstat(disk_fd, &stb)) ++ return 0; ++ ++ snprintf(nsid_path, PATH_MAX-1, "/sys/dev/block/%d:%d/nsid", ++ major(stb.st_rdev), minor(stb.st_rdev)); ++ ++ if (load_sys(nsid_path, buf, sizeof(buf))) { ++ pr_err("Cannot read %s, rejecting drive\n", nsid_path); ++ return 0; ++ } ++ if (strtoll(buf, NULL, 10) != 1) { ++ if (verbose) ++ pr_err("Only first namespace is supported by IMSM, aborting\n"); ++ return 0; ++ } ++ return 1; ++} +diff --git a/platform-intel.h b/platform-intel.h +index 7cb370e..7371478 100644 +--- a/platform-intel.h ++++ b/platform-intel.h +@@ -251,4 +251,5 @@ const struct orom_entry *get_orom_entry_by_device_id(__u16 dev_id); + const struct imsm_orom *get_orom_by_device_id(__u16 device_id); + struct sys_dev *device_by_id(__u16 device_id); + struct sys_dev *device_by_id_and_path(__u16 device_id, const char *path); ++int imsm_is_nvme_supported(int disk_fd, int verbose); + char *vmd_domain_to_controller(struct sys_dev *hba, char *buf); +diff --git a/super-intel.c b/super-intel.c +index 95f4eaf..715febf 100644 +--- a/super-intel.c ++++ b/super-intel.c +@@ -2364,7 +2364,9 @@ static int print_nvme_info(struct sys_dev *hba) + continue; + if (path_attached_to_hba(rp, hba->path)) { + fd = open_dev(ent->d_name); +- if (fd < 0) { ++ if (!imsm_is_nvme_supported(fd, 0)) { ++ if (fd >= 0) ++ close(fd); + free(rp); + continue; + } +@@ -5868,6 +5870,13 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, + snprintf(controller_path, PATH_MAX-1, "%s/device", devpath); + free(devpath); + ++ if (!imsm_is_nvme_supported(dd->fd, 1)) { ++ if (dd->devname) ++ free(dd->devname); ++ free(dd); ++ return 1; ++ } ++ + if (devpath_to_vendor(controller_path) == 0x8086) { + /* + * If Intel's NVMe drive has serial ended with +-- +2.26.2 + diff --git a/0108-mdadm-Unify-forks-behaviour.patch b/0108-mdadm-Unify-forks-behaviour.patch new file mode 100644 index 0000000..6768a12 --- /dev/null +++ b/0108-mdadm-Unify-forks-behaviour.patch @@ -0,0 +1,354 @@ +From ff6bb131a46e1bac84a26e5b2c4bf408c0e56926 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Wed, 4 Nov 2020 10:02:36 +0100 +Subject: [PATCH 14/17] mdadm: Unify forks behaviour + +If mdadm is run by udev or systemd, it gets a pipe as each stream. +Forks in the background may run after an event or service has been +processed when udev is detached from pipe. As a result process +fails quietly if any message is written. +To prevent from it, each fork has to close all parent streams. Leave +stderr and stdout opened only for debug purposes. +Unify it across all forks. Introduce other descriptors detection by +scanning /proc/self/fd directory. Add generic method for +managing systemd services. + +Signed-off-by: Mariusz Tkaczyk +--- + Grow.c | 52 +++------------------ + Incremental.c | 1 + + Monitor.c | 5 +- + mdadm.h | 10 ++++ + mdmon.c | 9 +--- + util.c | 124 ++++++++++++++++++++++++++++++++------------------ + 6 files changed, 100 insertions(+), 101 deletions(-) + +diff --git a/Grow.c b/Grow.c +index 57db7d4..6b8321c 100644 +--- a/Grow.c ++++ b/Grow.c +@@ -2982,47 +2982,6 @@ static void catch_term(int sig) + sigterm = 1; + } + +-static int continue_via_systemd(char *devnm) +-{ +- int skipped, i, pid, status; +- char pathbuf[1024]; +- /* In a systemd/udev world, it is best to get systemd to +- * run "mdadm --grow --continue" rather than running in the +- * background. +- */ +- switch(fork()) { +- case 0: +- /* FIXME yuk. CLOSE_EXEC?? */ +- skipped = 0; +- for (i = 3; skipped < 20; i++) +- if (close(i) < 0) +- skipped++; +- else +- skipped = 0; +- +- /* Don't want to see error messages from +- * systemctl. If the service doesn't exist, +- * we fork ourselves. +- */ +- close(2); +- open("/dev/null", O_WRONLY); +- snprintf(pathbuf, sizeof(pathbuf), +- "mdadm-grow-continue@%s.service", devnm); +- status = execl("/usr/bin/systemctl", "systemctl", "restart", +- pathbuf, NULL); +- status = execl("/bin/systemctl", "systemctl", "restart", +- pathbuf, NULL); +- exit(1); +- case -1: /* Just do it ourselves. */ +- break; +- default: /* parent - good */ +- pid = wait(&status); +- if (pid >= 0 && status == 0) +- return 1; +- } +- return 0; +-} +- + static int reshape_array(char *container, int fd, char *devname, + struct supertype *st, struct mdinfo *info, + int force, struct mddev_dev *devlist, +@@ -3401,6 +3360,7 @@ static int reshape_array(char *container, int fd, char *devname, + default: /* parent */ + return 0; + case 0: ++ manage_fork_fds(0); + map_fork(); + break; + } +@@ -3509,8 +3469,9 @@ started: + return 1; + } + +- if (!forked && !check_env("MDADM_NO_SYSTEMCTL")) +- if (continue_via_systemd(container ?: sra->sys_name)) { ++ if (!forked) ++ if (continue_via_systemd(container ?: sra->sys_name, ++ GROW_SERVICE)) { + free(fdlist); + free(offsets); + sysfs_free(sra); +@@ -3704,8 +3665,8 @@ int reshape_container(char *container, char *devname, + */ + ping_monitor(container); + +- if (!forked && !freeze_reshape && !check_env("MDADM_NO_SYSTEMCTL")) +- if (continue_via_systemd(container)) ++ if (!forked && !freeze_reshape) ++ if (continue_via_systemd(container, GROW_SERVICE)) + return 0; + + switch (forked ? 0 : fork()) { +@@ -3718,6 +3679,7 @@ int reshape_container(char *container, char *devname, + printf("%s: multi-array reshape continues in background\n", Name); + return 0; + case 0: /* child */ ++ manage_fork_fds(0); + map_fork(); + break; + } +diff --git a/Incremental.c b/Incremental.c +index 98dbcd9..ad9ec1c 100644 +--- a/Incremental.c ++++ b/Incremental.c +@@ -1679,6 +1679,7 @@ static void run_udisks(char *arg1, char *arg2) + int pid = fork(); + int status; + if (pid == 0) { ++ manage_fork_fds(1); + execl("/usr/bin/udisks", "udisks", arg1, arg2, NULL); + execl("/bin/udisks", "udisks", arg1, arg2, NULL); + exit(1); +diff --git a/Monitor.c b/Monitor.c +index a82e99d..3f3005b 100644 +--- a/Monitor.c ++++ b/Monitor.c +@@ -323,10 +323,7 @@ static int make_daemon(char *pidfile) + perror("daemonise"); + return 1; + } +- close(0); +- open("/dev/null", O_RDWR); +- dup2(0, 1); +- dup2(0, 2); ++ manage_fork_fds(0); + setsid(); + return -1; + } +diff --git a/mdadm.h b/mdadm.h +index 4961c0f..56b1b19 100644 +--- a/mdadm.h ++++ b/mdadm.h +@@ -129,6 +129,14 @@ struct dlm_lksb { + #define FAILED_SLOTS_DIR "/run/mdadm/failed-slots" + #endif /* FAILED_SLOTS */ + ++#ifndef MDMON_SERVICE ++#define MDMON_SERVICE "mdmon" ++#endif /* MDMON_SERVICE */ ++ ++#ifndef GROW_SERVICE ++#define GROW_SERVICE "mdadm-grow-continue" ++#endif /* GROW_SERVICE */ ++ + #include "md_u.h" + #include "md_p.h" + #include "bitmap.h" +@@ -1497,6 +1505,8 @@ extern int is_standard(char *dev, int *nump); + extern int same_dev(char *one, char *two); + extern int compare_paths (char* path1,char* path2); + extern void enable_fds(int devices); ++extern void manage_fork_fds(int close_all); ++extern int continue_via_systemd(char *devnm, char *service_name); + + extern int parse_auto(char *str, char *msg, int config); + extern struct mddev_ident *conf_get_ident(char *dev); +diff --git a/mdmon.c b/mdmon.c +index ff985d2..c71e62c 100644 +--- a/mdmon.c ++++ b/mdmon.c +@@ -546,14 +546,7 @@ static int mdmon(char *devnm, int must_fork, int takeover) + } + + setsid(); +- close(0); +- open("/dev/null", O_RDWR); +- close(1); +- ignore = dup(0); +-#ifndef DEBUG +- close(2); +- ignore = dup(0); +-#endif ++ manage_fork_fds(0); + + /* This silliness is to stop the compiler complaining + * that we ignore 'ignore' +diff --git a/util.c b/util.c +index 579dd42..5879694 100644 +--- a/util.c ++++ b/util.c +@@ -1915,7 +1915,7 @@ int mdmon_running(char *devnm) + + int start_mdmon(char *devnm) + { +- int i, skipped; ++ int i; + int len; + pid_t pid; + int status; +@@ -1929,7 +1929,10 @@ int start_mdmon(char *devnm) + + if (check_env("MDADM_NO_MDMON")) + return 0; ++ if (continue_via_systemd(devnm, MDMON_SERVICE)) ++ return 0; + ++ /* That failed, try running mdmon directly */ + len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf)-1); + if (len > 0) { + char *sl; +@@ -1943,51 +1946,9 @@ int start_mdmon(char *devnm) + } else + pathbuf[0] = '\0'; + +- /* First try to run systemctl */ +- if (!check_env("MDADM_NO_SYSTEMCTL")) +- switch(fork()) { +- case 0: +- /* FIXME yuk. CLOSE_EXEC?? */ +- skipped = 0; +- for (i = 3; skipped < 20; i++) +- if (close(i) < 0) +- skipped++; +- else +- skipped = 0; +- +- /* Don't want to see error messages from +- * systemctl. If the service doesn't exist, +- * we start mdmon ourselves. +- */ +- close(2); +- open("/dev/null", O_WRONLY); +- snprintf(pathbuf, sizeof(pathbuf), "mdmon@%s.service", +- devnm); +- status = execl("/usr/bin/systemctl", "systemctl", +- "start", +- pathbuf, NULL); +- status = execl("/bin/systemctl", "systemctl", "start", +- pathbuf, NULL); +- exit(1); +- case -1: pr_err("cannot run mdmon. Array remains readonly\n"); +- return -1; +- default: /* parent - good */ +- pid = wait(&status); +- if (pid >= 0 && status == 0) +- return 0; +- } +- +- /* That failed, try running mdmon directly */ + switch(fork()) { + case 0: +- /* FIXME yuk. CLOSE_EXEC?? */ +- skipped = 0; +- for (i = 3; skipped < 20; i++) +- if (close(i) < 0) +- skipped++; +- else +- skipped = 0; +- ++ manage_fork_fds(1); + for (i = 0; paths[i]; i++) + if (paths[i][0]) { + execl(paths[i], paths[i], +@@ -2192,6 +2153,81 @@ void enable_fds(int devices) + setrlimit(RLIMIT_NOFILE, &lim); + } + ++/* Close all opened descriptors if needed and redirect ++ * streams to /dev/null. ++ * For debug purposed, leave STDOUT and STDERR untouched ++ * Returns: ++ * 1- if any error occurred ++ * 0- otherwise ++ */ ++void manage_fork_fds(int close_all) ++{ ++ DIR *dir; ++ struct dirent *dirent; ++ ++ close(0); ++ open("/dev/null", O_RDWR); ++ ++#ifndef DEBUG ++ dup2(0, 1); ++ dup2(0, 2); ++#endif ++ ++ if (close_all == 0) ++ return; ++ ++ dir = opendir("/proc/self/fd"); ++ if (!dir) { ++ pr_err("Cannot open /proc/self/fd directory.\n"); ++ return; ++ } ++ for (dirent = readdir(dir); dirent; dirent = readdir(dir)) { ++ int fd = -1; ++ ++ if ((strcmp(dirent->d_name, ".") == 0) || ++ (strcmp(dirent->d_name, "..")) == 0) ++ continue; ++ ++ fd = strtol(dirent->d_name, NULL, 10); ++ if (fd > 2) ++ close(fd); ++ } ++} ++ ++/* In a systemd/udev world, it is best to get systemd to ++ * run daemon rather than running in the background. ++ * Returns: ++ * 1- if systemd service has been started ++ * 0- otherwise ++ */ ++int continue_via_systemd(char *devnm, char *service_name) ++{ ++ int pid, status; ++ char pathbuf[1024]; ++ ++ /* Simply return that service cannot be started */ ++ if (check_env("MDADM_NO_SYSTEMCTL")) ++ return 0; ++ switch (fork()) { ++ case 0: ++ manage_fork_fds(1); ++ snprintf(pathbuf, sizeof(pathbuf), ++ "%s@%s.service", service_name, devnm); ++ status = execl("/usr/bin/systemctl", "systemctl", "restart", ++ pathbuf, NULL); ++ status = execl("/bin/systemctl", "systemctl", "restart", ++ pathbuf, NULL); ++ exit(1); ++ case -1: /* Just do it ourselves. */ ++ break; ++ default: /* parent - good */ ++ pid = wait(&status); ++ if (pid >= 0 && status == 0) ++ return 1; ++ } ++ return 0; ++} ++ + int in_initrd(void) + { + /* This is based on similar function in systemd. */ +-- +2.26.2 + diff --git a/0109-mdadm-Detail-show-correct-state-for-clustered-array.patch b/0109-mdadm-Detail-show-correct-state-for-clustered-array.patch new file mode 100644 index 0000000..b3d13f1 --- /dev/null +++ b/0109-mdadm-Detail-show-correct-state-for-clustered-array.patch @@ -0,0 +1,197 @@ +From 9c030dadba89b90a4e52b6afe0290076c809684c Mon Sep 17 00:00:00 2001 +From: Zhao Heming +Date: Sat, 24 Oct 2020 17:43:12 +0800 +Subject: [PATCH 15/17] mdadm/Detail: show correct state for clustered array + +After kernel md module commit 480523feae581, in clustered env, +mddev->in_sync always zero, it will make array.state never set +up MD_SB_CLEAN. it causes "mdadm -D /dev/mdX" show state 'active' +all the time. + +bitmap.c: add a new API IsBitmapDirty() to support inquiry bitmap +dirty or clean. + +Signed-off-by: Zhao Heming +--- + Detail.c | 20 ++++++++++++++- + bitmap.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++------- + mdadm.h | 1 + + 3 files changed, 86 insertions(+), 10 deletions(-) + +diff --git a/Detail.c b/Detail.c +index ea86884..f8dea6f 100644 +--- a/Detail.c ++++ b/Detail.c +@@ -498,8 +498,26 @@ int Detail(char *dev, struct context *c) + sra->array_state); + else + arrayst = "clean"; +- } else ++ } else { + arrayst = "active"; ++ if (array.state & (1<prefer); ++ if (!dv) ++ continue; ++ arrayst = IsBitmapDirty(dv) ? "active" : "clean"; ++ break; ++ } ++ } ++ } ++ } + + printf(" State : %s%s%s%s%s%s%s \n", + arrayst, st, +diff --git a/bitmap.c b/bitmap.c +index e38cb96..9a7ffe3 100644 +--- a/bitmap.c ++++ b/bitmap.c +@@ -180,13 +180,14 @@ out: + } + + static int +-bitmap_file_open(char *filename, struct supertype **stp, int node_num) ++bitmap_file_open(char *filename, struct supertype **stp, int node_num, int fd) + { +- int fd; + struct stat stb; + struct supertype *st = *stp; + +- fd = open(filename, O_RDONLY|O_DIRECT); ++ /* won't re-open filename when (fd >= 0) */ ++ if (fd < 0) ++ fd = open(filename, O_RDONLY|O_DIRECT); + if (fd < 0) { + pr_err("failed to open bitmap file %s: %s\n", + filename, strerror(errno)); +@@ -249,7 +250,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) + int fd, i; + __u32 uuid32[4]; + +- fd = bitmap_file_open(filename, &st, 0); ++ fd = bitmap_file_open(filename, &st, 0, -1); + if (fd < 0) + return rv; + +@@ -263,7 +264,6 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) + pr_err("Reporting bitmap that would be used if this array were used\n"); + pr_err("as a member of some other array\n"); + } +- close(fd); + printf(" Filename : %s\n", filename); + printf(" Magic : %08x\n", sb->magic); + if (sb->magic != BITMAP_MAGIC) { +@@ -332,15 +332,13 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) + for (i = 0; i < (int)sb->nodes; i++) { + st = NULL; + free(info); +- fd = bitmap_file_open(filename, &st, i); ++ fd = bitmap_file_open(filename, &st, i, fd); + if (fd < 0) { + printf(" Unable to open bitmap file on node: %i\n", i); +- + continue; + } + info = bitmap_fd_read(fd, brief); + if (!info) { +- close(fd); + printf(" Unable to read bitmap on node: %i\n", i); + continue; + } +@@ -359,13 +357,72 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) + printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n", + info->total_bits, info->dirty_bits, + 100.0 * info->dirty_bits / (info->total_bits?:1)); +- close(fd); + } + } + + free_info: ++ close(fd); ++ free(info); ++ return rv; ++} ++ ++int IsBitmapDirty(char *filename) ++{ ++ /* ++ * Read the bitmap file ++ * It will break reading bitmap action immediately when meeting any error. ++ * ++ * Return: 1(dirty), 0 (clean), -1(error) ++ */ ++ ++ int fd = -1, rv = 0, i; ++ struct supertype *st = NULL; ++ bitmap_info_t *info = NULL; ++ bitmap_super_t *sb = NULL; ++ ++ fd = bitmap_file_open(filename, &st, 0, fd); ++ free(st); ++ if (fd < 0) ++ goto out; ++ ++ info = bitmap_fd_read(fd, 0); ++ if (!info) { ++ close(fd); ++ goto out; ++ } ++ ++ sb = &info->sb; ++ for (i = 0; i < (int)sb->nodes; i++) { ++ st = NULL; ++ free(info); ++ info = NULL; ++ ++ fd = bitmap_file_open(filename, &st, i, fd); ++ free(st); ++ if (fd < 0) ++ goto out; ++ ++ info = bitmap_fd_read(fd, 0); ++ if (!info) { ++ close(fd); ++ goto out; ++ } ++ ++ sb = &info->sb; ++ if (sb->magic != BITMAP_MAGIC) { /* invalid bitmap magic */ ++ free(info); ++ close(fd); ++ goto out; ++ } ++ ++ if (info->dirty_bits) ++ rv = 1; ++ } ++ close(fd); + free(info); + return rv; ++out: ++ return -1; + } + + int CreateBitmap(char *filename, int force, char uuid[16], +diff --git a/mdadm.h b/mdadm.h +index 56b1b19..1ee6c92 100644 +--- a/mdadm.h ++++ b/mdadm.h +@@ -1455,6 +1455,7 @@ extern int CreateBitmap(char *filename, int force, char uuid[16], + unsigned long long array_size, + int major); + extern int ExamineBitmap(char *filename, int brief, struct supertype *st); ++extern int IsBitmapDirty(char *filename); + extern int Write_rules(char *rule_name); + extern int bitmap_update_uuid(int fd, int *uuid, int swap); + +-- +2.26.2 + diff --git a/0110-Make-target-to-install-binaries-only.patch b/0110-Make-target-to-install-binaries-only.patch new file mode 100644 index 0000000..60a0234 --- /dev/null +++ b/0110-Make-target-to-install-binaries-only.patch @@ -0,0 +1,43 @@ +From b4a5ad4958cb3ad87c3c5fa993e7572c38596d09 Mon Sep 17 00:00:00 2001 +From: Kinga Tanska +Date: Thu, 22 Oct 2020 14:22:29 +0200 +Subject: [PATCH 16/17] Make target to install binaries only + +Make install causes installation of binaries, udev and man. +This commit contains new target make install-bin, which +results in installation of binaries only. + +Signed-off-by: Kinga Tanska +--- + Makefile | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/Makefile b/Makefile +index 15d05d1..4cd4c9d 100644 +--- a/Makefile ++++ b/Makefile +@@ -245,9 +245,7 @@ $(MON_OBJS) : $(INCL) mdmon.h + sha1.o : sha1.c sha1.h md5.h + $(CC) $(CFLAGS) -DHAVE_STDINT_H -o sha1.o -c sha1.c + +-install : mdadm mdmon install-man install-udev +- $(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm +- $(INSTALL) -D $(STRIP) -m 755 mdmon $(DESTDIR)$(BINDIR)/mdmon ++install : install-bin install-man install-udev + + install-static : mdadm.static install-man + $(INSTALL) -D $(STRIP) -m 755 mdadm.static $(DESTDIR)$(BINDIR)/mdadm +@@ -297,6 +295,10 @@ install-systemd: systemd/mdmon@.service + done + if [ -f /etc/SuSE-release -o -n "$(SUSE)" ] ;then $(INSTALL) -D -m 755 systemd/SUSE-mdadm_env.sh $(DESTDIR)$(LIB_DIR)/mdadm_env.sh ;fi + ++install-bin: mdadm mdmon ++ $(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm ++ $(INSTALL) -D $(STRIP) -m 755 mdmon $(DESTDIR)$(BINDIR)/mdmon ++ + uninstall: + rm -f $(DESTDIR)$(MAN8DIR)/mdadm.8 $(DESTDIR)$(MAN8DIR)/mdmon.8 $(DESTDIR)$(MAN4DIR)/md.4 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5 $(DESTDIR)$(BINDIR)/mdadm + +-- +2.26.2 + diff --git a/0111-udev-start-grow-service-automatically.patch b/0111-udev-start-grow-service-automatically.patch new file mode 100644 index 0000000..205af9a --- /dev/null +++ b/0111-udev-start-grow-service-automatically.patch @@ -0,0 +1,37 @@ +From a64f1263a56bd8653267c1a9800daa0bc993a743 Mon Sep 17 00:00:00 2001 +From: Tkaczyk Mariusz +Date: Thu, 15 Oct 2020 10:45:29 +0200 +Subject: [PATCH 17/17] udev: start grow service automatically + +Grow continue via service or fork is started during raid assembly. +If raid was assembled in initrd it will be newer restarted after +switch root. +Add udev support for starting mdadm-grow-continue service. + +Signed-off-by: Mariusz Tkaczyk +--- + udev-md-raid-arrays.rules | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules +index c8fa8e8..13c9076 100644 +--- a/udev-md-raid-arrays.rules ++++ b/udev-md-raid-arrays.rules +@@ -15,6 +15,7 @@ ENV{DEVTYPE}=="partition", GOTO="md_ignore_state" + ATTR{md/metadata_version}=="external:[A-Za-z]*", ATTR{md/array_state}=="inactive", GOTO="md_ignore_state" + TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end" + ATTR{md/array_state}=="clear*|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end" ++ATTR{md/sync_action}=="reshape", ENV{RESHAPE_ACTIVE}="yes" + LABEL="md_ignore_state" + + IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode" +@@ -38,5 +39,6 @@ ENV{MD_LEVEL}=="raid[1-9]*", ENV{SYSTEMD_WANTS}+="mdmonitor.service" + # Tell systemd to run mdmon for our container, if we need it. + ENV{MD_LEVEL}=="raid[1-9]*", ENV{MD_CONTAINER}=="?*", PROGRAM="/usr/bin/readlink $env{MD_CONTAINER}", ENV{MD_MON_THIS}="%c" + ENV{MD_MON_THIS}=="?*", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@%c.service" ++ENV{RESHAPE_ACTIVE}=="yes", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdadm-grow-continue@%c.service" + + LABEL="md_end" +-- +2.26.2 + diff --git a/mdadm.changes b/mdadm.changes index c4b22ac..8043f2e 100644 --- a/mdadm.changes +++ b/mdadm.changes @@ -1,3 +1,52 @@ +------------------------------------------------------------------- +Sat Dec 5 13:53:38 UTC 2020 - Coly Li +- There are some important fixes merged in mdadm upstream which + should go with jsc#SLE-13700. This is the update from upstream + mdadm including the important fixes we should have. + +- Detail: show correct raid level when the array is inactive + (jsc#SLE-13700) + 0095-Detail-show-correct-raid-level-when-the-array-is-ina.patch +- Don't create bitmap for raid5 with journal disk + (jsc#SLE-13700) + 0096-Don-t-create-bitmap-for-raid5-with-journal-disk.patch +- Monitor: refresh mdstat fd after select (jsc#SLE-13700) + 0097-Monitor-refresh-mdstat-fd-after-select.patch +- Monitor: stop notifing about containers. (jsc#SLE-13700) + 0098-Monitor-stop-notifing-about-containers.patch +- mdmonitor: set small delay once (jsc#SLE-13700) + 0099-mdmonitor-set-small-delay-once.patch +- Check if other Monitor instance running before fork. + (jsc#SLE-13700) + 0100-Check-if-other-Monitor-instance-running-before-fork.patch +- Super1: allow RAID0 layout setting to be removed. + (jsc#SLE-13700) + 0101-Super1-allow-RAID0-layout-setting-to-be-removed.patch +- Detail: fix segfault during IMSM raid creation + (jsc#SLE-13700) + 0102-Detail-fix-segfault-during-IMSM-raid-creation.patch +- Create.c: close mdfd and generate uevent (jsc#SLE-13700) + 0103-Create.c-close-mdfd-and-generate-uevent.patch +- imsm: update num_data_stripes according to dev_size + (jsc#SLE-13700) + 0104-imsm-update-num_data_stripes-according-to-dev_size.patch +- imsm: remove redundant calls to imsm_get_map (jsc#SLE-13700) + 0105-imsm-remove-redundant-calls-to-imsm_get_map.patch +- Monitor: don't use default modes when creating a file + (jsc#SLE-13700) + 0106-Monitor-don-t-use-default-modes-when-creating-a-file.patch +- imsm: limit support to first NVMe namespace (jsc#SLE-13700) + 0107-imsm-limit-support-to-first-NVMe-namespace.patch +- mdadm: Unify forks behaviour (jsc#SLE-13700) + 0108-mdadm-Unify-forks-behaviour.patch +- mdadm/Detail: show correct state for clustered array + (jsc#SLE-13700) + 0109-mdadm-Detail-show-correct-state-for-clustered-array.patch +- Make target to install binaries only (jsc#SLE-13700) + 0110-Make-target-to-install-binaries-only.patch +- udev: start grow service automatically (jsc#SLE-13700) + 0111-udev-start-grow-service-automatically.patch + ------------------------------------------------------------------- Sun Oct 11 16:14:50 UTC 2020 - Coly Li diff --git a/mdadm.spec b/mdadm.spec index ce8e827..b6f0b0b 100644 --- a/mdadm.spec +++ b/mdadm.spec @@ -131,6 +131,23 @@ Patch91: 0091-mdadm-Grow-prevent-md-s-fd-from-being-occupied-durin.patch Patch92: 0092-Specify-nodes-number-when-updating-cluster-nodes.patch Patch93: 0093-mdadm-md.4-update-path-to-in-kernel-tree-documentati.patch Patch94: 0094-manual-update-examine-badblocks.patch +Patch95: 0095-Detail-show-correct-raid-level-when-the-array-is-ina.patch +Patch96: 0096-Don-t-create-bitmap-for-raid5-with-journal-disk.patch +Patch97: 0097-Monitor-refresh-mdstat-fd-after-select.patch +Patch98: 0098-Monitor-stop-notifing-about-containers.patch +Patch99: 0099-mdmonitor-set-small-delay-once.patch +Patch100: 0100-Check-if-other-Monitor-instance-running-before-fork.patch +Patch101: 0101-Super1-allow-RAID0-layout-setting-to-be-removed.patch +Patch102: 0102-Detail-fix-segfault-during-IMSM-raid-creation.patch +Patch103: 0103-Create.c-close-mdfd-and-generate-uevent.patch +Patch104: 0104-imsm-update-num_data_stripes-according-to-dev_size.patch +Patch105: 0105-imsm-remove-redundant-calls-to-imsm_get_map.patch +Patch106: 0106-Monitor-don-t-use-default-modes-when-creating-a-file.patch +Patch107: 0107-imsm-limit-support-to-first-NVMe-namespace.patch +Patch108: 0108-mdadm-Unify-forks-behaviour.patch +Patch109: 0109-mdadm-Detail-show-correct-state-for-clustered-array.patch +Patch110: 0110-Make-target-to-install-binaries-only.patch +Patch111: 0111-udev-start-grow-service-automatically.patch Patch1001: 1001-display-timeout-status.patch Patch1002: 1002-OnCalendar-format-fix-of-mdcheck_start-timer.patch Patch1003: 1003-mdadm-treat-the-Dell-softraid-array-as-local-array.patch @@ -232,6 +249,23 @@ mdadm is a program that can be used to control Linux md devices. %patch92 -p1 %patch93 -p1 %patch94 -p1 +%patch95 -p1 +%patch96 -p1 +%patch97 -p1 +%patch98 -p1 +%patch99 -p1 +%patch100 -p1 +%patch101 -p1 +%patch102 -p1 +%patch103 -p1 +%patch104 -p1 +%patch105 -p1 +%patch106 -p1 +%patch107 -p1 +%patch108 -p1 +%patch109 -p1 +%patch110 -p1 +%patch111 -p1 %patch1001 -p1 %patch1002 -p1 %patch1003 -p1