- New upstream release 4.0

Multiple bugfixes and various enhancements
  including IMSM support for bad blocks and 4K block devices.
  (FATE#321941)
- 0001-Generic-support-for-consistency-policy-and-PPL.patch
- 0002-Detail-show-consistency-policy.patch
- 0003-imsm-PPL-support.patch
- 0004-super1-PPL-support.patch
- 0005-Add-ppl-and-no-ppl-options-for-update.patch
- 0006-Grow-support-consistency-policy-change.patch
   Add support for Partial Parity Logs
   (FATE#321941)
- 0007-udev-md-raid-assembly.rules-Skip-non-ready-devices.patch
   (bsc#956236)
- 0008-Retry-HOT_REMOVE_DISK-a-few-times.patch
   (bsc#808647)
- 0009-Introduce-sys_hot_remove_disk.patch
   (bsc#974154)
- 0010-Add-force-flag-to-hot_remove_disk.patch
   (bsc#808647)
- 0011-Detail-handle-non-existent-arrays-better.patch
   (bsc#966773)

OBS-URL: https://build.opensuse.org/package/show/Base:System/mdadm?expand=0&rev=147
This commit is contained in:
Neil Brown 2017-03-27 03:26:19 +00:00 committed by Git OBS Bridge
parent 8b210c1683
commit 35f025d42c
16 changed files with 2975 additions and 38 deletions

View File

@ -0,0 +1,573 @@
From d179ac821d77ded7a63a0b734e290a42eeeee4b2 Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Thu, 16 Mar 2017 22:09:43 +0100
Subject: [PATCH] Generic support for --consistency-policy and PPL
Add a new parameter to mdadm: --consistency-policy=. It determines how
the array maintains consistency in case of unexpected shutdown. This
maps to the md sysfs attribute 'consistency_policy'. It can be used to
create a raid5 array using PPL. Add the necessary plumbing to pass this
option to metadata handlers. The write journal and bitmap
functionalities are treated as different policies, which are implicitly
selected when using --write-journal or --bitmap options.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
Create.c | 18 ++++++++++++++----
Kill.c | 2 +-
ReadMe.c | 7 ++++---
maps.c | 10 ++++++++++
mdadm.8.in | 40 +++++++++++++++++++++++++++++++++++++---
mdadm.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
mdadm.h | 21 ++++++++++++++++++---
super-ddf.c | 6 +++---
super-gpt.c | 2 +-
super-intel.c | 16 ++++++++--------
super-mbr.c | 2 +-
super0.c | 8 ++++----
super1.c | 6 +++---
sysfs.c | 11 +++++++++++
14 files changed, 167 insertions(+), 37 deletions(-)
--- a/Create.c
+++ b/Create.c
@@ -259,7 +259,8 @@ int Create(struct supertype *st, char *m
if (st && ! st->ss->validate_geometry(st, s->level, s->layout, s->raiddisks,
&s->chunk, s->size*2,
data_offset, NULL,
- &newsize, c->verbose>=0))
+ &newsize, s->consistency_policy,
+ c->verbose>=0))
return 1;
if (s->chunk && s->chunk != UnSet) {
@@ -358,7 +359,8 @@ int Create(struct supertype *st, char *m
st, s->level, s->layout, s->raiddisks,
&s->chunk, s->size*2,
dv->data_offset, dname,
- &freesize, c->verbose > 0)) {
+ &freesize, s->consistency_policy,
+ c->verbose > 0)) {
case -1: /* Not valid, message printed, and not
* worth checking any further */
exit(2);
@@ -395,6 +397,7 @@ int Create(struct supertype *st, char *m
&s->chunk, s->size*2,
dv->data_offset,
dname, &freesize,
+ s->consistency_policy,
c->verbose >= 0)) {
pr_err("%s is not suitable for this array.\n",
@@ -501,7 +504,8 @@ int Create(struct supertype *st, char *m
s->raiddisks,
&s->chunk, minsize*2,
data_offset,
- NULL, NULL, 0)) {
+ NULL, NULL,
+ s->consistency_policy, 0)) {
pr_err("devices too large for RAID level %d\n", s->level);
return 1;
}
@@ -528,6 +532,12 @@ int Create(struct supertype *st, char *m
if (s->bitmap_file && strcmp(s->bitmap_file, "none") == 0)
s->bitmap_file = NULL;
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ !st->ss->write_init_ppl) {
+ pr_err("%s metadata does not support PPL\n", st->ss->name);
+ return 1;
+ }
+
if (!have_container && s->level > 0 && ((maxsize-s->size)*100 > maxsize)) {
if (c->runstop != 1 || c->verbose >= 0)
pr_err("largest drive (%s) exceeds size (%lluK) by more than 1%%\n",
@@ -720,7 +730,7 @@ int Create(struct supertype *st, char *m
name += 2;
}
}
- if (!st->ss->init_super(st, &info.array, s->size, name, c->homehost, uuid,
+ if (!st->ss->init_super(st, &info.array, s, name, c->homehost, uuid,
data_offset))
goto abort_locked;
--- a/Kill.c
+++ b/Kill.c
@@ -63,7 +63,7 @@ int Kill(char *dev, struct supertype *st
rv = st->ss->load_super(st, fd, dev);
if (rv == 0 || (force && rv >= 2)) {
st->ss->free_super(st);
- st->ss->init_super(st, NULL, 0, "", NULL, NULL,
+ st->ss->init_super(st, NULL, NULL, "", NULL, NULL,
INVALID_SECTORS);
if (st->ss->store_super(st, fd)) {
if (verbose >= 0)
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -78,11 +78,11 @@ char Version[] = "mdadm - v" VERSION " -
* found, it is started.
*/
-char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:";
+char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
char short_bitmap_options[]=
- "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:";
+ "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
char short_bitmap_auto_options[]=
- "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sa:rfRSow1tye:";
+ "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sa:rfRSow1tye:k:";
struct option long_options[] = {
{"manage", 0, 0, ManageOpt},
@@ -148,6 +148,7 @@ struct option long_options[] = {
{"nodes",1, 0, Nodes}, /* also for --assemble */
{"home-cluster",1, 0, ClusterName},
{"write-journal",1, 0, WriteJournal},
+ {"consistency-policy",1, 0, 'k'},
/* For assemble */
{"uuid", 1, 0, 'u'},
--- a/maps.c
+++ b/maps.c
@@ -129,6 +129,16 @@ mapping_t faultylayout[] = {
{ NULL, 0}
};
+mapping_t consistency_policies[] = {
+ { "unknown", CONSISTENCY_POLICY_UNKNOWN},
+ { "none", CONSISTENCY_POLICY_NONE},
+ { "resync", CONSISTENCY_POLICY_RESYNC},
+ { "bitmap", CONSISTENCY_POLICY_BITMAP},
+ { "journal", CONSISTENCY_POLICY_JOURNAL},
+ { "ppl", CONSISTENCY_POLICY_PPL},
+ { NULL, 0}
+};
+
char *map_num(mapping_t *map, int num)
{
while (map->name) {
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -724,7 +724,9 @@ When creating an array on devices which
.I mdadm
automatically adds an internal bitmap as it will usually be
beneficial. This can be suppressed with
-.B "\-\-bitmap=none".
+.B "\-\-bitmap=none"
+or by selecting a different consistency policy with
+.BR \-\-consistency\-policy .
.TP
.BR \-\-bitmap\-chunk=
@@ -1015,6 +1017,36 @@ simultaneously. If not specified, this d
Specify journal device for the RAID-4/5/6 array. The journal device
should be a SSD with reasonable lifetime.
+.TP
+.BR \-k ", " \-\-consistency\-policy=
+Specify how the array maintains consistency in case of unexpected shutdown.
+Only relevant for RAID levels with redundancy.
+Currently supported options are:
+.RS
+
+.TP
+.B resync
+Full resync is performed and all redundancy is regenerated when the array is
+started after unclean shutdown.
+
+.TP
+.B bitmap
+Resync assisted by a write-intent bitmap. Implicitly selected when using
+.BR \-\-bitmap .
+
+.TP
+.B journal
+For RAID levels 4/5/6, journal device is used to log transactions and replay
+after unclean shutdown. Implicitly selected when using
+.BR \-\-write\-journal .
+
+.TP
+.B ppl
+For RAID5 only, Partial Parity Log is used to close the write hole and
+eliminate resync. PPL is stored in the metadata region of RAID member drives,
+no additional journal drive is needed.
+.RE
+
.SH For assemble:
@@ -2144,8 +2176,10 @@ in the array exceed 100G is size, an int
will automatically be added unless some other option is explicitly
requested with the
.B \-\-bitmap
-option. In any case space for a bitmap will be reserved so that one
-can be added layer with
+option or a different consistency policy is selected with the
+.B \-\-consistency\-policy
+option. In any case space for a bitmap will be reserved so that one
+can be added later with
.BR "\-\-grow \-\-bitmap=internal" .
If the metadata type supports it (currently only 1.x metadata), space
--- a/mdadm.c
+++ b/mdadm.c
@@ -78,6 +78,7 @@ int main(int argc, char *argv[])
.level = UnSet,
.layout = UnSet,
.bitmap_chunk = UnSet,
+ .consistency_policy = UnSet,
};
char sys_hostname[256];
@@ -1209,6 +1210,16 @@ int main(int argc, char *argv[])
s.journaldisks = 1;
continue;
+ case O(CREATE, 'k'):
+ s.consistency_policy = map_name(consistency_policies,
+ optarg);
+ if (s.consistency_policy == UnSet ||
+ s.consistency_policy < CONSISTENCY_POLICY_RESYNC) {
+ pr_err("Invalid consistency policy: %s\n",
+ optarg);
+ exit(2);
+ }
+ continue;
}
/* We have now processed all the valid options. Anything else is
* an error
@@ -1236,9 +1247,47 @@ int main(int argc, char *argv[])
exit(0);
}
- if (s.journaldisks && (s.level < 4 || s.level > 6)) {
- pr_err("--write-journal is only supported for RAID level 4/5/6.\n");
- exit(2);
+ if (s.journaldisks) {
+ if (s.level < 4 || s.level > 6) {
+ pr_err("--write-journal is only supported for RAID level 4/5/6.\n");
+ exit(2);
+ }
+ if (s.consistency_policy != UnSet &&
+ s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
+ pr_err("--write-journal is not supported with consistency policy: %s\n",
+ map_num(consistency_policies, s.consistency_policy));
+ exit(2);
+ }
+ }
+
+ if (mode == CREATE && s.consistency_policy != UnSet) {
+ if (s.level <= 0) {
+ pr_err("--consistency-policy not meaningful with level %s.\n",
+ map_num(pers, s.level));
+ exit(2);
+ } else if (s.consistency_policy == CONSISTENCY_POLICY_JOURNAL &&
+ !s.journaldisks) {
+ pr_err("--write-journal is required for consistency policy: %s\n",
+ map_num(consistency_policies, s.consistency_policy));
+ exit(2);
+ } else if (s.consistency_policy == CONSISTENCY_POLICY_PPL &&
+ s.level != 5) {
+ pr_err("PPL consistency policy is only supported for RAID level 5.\n");
+ exit(2);
+ } else if (s.consistency_policy == CONSISTENCY_POLICY_BITMAP &&
+ (!s.bitmap_file ||
+ strcmp(s.bitmap_file, "none") == 0)) {
+ pr_err("--bitmap is required for consistency policy: %s\n",
+ map_num(consistency_policies, s.consistency_policy));
+ exit(2);
+ } else if (s.bitmap_file &&
+ strcmp(s.bitmap_file, "none") != 0 &&
+ s.consistency_policy != CONSISTENCY_POLICY_BITMAP &&
+ s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
+ pr_err("--bitmap is not compatible with consistency policy: %s\n",
+ map_num(consistency_policies, s.consistency_policy));
+ exit(2);
+ }
}
if (!mode && devs_found) {
--- a/mdadm.h
+++ b/mdadm.h
@@ -279,6 +279,15 @@ struct mdinfo {
int journal_device_required;
int journal_clean;
+ enum {
+ CONSISTENCY_POLICY_UNKNOWN,
+ CONSISTENCY_POLICY_NONE,
+ CONSISTENCY_POLICY_RESYNC,
+ CONSISTENCY_POLICY_BITMAP,
+ CONSISTENCY_POLICY_JOURNAL,
+ CONSISTENCY_POLICY_PPL,
+ } consistency_policy;
+
/* During reshape we can sometimes change the data_offset to avoid
* over-writing still-valid data. We need to know if there is space.
* So getinfo_super will fill in space_before and space_after in sectors.
@@ -426,6 +435,7 @@ enum special_options {
ClusterName,
ClusterConfirm,
WriteJournal,
+ ConsistencyPolicy,
};
enum prefix_standard {
@@ -527,6 +537,7 @@ struct shape {
int assume_clean;
int write_behind;
unsigned long long size;
+ int consistency_policy;
};
/* List of device names - wildcards expanded */
@@ -618,6 +629,7 @@ enum sysfs_read_flags {
GET_STATE = (1 << 23),
GET_ERROR = (1 << 24),
GET_ARRAY_STATE = (1 << 25),
+ GET_CONSISTENCY_POLICY = (1 << 26),
};
/* If fd >= 0, get the array it is open on,
@@ -701,7 +713,7 @@ extern int restore_stripes(int *dest, un
extern char *map_num(mapping_t *map, int num);
extern int map_name(mapping_t *map, char *name);
-extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[];
+extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[], consistency_policies[];
extern char *map_dev_preferred(int major, int minor, int create,
char *prefer);
@@ -863,7 +875,7 @@ extern struct superswitch {
* metadata.
*/
int (*init_super)(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *name,
+ struct shape *s, char *name,
char *homehost, int *uuid,
unsigned long long data_offset);
@@ -961,7 +973,7 @@ extern struct superswitch {
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *subdev, unsigned long long *freesize,
- int verbose);
+ int consistency_policy, int verbose);
/* Return a linked list of 'mdinfo' structures for all arrays
* in the container. For non-containers, it is like
@@ -1059,6 +1071,9 @@ extern struct superswitch {
/* validate container after assemble */
int (*validate_container)(struct mdinfo *info);
+ /* write initial empty PPL on device */
+ int (*write_init_ppl)(struct supertype *st, struct mdinfo *info, int fd);
+
/* records new bad block in metadata */
int (*record_bad_block)(struct active_array *a, int n,
unsigned long long sector, int length);
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -2290,7 +2290,7 @@ static unsigned int find_vde_by_guid(con
static int init_super_ddf(struct supertype *st,
mdu_array_info_t *info,
- unsigned long long size, char *name, char *homehost,
+ struct shape *s, char *name, char *homehost,
int *uuid, unsigned long long data_offset)
{
/* This is primarily called by Create when creating a new array.
@@ -2328,7 +2328,7 @@ static int init_super_ddf(struct superty
struct virtual_disk *vd;
if (st->sb)
- return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
+ return init_super_ddf_bvd(st, info, s->size, name, homehost, uuid,
data_offset);
if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
@@ -3347,7 +3347,7 @@ static int validate_geometry_ddf(struct
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *dev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
int fd;
struct mdinfo *sra;
--- a/super-gpt.c
+++ b/super-gpt.c
@@ -205,7 +205,7 @@ static int validate_geometry(struct supe
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *subdev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
pr_err("gpt metadata cannot be used this way\n");
return 0;
--- a/super-intel.c
+++ b/super-intel.c
@@ -5154,7 +5154,7 @@ static int check_name(struct intel_super
}
static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *name,
+ struct shape *s, char *name,
char *homehost, int *uuid,
long long data_offset)
{
@@ -5249,7 +5249,7 @@ static int init_super_imsm_volume(struct
strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
array_blocks = calc_array_size(info->level, info->raid_disks,
info->layout, info->chunk_size,
- size * 2);
+ s->size * 2);
/* round array size down to closest MB */
array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
@@ -5263,7 +5263,7 @@ static int init_super_imsm_volume(struct
vol->curr_migr_unit = 0;
map = get_imsm_map(dev, MAP_0);
set_pba_of_lba0(map, super->create_offset);
- set_blocks_per_member(map, info_to_blocks_per_member(info, size));
+ set_blocks_per_member(map, info_to_blocks_per_member(info, s->size));
map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
map->failed_disk_num = ~0;
if (info->level > 0)
@@ -5291,7 +5291,7 @@ static int init_super_imsm_volume(struct
map->num_domains = 1;
/* info->size is only int so use the 'size' parameter instead */
- num_data_stripes = (size * 2) / info_to_blocks_per_strip(info);
+ num_data_stripes = (s->size * 2) / info_to_blocks_per_strip(info);
num_data_stripes /= map->num_domains;
set_num_data_stripes(map, num_data_stripes);
@@ -5313,7 +5313,7 @@ static int init_super_imsm_volume(struct
}
static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *name,
+ struct shape *s, char *name,
char *homehost, int *uuid,
unsigned long long data_offset)
{
@@ -5336,7 +5336,7 @@ static int init_super_imsm(struct supert
}
if (st->sb)
- return init_super_imsm_volume(st, info, size, name, homehost, uuid,
+ return init_super_imsm_volume(st, info, s, name, homehost, uuid,
data_offset);
if (info)
@@ -6913,7 +6913,7 @@ static int validate_geometry_imsm(struct
int raiddisks, int *chunk, unsigned long long size,
unsigned long long data_offset,
char *dev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
int fd, cfd;
struct mdinfo *sra;
@@ -10950,7 +10950,7 @@ enum imsm_reshape_type imsm_analyze_chan
geo->raid_disks + devNumChange,
&chunk,
geo->size, INVALID_SECTORS,
- 0, 0, 1))
+ 0, 0, info.consistency_policy, 1))
change = -1;
if (check_devs) {
--- a/super-mbr.c
+++ b/super-mbr.c
@@ -193,7 +193,7 @@ static int validate_geometry(struct supe
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *subdev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
pr_err("mbr metadata cannot be used this way\n");
return 0;
--- a/super0.c
+++ b/super0.c
@@ -725,7 +725,7 @@ static int update_super0(struct supertyp
* We use the first 8 bytes (64bits) of the sha1 of the host name
*/
static int init_super0(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *ignored_name,
+ struct shape *s, char *ignored_name,
char *homehost, int *uuid,
unsigned long long data_offset)
{
@@ -764,8 +764,8 @@ static int init_super0(struct supertype
sb->gvalid_words = 0; /* ignored */
sb->ctime = time(0);
sb->level = info->level;
- sb->size = size;
- if (size != (unsigned long long)sb->size)
+ sb->size = s->size;
+ if (s->size != (unsigned long long)sb->size)
return 0;
sb->nr_disks = info->nr_disks;
sb->raid_disks = info->raid_disks;
@@ -1267,7 +1267,7 @@ static int validate_geometry0(struct sup
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *subdev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
unsigned long long ldsize;
int fd;
--- a/super1.c
+++ b/super1.c
@@ -1397,7 +1397,7 @@ static int update_super1(struct supertyp
}
static int init_super1(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *name, char *homehost,
+ struct shape *s, char *name, char *homehost,
int *uuid, unsigned long long data_offset)
{
struct mdp_superblock_1 *sb;
@@ -1450,7 +1450,7 @@ static int init_super1(struct supertype
sb->ctime = __cpu_to_le64((unsigned long long)time(0));
sb->level = __cpu_to_le32(info->level);
sb->layout = __cpu_to_le32(info->layout);
- sb->size = __cpu_to_le64(size*2ULL);
+ sb->size = __cpu_to_le64(s->size*2ULL);
sb->chunksize = __cpu_to_le32(info->chunk_size>>9);
sb->raid_disks = __cpu_to_le32(info->raid_disks);
@@ -2492,7 +2492,7 @@ static int validate_geometry1(struct sup
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *subdev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
unsigned long long ldsize, devsize;
int bmspace;
--- a/sysfs.c
+++ b/sysfs.c
@@ -242,6 +242,17 @@ struct mdinfo *sysfs_read(int fd, char *
} else
sra->sysfs_array_state[0] = 0;
+ if (options & GET_CONSISTENCY_POLICY) {
+ strcpy(base, "consistency_policy");
+ if (load_sys(fname, buf, sizeof(buf))) {
+ sra->consistency_policy = CONSISTENCY_POLICY_UNKNOWN;
+ } else {
+ sra->consistency_policy = map_name(consistency_policies, buf);
+ if (sra->consistency_policy == UnSet)
+ sra->consistency_policy = CONSISTENCY_POLICY_UNKNOWN;
+ }
+ }
+
if (! (options & GET_DEVS))
return sra;

View File

@ -1,31 +0,0 @@
From 1dcee1c9cbcf9592275914706b76b1931490092c Mon Sep 17 00:00:00 2001
From: Jes Sorensen <Jes.Sorensen@redhat.com>
Date: Wed, 6 Apr 2016 16:13:59 -0400
Subject: [PATCH] super1: Clear memory allocated for superblock + bitmap before
use
load_super1() did not clear memory allocated for the superblock +
bitmap. This causes issues if the superblock does not contain a bitmap
as later checks of bitmap features would rely on the bits being
cleared.
This bug has been around for a long time, but was only exposed in
mdadm-3.4 with the introduction of the clustering code.
Reported-by: Jan Stodola <jstodola@redhat.com>
Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
---
super1.c | 2 ++
1 file changed, 2 insertions(+)
--- a/super1.c
+++ b/super1.c
@@ -2016,6 +2016,8 @@ static int load_super1(struct supertype
return 1;
}
+ memset(super, 0, SUPER1_SIZE);
+
if (aread(&afd, super, MAX_SB_SIZE) != MAX_SB_SIZE) {
if (devname)
pr_err("Cannot read superblock on %s\n",

View File

@ -0,0 +1,330 @@
From 9a224a0f09175cde8ccecae445568bdb4512e2a8 Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Thu, 16 Mar 2017 22:09:44 +0100
Subject: [PATCH] Detail: show consistency policy
Show the currently enabled consistency policy in the output from
--detail. Add 3 spaces to all existing items in Detail output to align
with "Consistency Policy : ".
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
Detail.c | 90 +++++++++++++++++++++++++++++++++++------------------------
super-ddf.c | 6 ++--
super-intel.c | 2 +-
super0.c | 4 +--
super1.c | 9 +++---
5 files changed, 65 insertions(+), 46 deletions(-)
diff --git a/Detail.c b/Detail.c
index 509b0d418768..3067fb6965b7 100644
--- a/Detail.c
+++ b/Detail.c
@@ -394,24 +394,25 @@ int Detail(char *dev, struct context *c)
printf("%s:\n", dev);
if (container)
- printf(" Container : %s, member %s\n", container, member);
+ printf(" Container : %s, member %s\n", container,
+ member);
else {
if (sra && sra->array.major_version < 0)
- printf(" Version : %s\n", sra->text_version);
+ printf(" Version : %s\n", sra->text_version);
else
- printf(" Version : %d.%d\n",
+ printf(" Version : %d.%d\n",
array.major_version, array.minor_version);
}
atime = array.ctime;
if (atime)
- printf(" Creation Time : %.24s\n", ctime(&atime));
+ printf(" Creation Time : %.24s\n", ctime(&atime));
if (array.raid_disks == 0 && external)
str = "container";
if (str)
- printf(" Raid Level : %s\n", str);
+ printf(" Raid Level : %s\n", str);
if (larray_size)
- printf(" Array Size : %llu%s\n", (larray_size>>10),
+ printf(" Array Size : %llu%s\n", (larray_size>>10),
human_size(larray_size));
if (array.level >= 1) {
if (sra)
@@ -420,38 +421,38 @@ int Detail(char *dev, struct context *c)
(larray_size >= 0xFFFFFFFFULL|| array.size == 0)) {
unsigned long long dsize = get_component_size(fd);
if (dsize > 0)
- printf(" Used Dev Size : %llu%s\n",
+ printf(" Used Dev Size : %llu%s\n",
dsize/2,
human_size((long long)dsize<<9));
else
- printf(" Used Dev Size : unknown\n");
+ printf(" Used Dev Size : unknown\n");
} else
- printf(" Used Dev Size : %lu%s\n",
+ printf(" Used Dev Size : %lu%s\n",
(unsigned long)array.size,
human_size((unsigned long long)array.size<<10));
}
if (array.raid_disks)
- printf(" Raid Devices : %d\n", array.raid_disks);
- printf(" Total Devices : %d\n", array.nr_disks);
+ printf(" Raid Devices : %d\n", array.raid_disks);
+ printf(" Total Devices : %d\n", array.nr_disks);
if (!container &&
((sra == NULL && array.major_version == 0) ||
(sra && sra->array.major_version == 0)))
- printf("Preferred Minor : %d\n", array.md_minor);
+ printf(" Preferred Minor : %d\n", array.md_minor);
if (sra == NULL || sra->array.major_version >= 0)
- printf(" Persistence : Superblock is %spersistent\n",
+ printf(" Persistence : Superblock is %spersistent\n",
array.not_persistent?"not ":"");
printf("\n");
/* Only try GET_BITMAP_FILE for 0.90.01 and later */
if (vers >= 9001 &&
ioctl(fd, GET_BITMAP_FILE, &bmf) == 0 &&
bmf.pathname[0]) {
- printf(" Intent Bitmap : %s\n", bmf.pathname);
+ printf(" Intent Bitmap : %s\n", bmf.pathname);
printf("\n");
} else if (array.state & (1<<MD_SB_BITMAP_PRESENT))
- printf(" Intent Bitmap : Internal\n\n");
+ printf(" Intent Bitmap : Internal\n\n");
atime = array.utime;
if (atime)
- printf(" Update Time : %.24s\n", ctime(&atime));
+ printf(" Update Time : %.24s\n", ctime(&atime));
if (array.raid_disks) {
static char *sync_action[] = {
", recovering", ", resyncing",
@@ -465,7 +466,7 @@ int Detail(char *dev, struct context *c)
else
st = ", degraded";
- printf(" State : %s%s%s%s%s%s \n",
+ printf(" State : %s%s%s%s%s%s \n",
(array.state&(1<<MD_SB_CLEAN))?"clean":"active", st,
(!e || (e->percent < 0 && e->percent != RESYNC_PENDING &&
e->percent != RESYNC_DELAYED)) ? "" : sync_action[e->resync],
@@ -473,27 +474,27 @@ int Detail(char *dev, struct context *c)
(e && e->percent == RESYNC_DELAYED) ? " (DELAYED)": "",
(e && e->percent == RESYNC_PENDING) ? " (PENDING)": "");
} else if (inactive) {
- printf(" State : inactive\n");
+ printf(" State : inactive\n");
}
if (array.raid_disks)
- printf(" Active Devices : %d\n", array.active_disks);
+ printf(" Active Devices : %d\n", array.active_disks);
if (array.working_disks > 0)
- printf("Working Devices : %d\n", array.working_disks);
+ printf(" Working Devices : %d\n", array.working_disks);
if (array.raid_disks) {
- printf(" Failed Devices : %d\n", array.failed_disks);
- printf(" Spare Devices : %d\n", array.spare_disks);
+ printf(" Failed Devices : %d\n", array.failed_disks);
+ printf(" Spare Devices : %d\n", array.spare_disks);
}
printf("\n");
if (array.level == 5) {
str = map_num(r5layout, array.layout);
- printf(" Layout : %s\n", str?str:"-unknown-");
+ printf(" Layout : %s\n", str?str:"-unknown-");
}
if (array.level == 6) {
str = map_num(r6layout, array.layout);
- printf(" Layout : %s\n", str?str:"-unknown-");
+ printf(" Layout : %s\n", str?str:"-unknown-");
}
if (array.level == 10) {
- printf(" Layout :");
+ printf(" Layout :");
print_r10_layout(array.layout);
printf("\n");
}
@@ -504,20 +505,35 @@ int Detail(char *dev, struct context *c)
case 10:
case 6:
if (array.chunk_size)
- printf(" Chunk Size : %dK\n\n",
+ printf(" Chunk Size : %dK\n\n",
array.chunk_size/1024);
break;
case -1:
- printf(" Rounding : %dK\n\n", array.chunk_size/1024);
+ printf(" Rounding : %dK\n\n",
+ array.chunk_size/1024);
break;
default: break;
}
+ if (array.raid_disks) {
+ struct mdinfo *mdi = sysfs_read(fd, NULL,
+ GET_CONSISTENCY_POLICY);
+ if (mdi) {
+ char *policy = map_num(consistency_policies,
+ mdi->consistency_policy);
+ sysfs_free(mdi);
+ if (policy)
+ printf("Consistency Policy : %s\n\n",
+ policy);
+ }
+ }
+
if (e && e->percent >= 0) {
static char *sync_action[] = {
"Rebuild", "Resync",
"Reshape", "Check"};
- printf(" %7s Status : %d%% complete\n", sync_action[e->resync], e->percent);
+ printf(" %7s Status : %d%% complete\n",
+ sync_action[e->resync], e->percent);
is_rebuilding = 1;
}
free_mdstat(ms);
@@ -525,39 +541,41 @@ int Detail(char *dev, struct context *c)
if ((st && st->sb) && (info && info->reshape_active)) {
#if 0
This is pretty boring
- printf(" Reshape pos'n : %llu%s\n", (unsigned long long) info->reshape_progress<<9,
+ printf(" Reshape pos'n : %llu%s\n",
+ (unsigned long long) info->reshape_progress<<9,
human_size((unsigned long long)info->reshape_progress<<9));
#endif
if (info->delta_disks != 0)
- printf(" Delta Devices : %d, (%d->%d)\n",
+ printf(" Delta Devices : %d, (%d->%d)\n",
info->delta_disks,
array.raid_disks - info->delta_disks,
array.raid_disks);
if (info->new_level != array.level) {
str = map_num(pers, info->new_level);
- printf(" New Level : %s\n", str?str:"-unknown-");
+ printf(" New Level : %s\n", str?str:"-unknown-");
}
if (info->new_level != array.level ||
info->new_layout != array.layout) {
if (info->new_level == 5) {
str = map_num(r5layout, info->new_layout);
- printf(" New Layout : %s\n",
+ printf(" New Layout : %s\n",
str?str:"-unknown-");
}
if (info->new_level == 6) {
str = map_num(r6layout, info->new_layout);
- printf(" New Layout : %s\n",
+ printf(" New Layout : %s\n",
str?str:"-unknown-");
}
if (info->new_level == 10) {
- printf(" New Layout : near=%d, %s=%d\n",
+ printf(" New Layout : near=%d, %s=%d\n",
info->new_layout&255,
(info->new_layout&0x10000)?"offset":"far",
(info->new_layout>>8)&255);
}
}
if (info->new_chunk != array.chunk_size)
- printf(" New Chunksize : %dK\n", info->new_chunk/1024);
+ printf(" New Chunksize : %dK\n",
+ info->new_chunk/1024);
printf("\n");
} else if (e && e->percent >= 0)
printf("\n");
@@ -572,7 +590,7 @@ This is pretty boring
DIR *dir = opendir("/sys/block");
struct dirent *de;
- printf(" Member Arrays :");
+ printf(" Member Arrays :");
while (dir && (de = readdir(dir)) != NULL) {
char path[200];
diff --git a/super-ddf.c b/super-ddf.c
index cdd16a47aab3..c6037c1cca40 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1742,10 +1742,10 @@ static void detail_super_ddf(struct supertype *st, char *homehost)
struct ddf_super *sb = st->sb;
int cnt = be16_to_cpu(sb->virt->populated_vdes);
- printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
+ printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
printf("\n");
- printf(" Seq : %08x\n", be32_to_cpu(sb->active->seq));
- printf(" Virtual Disks : %d\n", cnt);
+ printf(" Seq : %08x\n", be32_to_cpu(sb->active->seq));
+ printf(" Virtual Disks : %d\n", cnt);
printf("\n");
}
#endif
diff --git a/super-intel.c b/super-intel.c
index 6d16a1919444..120ce77c0d3d 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1986,7 +1986,7 @@ static void detail_super_imsm(struct supertype *st, char *homehost)
getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
- printf("\n UUID : %s\n", nbuf + 5);
+ printf("\n UUID : %s\n", nbuf + 5);
}
static void brief_detail_super_imsm(struct supertype *st)
diff --git a/super0.c b/super0.c
index 0fec96b7cd81..b2cdaec5fc90 100644
--- a/super0.c
+++ b/super0.c
@@ -353,7 +353,7 @@ err:
static void detail_super0(struct supertype *st, char *homehost)
{
mdp_super_t *sb = st->sb;
- printf(" UUID : ");
+ printf(" UUID : ");
if (sb->minor_version >= 90)
printf("%08x:%08x:%08x:%08x", sb->set_uuid0, sb->set_uuid1,
sb->set_uuid2, sb->set_uuid3);
@@ -367,7 +367,7 @@ static void detail_super0(struct supertype *st, char *homehost)
if (memcmp(&sb->set_uuid2, hash, 8)==0)
printf(" (local to host %s)", homehost);
}
- printf("\n Events : %d.%d\n\n", sb->events_hi, sb->events_lo);
+ printf("\n Events : %d.%d\n\n", sb->events_hi, sb->events_lo);
}
static void brief_detail_super0(struct supertype *st)
diff --git a/super1.c b/super1.c
index fa2383295bd4..672cdde690b4 100644
--- a/super1.c
+++ b/super1.c
@@ -780,19 +780,20 @@ static void detail_super1(struct supertype *st, char *homehost)
int i;
int l = homehost ? strlen(homehost) : 0;
- printf(" Name : %.32s", sb->set_name);
+ printf(" Name : %.32s", sb->set_name);
if (l > 0 && l < 32 &&
sb->set_name[l] == ':' &&
strncmp(sb->set_name, homehost, l) == 0)
printf(" (local to host %s)", homehost);
if (bms->nodes > 0 && (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET))
- printf("\n Cluster Name : %-64s", bms->cluster_name);
- printf("\n UUID : ");
+ printf("\n Cluster Name : %-64s", bms->cluster_name);
+ printf("\n UUID : ");
for (i=0; i<16; i++) {
if ((i&3)==0 && i != 0) printf(":");
printf("%02x", sb->set_uuid[i]);
}
- printf("\n Events : %llu\n\n", (unsigned long long)__le64_to_cpu(sb->events));
+ printf("\n Events : %llu\n\n",
+ (unsigned long long)__le64_to_cpu(sb->events));
}
static void brief_detail_super1(struct supertype *st)
--
2.12.0

646
0003-imsm-PPL-support.patch Normal file
View File

@ -0,0 +1,646 @@
From dd3ce3b14b171ad049193053f7d6d2d126687fdc Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Thu, 16 Mar 2017 22:09:45 +0100
Subject: [PATCH] imsm: PPL support
Enable creating and assembling IMSM raid5 arrays with PPL. Update the
IMSM metadata format to include new fields used for PPL.
Add structures for PPL metadata. They are used also by super1 and shared
with the kernel, so put them in md_p.h.
Write the initial empty PPL header when creating an array. When
assembling an array with PPL, validate the PPL header and in case it is
not correct allow to overwrite it if --force was provided.
Write the PPL location and size for a device to the new rdev sysfs
attributes 'ppl_sector' and 'ppl_size'. Enable PPL in the kernel by
writing to 'consistency_policy' before the array is activated.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
Assemble.c | 49 +++++++++++
Makefile | 5 +-
md_p.h | 25 ++++++
mdadm.h | 6 ++
super-intel.c | 274 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
sysfs.c | 14 +++
6 files changed, 349 insertions(+), 24 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 3da090330019..8e55b49fa406 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1942,6 +1942,55 @@ int assemble_container_content(struct supertype *st, int mdfd,
map_update(NULL, fd2devnm(mdfd), content->text_version,
content->uuid, chosen_name);
+ if (content->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ st->ss->validate_ppl) {
+ content->array.state |= 1;
+ err = 0;
+
+ for (dev = content->devs; dev; dev = dev->next) {
+ int dfd;
+ char *devpath;
+ int ret;
+
+ ret = st->ss->validate_ppl(st, content, dev);
+ if (ret == 0)
+ continue;
+
+ if (ret < 0) {
+ err = 1;
+ break;
+ }
+
+ if (!c->force) {
+ pr_err("%s contains invalid PPL - consider --force or --update-subarray with --update=no-ppl\n",
+ chosen_name);
+ content->array.state &= ~1;
+ avail[dev->disk.raid_disk] = 0;
+ break;
+ }
+
+ /* have --force - overwrite the invalid ppl */
+ devpath = map_dev(dev->disk.major, dev->disk.minor, 0);
+ dfd = dev_open(devpath, O_RDWR);
+ if (dfd < 0) {
+ pr_err("Failed to open %s\n", devpath);
+ err = 1;
+ break;
+ }
+
+ err = st->ss->write_init_ppl(st, content, dfd);
+ close(dfd);
+
+ if (err)
+ break;
+ }
+
+ if (err) {
+ free(avail);
+ return err;
+ }
+ }
+
if (enough(content->array.level, content->array.raid_disks,
content->array.layout, content->array.state & 1, avail) == 0) {
if (c->export && result)
diff --git a/Makefile b/Makefile
index a6f464c31626..0d796d5c63c4 100644
--- a/Makefile
+++ b/Makefile
@@ -146,7 +146,7 @@ MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \
Kill.o sg_io.o dlink.o ReadMe.o super-intel.o \
super-mbr.o super-gpt.o \
super-ddf.o sha1.o crc32.o msg.o bitmap.o xmalloc.o \
- platform-intel.o probe_roms.o
+ platform-intel.o probe_roms.o crc32c.o
MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS))
@@ -156,7 +156,8 @@ STATICOBJS = pwgr.o
ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \
maps.c lib.c xmalloc.c \
super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \
- platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c mapfile.c
+ platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c mapfile.c \
+ crc32c.c
ASSEMBLE_AUTO_SRCS := mdopen.c
ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
ifdef MDASSEMBLE_AUTO
diff --git a/md_p.h b/md_p.h
index dc9fec165cb6..358a28ce3fcf 100644
--- a/md_p.h
+++ b/md_p.h
@@ -267,4 +267,29 @@ struct r5l_meta_block {
#define R5LOG_VERSION 0x1
#define R5LOG_MAGIC 0x6433c509
+struct ppl_header_entry {
+ __u64 data_sector; /* raid sector of the new data */
+ __u32 pp_size; /* length of partial parity */
+ __u32 data_size; /* length of data */
+ __u32 parity_disk; /* member disk containing parity */
+ __u32 checksum; /* checksum of this entry's partial parity */
+} __attribute__ ((__packed__));
+
+#define PPL_HEADER_SIZE 4096
+#define PPL_HDR_RESERVED 512
+#define PPL_HDR_ENTRY_SPACE \
+ (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(__u32) - sizeof(__u64))
+#define PPL_HDR_MAX_ENTRIES \
+ (PPL_HDR_ENTRY_SPACE / sizeof(struct ppl_header_entry))
+
+struct ppl_header {
+ __u8 reserved[PPL_HDR_RESERVED];/* reserved space, fill with 0xff */
+ __u32 signature; /* signature (family number of volume) */
+ __u32 padding; /* zero pad */
+ __u64 generation; /* generation number of the header */
+ __u32 entries_count; /* number of entries in entry array */
+ __u32 checksum; /* checksum of the header */
+ struct ppl_header_entry entries[PPL_HDR_MAX_ENTRIES];
+} __attribute__ ((__packed__));
+
#endif
diff --git a/mdadm.h b/mdadm.h
index ed4d7e4e65ae..10c204160351 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -300,6 +300,8 @@ struct mdinfo {
#define MaxSector (~0ULL) /* resync/recovery complete position */
};
long bitmap_offset; /* 0 == none, 1 == a file */
+ unsigned int ppl_size;
+ unsigned long long ppl_sector;
unsigned long safe_mode_delay; /* ms delay to mark clean */
int new_level, delta_disks, new_layout, new_chunk;
int errors;
@@ -1074,6 +1076,10 @@ extern struct superswitch {
/* write initial empty PPL on device */
int (*write_init_ppl)(struct supertype *st, struct mdinfo *info, int fd);
+ /* validate ppl before assemble */
+ int (*validate_ppl)(struct supertype *st, struct mdinfo *info,
+ struct mdinfo *disk);
+
/* records new bad block in metadata */
int (*record_bad_block)(struct active_array *a, int n,
unsigned long long sector, int length);
diff --git a/super-intel.c b/super-intel.c
index 120ce77c0d3d..ad3a45369534 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -102,6 +102,7 @@ struct imsm_disk {
#define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
#define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
#define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
+#define JOURNAL_DISK __cpu_to_le32(0x2000000) /* Device marked as Journaling Drive */
__u32 status; /* 0xF0 - 0xF3 */
__u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
__u32 total_blocks_hi; /* 0xF4 - 0xF5 total blocks hi */
@@ -155,6 +156,9 @@ struct imsm_vol {
#define MIGR_STATE_CHANGE 4
#define MIGR_REPAIR 5
__u8 migr_type; /* Initializing, Rebuilding, ... */
+#define RAIDVOL_CLEAN 0
+#define RAIDVOL_DIRTY 1
+#define RAIDVOL_DSRECORD_VALID 2
__u8 dirty;
__u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
__u16 verify_errors; /* number of mismatches */
@@ -190,7 +194,24 @@ struct imsm_dev {
__u16 cache_policy;
__u8 cng_state;
__u8 cng_sub_state;
-#define IMSM_DEV_FILLERS 10
+ __u16 my_vol_raid_dev_num; /* Used in Unique volume Id for this RaidDev */
+
+ /* NVM_EN */
+ __u8 nv_cache_mode;
+ __u8 nv_cache_flags;
+
+ /* Unique Volume Id of the NvCache Volume associated with this volume */
+ __u32 nvc_vol_orig_family_num;
+ __u16 nvc_vol_raid_dev_num;
+
+#define RWH_OFF 0
+#define RWH_DISTRIBUTED 1
+#define RWH_JOURNALING_DRIVE 2
+ __u8 rwh_policy; /* Raid Write Hole Policy */
+ __u8 jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
+ __u8 filler1;
+
+#define IMSM_DEV_FILLERS 3
__u32 filler[IMSM_DEV_FILLERS];
struct imsm_vol vol;
} __attribute__ ((packed));
@@ -257,6 +278,9 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed"
#define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
* already been migrated and must
* be recovered from checkpoint area */
+
+#define PPL_ENTRY_SPACE (128 * 1024) /* Size of the PPL, without the header */
+
struct migr_record {
__u32 rec_status; /* Status used to determine how to restart
* migration in case it aborts
@@ -1288,6 +1312,11 @@ static int is_failed(struct imsm_disk *disk)
return (disk->status & FAILED_DISK) == FAILED_DISK;
}
+static int is_journal(struct imsm_disk *disk)
+{
+ return (disk->status & JOURNAL_DISK) == JOURNAL_DISK;
+}
+
/* try to determine how much space is reserved for metadata from
* the last get_extents() entry on the smallest active disk,
* otherwise fallback to the default
@@ -1477,7 +1506,17 @@ static void print_imsm_dev(struct intel_super *super,
blocks_per_migr_unit(super, dev));
}
printf("\n");
- printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
+ printf(" Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ?
+ "dirty" : "clean");
+ printf(" RWH Policy : ");
+ if (dev->rwh_policy == RWH_OFF)
+ printf("off\n");
+ else if (dev->rwh_policy == RWH_DISTRIBUTED)
+ printf("PPL distributed\n");
+ else if (dev->rwh_policy == RWH_JOURNALING_DRIVE)
+ printf("PPL journaling drive\n");
+ else
+ printf("<unknown:%d>\n", dev->rwh_policy);
}
static void print_imsm_disk(struct imsm_disk *disk,
@@ -1496,9 +1535,10 @@ static void print_imsm_disk(struct imsm_disk *disk,
printf(" Disk%02d Serial : %s\n", index, str);
else
printf(" Disk Serial : %s\n", str);
- printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
- is_configured(disk) ? " active" : "",
- is_failed(disk) ? " failed" : "");
+ printf(" State :%s%s%s%s\n", is_spare(disk) ? " spare" : "",
+ is_configured(disk) ? " active" : "",
+ is_failed(disk) ? " failed" : "",
+ is_journal(disk) ? " journal" : "");
printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
sz = total_blocks(disk) - reserved;
printf(" Usable Size : %llu%s\n",
@@ -3113,6 +3153,15 @@ static unsigned long long imsm_component_size_aligment_check(int level,
return component_size;
}
+static unsigned long long get_ppl_sector(struct intel_super *super, int dev_idx)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+
+ return pba_of_lba0(map) +
+ (num_data_stripes(map) * map->blocks_per_strip);
+}
+
static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
{
struct intel_super *super = st->sb;
@@ -3139,7 +3188,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
info->array.utime = 0;
info->array.chunk_size =
__le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
- info->array.state = !dev->vol.dirty;
+ info->array.state = !(dev->vol.dirty & RAIDVOL_DIRTY);
info->custom_array_size = __le32_to_cpu(dev->size_high);
info->custom_array_size <<= 32;
info->custom_array_size |= __le32_to_cpu(dev->size_low);
@@ -3220,10 +3269,20 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
memset(info->uuid, 0, sizeof(info->uuid));
info->recovery_start = MaxSector;
+ if (info->array.level == 5 && dev->rwh_policy == RWH_DISTRIBUTED) {
+ info->consistency_policy = CONSISTENCY_POLICY_PPL;
+ info->ppl_sector = get_ppl_sector(super, super->current_vol);
+ info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
+ } else if (info->array.level <= 0) {
+ info->consistency_policy = CONSISTENCY_POLICY_NONE;
+ } else {
+ info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+ }
+
info->reshape_progress = 0;
info->resync_start = MaxSector;
if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
- dev->vol.dirty) &&
+ !(info->array.state & 1)) &&
imsm_reshape_blocks_arrays_changes(super) == 0) {
info->resync_start = 0;
}
@@ -3450,7 +3509,8 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
* found the 'most fresh' version of the metadata
*/
info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
- info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
+ info->disk.state |= (is_spare(disk) || is_journal(disk)) ?
+ 0 : (1 << MD_DISK_SYNC);
}
/* only call uuid_from_super_imsm when this disk is part of a populated container,
@@ -3905,7 +3965,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
*/
if (is_failed(&dl->disk))
dl->index = -2;
- else if (is_spare(&dl->disk))
+ else if (is_spare(&dl->disk) || is_journal(&dl->disk))
dl->index = -1;
}
@@ -5302,6 +5362,20 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
}
mpb->num_raid_devs++;
+ if (s->consistency_policy == UnSet ||
+ s->consistency_policy == CONSISTENCY_POLICY_RESYNC ||
+ s->consistency_policy == CONSISTENCY_POLICY_NONE) {
+ dev->rwh_policy = RWH_OFF;
+ } else if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ dev->rwh_policy = RWH_DISTRIBUTED;
+ } else {
+ free(dev);
+ free(dv);
+ pr_err("imsm does not support consistency policy %s\n",
+ map_num(consistency_policies, s->consistency_policy));
+ return 0;
+ }
+
dv->dev = dev;
dv->index = super->current_vol;
dv->next = super->devlist;
@@ -5926,11 +6000,146 @@ static int mgmt_disk(struct supertype *st)
return 0;
}
+#endif
+
+__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len);
+
+static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd)
+{
+ struct intel_super *super = st->sb;
+ void *buf;
+ struct ppl_header *ppl_hdr;
+ int ret;
+
+ ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE);
+ if (ret) {
+ pr_err("Failed to allocate PPL header buffer\n");
+ return ret;
+ }
+
+ memset(buf, 0, PPL_HEADER_SIZE);
+ ppl_hdr = buf;
+ memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
+ ppl_hdr->signature = __cpu_to_le32(super->anchor->orig_family_num);
+ ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
+
+ if (lseek64(fd, info->ppl_sector * 512, SEEK_SET) < 0) {
+ ret = errno;
+ perror("Failed to seek to PPL header location");
+ }
+
+ if (!ret && write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+ ret = errno;
+ perror("Write PPL header failed");
+ }
+
+ if (!ret)
+ fsync(fd);
+
+ free(buf);
+ return ret;
+}
+
+static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
+ struct mdinfo *disk)
+{
+ struct intel_super *super = st->sb;
+ struct dl *d;
+ void *buf;
+ int ret = 0;
+ struct ppl_header *ppl_hdr;
+ __u32 crc;
+ struct imsm_dev *dev;
+ struct imsm_map *map;
+ __u32 idx;
+
+ if (disk->disk.raid_disk < 0)
+ return 0;
+
+ if (posix_memalign(&buf, 4096, PPL_HEADER_SIZE)) {
+ pr_err("Failed to allocate PPL header buffer\n");
+ return -1;
+ }
+
+ dev = get_imsm_dev(super, info->container_member);
+ map = get_imsm_map(dev, MAP_X);
+ idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_X);
+ d = get_imsm_dl_disk(super, idx);
+
+ if (!d || d->index < 0 || is_failed(&d->disk))
+ goto out;
+
+ if (lseek64(d->fd, info->ppl_sector * 512, SEEK_SET) < 0) {
+ perror("Failed to seek to PPL header location");
+ ret = -1;
+ goto out;
+ }
+
+ if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+ perror("Read PPL header failed");
+ ret = -1;
+ goto out;
+ }
+
+ ppl_hdr = buf;
+
+ crc = __le32_to_cpu(ppl_hdr->checksum);
+ ppl_hdr->checksum = 0;
+
+ if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
+ dprintf("Wrong PPL header checksum on %s\n",
+ d->devname);
+ ret = 1;
+ }
+
+ if (!ret && (__le32_to_cpu(ppl_hdr->signature) !=
+ super->anchor->orig_family_num)) {
+ dprintf("Wrong PPL header signature on %s\n",
+ d->devname);
+ ret = 1;
+ }
+
+out:
+ free(buf);
+
+ if (ret == 1 && map->map_state == IMSM_T_STATE_UNINITIALIZED)
+ return st->ss->write_init_ppl(st, info, d->fd);
+
+ return ret;
+}
+
+#ifndef MDASSEMBLE
+
+static int write_init_ppl_imsm_all(struct supertype *st, struct mdinfo *info)
+{
+ struct intel_super *super = st->sb;
+ struct dl *d;
+ int ret = 0;
+
+ if (info->consistency_policy != CONSISTENCY_POLICY_PPL ||
+ info->array.level != 5)
+ return 0;
+
+ for (d = super->disks; d ; d = d->next) {
+ if (d->index < 0 || is_failed(&d->disk))
+ continue;
+
+ ret = st->ss->write_init_ppl(st, info, d->fd);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
static int write_init_super_imsm(struct supertype *st)
{
struct intel_super *super = st->sb;
int current_vol = super->current_vol;
+ int rv = 0;
+ struct mdinfo info;
+
+ getinfo_super_imsm(st, &info, NULL);
/* we are done with current_vol reset it to point st at the container */
super->current_vol = -1;
@@ -5938,24 +6147,29 @@ static int write_init_super_imsm(struct supertype *st)
if (st->update_tail) {
/* queue the recently created array / added disk
* as a metadata update */
- int rv;
/* determine if we are creating a volume or adding a disk */
if (current_vol < 0) {
/* in the mgmt (add/remove) disk case we are running
* in mdmon context, so don't close fd's
*/
- return mgmt_disk(st);
- } else
- rv = create_array(st, current_vol);
-
- return rv;
+ rv = mgmt_disk(st);
+ } else {
+ rv = write_init_ppl_imsm_all(st, &info);
+ if (!rv)
+ rv = create_array(st, current_vol);
+ }
} else {
struct dl *d;
for (d = super->disks; d; d = d->next)
Kill(d->devname, NULL, 0, -1, 1);
- return write_super_imsm(st, 1);
+ if (current_vol >= 0)
+ rv = write_init_ppl_imsm_all(st, &info);
+ if (!rv)
+ rv = write_super_imsm(st, 1);
}
+
+ return rv;
}
#endif
@@ -7372,7 +7586,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
*
* FIXME handle dirty degraded
*/
- if ((skip || recovery_start == 0) && !dev->vol.dirty)
+ if ((skip || recovery_start == 0) &&
+ !(dev->vol.dirty & RAIDVOL_DIRTY))
this->resync_start = MaxSector;
if (skip)
continue;
@@ -7407,9 +7622,12 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
info_d->component_size =
num_data_stripes(map) *
map->blocks_per_strip;
+ info_d->ppl_sector = this->ppl_sector;
+ info_d->ppl_size = this->ppl_size;
} else {
info_d->component_size = blocks_per_member(map);
}
+ info_d->consistency_policy = this->consistency_policy;
info_d->bb.supported = 1;
get_volume_badblocks(super->bbm_log, ord_to_idx(ord),
@@ -7925,12 +8143,16 @@ mark_checkpoint:
skip_mark_checkpoint:
/* mark dirty / clean */
- if (dev->vol.dirty != !consistent) {
+ if (((dev->vol.dirty & RAIDVOL_DIRTY) && consistent) ||
+ (!(dev->vol.dirty & RAIDVOL_DIRTY) && !consistent)) {
dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
- if (consistent)
- dev->vol.dirty = 0;
- else
- dev->vol.dirty = 1;
+ if (consistent) {
+ dev->vol.dirty = RAIDVOL_CLEAN;
+ } else {
+ dev->vol.dirty = RAIDVOL_DIRTY;
+ if (dev->rwh_policy == RWH_DISTRIBUTED)
+ dev->vol.dirty |= RAIDVOL_DSRECORD_VALID;
+ }
super->updates_pending++;
}
@@ -8442,6 +8664,11 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
di->component_size = a->info.component_size;
di->container_member = inst;
di->bb.supported = 1;
+ if (dev->rwh_policy == RWH_DISTRIBUTED) {
+ di->consistency_policy = CONSISTENCY_POLICY_PPL;
+ di->ppl_sector = get_ppl_sector(super, inst);
+ di->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
+ }
super->random = random32();
di->next = rv;
rv = di;
@@ -11597,6 +11824,9 @@ struct superswitch super_imsm = {
.container_content = container_content_imsm,
.validate_container = validate_container_imsm,
+ .write_init_ppl = write_init_ppl_imsm,
+ .validate_ppl = validate_ppl_imsm,
+
.external = 1,
.name = "imsm",
diff --git a/sysfs.c b/sysfs.c
index 53589a76f094..2a91ba0a90cf 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -689,6 +689,16 @@ int sysfs_set_array(struct mdinfo *info, int vers)
* once the reshape completes.
*/
}
+
+ if (info->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ if (sysfs_set_str(info, NULL, "consistency_policy",
+ map_num(consistency_policies,
+ info->consistency_policy))) {
+ pr_err("This kernel does not support PPL\n");
+ return 1;
+ }
+ }
+
return rv;
}
@@ -720,6 +730,10 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
if (sra->array.level != LEVEL_CONTAINER) {
+ if (sd->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ rv |= sysfs_set_num(sra, sd, "ppl_sector", sd->ppl_sector);
+ rv |= sysfs_set_num(sra, sd, "ppl_size", sd->ppl_size);
+ }
if (sd->recovery_start == MaxSector)
/* This can correctly fail if array isn't started,
* yet, so just ignore status for now.
--
2.12.0

View File

@ -0,0 +1,439 @@
From 35753b5ec92fd1d80c22b91aee4b61ed69691986 Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Thu, 16 Mar 2017 22:09:46 +0100
Subject: [PATCH] super1: PPL support
Enable creating and assembling raid5 arrays with PPL for 1.x metadata.
When creating, reserve enough space for PPL and store its size and
location in the superblock and set MD_FEATURE_PPL bit. Write an initial
empty header in the PPL area on each device. PPL is stored in the
metadata region reserved for internal write-intent bitmap, so don't
allow using bitmap and PPL together.
While at it, fix two endianness issues in write_empty_r5l_meta_block()
and write_init_super1().
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
Assemble.c | 3 ++
Create.c | 2 +
Grow.c | 15 +++++-
Incremental.c | 3 ++
mdadm.h | 1 +
super1.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
6 files changed, 155 insertions(+), 19 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 8e55b49fa406..c09842016c0a 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -962,6 +962,9 @@ static int start_array(int mdfd,
c->readonly = 1;
}
+ if (content->consistency_policy == CONSISTENCY_POLICY_PPL)
+ clean = 1;
+
rv = set_array_info(mdfd, st, content);
if (rv && !err_ok) {
pr_err("failed to set array info for %s: %s\n",
diff --git a/Create.c b/Create.c
index 4080bf69f05b..10e7d108956d 100644
--- a/Create.c
+++ b/Create.c
@@ -524,6 +524,8 @@ int Create(struct supertype *st, char *mddev,
if (!s->bitmap_file &&
s->level >= 1 &&
st->ss->add_internal_bitmap &&
+ (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
+ s->consistency_policy != CONSISTENCY_POLICY_PPL) &&
(s->write_behind || s->size > 100*1024*1024ULL)) {
if (c->verbose > 0)
pr_err("automatically enabling write-intent bitmap on large array\n");
diff --git a/Grow.c b/Grow.c
index 455c5f90bf58..e4351d7f952a 100755
--- a/Grow.c
+++ b/Grow.c
@@ -290,6 +290,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
int major = BITMAP_MAJOR_HI;
int vers = md_get_version(fd);
unsigned long long bitmapsize, array_size;
+ struct mdinfo *mdi;
if (vers < 9003) {
major = BITMAP_MAJOR_HOSTENDIAN;
@@ -389,12 +390,23 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
free(st);
return 1;
}
+
+ mdi = sysfs_read(fd, NULL, GET_CONSISTENCY_POLICY);
+ if (mdi) {
+ if (mdi->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ pr_err("Cannot add bitmap to array with PPL\n");
+ free(mdi);
+ free(st);
+ return 1;
+ }
+ free(mdi);
+ }
+
if (strcmp(s->bitmap_file, "internal") == 0 ||
strcmp(s->bitmap_file, "clustered") == 0) {
int rv;
int d;
int offset_setable = 0;
- struct mdinfo *mdi;
if (st->ss->add_internal_bitmap == NULL) {
pr_err("Internal bitmaps not supported with %s metadata\n", st->ss->name);
return 1;
@@ -446,6 +458,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
sysfs_init(mdi, fd, NULL);
rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location",
mdi->bitmap_offset);
+ free(mdi);
} else {
if (strcmp(s->bitmap_file, "clustered") == 0)
array.state |= (1 << MD_SB_CLUSTERED);
diff --git a/Incremental.c b/Incremental.c
index 0f507bb32c9e..81afc7ec36ae 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -528,6 +528,9 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
journal_device_missing = (info.journal_device_required) && (info.journal_clean == 0);
+ if (info.consistency_policy == CONSISTENCY_POLICY_PPL)
+ info.array.state |= 1;
+
if (enough(info.array.level, info.array.raid_disks,
info.array.layout, info.array.state & 1,
avail) == 0) {
diff --git a/mdadm.h b/mdadm.h
index 10c204160351..ab1b7fc66e7c 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -302,6 +302,7 @@ struct mdinfo {
long bitmap_offset; /* 0 == none, 1 == a file */
unsigned int ppl_size;
unsigned long long ppl_sector;
+ int ppl_offset;
unsigned long safe_mode_delay; /* ms delay to mark clean */
int new_level, delta_disks, new_layout, new_chunk;
int errors;
diff --git a/super1.c b/super1.c
index 672cdde690b4..76eeca111821 100644
--- a/super1.c
+++ b/super1.c
@@ -48,10 +48,18 @@ struct mdp_superblock_1 {
__u32 chunksize; /* in 512byte sectors */
__u32 raid_disks;
- __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts
- * NOTE: signed, so bitmap can be before superblock
- * only meaningful of feature_map[0] is set.
- */
+ union {
+ __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts
+ * NOTE: signed, so bitmap can be before superblock
+ * only meaningful of feature_map[0] is set.
+ */
+
+ /* only meaningful when feature_map[MD_FEATURE_PPL] is set */
+ struct {
+ __s16 offset; /* sectors from start of superblock that ppl starts */
+ __u16 size; /* ppl size in sectors */
+ } ppl;
+ };
/* These are only valid with feature bit '4' */
__u32 new_level; /* new level we are reshaping to */
@@ -131,6 +139,7 @@ struct misc_dev_info {
#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */
#define MD_FEATURE_BITMAP_VERSIONED 256 /* bitmap version number checked properly */
#define MD_FEATURE_JOURNAL 512 /* support write journal */
+#define MD_FEATURE_PPL 1024 /* support PPL */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \
@@ -140,6 +149,7 @@ struct misc_dev_info {
|MD_FEATURE_NEW_OFFSET \
|MD_FEATURE_BITMAP_VERSIONED \
|MD_FEATURE_JOURNAL \
+ |MD_FEATURE_PPL \
)
#ifndef MDASSEMBLE
@@ -289,6 +299,11 @@ static int awrite(struct align_fd *afd, void *buf, int len)
return len;
}
+static inline unsigned int choose_ppl_space(int chunk)
+{
+ return (PPL_HEADER_SIZE >> 9) + (chunk > 128*2 ? chunk : 128*2);
+}
+
#ifndef MDASSEMBLE
static void examine_super1(struct supertype *st, char *homehost)
{
@@ -392,6 +407,10 @@ static void examine_super1(struct supertype *st, char *homehost)
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
printf("Internal Bitmap : %ld sectors from superblock\n",
(long)(int32_t)__le32_to_cpu(sb->bitmap_offset));
+ } else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_PPL)) {
+ printf(" PPL : %u sectors at offset %d sectors from superblock\n",
+ __le16_to_cpu(sb->ppl.size),
+ __le16_to_cpu(sb->ppl.offset));
}
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)) {
printf(" Reshape pos'n : %llu%s\n", (unsigned long long)__le64_to_cpu(sb->reshape_position)/2,
@@ -934,10 +953,16 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
if (__le32_to_cpu(bsb->nodes) > 1)
info->array.state |= (1 << MD_SB_CLUSTERED);
+ super_offset = __le64_to_cpu(sb->super_offset);
info->data_offset = __le64_to_cpu(sb->data_offset);
info->component_size = __le64_to_cpu(sb->size);
- if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET))
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET)) {
info->bitmap_offset = (int32_t)__le32_to_cpu(sb->bitmap_offset);
+ } else if (sb->feature_map & __le32_to_cpu(MD_FEATURE_PPL)) {
+ info->ppl_offset = __le16_to_cpu(sb->ppl.offset);
+ info->ppl_size = __le16_to_cpu(sb->ppl.size);
+ info->ppl_sector = super_offset + info->ppl_offset;
+ }
info->disk.major = 0;
info->disk.minor = 0;
@@ -948,7 +973,6 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
else
role = __le16_to_cpu(sb->dev_roles[__le32_to_cpu(sb->dev_number)]);
- super_offset = __le64_to_cpu(sb->super_offset);
if (info->array.level <= 0)
data_size = __le64_to_cpu(sb->data_size);
else
@@ -965,8 +989,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
end = bboffset;
}
- if (super_offset + info->bitmap_offset < end)
- end = super_offset + info->bitmap_offset;
+ if (super_offset + info->bitmap_offset + info->ppl_offset < end)
+ end = super_offset + info->bitmap_offset + info->ppl_offset;
if (info->data_offset + data_size < end)
info->space_after = end - data_size - info->data_offset;
@@ -982,6 +1006,11 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
bmend += size;
if (bmend > earliest)
earliest = bmend;
+ } else if (info->ppl_offset > 0) {
+ unsigned long long pplend = info->ppl_offset +
+ info->ppl_size;
+ if (pplend > earliest)
+ earliest = pplend;
}
if (sb->bblog_offset && sb->bblog_size) {
unsigned long long bbend = super_offset;
@@ -1075,8 +1104,20 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
}
info->array.working_disks = working;
- if (sb->feature_map & __le32_to_cpu(MD_FEATURE_JOURNAL))
+
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_JOURNAL)) {
info->journal_device_required = 1;
+ info->consistency_policy = CONSISTENCY_POLICY_JOURNAL;
+ } else if (sb->feature_map & __le32_to_cpu(MD_FEATURE_PPL)) {
+ info->consistency_policy = CONSISTENCY_POLICY_PPL;
+ } else if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET)) {
+ info->consistency_policy = CONSISTENCY_POLICY_BITMAP;
+ } else if (info->array.level <= 0) {
+ info->consistency_policy = CONSISTENCY_POLICY_NONE;
+ } else {
+ info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+ }
+
info->journal_clean = 0;
}
@@ -1239,6 +1280,9 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
bitmap_offset = (long)__le32_to_cpu(sb->bitmap_offset);
bm_sectors = calc_bitmap_size(bms, 4096) >> 9;
+ } else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_PPL)) {
+ bitmap_offset = (long)__le16_to_cpu(sb->ppl.offset);
+ bm_sectors = (long)__le16_to_cpu(sb->ppl.size);
}
#endif
if (sb_offset < data_offset) {
@@ -1472,6 +1516,9 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
memset(sb->dev_roles, 0xff, MAX_SB_SIZE - sizeof(struct mdp_superblock_1));
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL)
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_PPL);
+
return 1;
}
@@ -1643,10 +1690,49 @@ static unsigned long choose_bm_space(unsigned long devsize)
static void free_super1(struct supertype *st);
-#define META_BLOCK_SIZE 4096
+#ifndef MDASSEMBLE
+
__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len);
-#ifndef MDASSEMBLE
+static int write_init_ppl1(struct supertype *st, struct mdinfo *info, int fd)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ void *buf;
+ struct ppl_header *ppl_hdr;
+ int ret;
+
+ ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE);
+ if (ret) {
+ pr_err("Failed to allocate PPL header buffer\n");
+ return ret;
+ }
+
+ memset(buf, 0, PPL_HEADER_SIZE);
+ ppl_hdr = buf;
+ memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
+ ppl_hdr->signature = __cpu_to_le32(~crc32c_le(~0, sb->set_uuid,
+ sizeof(sb->set_uuid)));
+ ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
+
+ if (lseek64(fd, info->ppl_sector * 512, SEEK_SET) < 0) {
+ ret = errno;
+ perror("Failed to seek to PPL header location");
+ }
+
+ if (!ret && write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+ ret = errno;
+ perror("Write PPL header failed");
+ }
+
+ if (!ret)
+ fsync(fd);
+
+ free(buf);
+ return ret;
+}
+
+#define META_BLOCK_SIZE 4096
+
static int write_empty_r5l_meta_block(struct supertype *st, int fd)
{
struct r5l_meta_block *mb;
@@ -1673,7 +1759,7 @@ static int write_empty_r5l_meta_block(struct supertype *st, int fd)
crc = crc32c_le(crc, (void *)mb, META_BLOCK_SIZE);
mb->checksum = crc;
- if (lseek64(fd, (sb->data_offset) * 512, 0) < 0LL) {
+ if (lseek64(fd, __le64_to_cpu(sb->data_offset) * 512, 0) < 0LL) {
pr_err("cannot seek to offset of the meta block\n");
goto fail_to_write;
}
@@ -1706,7 +1792,7 @@ static int write_init_super1(struct supertype *st)
for (di = st->info; di; di = di->next) {
if (di->disk.state & (1 << MD_DISK_JOURNAL))
- sb->feature_map |= MD_FEATURE_JOURNAL;
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_JOURNAL);
}
for (di = st->info; di; di = di->next) {
@@ -1781,6 +1867,21 @@ static int write_init_super1(struct supertype *st)
(((char *)sb) + MAX_SB_SIZE);
bm_space = calc_bitmap_size(bms, 4096) >> 9;
bm_offset = (long)__le32_to_cpu(sb->bitmap_offset);
+ } else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_PPL)) {
+ bm_space = choose_ppl_space(__le32_to_cpu(sb->chunksize));
+ if (bm_space > UINT16_MAX)
+ bm_space = UINT16_MAX;
+ if (st->minor_version == 0) {
+ bm_offset = -bm_space - 8;
+ if (bm_offset < INT16_MIN) {
+ bm_offset = INT16_MIN;
+ bm_space = -bm_offset - 8;
+ }
+ } else {
+ bm_offset = 8;
+ }
+ sb->ppl.offset = __cpu_to_le16(bm_offset);
+ sb->ppl.size = __cpu_to_le16(bm_space);
} else {
bm_space = choose_bm_space(array_size);
bm_offset = 8;
@@ -1852,8 +1953,17 @@ static int write_init_super1(struct supertype *st)
goto error_out;
}
- if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
+ if (rv == 0 &&
+ (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) {
rv = st->ss->write_bitmap(st, di->fd, NodeNumUpdate);
+ } else if (rv == 0 &&
+ (__le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL)) {
+ struct mdinfo info;
+
+ st->ss->getinfo_super(st, &info, NULL);
+ rv = st->ss->write_init_ppl(st, &info, di->fd);
+ }
+
close(di->fd);
di->fd = -1;
if (rv)
@@ -2121,11 +2231,13 @@ static __u64 avail_size1(struct supertype *st, __u64 devsize,
return 0;
#ifndef MDASSEMBLE
- if (__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
+ if (__le32_to_cpu(super->feature_map) & MD_FEATURE_BITMAP_OFFSET) {
/* hot-add. allow for actual size of bitmap */
struct bitmap_super_s *bsb;
bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
bmspace = calc_bitmap_size(bsb, 4096) >> 9;
+ } else if (__le32_to_cpu(super->feature_map) & MD_FEATURE_PPL) {
+ bmspace = __le16_to_cpu(super->ppl.size);
}
#endif
/* Allow space for bad block log */
@@ -2528,8 +2640,9 @@ static int validate_geometry1(struct supertype *st, int level,
return 0;
}
- /* creating: allow suitable space for bitmap */
- bmspace = choose_bm_space(devsize);
+ /* creating: allow suitable space for bitmap or PPL */
+ bmspace = consistency_policy == CONSISTENCY_POLICY_PPL ?
+ choose_ppl_space((*chunk)*2) : choose_bm_space(devsize);
if (data_offset == INVALID_SECTORS)
data_offset = st->data_offset;
@@ -2564,7 +2677,7 @@ static int validate_geometry1(struct supertype *st, int level,
switch(st->minor_version) {
case 0: /* metadata at end. Round down and subtract space to reserve */
devsize = (devsize & ~(4ULL*2-1));
- /* space for metadata, bblog, bitmap */
+ /* space for metadata, bblog, bitmap/ppl */
devsize -= 8*2 + 8 + bmspace;
break;
case 1:
@@ -2640,6 +2753,7 @@ struct superswitch super1 = {
.add_to_super = add_to_super1,
.examine_badblocks = examine_badblocks_super1,
.copy_metadata = copy_metadata1,
+ .write_init_ppl = write_init_ppl1,
#endif
.match_home = match_home1,
.uuid_from_super = uuid_from_super1,
--
2.12.0

View File

@ -0,0 +1,274 @@
From 9b26e37ca8943a91e10e758bfeeaf040eef3393c Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Thu, 16 Mar 2017 22:09:47 +0100
Subject: [PATCH] Add 'ppl' and 'no-ppl' options for --update=
This can be used with --assemble for super1 and with --update-subarray
for imsm to enable or disable PPL in the metadata.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
Assemble.c | 6 ++++++
mdadm.8.in | 27 ++++++++++++++++++++++++---
mdadm.c | 6 +++++-
super-intel.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
super1.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 139 insertions(+), 4 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index c09842016c0a..6a6a56bfb8b9 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -602,6 +602,12 @@ static int load_devices(struct devs *devices, char *devmap,
if (strcmp(c->update, "uuid") == 0 && !ident->uuid_set)
random_uuid((__u8 *)ident->uuid);
+ if (strcmp(c->update, "ppl") == 0 &&
+ ident->bitmap_fd >= 0) {
+ pr_err("PPL is not compatible with bitmap\n");
+ return -1;
+ }
+
dfd = dev_open(devname,
tmpdev->disposition == 'I'
? O_RDWR : (O_RDWR|O_EXCL));
diff --git a/mdadm.8.in b/mdadm.8.in
index cad5db533fa4..1178ed9ba320 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -1176,6 +1176,8 @@ argument given to this flag can be one of
.BR no\-bitmap ,
.BR bbl ,
.BR no\-bbl ,
+.BR ppl ,
+.BR no\-ppl ,
.BR metadata ,
or
.BR super\-minor .
@@ -1316,6 +1318,16 @@ option will cause any reservation of space for a bad block list to be
removed. If the bad block list contains entries, this will fail, as
removing the list could cause data corruption.
+The
+.B ppl
+option will enable PPL for a RAID5 array and reserve space for PPL on each
+device. There must be enough free space between the data and superblock and a
+write-intent bitmap or journal must not be used.
+
+The
+.B no\-ppl
+option will disable PPL in the superblock.
+
.TP
.BR \-\-freeze\-reshape
Option is intended to be used in start-up scripts during initrd boot phase.
@@ -2327,9 +2339,11 @@ superblock field in the subarray. Similar to updating an array in
.B \-U
or
.B \-\-update=
-option. Currently only
-.B name
-is supported.
+option. The supported options are
+.BR name ,
+.B ppl
+and
+.BR no\-ppl .
The
.B name
@@ -2340,6 +2354,13 @@ re\-assembled. If updating
would change the UUID of an active subarray this operation is blocked,
and the command will end in an error.
+The
+.B ppl
+and
+.B no\-ppl
+options enable and disable PPL in the metadata. Currently supported only for
+IMSM subarrays.
+
.TP
.B \-\-examine
The device should be a component of an md array.
diff --git a/mdadm.c b/mdadm.c
index 65431b76cf15..3d0da1eca8d2 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -769,6 +769,10 @@ int main(int argc, char *argv[])
continue;
if (strcmp(c.update, "force-no-bbl") == 0)
continue;
+ if (strcmp(c.update, "ppl") == 0)
+ continue;
+ if (strcmp(c.update, "no-ppl") == 0)
+ continue;
if (strcmp(c.update, "metadata") == 0)
continue;
if (strcmp(c.update, "revert-reshape") == 0)
@@ -802,7 +806,7 @@ int main(int argc, char *argv[])
" 'sparc2.2', 'super-minor', 'uuid', 'name', 'nodes', 'resync',\n"
" 'summaries', 'homehost', 'home-cluster', 'byteorder', 'devicesize',\n"
" 'no-bitmap', 'metadata', 'revert-reshape'\n"
- " 'bbl', 'no-bbl', 'force-no-bbl'\n"
+ " 'bbl', 'no-bbl', 'force-no-bbl', 'ppl', 'no-ppl'\n"
);
exit(outf == stdout ? 0 : 2);
diff --git a/super-intel.c b/super-intel.c
index ad3a45369534..53fab8a34a52 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -451,6 +451,7 @@ enum imsm_update_type {
update_general_migration_checkpoint,
update_size_change,
update_prealloc_badblocks_mem,
+ update_rwh_policy,
};
struct imsm_update_activate_spare {
@@ -543,6 +544,12 @@ struct imsm_update_prealloc_bb_mem {
enum imsm_update_type type;
};
+struct imsm_update_rwh_policy {
+ enum imsm_update_type type;
+ int new_policy;
+ int dev_idx;
+};
+
static const char *_sys_dev_type[] = {
[SYS_DEV_UNKNOWN] = "Unknown",
[SYS_DEV_SAS] = "SAS",
@@ -7370,6 +7377,34 @@ static int update_subarray_imsm(struct supertype *st, char *subarray,
}
super->updates_pending++;
}
+ } else if (strcmp(update, "ppl") == 0 ||
+ strcmp(update, "no-ppl") == 0) {
+ int new_policy;
+ char *ep;
+ int vol = strtoul(subarray, &ep, 10);
+
+ if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
+ return 2;
+
+ if (strcmp(update, "ppl") == 0)
+ new_policy = RWH_DISTRIBUTED;
+ else
+ new_policy = RWH_OFF;
+
+ if (st->update_tail) {
+ struct imsm_update_rwh_policy *u = xmalloc(sizeof(*u));
+
+ u->type = update_rwh_policy;
+ u->dev_idx = vol;
+ u->new_policy = new_policy;
+ append_metadata_update(st, u, sizeof(*u));
+ } else {
+ struct imsm_dev *dev;
+
+ dev = get_imsm_dev(super, vol);
+ dev->rwh_policy = new_policy;
+ super->updates_pending++;
+ }
} else
return 2;
@@ -9596,6 +9631,21 @@ static void imsm_process_update(struct supertype *st,
}
case update_prealloc_badblocks_mem:
break;
+ case update_rwh_policy: {
+ struct imsm_update_rwh_policy *u = (void *)update->buf;
+ int target = u->dev_idx;
+ struct imsm_dev *dev = get_imsm_dev(super, target);
+ if (!dev) {
+ dprintf("could not find subarray-%d\n", target);
+ break;
+ }
+
+ if (dev->rwh_policy != u->new_policy) {
+ dev->rwh_policy = u->new_policy;
+ super->updates_pending++;
+ }
+ break;
+ }
default:
pr_err("error: unsuported process update type:(type: %d)\n", type);
}
@@ -9841,6 +9891,11 @@ static int imsm_prepare_update(struct supertype *st,
super->extra_space += sizeof(struct bbm_log) -
get_imsm_bbm_log_size(super->bbm_log);
break;
+ case update_rwh_policy: {
+ if (update->len < (int)sizeof(struct imsm_update_rwh_policy))
+ return 0;
+ break;
+ }
default:
return 0;
}
diff --git a/super1.c b/super1.c
index 76eeca111821..541f31eec16f 100644
--- a/super1.c
+++ b/super1.c
@@ -1325,6 +1325,55 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
sb->bblog_size = 0;
sb->bblog_shift = 0;
sb->bblog_offset = 0;
+ } else if (strcmp(update, "ppl") == 0) {
+ unsigned long long sb_offset = __le64_to_cpu(sb->super_offset);
+ unsigned long long data_offset = __le64_to_cpu(sb->data_offset);
+ unsigned long long data_size = __le64_to_cpu(sb->data_size);
+ long bb_offset = __le32_to_cpu(sb->bblog_offset);
+ int space;
+ int optimal_space;
+ int offset;
+
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
+ pr_err("Cannot add PPL to array with bitmap\n");
+ return -2;
+ }
+
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_JOURNAL)) {
+ pr_err("Cannot add PPL to array with journal\n");
+ return -2;
+ }
+
+ if (sb_offset < data_offset) {
+ if (bb_offset)
+ space = bb_offset - 8;
+ else
+ space = data_offset - sb_offset - 8;
+ offset = 8;
+ } else {
+ offset = -(sb_offset - data_offset - data_size);
+ if (offset < INT16_MIN)
+ offset = INT16_MIN;
+ space = -(offset - bb_offset);
+ }
+
+ if (space < (PPL_HEADER_SIZE >> 9) + 8) {
+ pr_err("Not enough space to add ppl\n");
+ return -2;
+ }
+
+ optimal_space = choose_ppl_space(__le32_to_cpu(sb->chunksize));
+
+ if (space > optimal_space)
+ space = optimal_space;
+ if (space > UINT16_MAX)
+ space = UINT16_MAX;
+
+ sb->ppl.offset = __cpu_to_le16(offset);
+ sb->ppl.size = __cpu_to_le16(space);
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_PPL);
+ } else if (strcmp(update, "no-ppl") == 0) {
+ sb->feature_map &= ~ __cpu_to_le32(MD_FEATURE_PPL);
} else if (strcmp(update, "name") == 0) {
if (info->name[0] == 0)
sprintf(info->name, "%d", info->array.md_minor);
--
2.12.0

View File

@ -0,0 +1,289 @@
From 78ddd8e9ac18c2d01d79ac8a5a6fa924f6315ffd Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Thu, 16 Mar 2017 22:09:48 +0100
Subject: [PATCH] Grow: support consistency policy change
Extend the --consistency-policy parameter to work also in Grow mode.
Using it changes the currently active consistency policy in the kernel
driver and updates the metadata to make this change permanent. Currently
this supports only changing between "ppl" and "resync" policies, that is
enabling or disabling PPL at runtime.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
Grow.c | 172 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
mdadm.8.in | 18 ++++++-
mdadm.c | 3 ++
mdadm.h | 2 +
4 files changed, 194 insertions(+), 1 deletion(-)
diff --git a/Grow.c b/Grow.c
index e4351d7f952a..c01d0945e8f5 100755
--- a/Grow.c
+++ b/Grow.c
@@ -528,6 +528,178 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
return 0;
}
+int Grow_consistency_policy(char *devname, int fd, struct context *c, struct shape *s)
+{
+ struct supertype *st;
+ struct mdinfo *sra;
+ struct mdinfo *sd;
+ char *subarray = NULL;
+ int ret = 0;
+ char container_dev[PATH_MAX];
+
+ if (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
+ s->consistency_policy != CONSISTENCY_POLICY_PPL) {
+ pr_err("Operation not supported for consistency policy %s\n",
+ map_num(consistency_policies, s->consistency_policy));
+ return 1;
+ }
+
+ st = super_by_fd(fd, &subarray);
+ if (!st)
+ return 1;
+
+ sra = sysfs_read(fd, NULL, GET_CONSISTENCY_POLICY|GET_LEVEL|
+ GET_DEVS|GET_STATE);
+ if (!sra) {
+ ret = 1;
+ goto free_st;
+ }
+
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ !st->ss->write_init_ppl) {
+ pr_err("%s metadata does not support PPL\n", st->ss->name);
+ ret = 1;
+ goto free_info;
+ }
+
+ if (sra->array.level != 5) {
+ pr_err("Operation not supported for array level %d\n",
+ sra->array.level);
+ ret = 1;
+ goto free_info;
+ }
+
+ if (sra->consistency_policy == (unsigned)s->consistency_policy) {
+ pr_err("Consistency policy is already %s\n",
+ map_num(consistency_policies, s->consistency_policy));
+ ret = 1;
+ goto free_info;
+ } else if (sra->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
+ sra->consistency_policy != CONSISTENCY_POLICY_PPL) {
+ pr_err("Current consistency policy is %s, cannot change to %s\n",
+ map_num(consistency_policies, sra->consistency_policy),
+ map_num(consistency_policies, s->consistency_policy));
+ ret = 1;
+ goto free_info;
+ }
+
+ if (subarray) {
+ char *update;
+
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL)
+ update = "ppl";
+ else
+ update = "no-ppl";
+
+ sprintf(container_dev, "/dev/%s", st->container_devnm);
+
+ ret = Update_subarray(container_dev, subarray, update, NULL,
+ c->verbose);
+ if (ret)
+ goto free_info;
+ }
+
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ struct mdinfo info;
+
+ if (subarray) {
+ struct mdinfo *mdi;
+ int cfd;
+
+ cfd = open(container_dev, O_RDWR|O_EXCL);
+ if (cfd < 0) {
+ pr_err("Failed to open %s\n", container_dev);
+ ret = 1;
+ goto free_info;
+ }
+
+ ret = st->ss->load_container(st, cfd, st->container_devnm);
+ close(cfd);
+
+ if (ret) {
+ pr_err("Cannot read superblock for %s\n",
+ container_dev);
+ goto free_info;
+ }
+
+ mdi = st->ss->container_content(st, subarray);
+ info = *mdi;
+ free(mdi);
+ }
+
+ for (sd = sra->devs; sd; sd = sd->next) {
+ int dfd;
+ char *devpath;
+
+ if ((sd->disk.state & (1 << MD_DISK_SYNC)) == 0)
+ continue;
+
+ devpath = map_dev(sd->disk.major, sd->disk.minor, 0);
+ dfd = dev_open(devpath, O_RDWR);
+ if (dfd < 0) {
+ pr_err("Failed to open %s\n", devpath);
+ ret = 1;
+ goto free_info;
+ }
+
+ if (!subarray) {
+ ret = st->ss->load_super(st, dfd, NULL);
+ if (ret) {
+ pr_err("Failed to load super-block.\n");
+ close(dfd);
+ goto free_info;
+ }
+
+ ret = st->ss->update_super(st, sra, "ppl", devname,
+ c->verbose, 0, NULL);
+ if (ret) {
+ close(dfd);
+ st->ss->free_super(st);
+ goto free_info;
+ }
+ st->ss->getinfo_super(st, &info, NULL);
+ }
+
+ ret |= sysfs_set_num(sra, sd, "ppl_sector", info.ppl_sector);
+ ret |= sysfs_set_num(sra, sd, "ppl_size", info.ppl_size);
+
+ if (ret) {
+ pr_err("Failed to set PPL attributes for %s\n",
+ sd->sys_name);
+ close(dfd);
+ st->ss->free_super(st);
+ goto free_info;
+ }
+
+ ret = st->ss->write_init_ppl(st, &info, dfd);
+ if (ret)
+ pr_err("Failed to write PPL\n");
+
+ close(dfd);
+
+ if (!subarray)
+ st->ss->free_super(st);
+
+ if (ret)
+ goto free_info;
+ }
+ }
+
+ ret = sysfs_set_str(sra, NULL, "consistency_policy",
+ map_num(consistency_policies,
+ s->consistency_policy));
+ if (ret)
+ pr_err("Failed to change array consistency policy\n");
+
+free_info:
+ sysfs_free(sra);
+free_st:
+ free(st);
+ free(subarray);
+
+ return ret;
+}
+
/*
* When reshaping an array we might need to backup some data.
* This is written to all spares with a 'super_block' describing it.
diff --git a/mdadm.8.in b/mdadm.8.in
index 1178ed9ba320..744c12b534bf 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -126,7 +126,7 @@ of component devices and changing the number of active devices in
Linear and RAID levels 0/1/4/5/6,
changing the RAID level between 0, 1, 5, and 6, and between 0 and 10,
changing the chunk size and layout for RAID 0,4,5,6,10 as well as adding or
-removing a write-intent bitmap.
+removing a write-intent bitmap and changing the array's consistency policy.
.TP
.B "Incremental Assembly"
@@ -1050,6 +1050,10 @@ after unclean shutdown. Implicitly selected when using
For RAID5 only, Partial Parity Log is used to close the write hole and
eliminate resync. PPL is stored in the metadata region of RAID member drives,
no additional journal drive is needed.
+
+.PP
+Can be used with \-\-grow to change the consistency policy of an active array
+in some cases. See CONSISTENCY POLICY CHANGES below.
.RE
@@ -2694,6 +2698,8 @@ RAID0, RAID4, and RAID5, and between RAID0 and RAID10 (in the near-2 mode).
.IP \(bu 4
add a write-intent bitmap to any array which supports these bitmaps, or
remove a write-intent bitmap from such an array.
+.IP \(bu 4
+change the array's consistency policy.
.PP
Using GROW on containers is currently supported only for Intel's IMSM
@@ -2850,6 +2856,16 @@ can be added. Note that if you add a bitmap stored in a file which is
in a filesystem that is on the RAID array being affected, the system
will deadlock. The bitmap must be on a separate filesystem.
+.SS CONSISTENCY POLICY CHANGES
+
+The consistency policy of an active array can be changed by using the
+.B \-\-consistency\-policy
+option in Grow mode. Currently this works only for the
+.B ppl
+and
+.B resync
+policies and allows to enable or disable the RAID5 Partial Parity Log (PPL).
+
.SH INCREMENTAL MODE
.HP 12
diff --git a/mdadm.c b/mdadm.c
index 3d0da1eca8d2..0db4cb33caa4 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -1217,6 +1217,7 @@ int main(int argc, char *argv[])
s.journaldisks = 1;
continue;
case O(CREATE, 'k'):
+ case O(GROW, 'k'):
s.consistency_policy = map_name(consistency_policies,
optarg);
if (s.consistency_policy == UnSet ||
@@ -1675,6 +1676,8 @@ int main(int argc, char *argv[])
rv = Grow_reshape(devlist->devname, mdfd,
devlist->next,
data_offset, &c, &s);
+ } else if (s.consistency_policy != UnSet) {
+ rv = Grow_consistency_policy(devlist->devname, mdfd, &c, &s);
} else if (array_size == 0)
pr_err("no changes to --grow\n");
break;
diff --git a/mdadm.h b/mdadm.h
index ab1b7fc66e7c..7173b2589655 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1331,6 +1331,8 @@ extern int Grow_restart(struct supertype *st, struct mdinfo *info,
extern int Grow_continue(int mdfd, struct supertype *st,
struct mdinfo *info, char *backup_file,
int forked, int freeze_reshape);
+extern int Grow_consistency_policy(char *devname, int fd,
+ struct context *c, struct shape *s);
extern int restore_backup(struct supertype *st,
struct mdinfo *content,
--
2.12.0

View File

@ -0,0 +1,32 @@
From 13298bf138e65f094a9368173c0cfba6c12ce875 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 2 Dec 2015 13:00:15 +0100
Subject: [PATCH] udev-md-raid-assembly.rules: Skip non-ready devices
If a device isn't fully initialized (e.g if it should be
handled by multipathing) it should not be considered for
md/RAID auto-assembly. Doing so can cause incorrect results
such as causing multipath to fail during startup.
There is a convention that the udev environment variable
SYSTEMD_READY be set to zero for such devices. So change
the mdadm rules to ignore devices with SYSTEMD_READY==0.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: NeilBrown <neilb@suse.com>
---
udev-md-raid-assembly.rules | 3 +++
1 file changed, 3 insertions(+)
--- a/udev-md-raid-assembly.rules
+++ b/udev-md-raid-assembly.rules
@@ -7,6 +7,9 @@ ENV{ANACONDA}=="?*", GOTO="md_inc_end"
SUBSYSTEM!="block", GOTO="md_inc_end"
+# skip non-initialized devices
+ENV{SYSTEMD_READY}=="0", GOTO="md_inc_end"
+
# handle potential components of arrays (the ones supported by md)
ENV{ID_FS_TYPE}=="linux_raid_member", GOTO="md_inc"

View File

@ -0,0 +1,105 @@
From 3494e6b41659393c7cb97c48b45d2b1a05c6faf0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 27 Mar 2017 12:48:06 +1100
Subject: [PATCH] Retry HOT_REMOVE_DISK a few times.
HOT_REMOVE_DISK can fail with EBUSY if there are outstanding
IO request that have not completed yet. It can sometimes
be helpful to wait a little while for these to complete.
We already do this in impose_level() when reshaping a device,
but not in Manage.c in response to an explicit --remove request.
So create hot_remove_disk() to central this code, and call it
where-ever it makes sense to wait for a HOT_REMOVE_DISK to succeed.
Signed-off-by: NeilBrown <neilb@suse.com>
---
Grow.c | 9 +--------
Manage.c | 4 ++--
mdadm.h | 1 +
util.c | 18 ++++++++++++++++++
4 files changed, 22 insertions(+), 10 deletions(-)
--- a/Grow.c
+++ b/Grow.c
@@ -2921,7 +2921,6 @@ static int impose_level(int fd, int leve
for (d = 0, found = 0;
d < MAX_DISKS && found < array.nr_disks;
d++) {
- int cnt;
mdu_disk_info_t disk;
disk.number = d;
if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
@@ -2935,13 +2934,7 @@ static int impose_level(int fd, int leve
continue;
ioctl(fd, SET_DISK_FAULTY,
makedev(disk.major, disk.minor));
- cnt = 5;
- while (ioctl(fd, HOT_REMOVE_DISK,
- makedev(disk.major, disk.minor)) < 0
- && errno == EBUSY
- && cnt--) {
- usleep(10000);
- }
+ hot_remove_disk(fd, makedev(disk.major, disk.minor));
}
}
c = map_num(pers, level);
--- a/Manage.c
+++ b/Manage.c
@@ -1183,7 +1183,7 @@ int Manage_remove(struct supertype *tst,
else
err = 0;
} else {
- err = ioctl(fd, HOT_REMOVE_DISK, rdev);
+ err = hot_remove_disk(fd, rdev);
if (err && errno == ENODEV) {
/* Old kernels rejected this if no personality
* is registered */
@@ -1607,7 +1607,7 @@ int Manage_subdevs(char *devname, int fd
if (dv->disposition == 'F')
/* Need to remove first */
- ioctl(fd, HOT_REMOVE_DISK, rdev);
+ hot_remove_disk(fd, rdev);
/* Make sure it isn't in use (in 2.6 or later) */
tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
if (tfd >= 0) {
--- a/mdadm.h
+++ b/mdadm.h
@@ -1500,6 +1500,7 @@ extern int add_disk(int mdfd, struct sup
struct mdinfo *sra, struct mdinfo *info);
extern int remove_disk(int mdfd, struct supertype *st,
struct mdinfo *sra, struct mdinfo *info);
+extern int hot_remove_disk(int mdfd, unsigned long dev);
extern int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info);
unsigned long long min_recovery_start(struct mdinfo *array);
--- a/util.c
+++ b/util.c
@@ -1795,6 +1795,24 @@ int remove_disk(int mdfd, struct superty
return rv;
}
+int hot_remove_disk(int mdfd, unsigned long dev)
+{
+ int cnt = 5;
+ int ret;
+
+ /* HOT_REMOVE_DISK can fail with EBUSY if there are
+ * outstanding IO requests to the device.
+ * In this case, it can be helpful to wait a little while,
+ * up to half a second, for that IO to flush.
+ */
+ while ((ret = ioctl(mdfd, HOT_REMOVE_DISK, dev)) == -1 &&
+ errno == EBUSY &&
+ cnt-- > 0)
+ usleep(10000);
+
+ return ret;
+}
+
int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
{
/* Initialise kernel's knowledge of array.

View File

@ -0,0 +1,64 @@
From 201c9389dd0560544a46d21d1300f174d94a7e60 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 27 Mar 2017 13:59:41 +1100
Subject: [PATCH] Introduce sys_hot_remove_disk()
The new hot_remove_disk() will retry HOT_REMOVE_DISK
several times in the face of EBUSY.
However we sometimes remove a device by writing "remove" to the
"state" attributed. This should be retried as well.
So introduce sys_hot_remove_disk() to repeat this action a few times.
Signed-off-by: NeilBrown <neilb@suse.com>
---
Manage.c | 6 +-----
mdadm.h | 1 +
util.c | 12 ++++++++++++
3 files changed, 14 insertions(+), 5 deletions(-)
--- a/Manage.c
+++ b/Manage.c
@@ -1177,11 +1177,7 @@ int Manage_remove(struct supertype *tst,
/* device has been removed and we don't know
* the major:minor number
*/
- int n = write(sysfd, "remove", 6);
- if (n != 6)
- err = -1;
- else
- err = 0;
+ err = sys_hot_remove_disk(sysfd);
} else {
err = hot_remove_disk(fd, rdev);
if (err && errno == ENODEV) {
--- a/mdadm.h
+++ b/mdadm.h
@@ -1501,6 +1501,7 @@ extern int add_disk(int mdfd, struct sup
extern int remove_disk(int mdfd, struct supertype *st,
struct mdinfo *sra, struct mdinfo *info);
extern int hot_remove_disk(int mdfd, unsigned long dev);
+extern int sys_hot_remove_disk(int statefd);
extern int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info);
unsigned long long min_recovery_start(struct mdinfo *array);
--- a/util.c
+++ b/util.c
@@ -1813,6 +1813,18 @@ int hot_remove_disk(int mdfd, unsigned l
return ret;
}
+int sys_hot_remove_disk(int statefd)
+{
+ int cnt = 5;
+ int ret;
+
+ while ((ret = write(statefd, "remove", 6)) == -1 &&
+ errno == EBUSY &&
+ cnt-- > 0)
+ usleep(10000);
+ return ret == 6 ? 0 : -1;
+}
+
int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
{
/* Initialise kernel's knowledge of array.

View File

@ -0,0 +1,124 @@
From be098ff0d8ef141b309a85265dfed3035da26a18 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 27 Mar 2017 14:07:37 +1100
Subject: [PATCH] Add 'force' flag to *hot_remove_disk().
In rare circumstances, the short period that *hot_remove_disk()
waits isn't long enough to IO to complete. This particularly happens
when a device is failing and many retries are still happening.
We don't want to increase the normal wait time for "mdadm --remove"
as that might be use just to test if a device is active or not, and a
delay would be problematic.
So allow "--force" to mean that mdadm should try extra hard for a
--remove to complete, waiting up to 5 seconds.
Note that this patch fixes a comment which claim the previous
wait time was half a second, where it was really 50msec.
Signed-off-by: NeilBrown <neilb@suse.com>
---
Grow.c | 2 +-
Manage.c | 10 +++++-----
mdadm.h | 4 ++--
util.c | 10 +++++-----
4 files changed, 13 insertions(+), 13 deletions(-)
--- a/Grow.c
+++ b/Grow.c
@@ -2934,7 +2934,7 @@ static int impose_level(int fd, int leve
continue;
ioctl(fd, SET_DISK_FAULTY,
makedev(disk.major, disk.minor));
- hot_remove_disk(fd, makedev(disk.major, disk.minor));
+ hot_remove_disk(fd, makedev(disk.major, disk.minor), 1);
}
}
c = map_num(pers, level);
--- a/Manage.c
+++ b/Manage.c
@@ -1110,7 +1110,7 @@ int Manage_add(int fd, int tfd, struct m
}
int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
- int sysfd, unsigned long rdev, int verbose, char *devname)
+ int sysfd, unsigned long rdev, int force, int verbose, char *devname)
{
int lfd = -1;
int err;
@@ -1177,9 +1177,9 @@ int Manage_remove(struct supertype *tst,
/* device has been removed and we don't know
* the major:minor number
*/
- err = sys_hot_remove_disk(sysfd);
+ err = sys_hot_remove_disk(sysfd, force);
} else {
- err = hot_remove_disk(fd, rdev);
+ err = hot_remove_disk(fd, rdev, force);
if (err && errno == ENODEV) {
/* Old kernels rejected this if no personality
* is registered */
@@ -1603,7 +1603,7 @@ int Manage_subdevs(char *devname, int fd
if (dv->disposition == 'F')
/* Need to remove first */
- hot_remove_disk(fd, rdev);
+ hot_remove_disk(fd, rdev, force);
/* Make sure it isn't in use (in 2.6 or later) */
tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
if (tfd >= 0) {
@@ -1645,7 +1645,7 @@ int Manage_subdevs(char *devname, int fd
rv = -1;
} else
rv = Manage_remove(tst, fd, dv, sysfd,
- rdev, verbose,
+ rdev, verbose, force,
devname);
if (sysfd >= 0)
close(sysfd);
--- a/mdadm.h
+++ b/mdadm.h
@@ -1500,8 +1500,8 @@ extern int add_disk(int mdfd, struct sup
struct mdinfo *sra, struct mdinfo *info);
extern int remove_disk(int mdfd, struct supertype *st,
struct mdinfo *sra, struct mdinfo *info);
-extern int hot_remove_disk(int mdfd, unsigned long dev);
-extern int sys_hot_remove_disk(int statefd);
+extern int hot_remove_disk(int mdfd, unsigned long dev, int force);
+extern int sys_hot_remove_disk(int statefd, int force);
extern int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info);
unsigned long long min_recovery_start(struct mdinfo *array);
--- a/util.c
+++ b/util.c
@@ -1795,15 +1795,15 @@ int remove_disk(int mdfd, struct superty
return rv;
}
-int hot_remove_disk(int mdfd, unsigned long dev)
+int hot_remove_disk(int mdfd, unsigned long dev, int force)
{
- int cnt = 5;
+ int cnt = force ? 500 : 5;
int ret;
/* HOT_REMOVE_DISK can fail with EBUSY if there are
* outstanding IO requests to the device.
* In this case, it can be helpful to wait a little while,
- * up to half a second, for that IO to flush.
+ * up to 5 seconds if 'force' is set, or 50 msec if not.
*/
while ((ret = ioctl(mdfd, HOT_REMOVE_DISK, dev)) == -1 &&
errno == EBUSY &&
@@ -1813,9 +1813,9 @@ int hot_remove_disk(int mdfd, unsigned l
return ret;
}
-int sys_hot_remove_disk(int statefd)
+int sys_hot_remove_disk(int statefd, int force)
{
- int cnt = 5;
+ int cnt = force ? 500 : 5;
int ret;
while ((ret = write(statefd, "remove", 6)) == -1 &&

View File

@ -0,0 +1,43 @@
From f58545b578e839df2f1682fd68f36079d4c39134 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 27 Mar 2017 14:21:11 +1100
Subject: [PATCH] Detail: handle non-existent arrays better.
If you call "mdadm --detail" with a device file for an array which
doesn't exist, such as by
mknod /dev/md57 b 9 57
mdadm --detail /dev/md57
you get an unhelpful message about and inactive RAID0, and return
status is '0'. This is confusing.
So catch this possibility and print a more useful message, and
return a non-zero status.
Signed-off-by: NeilBrown <neilb@suse.com>
---
Detail.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/Detail.c b/Detail.c
index 509b0d418768..d9d1b7092167 100644
--- a/Detail.c
+++ b/Detail.c
@@ -110,6 +110,14 @@ int Detail(char *dev, struct context *c)
if (ioctl(fd, GET_ARRAY_INFO, &array) == 0) {
inactive = 0;
} else if (errno == ENODEV && sra) {
+ if (sra->array.major_version == -1 &&
+ sra->array.minor_version == -1 &&
+ sra->devs == NULL) {
+ pr_err("Array associated with md device %s does not exist.\n", dev);
+ close(fd);
+ sysfs_free(sra);
+ return rv;
+ }
array = sra->array;
inactive = 1;
} else {
--
2.12.0

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:27d0be4627d38a12ddcd1c1c3721d649d4e89e1093914497e22b57245cda8808
size 422704

3
mdadm-4.0.tar.xz Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1d6ae7f24ced3a0fa7b5613b32f4a589bb4881e3946a5a2c3724056254ada3a9
size 430780

View File

@ -1,3 +1,32 @@
-------------------------------------------------------------------
Fri Mar 24 04:10:22 UTC 2017 - nfbrown@suse.com
- New upstream release 4.0
Multiple bugfixes and various enhancements
including IMSM support for bad blocks and 4K block devices.
(FATE#321941)
- 0001-Generic-support-for-consistency-policy-and-PPL.patch
- 0002-Detail-show-consistency-policy.patch
- 0003-imsm-PPL-support.patch
- 0004-super1-PPL-support.patch
- 0005-Add-ppl-and-no-ppl-options-for-update.patch
- 0006-Grow-support-consistency-policy-change.patch
Add support for Partial Parity Logs
(FATE#321941)
- 0007-udev-md-raid-assembly.rules-Skip-non-ready-devices.patch
(bsc#956236)
- 0008-Retry-HOT_REMOVE_DISK-a-few-times.patch
(bsc#808647)
- 0009-Introduce-sys_hot_remove_disk.patch
(bsc#974154)
- 0010-Add-force-flag-to-hot_remove_disk.patch
(bsc#808647)
- 0011-Detail-handle-non-existent-arrays-better.patch
(bsc#966773)
-------------------------------------------------------------------
Sun May 22 15:01:13 UTC 2016 - bill@merriam.net

View File

@ -1,7 +1,7 @@
#
# spec file for package mdadm
#
# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany.
# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany.
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@ -17,7 +17,7 @@
Name: mdadm
Version: 3.4
Version: 4.0
Release: 0
BuildRequires: binutils-devel
BuildRequires: groff
@ -39,8 +39,18 @@ Source1: Software-RAID.HOWTO.tar.bz2
Source2: sysconfig.mdadm
Source3: cron.d.mdadm
Source7: mdadm.cron
Patch1: 0001-super1-Clear-memory-allocated-for-superblock-bitmap-.patch
Patch2: 0002-The-mdcheck-script-now-adds-messages-to-the-system.patch
Patch0: 0002-The-mdcheck-script-now-adds-messages-to-the-system.patch
Patch1: 0001-Generic-support-for-consistency-policy-and-PPL.patch
Patch2: 0002-Detail-show-consistency-policy.patch
Patch3: 0003-imsm-PPL-support.patch
Patch4: 0004-super1-PPL-support.patch
Patch5: 0005-Add-ppl-and-no-ppl-options-for-update.patch
Patch6: 0006-Grow-support-consistency-policy-change.patch
Patch7: 0007-udev-md-raid-assembly.rules-Skip-non-ready-devices.patch
Patch8: 0008-Retry-HOT_REMOVE_DISK-a-few-times.patch
Patch9: 0009-Introduce-sys_hot_remove_disk.patch
Patch10: 0010-Add-force-flag-to-hot_remove_disk.patch
Patch11: 0011-Detail-handle-non-existent-arrays-better.patch
%define _udevdir %(pkg-config --variable=udevdir udev)
%define _systemdshutdowndir %{_unitdir}/../system-shutdown
@ -52,8 +62,18 @@ programs but with a very different interface.
%prep
%setup -q -a1
%patch0 -p1
%patch1 -p1
%patch2 -p1
%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1
%patch9 -p1
%patch10 -p1
%patch11 -p1
%build
make %{?_smp_mflags} CC="%__cc" CXFLAGS="$RPM_OPT_FLAGS -Wno-error" SUSE=yes