Accepting request 729324 from home:ganghe:branches:Base:System

Avoid creation of mixed-blocksize PV on LVM volume groups (bsc#1149408)

OBS-URL: https://build.opensuse.org/request/show/729324
OBS-URL: https://build.opensuse.org/package/show/Base:System/lvm2?expand=0&rev=251
This commit is contained in:
Gang He 2019-09-09 06:55:46 +00:00 committed by Git OBS Bridge
parent 4af0ba23c0
commit 9dc484637c
5 changed files with 545 additions and 0 deletions

View File

@ -0,0 +1,303 @@
From 7f347698e3d09b15b4f9aed9c61239fda7b9e8c8 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Fri, 26 Jul 2019 14:21:08 -0500
Subject: [PATCH] Fix rounding writes up to sector size
Do this at two levels, although one would be enough to
fix the problem seen recently:
- Ignore any reported sector size other than 512 of 4096.
If either sector size (physical or logical) is reported
as 512, then use 512. If neither are reported as 512,
and one or the other is reported as 4096, then use 4096.
If neither is reported as either 512 or 4096, then use 512.
- When rounding up a limited write in bcache to be a multiple
of the sector size, check that the resulting write size is
not larger than the bcache block itself. (This shouldn't
happen if the sector size is 512 or 4096.)
---
lib/device/bcache.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++--
lib/device/dev-io.c | 52 +++++++++++++++++++++++++++++++
lib/device/device.h | 8 +++--
lib/label/label.c | 30 ++++++++++++++----
4 files changed, 169 insertions(+), 10 deletions(-)
diff --git a/lib/device/bcache.c b/lib/device/bcache.c
index 7b0935352..04fbf3521 100644
--- a/lib/device/bcache.c
+++ b/lib/device/bcache.c
@@ -169,6 +169,7 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
sector_t offset;
sector_t nbytes;
sector_t limit_nbytes;
+ sector_t orig_nbytes;
sector_t extra_nbytes = 0;
if (((uintptr_t) data) & e->page_mask) {
@@ -191,11 +192,41 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
return false;
}
+ /*
+ * If the bcache block offset+len goes beyond where lvm is
+ * intending to write, then reduce the len being written
+ * (which is the bcache block size) so we don't write past
+ * the limit set by lvm. If after applying the limit, the
+ * resulting size is not a multiple of the sector size (512
+ * or 4096) then extend the reduced size to be a multiple of
+ * the sector size (we don't want to write partial sectors.)
+ */
if (offset + nbytes > _last_byte_offset) {
limit_nbytes = _last_byte_offset - offset;
- if (limit_nbytes % _last_byte_sector_size)
+
+ if (limit_nbytes % _last_byte_sector_size) {
extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
+ /*
+ * adding extra_nbytes to the reduced nbytes (limit_nbytes)
+ * should make the final write size a multiple of the
+ * sector size. This should never result in a final size
+ * larger than the bcache block size (as long as the bcache
+ * block size is a multiple of the sector size).
+ */
+ if (limit_nbytes + extra_nbytes > nbytes) {
+ log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
+ (unsigned long long)offset,
+ (unsigned long long)nbytes,
+ (unsigned long long)limit_nbytes,
+ (unsigned long long)extra_nbytes,
+ (unsigned long long)_last_byte_sector_size);
+ extra_nbytes = 0;
+ }
+ }
+
+ orig_nbytes = nbytes;
+
if (extra_nbytes) {
log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
(unsigned long long)offset,
@@ -210,6 +241,22 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
(unsigned long long)limit_nbytes);
nbytes = limit_nbytes;
}
+
+ /*
+ * This shouldn't happen, the reduced+extended
+ * nbytes value should never be larger than the
+ * bcache block size.
+ */
+ if (nbytes > orig_nbytes) {
+ log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
+ (unsigned long long)offset,
+ (unsigned long long)orig_nbytes,
+ (unsigned long long)nbytes,
+ (unsigned long long)limit_nbytes,
+ (unsigned long long)extra_nbytes,
+ (unsigned long long)_last_byte_sector_size);
+ return false;
+ }
}
}
@@ -403,6 +450,7 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
uint64_t nbytes = len;
sector_t limit_nbytes = 0;
sector_t extra_nbytes = 0;
+ sector_t orig_nbytes = 0;
if (offset > _last_byte_offset) {
log_error("Limit write at %llu len %llu beyond last byte %llu",
@@ -415,9 +463,30 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
if (offset + nbytes > _last_byte_offset) {
limit_nbytes = _last_byte_offset - offset;
- if (limit_nbytes % _last_byte_sector_size)
+
+ if (limit_nbytes % _last_byte_sector_size) {
extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
+ /*
+ * adding extra_nbytes to the reduced nbytes (limit_nbytes)
+ * should make the final write size a multiple of the
+ * sector size. This should never result in a final size
+ * larger than the bcache block size (as long as the bcache
+ * block size is a multiple of the sector size).
+ */
+ if (limit_nbytes + extra_nbytes > nbytes) {
+ log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
+ (unsigned long long)offset,
+ (unsigned long long)nbytes,
+ (unsigned long long)limit_nbytes,
+ (unsigned long long)extra_nbytes,
+ (unsigned long long)_last_byte_sector_size);
+ extra_nbytes = 0;
+ }
+ }
+
+ orig_nbytes = nbytes;
+
if (extra_nbytes) {
log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
(unsigned long long)offset,
@@ -432,6 +501,22 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
(unsigned long long)limit_nbytes);
nbytes = limit_nbytes;
}
+
+ /*
+ * This shouldn't happen, the reduced+extended
+ * nbytes value should never be larger than the
+ * bcache block size.
+ */
+ if (nbytes > orig_nbytes) {
+ log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
+ (unsigned long long)offset,
+ (unsigned long long)orig_nbytes,
+ (unsigned long long)nbytes,
+ (unsigned long long)limit_nbytes,
+ (unsigned long long)extra_nbytes,
+ (unsigned long long)_last_byte_sector_size);
+ return false;
+ }
}
where = offset;
diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c
index 3fe264755..5fa0b7a9e 100644
--- a/lib/device/dev-io.c
+++ b/lib/device/dev-io.c
@@ -250,6 +250,58 @@ static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64
return 1;
}
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
+ unsigned int *logical_block_size)
+{
+ int fd = dev->bcache_fd;
+ int do_close = 0;
+ unsigned int pbs = 0;
+ unsigned int lbs = 0;
+
+ if (dev->physical_block_size || dev->logical_block_size) {
+ *physical_block_size = dev->physical_block_size;
+ *logical_block_size = dev->logical_block_size;
+ return 1;
+ }
+
+ if (fd <= 0) {
+ if (!dev_open_readonly(dev))
+ return 0;
+ fd = dev_fd(dev);
+ do_close = 1;
+ }
+
+ /*
+ * BLKPBSZGET from kernel comment for blk_queue_physical_block_size:
+ * "the lowest possible sector size that the hardware can operate on
+ * without reverting to read-modify-write operations"
+ */
+ if (ioctl(fd, BLKPBSZGET, &pbs)) {
+ stack;
+ pbs = 0;
+ }
+
+ /*
+ * BLKSSZGET from kernel comment for blk_queue_logical_block_size:
+ * "the lowest possible block size that the storage device can address."
+ */
+ if (ioctl(fd, BLKSSZGET, &lbs)) {
+ stack;
+ lbs = 0;
+ }
+
+ dev->physical_block_size = pbs;
+ dev->logical_block_size = lbs;
+
+ *physical_block_size = pbs;
+ *logical_block_size = lbs;
+
+ if (do_close && !dev_close_immediate(dev))
+ stack;
+
+ return 1;
+}
+
/*-----------------------------------------------------------------
* Public functions
*---------------------------------------------------------------*/
diff --git a/lib/device/device.h b/lib/device/device.h
index 30e1e79b3..bb65f841d 100644
--- a/lib/device/device.h
+++ b/lib/device/device.h
@@ -67,8 +67,10 @@ struct device {
/* private */
int fd;
int open_count;
- int phys_block_size;
- int block_size;
+ int phys_block_size; /* From either BLKPBSZGET or BLKSSZGET, don't use */
+ int block_size; /* From BLKBSZGET, returns bdev->bd_block_size, likely set by fs, probably don't use */
+ int physical_block_size; /* From BLKPBSZGET: lowest possible sector size that the hardware can operate on without reverting to read-modify-write operations */
+ int logical_block_size; /* From BLKSSZGET: lowest possible block size that the storage device can address */
int read_ahead;
int bcache_fd;
uint32_t flags;
@@ -132,6 +134,8 @@ void dev_size_seqno_inc(void);
* All io should use these routines.
*/
int dev_get_block_size(struct device *dev, unsigned int *phys_block_size, unsigned int *block_size);
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
+ unsigned int *logical_block_size);
int dev_get_size(struct device *dev, uint64_t *size);
int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead);
int dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes);
diff --git a/lib/label/label.c b/lib/label/label.c
index fb7ad1d56..5d8a0f51b 100644
--- a/lib/label/label.c
+++ b/lib/label/label.c
@@ -1495,16 +1495,34 @@ bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val)
void dev_set_last_byte(struct device *dev, uint64_t offset)
{
- unsigned int phys_block_size = 0;
- unsigned int block_size = 0;
+ unsigned int physical_block_size = 0;
+ unsigned int logical_block_size = 0;
+ unsigned int bs;
- if (!dev_get_block_size(dev, &phys_block_size, &block_size)) {
+ if (!dev_get_direct_block_sizes(dev, &physical_block_size, &logical_block_size)) {
stack;
- /* FIXME ASSERT or regular error testing is missing */
- return;
+ return; /* FIXME: error path ? */
+ }
+
+ if ((physical_block_size == 512) && (logical_block_size == 512))
+ bs = 512;
+ else if ((physical_block_size == 4096) && (logical_block_size == 4096))
+ bs = 4096;
+ else if ((physical_block_size == 512) || (logical_block_size == 512)) {
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
+ physical_block_size, logical_block_size);
+ bs = 512;
+ } else if ((physical_block_size == 4096) || (logical_block_size == 4096)) {
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 4096",
+ physical_block_size, logical_block_size);
+ bs = 4096;
+ } else {
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
+ physical_block_size, logical_block_size);
+ bs = 512;
}
- bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, phys_block_size);
+ bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, bs);
}
void dev_unset_last_byte(struct device *dev)
--
2.12.3

View File

@ -0,0 +1,223 @@
From 0404539edb25e4a9d3456bb3e6b402aa2767af6b Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Thu, 1 Aug 2019 10:06:47 -0500
Subject: [PATCH] vgcreate/vgextend: restrict PVs with mixed block sizes
Avoid having PVs with different logical block sizes in the same VG.
This prevents LVs from having mixed block sizes, which can produce
file system errors.
The new config setting devices/allow_mixed_block_sizes (default 0)
can be changed to 1 to return to the unrestricted mode.
---
lib/commands/toolcontext.h | 1 +
lib/config/config_settings.h | 5 ++++
lib/metadata/metadata-exported.h | 1 +
lib/metadata/metadata.c | 44 ++++++++++++++++++++++++++++++
tools/lvmcmdline.c | 2 ++
tools/toollib.c | 47 ++++++++++++++++++++++++++++++++
tools/vgcreate.c | 2 ++
7 files changed, 102 insertions(+)
diff --git a/lib/commands/toolcontext.h b/lib/commands/toolcontext.h
index 488752c8f..655d9f297 100644
--- a/lib/commands/toolcontext.h
+++ b/lib/commands/toolcontext.h
@@ -153,6 +153,7 @@ struct cmd_context {
unsigned include_shared_vgs:1; /* report/display cmds can reveal lockd VGs */
unsigned include_active_foreign_vgs:1; /* cmd should process foreign VGs with active LVs */
unsigned vg_read_print_access_error:1; /* print access errors from vg_read */
+ unsigned allow_mixed_block_sizes:1;
unsigned force_access_clustered:1;
unsigned lockd_gl_disable:1;
unsigned lockd_vg_disable:1;
diff --git a/lib/config/config_settings.h b/lib/config/config_settings.h
index 527d5bd07..edfe4a31a 100644
--- a/lib/config/config_settings.h
+++ b/lib/config/config_settings.h
@@ -502,6 +502,11 @@ cfg(devices_allow_changes_with_duplicate_pvs_CFG, "allow_changes_with_duplicate_
"Enabling this setting allows the VG to be used as usual even with\n"
"uncertain devices.\n")
+cfg(devices_allow_mixed_block_sizes_CFG, "allow_mixed_block_sizes", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(2, 3, 6), NULL, 0, NULL,
+ "Allow PVs in the same VG with different logical block sizes.\n"
+ "When allowed, the user is responsible to ensure that an LV is\n"
+ "using PVs with matching block sizes when necessary.\n")
+
cfg_array(allocation_cling_tag_list_CFG, "cling_tag_list", allocation_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 77), NULL, 0, NULL,
"Advise LVM which PVs to use when searching for new space.\n"
"When searching for free space to extend an LV, the 'cling' allocation\n"
diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h
index f18587a73..e1767b78d 100644
--- a/lib/metadata/metadata-exported.h
+++ b/lib/metadata/metadata-exported.h
@@ -623,6 +623,7 @@ struct pvcreate_params {
unsigned is_remove : 1; /* is removing PVs, not creating */
unsigned preserve_existing : 1;
unsigned check_failed : 1;
+ unsigned check_consistent_block_size : 1;
};
struct lvresize_params {
diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c
index f19df3d1d..e55adc212 100644
--- a/lib/metadata/metadata.c
+++ b/lib/metadata/metadata.c
@@ -751,12 +751,40 @@ int vg_extend_each_pv(struct volume_group *vg, struct pvcreate_params *pp)
{
struct pv_list *pvl;
unsigned int max_phys_block_size = 0;
+ unsigned int physical_block_size, logical_block_size;
+ unsigned int prev_lbs = 0;
+ int inconsistent_existing_lbs = 0;
log_debug_metadata("Adding PVs to VG %s.", vg->name);
if (vg_bad_status_bits(vg, RESIZEABLE_VG))
return_0;
+ /*
+ * Check if existing PVs have inconsistent block sizes.
+ * If so, do not enforce new devices to be consistent.
+ */
+ dm_list_iterate_items(pvl, &vg->pvs) {
+ logical_block_size = 0;
+ physical_block_size = 0;
+
+ if (!dev_get_direct_block_sizes(pvl->pv->dev, &physical_block_size, &logical_block_size))
+ continue;
+
+ if (!logical_block_size)
+ continue;
+
+ if (!prev_lbs) {
+ prev_lbs = logical_block_size;
+ continue;
+ }
+
+ if (prev_lbs != logical_block_size) {
+ inconsistent_existing_lbs = 1;
+ break;
+ }
+ }
+
dm_list_iterate_items(pvl, &pp->pvs) {
log_debug_metadata("Adding PV %s to VG %s.", pv_dev_name(pvl->pv), vg->name);
@@ -767,6 +795,22 @@ int vg_extend_each_pv(struct volume_group *vg, struct pvcreate_params *pp)
return 0;
}
+ logical_block_size = 0;
+ physical_block_size = 0;
+
+ if (!dev_get_direct_block_sizes(pvl->pv->dev, &physical_block_size, &logical_block_size))
+ log_warn("WARNING: PV %s has unknown block size.", pv_dev_name(pvl->pv));
+
+ else if (prev_lbs && logical_block_size && (logical_block_size != prev_lbs)) {
+ if (vg->cmd->allow_mixed_block_sizes || inconsistent_existing_lbs)
+ log_debug("Devices have inconsistent block sizes (%u and %u)", prev_lbs, logical_block_size);
+ else {
+ log_error("Devices have inconsistent logical block sizes (%u and %u).",
+ prev_lbs, logical_block_size);
+ return 0;
+ }
+ }
+
if (!add_pv_to_vg(vg, pv_dev_name(pvl->pv), pvl->pv, 0)) {
log_error("PV %s cannot be added to VG %s.",
pv_dev_name(pvl->pv), vg->name);
diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c
index 30f9d8133..7d29b6fab 100644
--- a/tools/lvmcmdline.c
+++ b/tools/lvmcmdline.c
@@ -2319,6 +2319,8 @@ static int _get_current_settings(struct cmd_context *cmd)
cmd->scan_lvs = find_config_tree_bool(cmd, devices_scan_lvs_CFG, NULL);
+ cmd->allow_mixed_block_sizes = find_config_tree_bool(cmd, devices_allow_mixed_block_sizes_CFG, NULL);
+
/*
* enable_hints is set to 1 if any commands are using hints.
* use_hints is set to 1 if this command doesn't use the hints.
diff --git a/tools/toollib.c b/tools/toollib.c
index b2313f8ff..155528c4e 100644
--- a/tools/toollib.c
+++ b/tools/toollib.c
@@ -5355,6 +5355,8 @@ int pvcreate_each_device(struct cmd_context *cmd,
struct pv_list *vgpvl;
struct device_list *devl;
const char *pv_name;
+ unsigned int physical_block_size, logical_block_size;
+ unsigned int prev_pbs = 0, prev_lbs = 0;
int must_use_all = (cmd->cname->flags & MUST_USE_ALL_ARGS);
int found;
unsigned i;
@@ -5394,6 +5396,51 @@ int pvcreate_each_device(struct cmd_context *cmd,
dm_list_iterate_items(pd, &pp->arg_devices)
pd->dev = dev_cache_get(cmd, pd->name, cmd->filter);
+ /*
+ * Check for consistent block sizes.
+ */
+ if (pp->check_consistent_block_size) {
+ dm_list_iterate_items(pd, &pp->arg_devices) {
+ if (!pd->dev)
+ continue;
+
+ logical_block_size = 0;
+ physical_block_size = 0;
+
+ if (!dev_get_direct_block_sizes(pd->dev, &physical_block_size, &logical_block_size)) {
+ log_warn("WARNING: Unknown block size for device %s.", dev_name(pd->dev));
+ continue;
+ }
+
+ if (!logical_block_size) {
+ log_warn("WARNING: Unknown logical_block_size for device %s.", dev_name(pd->dev));
+ continue;
+ }
+
+ if (!prev_lbs) {
+ prev_lbs = logical_block_size;
+ prev_pbs = physical_block_size;
+ continue;
+ }
+
+ if (prev_lbs == logical_block_size) {
+ /* Require lbs to match, just warn about unmatching pbs. */
+ if (!cmd->allow_mixed_block_sizes && prev_pbs && physical_block_size &&
+ (prev_pbs != physical_block_size))
+ log_warn("WARNING: Devices have inconsistent physical block sizes (%u and %u).",
+ prev_pbs, physical_block_size);
+ continue;
+ }
+
+ if (!cmd->allow_mixed_block_sizes) {
+ log_error("Devices have inconsistent logical block sizes (%u and %u).",
+ prev_lbs, logical_block_size);
+ log_print("See lvm.conf allow_mixed_block_sizes.");
+ return 0;
+ }
+ }
+ }
+
/*
* Use process_each_pv to search all existing PVs and devices.
*
diff --git a/tools/vgcreate.c b/tools/vgcreate.c
index d594ec110..09b6a6c95 100644
--- a/tools/vgcreate.c
+++ b/tools/vgcreate.c
@@ -47,6 +47,8 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv)
/* Don't create a new PV on top of an existing PV like pvcreate does. */
pp.preserve_existing = 1;
+ pp.check_consistent_block_size = 1;
+
if (!vgcreate_params_set_defaults(cmd, &vp_def, NULL))
return EINVALID_CMD_LINE;
vp_def.vg_name = vg_name;
--
2.21.0

View File

@ -292,6 +292,12 @@ devices {
# Enabling this setting allows the VG to be used as usual even with
# uncertain devices.
allow_changes_with_duplicate_pvs = 0
# Configuration option devices/allow_mixed_block_sizes.
# Allow PVs in the same VG with different logical block sizes.
# When allowed, the user is responsible to ensure that an LV is
# using PVs with matching block sizes when necessary.
allow_mixed_block_sizes = 0
}
# Configuration section allocation.

View File

@ -1,3 +1,12 @@
-------------------------------------------------------------------
Mon Sep 9 11:00:25 UTC 2019 - ghe@suse.com
- Avoid creation of mixed-blocksize PV on LVM volume groups (bsc#1149408)
+ bug-1149408_Fix-rounding-writes-up-to-sector-size.patch
+ bug-1149408_vgcreate-vgextend-restrict-PVs-with-mixed-block-size.patch
- Update lvm.conf files
- add devices/allow_mixed_block_sizes item
-------------------------------------------------------------------
Mon Sep 02 11:21:03 UTC 2019 - heming.zhao@suse.com

View File

@ -58,6 +58,8 @@ Source99: baselibs.conf
%endif
# Upstream patches
Patch0001: bug-1122666_devices-drop-open-error-message.patch
Patch0002: bug-1149408_Fix-rounding-writes-up-to-sector-size.patch
Patch0003: bug-1149408_vgcreate-vgextend-restrict-PVs-with-mixed-block-size.patch
# SUSE patches: 1000+ for LVM
# Never upstream
Patch1001: cmirrord_remove_date_time_from_compilation.patch
@ -113,6 +115,8 @@ Volume Manager.
%prep
%setup -q -n LVM2.%{version}
%patch0001 -p1
%patch0002 -p1
%patch0003 -p1
%patch1001 -p1
%patch1002 -p1
%patch1003 -p1