304 lines
11 KiB
Diff
304 lines
11 KiB
Diff
|
From 7f347698e3d09b15b4f9aed9c61239fda7b9e8c8 Mon Sep 17 00:00:00 2001
|
||
|
From: David Teigland <teigland@redhat.com>
|
||
|
Date: Fri, 26 Jul 2019 14:21:08 -0500
|
||
|
Subject: [PATCH] Fix rounding writes up to sector size
|
||
|
|
||
|
Do this at two levels, although one would be enough to
|
||
|
fix the problem seen recently:
|
||
|
|
||
|
- Ignore any reported sector size other than 512 of 4096.
|
||
|
If either sector size (physical or logical) is reported
|
||
|
as 512, then use 512. If neither are reported as 512,
|
||
|
and one or the other is reported as 4096, then use 4096.
|
||
|
If neither is reported as either 512 or 4096, then use 512.
|
||
|
|
||
|
- When rounding up a limited write in bcache to be a multiple
|
||
|
of the sector size, check that the resulting write size is
|
||
|
not larger than the bcache block itself. (This shouldn't
|
||
|
happen if the sector size is 512 or 4096.)
|
||
|
---
|
||
|
lib/device/bcache.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++--
|
||
|
lib/device/dev-io.c | 52 +++++++++++++++++++++++++++++++
|
||
|
lib/device/device.h | 8 +++--
|
||
|
lib/label/label.c | 30 ++++++++++++++----
|
||
|
4 files changed, 169 insertions(+), 10 deletions(-)
|
||
|
|
||
|
diff --git a/lib/device/bcache.c b/lib/device/bcache.c
|
||
|
index 7b0935352..04fbf3521 100644
|
||
|
--- a/lib/device/bcache.c
|
||
|
+++ b/lib/device/bcache.c
|
||
|
@@ -169,6 +169,7 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
|
||
|
sector_t offset;
|
||
|
sector_t nbytes;
|
||
|
sector_t limit_nbytes;
|
||
|
+ sector_t orig_nbytes;
|
||
|
sector_t extra_nbytes = 0;
|
||
|
|
||
|
if (((uintptr_t) data) & e->page_mask) {
|
||
|
@@ -191,11 +192,41 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
+ /*
|
||
|
+ * If the bcache block offset+len goes beyond where lvm is
|
||
|
+ * intending to write, then reduce the len being written
|
||
|
+ * (which is the bcache block size) so we don't write past
|
||
|
+ * the limit set by lvm. If after applying the limit, the
|
||
|
+ * resulting size is not a multiple of the sector size (512
|
||
|
+ * or 4096) then extend the reduced size to be a multiple of
|
||
|
+ * the sector size (we don't want to write partial sectors.)
|
||
|
+ */
|
||
|
if (offset + nbytes > _last_byte_offset) {
|
||
|
limit_nbytes = _last_byte_offset - offset;
|
||
|
- if (limit_nbytes % _last_byte_sector_size)
|
||
|
+
|
||
|
+ if (limit_nbytes % _last_byte_sector_size) {
|
||
|
extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
|
||
|
|
||
|
+ /*
|
||
|
+ * adding extra_nbytes to the reduced nbytes (limit_nbytes)
|
||
|
+ * should make the final write size a multiple of the
|
||
|
+ * sector size. This should never result in a final size
|
||
|
+ * larger than the bcache block size (as long as the bcache
|
||
|
+ * block size is a multiple of the sector size).
|
||
|
+ */
|
||
|
+ if (limit_nbytes + extra_nbytes > nbytes) {
|
||
|
+ log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
|
||
|
+ (unsigned long long)offset,
|
||
|
+ (unsigned long long)nbytes,
|
||
|
+ (unsigned long long)limit_nbytes,
|
||
|
+ (unsigned long long)extra_nbytes,
|
||
|
+ (unsigned long long)_last_byte_sector_size);
|
||
|
+ extra_nbytes = 0;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ orig_nbytes = nbytes;
|
||
|
+
|
||
|
if (extra_nbytes) {
|
||
|
log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
|
||
|
(unsigned long long)offset,
|
||
|
@@ -210,6 +241,22 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
|
||
|
(unsigned long long)limit_nbytes);
|
||
|
nbytes = limit_nbytes;
|
||
|
}
|
||
|
+
|
||
|
+ /*
|
||
|
+ * This shouldn't happen, the reduced+extended
|
||
|
+ * nbytes value should never be larger than the
|
||
|
+ * bcache block size.
|
||
|
+ */
|
||
|
+ if (nbytes > orig_nbytes) {
|
||
|
+ log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
|
||
|
+ (unsigned long long)offset,
|
||
|
+ (unsigned long long)orig_nbytes,
|
||
|
+ (unsigned long long)nbytes,
|
||
|
+ (unsigned long long)limit_nbytes,
|
||
|
+ (unsigned long long)extra_nbytes,
|
||
|
+ (unsigned long long)_last_byte_sector_size);
|
||
|
+ return false;
|
||
|
+ }
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@@ -403,6 +450,7 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
|
||
|
uint64_t nbytes = len;
|
||
|
sector_t limit_nbytes = 0;
|
||
|
sector_t extra_nbytes = 0;
|
||
|
+ sector_t orig_nbytes = 0;
|
||
|
|
||
|
if (offset > _last_byte_offset) {
|
||
|
log_error("Limit write at %llu len %llu beyond last byte %llu",
|
||
|
@@ -415,9 +463,30 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
|
||
|
|
||
|
if (offset + nbytes > _last_byte_offset) {
|
||
|
limit_nbytes = _last_byte_offset - offset;
|
||
|
- if (limit_nbytes % _last_byte_sector_size)
|
||
|
+
|
||
|
+ if (limit_nbytes % _last_byte_sector_size) {
|
||
|
extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
|
||
|
|
||
|
+ /*
|
||
|
+ * adding extra_nbytes to the reduced nbytes (limit_nbytes)
|
||
|
+ * should make the final write size a multiple of the
|
||
|
+ * sector size. This should never result in a final size
|
||
|
+ * larger than the bcache block size (as long as the bcache
|
||
|
+ * block size is a multiple of the sector size).
|
||
|
+ */
|
||
|
+ if (limit_nbytes + extra_nbytes > nbytes) {
|
||
|
+ log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
|
||
|
+ (unsigned long long)offset,
|
||
|
+ (unsigned long long)nbytes,
|
||
|
+ (unsigned long long)limit_nbytes,
|
||
|
+ (unsigned long long)extra_nbytes,
|
||
|
+ (unsigned long long)_last_byte_sector_size);
|
||
|
+ extra_nbytes = 0;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ orig_nbytes = nbytes;
|
||
|
+
|
||
|
if (extra_nbytes) {
|
||
|
log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
|
||
|
(unsigned long long)offset,
|
||
|
@@ -432,6 +501,22 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
|
||
|
(unsigned long long)limit_nbytes);
|
||
|
nbytes = limit_nbytes;
|
||
|
}
|
||
|
+
|
||
|
+ /*
|
||
|
+ * This shouldn't happen, the reduced+extended
|
||
|
+ * nbytes value should never be larger than the
|
||
|
+ * bcache block size.
|
||
|
+ */
|
||
|
+ if (nbytes > orig_nbytes) {
|
||
|
+ log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
|
||
|
+ (unsigned long long)offset,
|
||
|
+ (unsigned long long)orig_nbytes,
|
||
|
+ (unsigned long long)nbytes,
|
||
|
+ (unsigned long long)limit_nbytes,
|
||
|
+ (unsigned long long)extra_nbytes,
|
||
|
+ (unsigned long long)_last_byte_sector_size);
|
||
|
+ return false;
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
where = offset;
|
||
|
diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c
|
||
|
index 3fe264755..5fa0b7a9e 100644
|
||
|
--- a/lib/device/dev-io.c
|
||
|
+++ b/lib/device/dev-io.c
|
||
|
@@ -250,6 +250,58 @@ static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
|
||
|
+ unsigned int *logical_block_size)
|
||
|
+{
|
||
|
+ int fd = dev->bcache_fd;
|
||
|
+ int do_close = 0;
|
||
|
+ unsigned int pbs = 0;
|
||
|
+ unsigned int lbs = 0;
|
||
|
+
|
||
|
+ if (dev->physical_block_size || dev->logical_block_size) {
|
||
|
+ *physical_block_size = dev->physical_block_size;
|
||
|
+ *logical_block_size = dev->logical_block_size;
|
||
|
+ return 1;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (fd <= 0) {
|
||
|
+ if (!dev_open_readonly(dev))
|
||
|
+ return 0;
|
||
|
+ fd = dev_fd(dev);
|
||
|
+ do_close = 1;
|
||
|
+ }
|
||
|
+
|
||
|
+ /*
|
||
|
+ * BLKPBSZGET from kernel comment for blk_queue_physical_block_size:
|
||
|
+ * "the lowest possible sector size that the hardware can operate on
|
||
|
+ * without reverting to read-modify-write operations"
|
||
|
+ */
|
||
|
+ if (ioctl(fd, BLKPBSZGET, &pbs)) {
|
||
|
+ stack;
|
||
|
+ pbs = 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ /*
|
||
|
+ * BLKSSZGET from kernel comment for blk_queue_logical_block_size:
|
||
|
+ * "the lowest possible block size that the storage device can address."
|
||
|
+ */
|
||
|
+ if (ioctl(fd, BLKSSZGET, &lbs)) {
|
||
|
+ stack;
|
||
|
+ lbs = 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ dev->physical_block_size = pbs;
|
||
|
+ dev->logical_block_size = lbs;
|
||
|
+
|
||
|
+ *physical_block_size = pbs;
|
||
|
+ *logical_block_size = lbs;
|
||
|
+
|
||
|
+ if (do_close && !dev_close_immediate(dev))
|
||
|
+ stack;
|
||
|
+
|
||
|
+ return 1;
|
||
|
+}
|
||
|
+
|
||
|
/*-----------------------------------------------------------------
|
||
|
* Public functions
|
||
|
*---------------------------------------------------------------*/
|
||
|
diff --git a/lib/device/device.h b/lib/device/device.h
|
||
|
index 30e1e79b3..bb65f841d 100644
|
||
|
--- a/lib/device/device.h
|
||
|
+++ b/lib/device/device.h
|
||
|
@@ -67,8 +67,10 @@ struct device {
|
||
|
/* private */
|
||
|
int fd;
|
||
|
int open_count;
|
||
|
- int phys_block_size;
|
||
|
- int block_size;
|
||
|
+ int phys_block_size; /* From either BLKPBSZGET or BLKSSZGET, don't use */
|
||
|
+ int block_size; /* From BLKBSZGET, returns bdev->bd_block_size, likely set by fs, probably don't use */
|
||
|
+ int physical_block_size; /* From BLKPBSZGET: lowest possible sector size that the hardware can operate on without reverting to read-modify-write operations */
|
||
|
+ int logical_block_size; /* From BLKSSZGET: lowest possible block size that the storage device can address */
|
||
|
int read_ahead;
|
||
|
int bcache_fd;
|
||
|
uint32_t flags;
|
||
|
@@ -132,6 +134,8 @@ void dev_size_seqno_inc(void);
|
||
|
* All io should use these routines.
|
||
|
*/
|
||
|
int dev_get_block_size(struct device *dev, unsigned int *phys_block_size, unsigned int *block_size);
|
||
|
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
|
||
|
+ unsigned int *logical_block_size);
|
||
|
int dev_get_size(struct device *dev, uint64_t *size);
|
||
|
int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead);
|
||
|
int dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes);
|
||
|
diff --git a/lib/label/label.c b/lib/label/label.c
|
||
|
index fb7ad1d56..5d8a0f51b 100644
|
||
|
--- a/lib/label/label.c
|
||
|
+++ b/lib/label/label.c
|
||
|
@@ -1495,16 +1495,34 @@ bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val)
|
||
|
|
||
|
void dev_set_last_byte(struct device *dev, uint64_t offset)
|
||
|
{
|
||
|
- unsigned int phys_block_size = 0;
|
||
|
- unsigned int block_size = 0;
|
||
|
+ unsigned int physical_block_size = 0;
|
||
|
+ unsigned int logical_block_size = 0;
|
||
|
+ unsigned int bs;
|
||
|
|
||
|
- if (!dev_get_block_size(dev, &phys_block_size, &block_size)) {
|
||
|
+ if (!dev_get_direct_block_sizes(dev, &physical_block_size, &logical_block_size)) {
|
||
|
stack;
|
||
|
- /* FIXME ASSERT or regular error testing is missing */
|
||
|
- return;
|
||
|
+ return; /* FIXME: error path ? */
|
||
|
+ }
|
||
|
+
|
||
|
+ if ((physical_block_size == 512) && (logical_block_size == 512))
|
||
|
+ bs = 512;
|
||
|
+ else if ((physical_block_size == 4096) && (logical_block_size == 4096))
|
||
|
+ bs = 4096;
|
||
|
+ else if ((physical_block_size == 512) || (logical_block_size == 512)) {
|
||
|
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
|
||
|
+ physical_block_size, logical_block_size);
|
||
|
+ bs = 512;
|
||
|
+ } else if ((physical_block_size == 4096) || (logical_block_size == 4096)) {
|
||
|
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 4096",
|
||
|
+ physical_block_size, logical_block_size);
|
||
|
+ bs = 4096;
|
||
|
+ } else {
|
||
|
+ log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
|
||
|
+ physical_block_size, logical_block_size);
|
||
|
+ bs = 512;
|
||
|
}
|
||
|
|
||
|
- bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, phys_block_size);
|
||
|
+ bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, bs);
|
||
|
}
|
||
|
|
||
|
void dev_unset_last_byte(struct device *dev)
|
||
|
--
|
||
|
2.12.3
|
||
|
|