29b173ad99
Include latest upstream disk corruption fix - hopefully that's all of them! OBS-URL: https://build.opensuse.org/request/show/746103 OBS-URL: https://build.opensuse.org/package/show/Virtualization/qemu?expand=0&rev=505
479 lines
16 KiB
Diff
479 lines
16 KiB
Diff
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
|
Date: Tue, 4 Jun 2019 19:15:05 +0300
|
|
Subject: block/io: refactor padding
|
|
|
|
Git-commit: 7a3f542fbdfd799be4fa6f8b96dc8c1e6933fce4
|
|
|
|
We have similar padding code in bdrv_co_pwritev,
|
|
bdrv_co_do_pwrite_zeroes and bdrv_co_preadv. Let's combine and unify
|
|
it.
|
|
|
|
[Squashed in Vladimir's qemu-iotests 077 fix
|
|
--Stefan]
|
|
|
|
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
|
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
Message-id: 20190604161514.262241-4-vsementsov@virtuozzo.com
|
|
Message-Id: <20190604161514.262241-4-vsementsov@virtuozzo.com>
|
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
Signed-off-by: Bruce Rogers <brogers@suse.com>
|
|
---
|
|
block/io.c | 365 +++++++++++++++++++++++++++++------------------------
|
|
1 file changed, 200 insertions(+), 165 deletions(-)
|
|
|
|
diff --git a/block/io.c b/block/io.c
|
|
index 06305c6ea62efabf1efb43933bf6..bfb2653d8ee853e99bd4d55a1a87 100644
|
|
--- a/block/io.c
|
|
+++ b/block/io.c
|
|
@@ -1408,28 +1408,177 @@ out:
|
|
}
|
|
|
|
/*
|
|
- * Handle a read request in coroutine context
|
|
+ * Request padding
|
|
+ *
|
|
+ * |<---- align ----->| |<----- align ---->|
|
|
+ * |<- head ->|<------------- bytes ------------->|<-- tail -->|
|
|
+ * | | | | | |
|
|
+ * -*----------$-------*-------- ... --------*-----$------------*---
|
|
+ * | | | | | |
|
|
+ * | offset | | end |
|
|
+ * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end)
|
|
+ * [buf ... ) [tail_buf )
|
|
+ *
|
|
+ * @buf is an aligned allocation needed to store @head and @tail paddings. @head
|
|
+ * is placed at the beginning of @buf and @tail at the @end.
|
|
+ *
|
|
+ * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk
|
|
+ * around tail, if tail exists.
|
|
+ *
|
|
+ * @merge_reads is true for small requests,
|
|
+ * if @buf_len == @head + bytes + @tail. In this case it is possible that both
|
|
+ * head and tail exist but @buf_len == align and @tail_buf == @buf.
|
|
+ */
|
|
+typedef struct BdrvRequestPadding {
|
|
+ uint8_t *buf;
|
|
+ size_t buf_len;
|
|
+ uint8_t *tail_buf;
|
|
+ size_t head;
|
|
+ size_t tail;
|
|
+ bool merge_reads;
|
|
+ QEMUIOVector local_qiov;
|
|
+} BdrvRequestPadding;
|
|
+
|
|
+static bool bdrv_init_padding(BlockDriverState *bs,
|
|
+ int64_t offset, int64_t bytes,
|
|
+ BdrvRequestPadding *pad)
|
|
+{
|
|
+ uint64_t align = bs->bl.request_alignment;
|
|
+ size_t sum;
|
|
+
|
|
+ memset(pad, 0, sizeof(*pad));
|
|
+
|
|
+ pad->head = offset & (align - 1);
|
|
+ pad->tail = ((offset + bytes) & (align - 1));
|
|
+ if (pad->tail) {
|
|
+ pad->tail = align - pad->tail;
|
|
+ }
|
|
+
|
|
+ if ((!pad->head && !pad->tail) || !bytes) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ sum = pad->head + bytes + pad->tail;
|
|
+ pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
|
|
+ pad->buf = qemu_blockalign(bs, pad->buf_len);
|
|
+ pad->merge_reads = sum == pad->buf_len;
|
|
+ if (pad->tail) {
|
|
+ pad->tail_buf = pad->buf + pad->buf_len - align;
|
|
+ }
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+static int bdrv_padding_rmw_read(BdrvChild *child,
|
|
+ BdrvTrackedRequest *req,
|
|
+ BdrvRequestPadding *pad,
|
|
+ bool zero_middle)
|
|
+{
|
|
+ QEMUIOVector local_qiov;
|
|
+ BlockDriverState *bs = child->bs;
|
|
+ uint64_t align = bs->bl.request_alignment;
|
|
+ int ret;
|
|
+
|
|
+ assert(req->serialising && pad->buf);
|
|
+
|
|
+ if (pad->head || pad->merge_reads) {
|
|
+ uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
|
|
+
|
|
+ qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
|
|
+
|
|
+ if (pad->head) {
|
|
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
|
|
+ }
|
|
+ if (pad->merge_reads && pad->tail) {
|
|
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
|
|
+ }
|
|
+ ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
|
|
+ align, &local_qiov, 0);
|
|
+ if (ret < 0) {
|
|
+ return ret;
|
|
+ }
|
|
+ if (pad->head) {
|
|
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
|
|
+ }
|
|
+ if (pad->merge_reads && pad->tail) {
|
|
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
|
|
+ }
|
|
+
|
|
+ if (pad->merge_reads) {
|
|
+ goto zero_mem;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (pad->tail) {
|
|
+ qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
|
|
+
|
|
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
|
|
+ ret = bdrv_aligned_preadv(
|
|
+ child, req,
|
|
+ req->overlap_offset + req->overlap_bytes - align,
|
|
+ align, align, &local_qiov, 0);
|
|
+ if (ret < 0) {
|
|
+ return ret;
|
|
+ }
|
|
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
|
|
+ }
|
|
+
|
|
+zero_mem:
|
|
+ if (zero_middle) {
|
|
+ memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
|
+{
|
|
+ if (pad->buf) {
|
|
+ qemu_vfree(pad->buf);
|
|
+ qemu_iovec_destroy(&pad->local_qiov);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * bdrv_pad_request
|
|
+ *
|
|
+ * Exchange request parameters with padded request if needed. Don't include RMW
|
|
+ * read of padding, bdrv_padding_rmw_read() should be called separately if
|
|
+ * needed.
|
|
+ *
|
|
+ * All parameters except @bs are in-out: they represent original request at
|
|
+ * function call and padded (if padding needed) at function finish.
|
|
+ *
|
|
+ * Function always succeeds.
|
|
*/
|
|
+static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov,
|
|
+ int64_t *offset, unsigned int *bytes,
|
|
+ BdrvRequestPadding *pad)
|
|
+{
|
|
+ if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
|
|
+ *qiov, 0, *bytes,
|
|
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
|
|
+ *bytes += pad->head + pad->tail;
|
|
+ *offset -= pad->head;
|
|
+ *qiov = &pad->local_qiov;
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
|
|
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
|
|
BdrvRequestFlags flags)
|
|
{
|
|
BlockDriverState *bs = child->bs;
|
|
- BlockDriver *drv = bs->drv;
|
|
BdrvTrackedRequest req;
|
|
-
|
|
- uint64_t align = bs->bl.request_alignment;
|
|
- uint8_t *head_buf = NULL;
|
|
- uint8_t *tail_buf = NULL;
|
|
- QEMUIOVector local_qiov;
|
|
- bool use_local_qiov = false;
|
|
+ BdrvRequestPadding pad;
|
|
int ret;
|
|
|
|
- trace_bdrv_co_preadv(child->bs, offset, bytes, flags);
|
|
-
|
|
- if (!drv) {
|
|
- return -ENOMEDIUM;
|
|
- }
|
|
+ trace_bdrv_co_preadv(bs, offset, bytes, flags);
|
|
|
|
ret = bdrv_check_byte_request(bs, offset, bytes);
|
|
if (ret < 0) {
|
|
@@ -1443,43 +1592,16 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
|
|
flags |= BDRV_REQ_COPY_ON_READ;
|
|
}
|
|
|
|
- /* Align read if necessary by padding qiov */
|
|
- if (offset & (align - 1)) {
|
|
- head_buf = qemu_blockalign(bs, align);
|
|
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
|
|
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
|
|
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
|
- use_local_qiov = true;
|
|
-
|
|
- bytes += offset & (align - 1);
|
|
- offset = offset & ~(align - 1);
|
|
- }
|
|
-
|
|
- if ((offset + bytes) & (align - 1)) {
|
|
- if (!use_local_qiov) {
|
|
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
|
|
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
|
- use_local_qiov = true;
|
|
- }
|
|
- tail_buf = qemu_blockalign(bs, align);
|
|
- qemu_iovec_add(&local_qiov, tail_buf,
|
|
- align - ((offset + bytes) & (align - 1)));
|
|
-
|
|
- bytes = ROUND_UP(bytes, align);
|
|
- }
|
|
+ bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad);
|
|
|
|
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
|
|
- ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
|
|
- use_local_qiov ? &local_qiov : qiov,
|
|
- flags);
|
|
+ ret = bdrv_aligned_preadv(child, &req, offset, bytes,
|
|
+ bs->bl.request_alignment,
|
|
+ qiov, flags);
|
|
tracked_request_end(&req);
|
|
bdrv_dec_in_flight(bs);
|
|
|
|
- if (use_local_qiov) {
|
|
- qemu_iovec_destroy(&local_qiov);
|
|
- qemu_vfree(head_buf);
|
|
- qemu_vfree(tail_buf);
|
|
- }
|
|
+ bdrv_padding_destroy(&pad);
|
|
|
|
return ret;
|
|
}
|
|
@@ -1775,44 +1897,34 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
|
|
BdrvTrackedRequest *req)
|
|
{
|
|
BlockDriverState *bs = child->bs;
|
|
- uint8_t *buf = NULL;
|
|
QEMUIOVector local_qiov;
|
|
uint64_t align = bs->bl.request_alignment;
|
|
- unsigned int head_padding_bytes, tail_padding_bytes;
|
|
int ret = 0;
|
|
+ bool padding;
|
|
+ BdrvRequestPadding pad;
|
|
|
|
- head_padding_bytes = offset & (align - 1);
|
|
- tail_padding_bytes = (align - (offset + bytes)) & (align - 1);
|
|
-
|
|
-
|
|
- assert(flags & BDRV_REQ_ZERO_WRITE);
|
|
- if (head_padding_bytes || tail_padding_bytes) {
|
|
- buf = qemu_blockalign(bs, align);
|
|
- qemu_iovec_init_buf(&local_qiov, buf, align);
|
|
- }
|
|
- if (head_padding_bytes) {
|
|
- uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
|
|
-
|
|
- /* RMW the unaligned part before head. */
|
|
+ padding = bdrv_init_padding(bs, offset, bytes, &pad);
|
|
+ if (padding) {
|
|
mark_request_serialising(req, align);
|
|
wait_serialising_requests(req);
|
|
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
|
|
- ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
|
|
- align, &local_qiov, 0);
|
|
- if (ret < 0) {
|
|
- goto fail;
|
|
- }
|
|
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
|
|
|
|
- memset(buf + head_padding_bytes, 0, zero_bytes);
|
|
- ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
|
|
- align, &local_qiov,
|
|
- flags & ~BDRV_REQ_ZERO_WRITE);
|
|
- if (ret < 0) {
|
|
- goto fail;
|
|
+ bdrv_padding_rmw_read(child, req, &pad, true);
|
|
+
|
|
+ if (pad.head || pad.merge_reads) {
|
|
+ int64_t aligned_offset = offset & ~(align - 1);
|
|
+ int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
|
|
+
|
|
+ qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
|
|
+ ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
|
|
+ align, &local_qiov,
|
|
+ flags & ~BDRV_REQ_ZERO_WRITE);
|
|
+ if (ret < 0 || pad.merge_reads) {
|
|
+ /* Error or all work is done */
|
|
+ goto out;
|
|
+ }
|
|
+ offset += write_bytes - pad.head;
|
|
+ bytes -= write_bytes - pad.head;
|
|
}
|
|
- offset += zero_bytes;
|
|
- bytes -= zero_bytes;
|
|
}
|
|
|
|
assert(!bytes || (offset & (align - 1)) == 0);
|
|
@@ -1822,7 +1934,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
|
|
ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
|
|
NULL, flags);
|
|
if (ret < 0) {
|
|
- goto fail;
|
|
+ goto out;
|
|
}
|
|
bytes -= aligned_bytes;
|
|
offset += aligned_bytes;
|
|
@@ -1830,26 +1942,17 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
|
|
|
|
assert(!bytes || (offset & (align - 1)) == 0);
|
|
if (bytes) {
|
|
- assert(align == tail_padding_bytes + bytes);
|
|
- /* RMW the unaligned part after tail. */
|
|
- mark_request_serialising(req, align);
|
|
- wait_serialising_requests(req);
|
|
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
|
|
- ret = bdrv_aligned_preadv(child, req, offset, align,
|
|
- align, &local_qiov, 0);
|
|
- if (ret < 0) {
|
|
- goto fail;
|
|
- }
|
|
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
|
|
+ assert(align == pad.tail + bytes);
|
|
|
|
- memset(buf, 0, bytes);
|
|
+ qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
|
|
ret = bdrv_aligned_pwritev(child, req, offset, align, align,
|
|
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
|
|
}
|
|
-fail:
|
|
- qemu_vfree(buf);
|
|
- return ret;
|
|
|
|
+out:
|
|
+ bdrv_padding_destroy(&pad);
|
|
+
|
|
+ return ret;
|
|
}
|
|
|
|
/*
|
|
@@ -1862,10 +1965,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
|
|
BlockDriverState *bs = child->bs;
|
|
BdrvTrackedRequest req;
|
|
uint64_t align = bs->bl.request_alignment;
|
|
- uint8_t *head_buf = NULL;
|
|
- uint8_t *tail_buf = NULL;
|
|
- QEMUIOVector local_qiov;
|
|
- bool use_local_qiov = false;
|
|
+ BdrvRequestPadding pad;
|
|
int ret;
|
|
|
|
trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
|
|
@@ -1892,86 +1992,21 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
|
|
goto out;
|
|
}
|
|
|
|
- if (offset & (align - 1)) {
|
|
- QEMUIOVector head_qiov;
|
|
-
|
|
+ if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) {
|
|
mark_request_serialising(&req, align);
|
|
wait_serialising_requests(&req);
|
|
-
|
|
- head_buf = qemu_blockalign(bs, align);
|
|
- qemu_iovec_init_buf(&head_qiov, head_buf, align);
|
|
-
|
|
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
|
|
- ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
|
|
- align, &head_qiov, 0);
|
|
- if (ret < 0) {
|
|
- goto fail;
|
|
- }
|
|
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
|
|
-
|
|
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
|
|
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
|
|
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
|
- use_local_qiov = true;
|
|
-
|
|
- bytes += offset & (align - 1);
|
|
- offset = offset & ~(align - 1);
|
|
-
|
|
- /* We have read the tail already if the request is smaller
|
|
- * than one aligned block.
|
|
- */
|
|
- if (bytes < align) {
|
|
- qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
|
|
- bytes = align;
|
|
- }
|
|
- }
|
|
-
|
|
- if ((offset + bytes) & (align - 1)) {
|
|
- QEMUIOVector tail_qiov;
|
|
- size_t tail_bytes;
|
|
- bool waited;
|
|
-
|
|
- mark_request_serialising(&req, align);
|
|
- waited = wait_serialising_requests(&req);
|
|
- assert(!waited || !use_local_qiov);
|
|
-
|
|
- tail_buf = qemu_blockalign(bs, align);
|
|
- qemu_iovec_init_buf(&tail_qiov, tail_buf, align);
|
|
-
|
|
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
|
|
- ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
|
|
- align, align, &tail_qiov, 0);
|
|
- if (ret < 0) {
|
|
- goto fail;
|
|
- }
|
|
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
|
|
-
|
|
- if (!use_local_qiov) {
|
|
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
|
|
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
|
- use_local_qiov = true;
|
|
- }
|
|
-
|
|
- tail_bytes = (offset + bytes) & (align - 1);
|
|
- qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
|
|
-
|
|
- bytes = ROUND_UP(bytes, align);
|
|
+ bdrv_padding_rmw_read(child, &req, &pad, false);
|
|
}
|
|
|
|
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
|
|
- use_local_qiov ? &local_qiov : qiov,
|
|
- flags);
|
|
+ qiov, flags);
|
|
|
|
-fail:
|
|
+ bdrv_padding_destroy(&pad);
|
|
|
|
- if (use_local_qiov) {
|
|
- qemu_iovec_destroy(&local_qiov);
|
|
- }
|
|
- qemu_vfree(head_buf);
|
|
- qemu_vfree(tail_buf);
|
|
out:
|
|
tracked_request_end(&req);
|
|
bdrv_dec_in_flight(bs);
|
|
+
|
|
return ret;
|
|
}
|
|
|