SHA256
1
0
forked from pool/qemu

Accepting request 746103 from home:bfrogers:branches:Virtualization

Include latest upstream disk corruption fix - hopefully that's all of them!

OBS-URL: https://build.opensuse.org/request/show/746103
OBS-URL: https://build.opensuse.org/package/show/Virtualization/qemu?expand=0&rev=505
This commit is contained in:
Bruce Rogers 2019-11-06 23:27:48 +00:00 committed by Git OBS Bridge
parent c76fbac963
commit a47d6566ab
9 changed files with 989 additions and 43 deletions

View File

@ -0,0 +1,57 @@
From: Max Reitz <mreitz@redhat.com>
Date: Fri, 1 Nov 2019 16:25:09 +0100
Subject: block: Add bdrv_co_get_self_request()
Git-commit: c28107e9e55b11cd35cf3dc2505e3e69d10dcf13
Cc: qemu-stable@nongnu.org
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20191101152510.11719-3-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
block/io.c | 18 ++++++++++++++++++
include/block/block_int.h | 1 +
2 files changed, 19 insertions(+)
diff --git a/block/io.c b/block/io.c
index d9f632f450b744515d3f91d2aa26..0366daf27f4a2133148716135d63 100644
--- a/block/io.c
+++ b/block/io.c
@@ -721,6 +721,24 @@ static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req)
(req->bytes == req->overlap_bytes);
}
+/**
+ * Return the tracked request on @bs for the current coroutine, or
+ * NULL if there is none.
+ */
+BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
+{
+ BdrvTrackedRequest *req;
+ Coroutine *self = qemu_coroutine_self();
+
+ QLIST_FOREACH(req, &bs->tracked_requests, list) {
+ if (req->co == self) {
+ return req;
+ }
+ }
+
+ return NULL;
+}
+
/**
* Round a region to cluster boundaries
*/
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 4465b022424c23aea82942547cc3..05ee6b4866f84a9ab9ba0dcda5da 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -964,6 +964,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self);
void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align);
+BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
int get_tmp_filename(char *filename, int size);
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,

View File

@ -0,0 +1,126 @@
From: Max Reitz <mreitz@redhat.com>
Date: Fri, 1 Nov 2019 16:25:08 +0100
Subject: block: Make wait/mark serialising requests public
Git-commit: 304d9d7f034ff7f5e1e66a65b7f720f63a72c57e
Make both bdrv_mark_request_serialising() and
bdrv_wait_serialising_requests() public so they can be used from block
drivers.
Cc: qemu-stable@nongnu.org
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20191101152510.11719-2-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
block/io.c | 24 ++++++++++++------------
include/block/block_int.h | 3 +++
2 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/block/io.c b/block/io.c
index bfb2653d8ee853e99bd4d55a1a87..d9f632f450b744515d3f91d2aa26 100644
--- a/block/io.c
+++ b/block/io.c
@@ -694,7 +694,7 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
qemu_co_mutex_unlock(&bs->reqs_lock);
}
-static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
+void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
{
int64_t overlap_offset = req->offset & ~(align - 1);
uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
@@ -784,7 +784,7 @@ void bdrv_dec_in_flight(BlockDriverState *bs)
bdrv_wakeup(bs);
}
-static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
+bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
{
BlockDriverState *bs = self->bs;
BdrvTrackedRequest *req;
@@ -1340,14 +1340,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
* with each other for the same cluster. For example, in copy-on-read
* it ensures that the CoR read and write operations are atomic and
* guest writes cannot interleave between them. */
- mark_request_serialising(req, bdrv_get_cluster_size(bs));
+ bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
}
/* BDRV_REQ_SERIALISING is only for write operation */
assert(!(flags & BDRV_REQ_SERIALISING));
if (!(flags & BDRV_REQ_NO_SERIALISING)) {
- wait_serialising_requests(req);
+ bdrv_wait_serialising_requests(req);
}
if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -1736,10 +1736,10 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
assert(!(flags & ~BDRV_REQ_MASK));
if (flags & BDRV_REQ_SERIALISING) {
- mark_request_serialising(req, bdrv_get_cluster_size(bs));
+ bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
}
- waited = wait_serialising_requests(req);
+ waited = bdrv_wait_serialising_requests(req);
assert(!waited || !req->serialising ||
is_request_serialising_and_aligned(req));
@@ -1905,8 +1905,8 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
padding = bdrv_init_padding(bs, offset, bytes, &pad);
if (padding) {
- mark_request_serialising(req, align);
- wait_serialising_requests(req);
+ bdrv_mark_request_serialising(req, align);
+ bdrv_wait_serialising_requests(req);
bdrv_padding_rmw_read(child, req, &pad, true);
@@ -1993,8 +1993,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
}
if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) {
- mark_request_serialising(&req, align);
- wait_serialising_requests(&req);
+ bdrv_mark_request_serialising(&req, align);
+ bdrv_wait_serialising_requests(&req);
bdrv_padding_rmw_read(child, &req, &pad, false);
}
@@ -3078,7 +3078,7 @@ static int coroutine_fn bdrv_co_copy_range_internal(
/* BDRV_REQ_SERIALISING is only for write operation */
assert(!(read_flags & BDRV_REQ_SERIALISING));
if (!(read_flags & BDRV_REQ_NO_SERIALISING)) {
- wait_serialising_requests(&req);
+ bdrv_wait_serialising_requests(&req);
}
ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
@@ -3205,7 +3205,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset,
* new area, we need to make sure that no write requests are made to it
* concurrently or they might be overwritten by preallocation. */
if (new_bytes) {
- mark_request_serialising(&req, 1);
+ bdrv_mark_request_serialising(&req, 1);
}
if (bs->read_only) {
error_setg(errp, "Image is read-only");
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 3aa1e832a8fdf32bf3f33d1e1508..4465b022424c23aea82942547cc3 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -962,6 +962,9 @@ extern unsigned int bdrv_drain_all_count;
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
+bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self);
+void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align);
+
int get_tmp_filename(char *filename, int size);
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
const char *filename);

View File

@ -0,0 +1,67 @@
From: Max Reitz <mreitz@redhat.com>
Date: Fri, 1 Nov 2019 16:25:10 +0100
Subject: block/file-posix: Let post-EOF fallocate serialize
Git-commit: 292d06b925b2787ee6f2430996b95651cae42fce
The XFS kernel driver has a bug that may cause data corruption for qcow2
images as of qemu commit c8bb23cbdbe32f. We can work around it by
treating post-EOF fallocates as serializing up until infinity (INT64_MAX
in practice).
Cc: qemu-stable@nongnu.org
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20191101152510.11719-4-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
block/file-posix.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/block/file-posix.c b/block/file-posix.c
index 992eb4a798b99fe02e93103028c6..c5df61b47735ee7e5201cebec46c 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2623,6 +2623,42 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
RawPosixAIOData acb;
ThreadPoolFunc *handler;
+#ifdef CONFIG_FALLOCATE
+ if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
+ BdrvTrackedRequest *req;
+ uint64_t end;
+
+ /*
+ * This is a workaround for a bug in the Linux XFS driver,
+ * where writes submitted through the AIO interface will be
+ * discarded if they happen beyond a concurrently running
+ * fallocate() that increases the file length (i.e., both the
+ * write and the fallocate() happen beyond the EOF).
+ *
+ * To work around it, we extend the tracked request for this
+ * zero write until INT64_MAX (effectively infinity), and mark
+ * it as serializing.
+ *
+ * We have to enable this workaround for all filesystems and
+ * AIO modes (not just XFS with aio=native), because for
+ * remote filesystems we do not know the host configuration.
+ */
+
+ req = bdrv_co_get_self_request(bs);
+ assert(req);
+ assert(req->type == BDRV_TRACKED_WRITE);
+ assert(req->offset <= offset);
+ assert(req->offset + req->bytes >= offset + bytes);
+
+ end = INT64_MAX & -(uint64_t)bs->bl.request_alignment;
+ req->bytes = end - req->offset;
+ req->overlap_bytes = req->bytes;
+
+ bdrv_mark_request_serialising(req, bs->bl.request_alignment);
+ bdrv_wait_serialising_requests(req);
+ }
+#endif
+
acb = (RawPosixAIOData) {
.bs = bs,
.aio_fildes = s->fd,

View File

@ -0,0 +1,478 @@
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Tue, 4 Jun 2019 19:15:05 +0300
Subject: block/io: refactor padding
Git-commit: 7a3f542fbdfd799be4fa6f8b96dc8c1e6933fce4
We have similar padding code in bdrv_co_pwritev,
bdrv_co_do_pwrite_zeroes and bdrv_co_preadv. Let's combine and unify
it.
[Squashed in Vladimir's qemu-iotests 077 fix
--Stefan]
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20190604161514.262241-4-vsementsov@virtuozzo.com
Message-Id: <20190604161514.262241-4-vsementsov@virtuozzo.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
block/io.c | 365 +++++++++++++++++++++++++++++------------------------
1 file changed, 200 insertions(+), 165 deletions(-)
diff --git a/block/io.c b/block/io.c
index 06305c6ea62efabf1efb43933bf6..bfb2653d8ee853e99bd4d55a1a87 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1408,28 +1408,177 @@ out:
}
/*
- * Handle a read request in coroutine context
+ * Request padding
+ *
+ * |<---- align ----->| |<----- align ---->|
+ * |<- head ->|<------------- bytes ------------->|<-- tail -->|
+ * | | | | | |
+ * -*----------$-------*-------- ... --------*-----$------------*---
+ * | | | | | |
+ * | offset | | end |
+ * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end)
+ * [buf ... ) [tail_buf )
+ *
+ * @buf is an aligned allocation needed to store @head and @tail paddings. @head
+ * is placed at the beginning of @buf and @tail at the @end.
+ *
+ * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk
+ * around tail, if tail exists.
+ *
+ * @merge_reads is true for small requests,
+ * if @buf_len == @head + bytes + @tail. In this case it is possible that both
+ * head and tail exist but @buf_len == align and @tail_buf == @buf.
+ */
+typedef struct BdrvRequestPadding {
+ uint8_t *buf;
+ size_t buf_len;
+ uint8_t *tail_buf;
+ size_t head;
+ size_t tail;
+ bool merge_reads;
+ QEMUIOVector local_qiov;
+} BdrvRequestPadding;
+
+static bool bdrv_init_padding(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ BdrvRequestPadding *pad)
+{
+ uint64_t align = bs->bl.request_alignment;
+ size_t sum;
+
+ memset(pad, 0, sizeof(*pad));
+
+ pad->head = offset & (align - 1);
+ pad->tail = ((offset + bytes) & (align - 1));
+ if (pad->tail) {
+ pad->tail = align - pad->tail;
+ }
+
+ if ((!pad->head && !pad->tail) || !bytes) {
+ return false;
+ }
+
+ sum = pad->head + bytes + pad->tail;
+ pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
+ pad->buf = qemu_blockalign(bs, pad->buf_len);
+ pad->merge_reads = sum == pad->buf_len;
+ if (pad->tail) {
+ pad->tail_buf = pad->buf + pad->buf_len - align;
+ }
+
+ return true;
+}
+
+static int bdrv_padding_rmw_read(BdrvChild *child,
+ BdrvTrackedRequest *req,
+ BdrvRequestPadding *pad,
+ bool zero_middle)
+{
+ QEMUIOVector local_qiov;
+ BlockDriverState *bs = child->bs;
+ uint64_t align = bs->bl.request_alignment;
+ int ret;
+
+ assert(req->serialising && pad->buf);
+
+ if (pad->head || pad->merge_reads) {
+ uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
+
+ qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
+
+ if (pad->head) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
+ }
+ if (pad->merge_reads && pad->tail) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ }
+ ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
+ align, &local_qiov, 0);
+ if (ret < 0) {
+ return ret;
+ }
+ if (pad->head) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+ }
+ if (pad->merge_reads && pad->tail) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ }
+
+ if (pad->merge_reads) {
+ goto zero_mem;
+ }
+ }
+
+ if (pad->tail) {
+ qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
+
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ ret = bdrv_aligned_preadv(
+ child, req,
+ req->overlap_offset + req->overlap_bytes - align,
+ align, align, &local_qiov, 0);
+ if (ret < 0) {
+ return ret;
+ }
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ }
+
+zero_mem:
+ if (zero_middle) {
+ memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
+ }
+
+ return 0;
+}
+
+static void bdrv_padding_destroy(BdrvRequestPadding *pad)
+{
+ if (pad->buf) {
+ qemu_vfree(pad->buf);
+ qemu_iovec_destroy(&pad->local_qiov);
+ }
+}
+
+/*
+ * bdrv_pad_request
+ *
+ * Exchange request parameters with padded request if needed. Don't include RMW
+ * read of padding, bdrv_padding_rmw_read() should be called separately if
+ * needed.
+ *
+ * All parameters except @bs are in-out: they represent original request at
+ * function call and padded (if padding needed) at function finish.
+ *
+ * Function always succeeds.
*/
+static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov,
+ int64_t *offset, unsigned int *bytes,
+ BdrvRequestPadding *pad)
+{
+ if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
+ return false;
+ }
+
+ qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
+ *qiov, 0, *bytes,
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
+ *bytes += pad->head + pad->tail;
+ *offset -= pad->head;
+ *qiov = &pad->local_qiov;
+
+ return true;
+}
+
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
BlockDriverState *bs = child->bs;
- BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
-
- uint64_t align = bs->bl.request_alignment;
- uint8_t *head_buf = NULL;
- uint8_t *tail_buf = NULL;
- QEMUIOVector local_qiov;
- bool use_local_qiov = false;
+ BdrvRequestPadding pad;
int ret;
- trace_bdrv_co_preadv(child->bs, offset, bytes, flags);
-
- if (!drv) {
- return -ENOMEDIUM;
- }
+ trace_bdrv_co_preadv(bs, offset, bytes, flags);
ret = bdrv_check_byte_request(bs, offset, bytes);
if (ret < 0) {
@@ -1443,43 +1592,16 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
flags |= BDRV_REQ_COPY_ON_READ;
}
- /* Align read if necessary by padding qiov */
- if (offset & (align - 1)) {
- head_buf = qemu_blockalign(bs, align);
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
-
- bytes += offset & (align - 1);
- offset = offset & ~(align - 1);
- }
-
- if ((offset + bytes) & (align - 1)) {
- if (!use_local_qiov) {
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
- }
- tail_buf = qemu_blockalign(bs, align);
- qemu_iovec_add(&local_qiov, tail_buf,
- align - ((offset + bytes) & (align - 1)));
-
- bytes = ROUND_UP(bytes, align);
- }
+ bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad);
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
- ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
- use_local_qiov ? &local_qiov : qiov,
- flags);
+ ret = bdrv_aligned_preadv(child, &req, offset, bytes,
+ bs->bl.request_alignment,
+ qiov, flags);
tracked_request_end(&req);
bdrv_dec_in_flight(bs);
- if (use_local_qiov) {
- qemu_iovec_destroy(&local_qiov);
- qemu_vfree(head_buf);
- qemu_vfree(tail_buf);
- }
+ bdrv_padding_destroy(&pad);
return ret;
}
@@ -1775,44 +1897,34 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
BdrvTrackedRequest *req)
{
BlockDriverState *bs = child->bs;
- uint8_t *buf = NULL;
QEMUIOVector local_qiov;
uint64_t align = bs->bl.request_alignment;
- unsigned int head_padding_bytes, tail_padding_bytes;
int ret = 0;
+ bool padding;
+ BdrvRequestPadding pad;
- head_padding_bytes = offset & (align - 1);
- tail_padding_bytes = (align - (offset + bytes)) & (align - 1);
-
-
- assert(flags & BDRV_REQ_ZERO_WRITE);
- if (head_padding_bytes || tail_padding_bytes) {
- buf = qemu_blockalign(bs, align);
- qemu_iovec_init_buf(&local_qiov, buf, align);
- }
- if (head_padding_bytes) {
- uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
-
- /* RMW the unaligned part before head. */
+ padding = bdrv_init_padding(bs, offset, bytes, &pad);
+ if (padding) {
mark_request_serialising(req, align);
wait_serialising_requests(req);
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
- ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
- align, &local_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
- memset(buf + head_padding_bytes, 0, zero_bytes);
- ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
- align, &local_qiov,
- flags & ~BDRV_REQ_ZERO_WRITE);
- if (ret < 0) {
- goto fail;
+ bdrv_padding_rmw_read(child, req, &pad, true);
+
+ if (pad.head || pad.merge_reads) {
+ int64_t aligned_offset = offset & ~(align - 1);
+ int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
+
+ qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
+ ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
+ align, &local_qiov,
+ flags & ~BDRV_REQ_ZERO_WRITE);
+ if (ret < 0 || pad.merge_reads) {
+ /* Error or all work is done */
+ goto out;
+ }
+ offset += write_bytes - pad.head;
+ bytes -= write_bytes - pad.head;
}
- offset += zero_bytes;
- bytes -= zero_bytes;
}
assert(!bytes || (offset & (align - 1)) == 0);
@@ -1822,7 +1934,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
NULL, flags);
if (ret < 0) {
- goto fail;
+ goto out;
}
bytes -= aligned_bytes;
offset += aligned_bytes;
@@ -1830,26 +1942,17 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
assert(!bytes || (offset & (align - 1)) == 0);
if (bytes) {
- assert(align == tail_padding_bytes + bytes);
- /* RMW the unaligned part after tail. */
- mark_request_serialising(req, align);
- wait_serialising_requests(req);
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
- ret = bdrv_aligned_preadv(child, req, offset, align,
- align, &local_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ assert(align == pad.tail + bytes);
- memset(buf, 0, bytes);
+ qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
ret = bdrv_aligned_pwritev(child, req, offset, align, align,
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
}
-fail:
- qemu_vfree(buf);
- return ret;
+out:
+ bdrv_padding_destroy(&pad);
+
+ return ret;
}
/*
@@ -1862,10 +1965,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
BlockDriverState *bs = child->bs;
BdrvTrackedRequest req;
uint64_t align = bs->bl.request_alignment;
- uint8_t *head_buf = NULL;
- uint8_t *tail_buf = NULL;
- QEMUIOVector local_qiov;
- bool use_local_qiov = false;
+ BdrvRequestPadding pad;
int ret;
trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
@@ -1892,86 +1992,21 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
goto out;
}
- if (offset & (align - 1)) {
- QEMUIOVector head_qiov;
-
+ if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) {
mark_request_serialising(&req, align);
wait_serialising_requests(&req);
-
- head_buf = qemu_blockalign(bs, align);
- qemu_iovec_init_buf(&head_qiov, head_buf, align);
-
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
- ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
- align, &head_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
-
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
-
- bytes += offset & (align - 1);
- offset = offset & ~(align - 1);
-
- /* We have read the tail already if the request is smaller
- * than one aligned block.
- */
- if (bytes < align) {
- qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
- bytes = align;
- }
- }
-
- if ((offset + bytes) & (align - 1)) {
- QEMUIOVector tail_qiov;
- size_t tail_bytes;
- bool waited;
-
- mark_request_serialising(&req, align);
- waited = wait_serialising_requests(&req);
- assert(!waited || !use_local_qiov);
-
- tail_buf = qemu_blockalign(bs, align);
- qemu_iovec_init_buf(&tail_qiov, tail_buf, align);
-
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
- ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
- align, align, &tail_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
-
- if (!use_local_qiov) {
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
- }
-
- tail_bytes = (offset + bytes) & (align - 1);
- qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
-
- bytes = ROUND_UP(bytes, align);
+ bdrv_padding_rmw_read(child, &req, &pad, false);
}
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
- use_local_qiov ? &local_qiov : qiov,
- flags);
+ qiov, flags);
-fail:
+ bdrv_padding_destroy(&pad);
- if (use_local_qiov) {
- qemu_iovec_destroy(&local_qiov);
- }
- qemu_vfree(head_buf);
- qemu_vfree(tail_buf);
out:
tracked_request_end(&req);
bdrv_dec_in_flight(bs);
+
return ret;
}

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:cd9adb317ef86b16cecc43d9a223db3bc3c75b1697ea85beeb3f0f3dda24be17
size 54480
oid sha256:ab74c500798292a89444c80690bf83f6aad325186f5f98047951dd78c52c3bd0
size 59948

View File

@ -1,3 +1,15 @@
-------------------------------------------------------------------
Wed Nov 6 20:43:48 UTC 2019 - Bruce Rogers <brogers@suse.com>
- Work around a host kernel xfs bug which can result in qcow2 image
corruption
block-io-refactor-padding.patch
util-iov-introduce-qemu_iovec_init_exten.patch
block-Make-wait-mark-serialising-request.patch
block-Add-bdrv_co_get_self_request.patch
block-file-posix-Let-post-EOF-fallocate-.patch
- Patch queue updated from git://github.com/openSUSE/qemu.git opensuse-4.1
-------------------------------------------------------------------
Mon Nov 4 13:47:02 UTC 2019 - Stefan Brüns <stefan.bruens@rwth-aachen.de>

View File

@ -149,46 +149,51 @@ Patch00023: make-release-pull-in-edk2-submodules-so-.patch
Patch00024: roms-Makefile.edk2-don-t-pull-in-submodu.patch
Patch00025: coroutine-Add-qemu_co_mutex_assert_locke.patch
Patch00026: qcow2-Fix-corruption-bug-in-qcow2_detect.patch
Patch00027: XXX-dont-dump-core-on-sigabort.patch
Patch00028: qemu-binfmt-conf-Modify-default-path.patch
Patch00029: qemu-cvs-gettimeofday.patch
Patch00030: qemu-cvs-ioctl_debug.patch
Patch00031: qemu-cvs-ioctl_nodirection.patch
Patch00032: linux-user-add-binfmt-wrapper-for-argv-0.patch
Patch00033: PPC-KVM-Disable-mmu-notifier-check.patch
Patch00034: linux-user-binfmt-support-host-binaries.patch
Patch00035: linux-user-Fake-proc-cpuinfo.patch
Patch00036: linux-user-use-target_ulong.patch
Patch00037: Make-char-muxer-more-robust-wrt-small-FI.patch
Patch00038: linux-user-lseek-explicitly-cast-non-set.patch
Patch00039: AIO-Reduce-number-of-threads-for-32bit-h.patch
Patch00040: xen_disk-Add-suse-specific-flush-disable.patch
Patch00041: qemu-bridge-helper-reduce-security-profi.patch
Patch00042: qemu-binfmt-conf-use-qemu-ARCH-binfmt.patch
Patch00043: linux-user-properly-test-for-infinite-ti.patch
Patch00044: roms-Makefile-pass-a-packaging-timestamp.patch
Patch00045: Raise-soft-address-space-limit-to-hard-l.patch
Patch00046: increase-x86_64-physical-bits-to-42.patch
Patch00047: vga-Raise-VRAM-to-16-MiB-for-pc-0.15-and.patch
Patch00048: i8254-Fix-migration-from-SLE11-SP2.patch
Patch00049: acpi_piix4-Fix-migration-from-SLE11-SP2.patch
Patch00050: Switch-order-of-libraries-for-mpath-supp.patch
Patch00051: Make-installed-scripts-explicitly-python.patch
Patch00052: hw-smbios-handle-both-file-formats-regar.patch
Patch00053: xen-add-block-resize-support-for-xen-dis.patch
Patch00054: tests-qemu-iotests-Triple-timeout-of-i-o.patch
Patch00055: tests-Fix-block-tests-to-be-compatible-w.patch
Patch00056: xen-ignore-live-parameter-from-xen-save-.patch
Patch00057: Conditionalize-ui-bitmap-installation-be.patch
Patch00058: tests-change-error-message-in-test-162.patch
Patch00059: hw-usb-hcd-xhci-Fix-GCC-9-build-warning.patch
Patch00060: hw-usb-dev-mtp-Fix-GCC-9-build-warning.patch
Patch00061: hw-intc-exynos4210_gic-provide-more-room.patch
Patch00062: configure-only-populate-roms-if-softmmu.patch
Patch00063: pc-bios-s390-ccw-net-avoid-warning-about.patch
Patch00064: roms-change-cross-compiler-naming-to-be-.patch
Patch00065: tests-Disable-some-block-tests-for-now.patch
Patch00066: test-add-mapping-from-arch-of-i686-to-qe.patch
Patch00027: block-io-refactor-padding.patch
Patch00028: util-iov-introduce-qemu_iovec_init_exten.patch
Patch00029: block-Make-wait-mark-serialising-request.patch
Patch00030: block-Add-bdrv_co_get_self_request.patch
Patch00031: block-file-posix-Let-post-EOF-fallocate-.patch
Patch00032: XXX-dont-dump-core-on-sigabort.patch
Patch00033: qemu-binfmt-conf-Modify-default-path.patch
Patch00034: qemu-cvs-gettimeofday.patch
Patch00035: qemu-cvs-ioctl_debug.patch
Patch00036: qemu-cvs-ioctl_nodirection.patch
Patch00037: linux-user-add-binfmt-wrapper-for-argv-0.patch
Patch00038: PPC-KVM-Disable-mmu-notifier-check.patch
Patch00039: linux-user-binfmt-support-host-binaries.patch
Patch00040: linux-user-Fake-proc-cpuinfo.patch
Patch00041: linux-user-use-target_ulong.patch
Patch00042: Make-char-muxer-more-robust-wrt-small-FI.patch
Patch00043: linux-user-lseek-explicitly-cast-non-set.patch
Patch00044: AIO-Reduce-number-of-threads-for-32bit-h.patch
Patch00045: xen_disk-Add-suse-specific-flush-disable.patch
Patch00046: qemu-bridge-helper-reduce-security-profi.patch
Patch00047: qemu-binfmt-conf-use-qemu-ARCH-binfmt.patch
Patch00048: linux-user-properly-test-for-infinite-ti.patch
Patch00049: roms-Makefile-pass-a-packaging-timestamp.patch
Patch00050: Raise-soft-address-space-limit-to-hard-l.patch
Patch00051: increase-x86_64-physical-bits-to-42.patch
Patch00052: vga-Raise-VRAM-to-16-MiB-for-pc-0.15-and.patch
Patch00053: i8254-Fix-migration-from-SLE11-SP2.patch
Patch00054: acpi_piix4-Fix-migration-from-SLE11-SP2.patch
Patch00055: Switch-order-of-libraries-for-mpath-supp.patch
Patch00056: Make-installed-scripts-explicitly-python.patch
Patch00057: hw-smbios-handle-both-file-formats-regar.patch
Patch00058: xen-add-block-resize-support-for-xen-dis.patch
Patch00059: tests-qemu-iotests-Triple-timeout-of-i-o.patch
Patch00060: tests-Fix-block-tests-to-be-compatible-w.patch
Patch00061: xen-ignore-live-parameter-from-xen-save-.patch
Patch00062: Conditionalize-ui-bitmap-installation-be.patch
Patch00063: tests-change-error-message-in-test-162.patch
Patch00064: hw-usb-hcd-xhci-Fix-GCC-9-build-warning.patch
Patch00065: hw-usb-dev-mtp-Fix-GCC-9-build-warning.patch
Patch00066: hw-intc-exynos4210_gic-provide-more-room.patch
Patch00067: configure-only-populate-roms-if-softmmu.patch
Patch00068: pc-bios-s390-ccw-net-avoid-warning-about.patch
Patch00069: roms-change-cross-compiler-naming-to-be-.patch
Patch00070: tests-Disable-some-block-tests-for-now.patch
Patch00071: test-add-mapping-from-arch-of-i686-to-qe.patch
# Patches applied in roms/seabios/:
Patch01000: seabios-use-python2-explicitly-as-needed.patch
Patch01001: seabios-switch-to-python3-as-needed.patch
@ -945,6 +950,11 @@ This package provides a service file for starting and stopping KSM.
%patch00064 -p1
%patch00065 -p1
%patch00066 -p1
%patch00067 -p1
%patch00068 -p1
%patch00069 -p1
%patch00070 -p1
%patch00071 -p1
%patch01000 -p1
%patch01001 -p1
%patch01002 -p1

View File

@ -8,6 +8,26 @@
#
# (default is git2pkg)
# TODO NOTES:
# after ensuring current status of local repo is clean, incl submodules, we checkout master+submodules, then also ensure they're clean, then checkout the commit or tag corresponding to latest / stable-release + submodules (but don't bother to verift that's clean) - so this is a detached HEAD for stable-release and IS master (almost certainly) for LATEST. WOW - is that what we need to be doing!?!?! At least it seems to be working for the cases I've seen!!!!
# initbundle operates from the current checked out state of the local superproject, to get the submodule ids - CORRECT!!!!
# the LATEST processing of cloning the local repo, clones master - but perhaps it doesn't matter? because it adds upstream as a remote and probably gets most things from there? INVESTIGATE!!!!!!!!!!!!!!!!!!!1
# bundle2local checks out master in local repo to ensure we're off the frombundle branch (doesn't seem needed the way the script currently is). It fetches the bundle's head (FETCH_HEAD) REQUIRING that the base commit be present (which it seems to be. Then it creates the frombundle branch, with the current FETCH_HEAD (SAME AS IN BUNDLE, RIGHT?)
# The LATEST's rebase loop checks out the frombundle branch (with force), so we are now OFF of the "correct checkout" that happened at the beginning, it DELETES the GIT_BRANCH (so in this case it DIDN'T MATTER WHAT WAS THERE WHEN SCRIPT STARTED !!!!!!! WARNING !!!!!, branches off of the frombundle branch (w/checkout), then rebases that (which came from bundle) onto the current superproject, or submodule commit id.
# At this point, if the GIT_BRANCH rebased ok, it's ready for making a patchqueue, out of, if the rebase failed, it's time to manually fix that.!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
# LATEST processing implies updated upstream/master IS the right thing
# HOW do we protect against a bad bundle being created (we do have the build service's tracking of previous files - is that sufficient?
# initbundle - what does it need? Currently we take the default branch in local superproject for the superproject and submodule commit ids, so it needs to be right! (ie which ever branch it is (DECIDE!), make it the RIGHT ONE for this release. It takes the commits from the $GIT_BUNDLE branch of each of superproject and submodule repos, which are beyond above found commits. IT CERTIANLY SEEMS REASONABLE THAT WE WOULD HAVE THE $$GIT_BRANCH BRANCH BE THE DEFAULT AND CORRECT BUT HOW DO WE GUARANTEE THATS OK? GIVEN OUR REQS HERE QEMU_VERSION CAN BE GRABBED HERE IN LOCAL SUPERPROJECT RIGHT AWAY!
# bundle2local - what does it need? This checks out local master just to get off of frombundle (could have been $GIT_BRANCH as well) No other req's
# bundle2spec - what does it need? THIS SHOULD HAVE LATEST SPLIT OUT!!!! We allow this alone, so see what setup it alone needs - for this one particularly, we don't want to REQUIRE local repo. FOR NOW I ASSUME THE RIGHT THING IS CHECKED OUT!
#
# SEEMS WE SHOULD NOT,NOT,NOT require user to have previously updated, or set local repo a certain way, but for us to enforce it or actually do it
# TODO: confirm local repo present, correct remotes, correct local branches, somehow validate local branch content against remote, ...
#
# In both cases we DO require the $GIT_BRANCH to exist, and should confirm that the appropriate upstream basis commit is indeed part of that. In the LATEST case, we can treat master as a source for initial current upstream.
set -e
source ./config.sh
@ -345,7 +365,9 @@ rm -rf $BUNDLE_DIR
sed -E -i 's/(^index [a-f0-9]{28})[a-f0-9]{12}([.][.][a-f0-9]{28})[a-f0-9]{12}( [0-9]{6}$)/\1\2\3/' $i
BASENAME=$(basename $i)
if [ "$FIVE_DIGIT_POTENTIAL" = "1" ]; then
if [[ $BASENAME =~ [[:digit:]]{4}-.* ]]; then
if [[ $BASENAME =~ [[:digit:]]{5}.* ]]; then
:
else
BASENAME=0$BASENAME
fi
fi

View File

@ -0,0 +1,174 @@
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Tue, 4 Jun 2019 19:15:03 +0300
Subject: util/iov: introduce qemu_iovec_init_extended
Git-commit: d953169d4840f312d3b9a54952f4a7ccfcb3b311
Introduce new initialization API, to create requests with padding. Will
be used in the following patch. New API uses qemu_iovec_init_buf if
resulting io vector has only one element, to avoid extra allocations.
So, we need to update qemu_iovec_destroy to support destroying such
QIOVs.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20190604161514.262241-2-vsementsov@virtuozzo.com
Message-Id: <20190604161514.262241-2-vsementsov@virtuozzo.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
include/qemu/iov.h | 7 +++
util/iov.c | 112 +++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 114 insertions(+), 5 deletions(-)
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
index 48b45987b70ea28879af7989c31f..f3787a0cf768bd0ea1031913a038 100644
--- a/include/qemu/iov.h
+++ b/include/qemu/iov.h
@@ -199,6 +199,13 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov)
void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
+void qemu_iovec_init_extended(
+ QEMUIOVector *qiov,
+ void *head_buf, size_t head_len,
+ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
+ void *tail_buf, size_t tail_len);
+void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
+ size_t offset, size_t len);
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
void qemu_iovec_concat(QEMUIOVector *dst,
QEMUIOVector *src, size_t soffset, size_t sbytes);
diff --git a/util/iov.c b/util/iov.c
index 74e6ca8ed7298c833e52257923c1..366ff9cdd1dee06c5d62712cb5ef 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -353,6 +353,103 @@ void qemu_iovec_concat(QEMUIOVector *dst,
qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
}
+/*
+ * qiov_find_iov
+ *
+ * Return pointer to iovec structure, where byte at @offset in original vector
+ * @iov exactly is.
+ * Set @remaining_offset to be offset inside that iovec to the same byte.
+ */
+static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset,
+ size_t *remaining_offset)
+{
+ while (offset > 0 && offset >= iov->iov_len) {
+ offset -= iov->iov_len;
+ iov++;
+ }
+ *remaining_offset = offset;
+
+ return iov;
+}
+
+/*
+ * qiov_slice
+ *
+ * Find subarray of iovec's, containing requested range. @head would
+ * be offset in first iov (returned by the function), @tail would be
+ * count of extra bytes in last iovec (returned iov + @niov - 1).
+ */
+static struct iovec *qiov_slice(QEMUIOVector *qiov,
+ size_t offset, size_t len,
+ size_t *head, size_t *tail, int *niov)
+{
+ struct iovec *iov, *end_iov;
+
+ assert(offset + len <= qiov->size);
+
+ iov = iov_skip_offset(qiov->iov, offset, head);
+ end_iov = iov_skip_offset(iov, *head + len, tail);
+
+ if (*tail > 0) {
+ assert(*tail < end_iov->iov_len);
+ *tail = end_iov->iov_len - *tail;
+ end_iov++;
+ }
+
+ *niov = end_iov - iov;
+
+ return iov;
+}
+
+/*
+ * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
+ * and @tail_buf buffer into new qiov.
+ */
+void qemu_iovec_init_extended(
+ QEMUIOVector *qiov,
+ void *head_buf, size_t head_len,
+ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
+ void *tail_buf, size_t tail_len)
+{
+ size_t mid_head, mid_tail;
+ int total_niov, mid_niov = 0;
+ struct iovec *p, *mid_iov;
+
+ if (mid_len) {
+ mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
+ &mid_head, &mid_tail, &mid_niov);
+ }
+
+ total_niov = !!head_len + mid_niov + !!tail_len;
+ if (total_niov == 1) {
+ qemu_iovec_init_buf(qiov, NULL, 0);
+ p = &qiov->local_iov;
+ } else {
+ qiov->niov = qiov->nalloc = total_niov;
+ qiov->size = head_len + mid_len + tail_len;
+ p = qiov->iov = g_new(struct iovec, qiov->niov);
+ }
+
+ if (head_len) {
+ p->iov_base = head_buf;
+ p->iov_len = head_len;
+ p++;
+ }
+
+ if (mid_len) {
+ memcpy(p, mid_iov, mid_niov * sizeof(*p));
+ p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
+ p[0].iov_len -= mid_head;
+ p[mid_niov - 1].iov_len -= mid_tail;
+ p += mid_niov;
+ }
+
+ if (tail_len) {
+ p->iov_base = tail_buf;
+ p->iov_len = tail_len;
+ }
+}
+
/*
* Check if the contents of the iovecs are all zero
*/
@@ -374,14 +471,19 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov)
return true;
}
+void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
+ size_t offset, size_t len)
+{
+ qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0);
+}
+
void qemu_iovec_destroy(QEMUIOVector *qiov)
{
- assert(qiov->nalloc != -1);
+ if (qiov->nalloc != -1) {
+ g_free(qiov->iov);
+ }
- qemu_iovec_reset(qiov);
- g_free(qiov->iov);
- qiov->nalloc = 0;
- qiov->iov = NULL;
+ memset(qiov, 0, sizeof(*qiov));
}
void qemu_iovec_reset(QEMUIOVector *qiov)