qemu/block-Fix-deadlock-in-bdrv_co_yield_to_d.patch

From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 3 Dec 2020 18:23:11 +0100
Subject: block: Fix deadlock in bdrv_co_yield_to_drain()

Git-commit: 960d5fb3e8ee09bc5f1a5c84f66dce42a6cef920

If bdrv_co_yield_to_drain() is called for draining a block node that
runs in a different AioContext, it keeps that AioContext locked while it
yields and schedules a BH in the AioContext to do the actual drain.

As long as executing the BH is the very next thing that the event loop
of the node's AioContext does, this actually happens to work, but when
it tries to execute something else that wants to take the AioContext
lock, it will deadlock. (In the bug report, this other thing is a
virtio-scsi device running virtio_scsi_data_plane_handle_cmd().)

Instead, always drop the AioContext lock across the yield and reacquire
it only when the coroutine is reentered. The BH needs to unconditionally
take the lock for itself now.

This fixes the 'block_resize' QMP command on a block node that runs in
an iothread.

Cc: qemu-stable@nongnu.org
Fixes: eb94b81a94bce112e6b206df846c1551aaf6cab6
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1903511
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20201203172311.68232-4-kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
 block/io.c | 41 ++++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/block/io.c b/block/io.c
index ec5e152bb70f62371b608e95d514..a9f56a9ab1c56a3ca83833bfb0fa 100644
--- a/block/io.c
+++ b/block/io.c
@@ -306,17 +306,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
 
     if (bs) {
         AioContext *ctx = bdrv_get_aio_context(bs);
-        AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
-
-        /*
-         * When the coroutine yielded, the lock for its home context was
-         * released, so we need to re-acquire it here. If it explicitly
-         * acquired a different context, the lock is still held and we don't
-         * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
-         */
-        if (ctx == co_ctx) {
-            aio_context_acquire(ctx);
-        }
+        aio_context_acquire(ctx);
         bdrv_dec_in_flight(bs);
         if (data->begin) {
             assert(!data->drained_end_counter);
@@ -328,9 +318,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
                                 data->ignore_bds_parents,
                                 data->drained_end_counter);
         }
-        if (ctx == co_ctx) {
-            aio_context_release(ctx);
-        }
+        aio_context_release(ctx);
     } else {
         assert(data->begin);
         bdrv_drain_all_begin();
@@ -348,13 +336,16 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
                                                 int *drained_end_counter)
 {
     BdrvCoDrainData data;
+    Coroutine *self = qemu_coroutine_self();
+    AioContext *ctx = bdrv_get_aio_context(bs);
+    AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
 
     /* Calling bdrv_drain() from a BH ensures the current coroutine yields and
      * other coroutines run if they were queued by aio_co_enter(). */
 
     assert(qemu_in_coroutine());
     data = (BdrvCoDrainData) {
-        .co = qemu_coroutine_self(),
+        .co = self,
         .bs = bs,
         .done = false,
         .begin = begin,
@@ -368,13 +359,29 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
     if (bs) {
         bdrv_inc_in_flight(bs);
     }
-    replay_bh_schedule_oneshot_event(bdrv_get_aio_context(bs),
-                                     bdrv_co_drain_bh_cb, &data);
+
+    /*
+     * Temporarily drop the lock across yield or we would get deadlocks.
+     * bdrv_co_drain_bh_cb() reaquires the lock as needed.
+     *
+     * When we yield below, the lock for the current context will be
+     * released, so if this is actually the lock that protects bs, don't drop
+     * it a second time.
+     */
+    if (ctx != co_ctx) {
+        aio_context_release(ctx);
+    }
+    replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
 
     qemu_coroutine_yield();
     /* If we are resumed from some other event (such as an aio completion or a
      * timer callback), it is a bug in the caller that should be fixed. */
     assert(data.done);
+
+    /* Reaquire the AioContext of bs if we dropped it */
+    if (ctx != co_ctx) {
+        aio_context_acquire(ctx);
+    }
 }
 
 void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
Accepting request 860516 from home:bfrogers:branches:Virtualization - Fix crash when spice used and the qemu-audio-spice package isn't installed (boo#1180210) audio-add-sanity-check.patch - Add some stable patches from upstream block-Fix-deadlock-in-bdrv_co_yield_to_d.patch block-Fix-locking-in-qmp_block_resize.patch block-nfs-fix-int-overflow-in-nfs_client.patch block-Simplify-qmp_block_resize-error-pa.patch build-no-pie-is-no-functional-linker-fla.patch OBS-URL: https://build.opensuse.org/request/show/860516 OBS-URL: https://build.opensuse.org/package/show/Virtualization/qemu?expand=0&rev=599 2021-01-05 18:40:44 +01:00			`From: Kevin Wolf <kwolf@redhat.com>`
			`Date: Thu, 3 Dec 2020 18:23:11 +0100`
			`Subject: block: Fix deadlock in bdrv_co_yield_to_drain()`

Accepting request 882222 from home:bfrogers:branches:Virtualization - Switch method of splitting off hw-s390x-virtio-gpu-ccw.so as a module to what was accepted upstream (bsc#1181103) * Patches dropped: hw-s390x-modularize-virtio-gpu-ccw.patch * Patches added: s390x-add-have_virtio_ccw.patch s390x-modularize-virtio-gpu-ccw.patch s390x-move-S390_ADAPTER_SUPPRESSIBLE.patch - Fix OOB access in sdhci interface (CVE-2020-17380, bsc#1175144, CVE-2020-25085, bsc#1176681, CVE-2021-3409, bsc#1182282) hw-sd-sd-Actually-perform-the-erase-oper.patch hw-sd-sd-Fix-build-error-when-DEBUG_SD-i.patch hw-sd-sdhci-Correctly-set-the-controller.patch hw-sd-sdhci-Don-t-transfer-any-data-when.patch hw-sd-sdhci-Don-t-write-to-SDHC_SYSAD-re.patch hw-sd-sdhci-Limit-block-size-only-when-S.patch hw-sd-sdhci-Reset-the-data-pointer-of-s-.patch hw-sd-sd-Move-the-sd_block_-read-write-a.patch hw-sd-sd-Skip-write-protect-groups-check.patch - Fix potential privilege escalation in virtiofsd tool (CVE-2021-20263, bsc#1183373) tools-virtiofsd-Replace-the-word-whiteli.patch viriofsd-Add-support-for-FUSE_HANDLE_KIL.patch virtiofsd-extract-lo_do_open-from-lo_ope.patch virtiofsd-optionally-return-inode-pointe.patch virtiofsd-prevent-opening-of-special-fil.patch virtiofs-drop-remapped-security.capabili.patch virtiofsd-Save-error-code-early-at-the-f.patch - Fix OOB access (stack overflow) in rtl8139 NIC emulation (CVE-2021-3416, bsc#1182968) net-introduce-qemu_receive_packet.patch rtl8139-switch-to-use-qemu_receive_packe.patch - Fix OOB access (stack overflow) in other NIC emulations (CVE-2021-3416) cadence_gem-switch-to-use-qemu_receive_p.patch dp8393x-switch-to-use-qemu_receive_packe.patch e1000-switch-to-use-qemu_receive_packet-.patch lan9118-switch-to-use-qemu_receive_packe.patch msf2-mac-switch-to-use-qemu_receive_pack.patch pcnet-switch-to-use-qemu_receive_packet-.patch sungem-switch-to-use-qemu_receive_packet.patch tx_pkt-switch-to-use-qemu_receive_packet.patch - Fix heap overflow in MSIx emulation (CVE-2020-27821, bsc#1179686) memory-clamp-cached-translation-in-case-.patch - Include upstream patches designated as stable material and reviewed for applicability to include here hw-arm-virt-Disable-pl011-clock-migratio.patch xen-block-Fix-removal-of-backend-instanc.patch - Fix package scripts to not use hard coded paths for temporary working directories and log files (bsc#1182425) OBS-URL: https://build.opensuse.org/request/show/882222 OBS-URL: https://build.opensuse.org/package/show/Virtualization/qemu?expand=0&rev=632 2021-03-30 22:27:28 +02:00			`Git-commit: 960d5fb3e8ee09bc5f1a5c84f66dce42a6cef920`
Accepting request 860516 from home:bfrogers:branches:Virtualization - Fix crash when spice used and the qemu-audio-spice package isn't installed (boo#1180210) audio-add-sanity-check.patch - Add some stable patches from upstream block-Fix-deadlock-in-bdrv_co_yield_to_d.patch block-Fix-locking-in-qmp_block_resize.patch block-nfs-fix-int-overflow-in-nfs_client.patch block-Simplify-qmp_block_resize-error-pa.patch build-no-pie-is-no-functional-linker-fla.patch OBS-URL: https://build.opensuse.org/request/show/860516 OBS-URL: https://build.opensuse.org/package/show/Virtualization/qemu?expand=0&rev=599 2021-01-05 18:40:44 +01:00
			`If bdrv_co_yield_to_drain() is called for draining a block node that`
			`runs in a different AioContext, it keeps that AioContext locked while it`
			`yields and schedules a BH in the AioContext to do the actual drain.`

			`As long as executing the BH is the very next thing that the event loop`
			`of the node's AioContext does, this actually happens to work, but when`
			`it tries to execute something else that wants to take the AioContext`
			`lock, it will deadlock. (In the bug report, this other thing is a`
			`virtio-scsi device running virtio_scsi_data_plane_handle_cmd().)`

			`Instead, always drop the AioContext lock across the yield and reacquire`
			`it only when the coroutine is reentered. The BH needs to unconditionally`
			`take the lock for itself now.`

			`This fixes the 'block_resize' QMP command on a block node that runs in`
			`an iothread.`

			`Cc: qemu-stable@nongnu.org`
			`Fixes: eb94b81a94bce112e6b206df846c1551aaf6cab6`
			`Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1903511`
			`Signed-off-by: Kevin Wolf <kwolf@redhat.com>`
			`Message-Id: <20201203172311.68232-4-kwolf@redhat.com>`
			`Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>`
			`Signed-off-by: Kevin Wolf <kwolf@redhat.com>`
			`Signed-off-by: Bruce Rogers <brogers@suse.com>`
			`---`
			`block/io.c \| 41 ++++++++++++++++++++++++-----------------`
			`1 file changed, 24 insertions(+), 17 deletions(-)`

			`diff --git a/block/io.c b/block/io.c`
			`index ec5e152bb70f62371b608e95d514..a9f56a9ab1c56a3ca83833bfb0fa 100644`
			`--- a/block/io.c`
			`+++ b/block/io.c`
			`@@ -306,17 +306,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)`

			`if (bs) {`
			`AioContext *ctx = bdrv_get_aio_context(bs);`
			`- AioContext *co_ctx = qemu_coroutine_get_aio_context(co);`
			`-`
			`- /*`
			`- * When the coroutine yielded, the lock for its home context was`
			`- * released, so we need to re-acquire it here. If it explicitly`
			`- * acquired a different context, the lock is still held and we don't`
			`- * want to lock it a second time (or AIO_WAIT_WHILE() would hang).`
			`- */`
			`- if (ctx == co_ctx) {`
			`- aio_context_acquire(ctx);`
			`- }`
			`+ aio_context_acquire(ctx);`
			`bdrv_dec_in_flight(bs);`
			`if (data->begin) {`
			`assert(!data->drained_end_counter);`
			`@@ -328,9 +318,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)`
			`data->ignore_bds_parents,`
			`data->drained_end_counter);`
			`}`
			`- if (ctx == co_ctx) {`
			`- aio_context_release(ctx);`
			`- }`
			`+ aio_context_release(ctx);`
			`} else {`
			`assert(data->begin);`
			`bdrv_drain_all_begin();`
			`@@ -348,13 +336,16 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,`
			`int *drained_end_counter)`
			`{`
			`BdrvCoDrainData data;`
			`+ Coroutine *self = qemu_coroutine_self();`
			`+ AioContext *ctx = bdrv_get_aio_context(bs);`
			`+ AioContext *co_ctx = qemu_coroutine_get_aio_context(self);`

			`/* Calling bdrv_drain() from a BH ensures the current coroutine yields and`
			`* other coroutines run if they were queued by aio_co_enter(). */`

			`assert(qemu_in_coroutine());`
			`data = (BdrvCoDrainData) {`
			`- .co = qemu_coroutine_self(),`
			`+ .co = self,`
			`.bs = bs,`
			`.done = false,`
			`.begin = begin,`
			`@@ -368,13 +359,29 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,`
			`if (bs) {`
			`bdrv_inc_in_flight(bs);`
			`}`
			`- replay_bh_schedule_oneshot_event(bdrv_get_aio_context(bs),`
			`- bdrv_co_drain_bh_cb, &data);`
			`+`
			`+ /*`
			`+ * Temporarily drop the lock across yield or we would get deadlocks.`
			`+ * bdrv_co_drain_bh_cb() reaquires the lock as needed.`
			`+ *`
			`+ * When we yield below, the lock for the current context will be`
			`+ * released, so if this is actually the lock that protects bs, don't drop`
			`+ * it a second time.`
			`+ */`
			`+ if (ctx != co_ctx) {`
			`+ aio_context_release(ctx);`
			`+ }`
			`+ replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);`

			`qemu_coroutine_yield();`
			`/* If we are resumed from some other event (such as an aio completion or a`
			`* timer callback), it is a bug in the caller that should be fixed. */`
			`assert(data.done);`
			`+`
			`+ /* Reaquire the AioContext of bs if we dropped it */`
			`+ if (ctx != co_ctx) {`
			`+ aio_context_acquire(ctx);`
			`+ }`
			`}`

			`void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,`