dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel() race
dma_blk_cb() only takes the AioContext lock around ->io_func(). That means the rest of dma_blk_cb() is not protected. In particular, the DMAAIOCB field accesses happen outside the lock. There is a race when the main loop thread holds the AioContext lock and invokes scsi_device_purge_requests() -> bdrv_aio_cancel() -> dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb field determines how cancellation proceeds. If dma_aio_cancel() sees dbs->acb == NULL while dma_blk_cb() is still running, the request can be completed twice (-ECANCELED and the actual return value). The following assertion can occur with virtio-scsi when an IOThread is used: ../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed. Fix the race by holding the AioContext across dma_blk_cb(). Now dma_aio_cancel() under the AioContext lock will not see inconsistent/intermediate states. Cc: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-Id: <20230221212218.1378734-3-stefanha@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
		
				
					committed by
					
						 Kevin Wolf
						Kevin Wolf
					
				
			
			
				
	
			
			
			
						parent
						
							7b7fc3d010
						
					
				
				
					commit
					abfcd2760b
				
			| @@ -354,13 +354,12 @@ done: | |||||||
|     scsi_req_unref(&r->req); |     scsi_req_unref(&r->req); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* Called with AioContext lock held */ | ||||||
| static void scsi_dma_complete(void *opaque, int ret) | static void scsi_dma_complete(void *opaque, int ret) | ||||||
| { | { | ||||||
|     SCSIDiskReq *r = (SCSIDiskReq *)opaque; |     SCSIDiskReq *r = (SCSIDiskReq *)opaque; | ||||||
|     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); |     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); | ||||||
|  |  | ||||||
|     aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); |  | ||||||
|  |  | ||||||
|     assert(r->req.aiocb != NULL); |     assert(r->req.aiocb != NULL); | ||||||
|     r->req.aiocb = NULL; |     r->req.aiocb = NULL; | ||||||
|  |  | ||||||
| @@ -370,7 +369,6 @@ static void scsi_dma_complete(void *opaque, int ret) | |||||||
|         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); |         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); | ||||||
|     } |     } | ||||||
|     scsi_dma_complete_noio(r, ret); |     scsi_dma_complete_noio(r, ret); | ||||||
|     aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) | static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) | ||||||
|   | |||||||
| @@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret) | |||||||
| static void dma_blk_cb(void *opaque, int ret) | static void dma_blk_cb(void *opaque, int ret) | ||||||
| { | { | ||||||
|     DMAAIOCB *dbs = (DMAAIOCB *)opaque; |     DMAAIOCB *dbs = (DMAAIOCB *)opaque; | ||||||
|  |     AioContext *ctx = dbs->ctx; | ||||||
|     dma_addr_t cur_addr, cur_len; |     dma_addr_t cur_addr, cur_len; | ||||||
|     void *mem; |     void *mem; | ||||||
|  |  | ||||||
|     trace_dma_blk_cb(dbs, ret); |     trace_dma_blk_cb(dbs, ret); | ||||||
|  |  | ||||||
|  |     aio_context_acquire(ctx); | ||||||
|     dbs->acb = NULL; |     dbs->acb = NULL; | ||||||
|     dbs->offset += dbs->iov.size; |     dbs->offset += dbs->iov.size; | ||||||
|  |  | ||||||
|     if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { |     if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { | ||||||
|         dma_complete(dbs, ret); |         dma_complete(dbs, ret); | ||||||
|         return; |         goto out; | ||||||
|     } |     } | ||||||
|     dma_blk_unmap(dbs); |     dma_blk_unmap(dbs); | ||||||
|  |  | ||||||
| @@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret) | |||||||
|  |  | ||||||
|     if (dbs->iov.size == 0) { |     if (dbs->iov.size == 0) { | ||||||
|         trace_dma_map_wait(dbs); |         trace_dma_map_wait(dbs); | ||||||
|         dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs); |         dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); | ||||||
|         cpu_register_map_client(dbs->bh); |         cpu_register_map_client(dbs->bh); | ||||||
|         return; |         goto out; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { |     if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { | ||||||
| @@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret) | |||||||
|                                 QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); |                                 QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     aio_context_acquire(dbs->ctx); |  | ||||||
|     dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, |     dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, | ||||||
|                             dma_blk_cb, dbs, dbs->io_func_opaque); |                             dma_blk_cb, dbs, dbs->io_func_opaque); | ||||||
|     aio_context_release(dbs->ctx); |  | ||||||
|     assert(dbs->acb); |     assert(dbs->acb); | ||||||
|  | out: | ||||||
|  |     aio_context_release(ctx); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void dma_aio_cancel(BlockAIOCB *acb) | static void dma_aio_cancel(BlockAIOCB *acb) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user