References: FATE#316071 Subject: [RFC] qemu-upstream: add discard support for xen_disk Implement discard support for xen_disk. It makes use of the existing discard code in qemu. The discard support is enabled unconditionally. But it would be worth to have a knob to disable it in case the backing file was intentionally created non-sparse to avoid fragmentation. How could this be knob be passed from domU.cfg:disk=[] to the actual qemu process? Perhaps introduce an option discard=on|off|ignore|unmap. Signed-off-by: Olaf Hering --- tools/qemu-xen-dir-remote/hw/block/xen_blkif.h | 12 ++++++++ tools/qemu-xen-dir-remote/hw/block/xen_disk.c | 36 +++++++++++++++++++++++++ 2 files changed, 48 insertions(+) Index: xen-4.4.0-testing/tools/qemu-xen-dir-remote/hw/block/xen_blkif.h =================================================================== --- xen-4.4.0-testing.orig/tools/qemu-xen-dir-remote/hw/block/xen_blkif.h +++ xen-4.4.0-testing/tools/qemu-xen-dir-remote/hw/block/xen_blkif.h @@ -79,6 +79,12 @@ static inline void blkif_get_x86_32_req( dst->handle = src->handle; dst->id = src->id; dst->sector_number = src->sector_number; + if (src->operation == BLKIF_OP_DISCARD) { + struct blkif_request_discard *s = (void *)src; + struct blkif_request_discard *d = (void *)dst; + d->nr_sectors = s->nr_sectors; + return; + } if (n > src->nr_segments) n = src->nr_segments; for (i = 0; i < n; i++) @@ -94,6 +100,12 @@ static inline void blkif_get_x86_64_req( dst->handle = src->handle; dst->id = src->id; dst->sector_number = src->sector_number; + if (src->operation == BLKIF_OP_DISCARD) { + struct blkif_request_discard *s = (void *)src; + struct blkif_request_discard *d = (void *)dst; + d->nr_sectors = s->nr_sectors; + return; + } if (n > src->nr_segments) n = src->nr_segments; for (i = 0; i < n; i++) Index: xen-4.4.0-testing/tools/qemu-xen-dir-remote/hw/block/xen_disk.c =================================================================== --- xen-4.4.0-testing.orig/tools/qemu-xen-dir-remote/hw/block/xen_disk.c +++ xen-4.4.0-testing/tools/qemu-xen-dir-remote/hw/block/xen_disk.c @@ -68,6 +68,8 @@ struct ioreq { int presync; int postsync; uint8_t mapped; + int64_t sector_num; + int nb_sectors; /* grant mapping */ uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -114,6 +116,7 @@ struct XenBlkDev { int requests_finished; /* Persistent grants extension */ + gboolean feature_discard; gboolean feature_persistent; GTree *persistent_gnts; unsigned int persistent_gnt_count; @@ -232,6 +235,7 @@ static void ioreq_release(struct ioreq * static int ioreq_parse(struct ioreq *ioreq) { struct XenBlkDev *blkdev = ioreq->blkdev; + struct blkif_request_discard *discard_req = (void *)&ioreq->req; uintptr_t mem; size_t len; int i; @@ -244,6 +248,10 @@ static int ioreq_parse(struct ioreq *ior case BLKIF_OP_READ: ioreq->prot = PROT_WRITE; /* to memory */ break; + case BLKIF_OP_DISCARD: + ioreq->sector_num = discard_req->sector_number; + ioreq->nb_sectors = discard_req->nr_sectors; + return 0; case BLKIF_OP_FLUSH_DISKCACHE: ioreq->presync = 1; if (!ioreq->req.nr_segments) { @@ -521,6 +529,13 @@ static int ioreq_runio_qemu_aio(struct i &ioreq->v, ioreq->v.size / BLOCK_SIZE, qemu_aio_complete, ioreq); break; + case BLKIF_OP_DISCARD: + bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->nb_sectors * BLOCK_SIZE, BDRV_ACCT_WRITE); + ioreq->aio_inflight++; + bdrv_aio_discard(blkdev->bs, + ioreq->sector_num, ioreq->nb_sectors, + qemu_aio_complete, ioreq); + break; default: /* unknown operation (shouldn't happen -- parse catches this) */ goto err; @@ -699,6 +714,23 @@ static void blk_alloc(struct XenDevice * } } +static void blk_parse_discard(struct XenBlkDev *blkdev) +{ + int flags = BDRV_O_UNMAP; +#if 0 + char *s = xenstore_read_be_str(&blkdev->xendev, "toolstack-options-discard"); + + if (s && bdrv_parse_discard_flags(s, &flags) < 0) { + xen_be_printf(xendev, 0, "property toolstack-discard-options failed to parse: %s\n", s); + return; + } +#endif + blkdev->feature_discard = !!(flags & BDRV_O_UNMAP); + if (!blkdev->feature_discard) + return; + xenstore_write_be_int(&blkdev->xendev, "feature-discard", 1); +} + static int blk_init(struct XenDevice *xendev) { struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); @@ -766,6 +798,8 @@ static int blk_init(struct XenDevice *xe xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1); xenstore_write_be_int(&blkdev->xendev, "info", info); + blk_parse_discard(blkdev); + g_free(directiosafe); return 0; @@ -801,6 +835,8 @@ static int blk_connect(struct XenDevice qflags |= BDRV_O_RDWR; readonly = false; } + if (blkdev->feature_discard) + qflags |= BDRV_O_UNMAP; /* init qemu block driver */ index = (blkdev->xendev.dev - 202 * 256) / 16;