From: Olaf Hering Date: Thu, 29 Oct 2020 16:13:10 +0100 Subject: libxc sr restore handle_incoming_page_data tools: restore: write data directly into guest Read incoming migration stream directly into the guest memory. This avoids the memory allocation and copying, and the resulting performance penalty. Signed-off-by: Olaf Hering --- tools/libs/guest/xg_sr_common.h | 3 + tools/libs/guest/xg_sr_restore.c | 155 ++++++++++++++++++++++++++++++- 2 files changed, 153 insertions(+), 5 deletions(-) --- a/tools/libs/guest/xg_sr_common.h +++ b/tools/libs/guest/xg_sr_common.h @@ -263,6 +263,8 @@ struct xc_sr_context xen_pfn_t *pp_pfns; xen_pfn_t *pp_mfns; void **guest_data; + struct iovec *iov; + struct xc_sr_rec_page_data_header *pages; void *guest_mapping; uint32_t nr_mapped_pages; @@ -311,6 +313,7 @@ struct xc_sr_context /* Sender has invoked verify mode on the stream. */ bool verify; + void *verify_buf; } restore; }; --- a/tools/libs/guest/xg_sr_restore.c +++ b/tools/libs/guest/xg_sr_restore.c @@ -382,6 +382,129 @@ err: } /* + * Handle PAGE_DATA record from the stream. + * Given a list of pfns, their types, and a block of page data from the + * stream, populate and record their types, map the relevant subset and copy + * the data into the guest. + */ +static int handle_incoming_page_data(struct xc_sr_context *ctx, + struct xc_sr_rhdr *rhdr) +{ + xc_interface *xch = ctx->xch; + struct xc_sr_rec_page_data_header *pages = ctx->restore.pages; + uint64_t *pfn_nums = &pages->pfn[0]; + uint32_t i; + int rc, iov_idx; + + rc = handle_static_data_end_v2(ctx); + if ( rc ) + goto err; + + /* First read and verify the header */ + rc = read_exact(ctx->fd, pages, sizeof(*pages)); + if ( rc ) + { + PERROR("Could not read rec_pfn header"); + goto err; + } + + if ( !verify_rec_page_hdr(ctx, rhdr->length, pages) ) + { + rc = -1; + goto err; + } + + /* Then read and verify the incoming pfn numbers */ + rc = read_exact(ctx->fd, pfn_nums, sizeof(*pfn_nums) * pages->count); + if ( rc ) + { + PERROR("Could not read rec_pfn data"); + goto err; + } + + if ( !verify_rec_page_pfns(ctx, rhdr->length, pages) ) + { + rc = -1; + goto err; + } + + /* Finally read and verify the incoming pfn data */ + rc = map_guest_pages(ctx, pages); + if ( rc ) + goto err; + + /* Prepare read buffers, either guest or throw-away memory */ + for ( i = 0, iov_idx = 0; i < pages->count; i++ ) + { + struct iovec *iov; + + if ( !ctx->restore.guest_data[i] ) + continue; + + iov = &ctx->restore.iov[iov_idx]; + iov->iov_len = PAGE_SIZE; + if ( ctx->restore.verify ) + iov->iov_base = ctx->restore.verify_buf + (i * PAGE_SIZE); + else + iov->iov_base = ctx->restore.guest_data[i]; + iov_idx++; + } + + if ( !iov_idx ) + goto done; + + rc = readv_exact(ctx->fd, ctx->restore.iov, iov_idx); + if ( rc ) + { + PERROR("read of %d pages failed", iov_idx); + goto err; + } + + /* Post-processing of pfn data */ + for ( i = 0, iov_idx = 0; i < pages->count; i++ ) + { + void *addr; + + if ( !ctx->restore.guest_data[i] ) + continue; + + addr = ctx->restore.iov[iov_idx].iov_base; + rc = ctx->restore.ops.localise_page(ctx, ctx->restore.types[i], addr); + if ( rc ) + { + ERROR("Failed to localise pfn %#"PRIpfn" (type %#"PRIx32")", + ctx->restore.pfns[i], + ctx->restore.types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT); + goto err; + + } + + if ( ctx->restore.verify ) + { + if ( memcmp(ctx->restore.guest_data[i], addr, PAGE_SIZE) ) + { + ERROR("verify pfn %#"PRIpfn" failed (type %#"PRIx32")", + ctx->restore.pfns[i], + ctx->restore.types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT); + } + } + + iov_idx++; + } + +done: + rc = 0; + +err: + if ( ctx->restore.guest_mapping ) + { + xenforeignmemory_unmap(xch->fmem, ctx->restore.guest_mapping, ctx->restore.nr_mapped_pages); + ctx->restore.guest_mapping = NULL; + } + return rc; +} + +/* * Handle PAGE_DATA record from an existing buffer * Given a list of pfns, their types, and a block of page data from the * stream, populate and record their types, map the relevant subset and copy @@ -713,6 +836,15 @@ static int process_buffered_record(struc case REC_TYPE_VERIFY: DPRINTF("Verify mode enabled"); ctx->restore.verify = true; + if ( !ctx->restore.verify_buf ) + { + ctx->restore.verify_buf = malloc(MAX_BATCH_SIZE * PAGE_SIZE); + if ( !ctx->restore.verify_buf ) + { + PERROR("Unable to allocate verify_buf"); + rc = -1; + } + } break; case REC_TYPE_CHECKPOINT: @@ -739,11 +871,19 @@ static int process_incoming_record_heade struct xc_sr_record rec; int rc; - rc = read_record_data(ctx, ctx->fd, rhdr, &rec); - if ( rc ) - return rc; + switch ( rhdr->type ) + { + case REC_TYPE_PAGE_DATA: + rc = handle_incoming_page_data(ctx, rhdr); + break; + default: + rc = read_record_data(ctx, ctx->fd, rhdr, &rec); + if ( rc == 0 ) + rc = process_buffered_record(ctx, &rec);; + break; + } - return process_buffered_record(ctx, &rec); + return rc; } @@ -788,9 +928,12 @@ static int setup(struct xc_sr_context *c ctx->restore.pp_pfns = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.pp_pfns)); ctx->restore.pp_mfns = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.pp_mfns)); ctx->restore.guest_data = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.guest_data)); + ctx->restore.iov = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.iov)); + ctx->restore.pages = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.pages->pfn) + sizeof(*ctx->restore.pages)); if ( !ctx->restore.pfns || !ctx->restore.types || !ctx->restore.mfns || !ctx->restore.map_errs || !ctx->restore.pp_pfns || - !ctx->restore.pp_mfns || !ctx->restore.guest_data ) + !ctx->restore.pp_mfns || !ctx->restore.guest_data || + !ctx->restore.iov || !ctx->restore.pages ) { ERROR("Unable to allocate memory"); rc = -1; @@ -827,6 +970,8 @@ static void cleanup(struct xc_sr_context free(ctx->restore.buffered_records); free(ctx->restore.populated_pfns); + free(ctx->restore.pages); + free(ctx->restore.iov); free(ctx->restore.guest_data); free(ctx->restore.pp_mfns); free(ctx->restore.pp_pfns);