Subject: xenpaging: handle paged-out pages in XENMEM_* commands Fix these two warings: (XEN) Assertion '__mfn_valid(mfn_x(omfn))' failed at p2m.c:2200 (XEN) memory.c:171:d1 Domain 1 page number 37ff0 invalid Handle paged-out pages in xc_memory_op, guest_physmap_add_entry and guest_remove_page. Use new do_xenmem_op_retry helper function. In addition, export also xen/errno.h to hvmloader to get ENOENT define. XENMEM_populate_physmap populate_physmap -> guest_physmap_add_entry XENMEM_exchange memory_exchange -> guest_physmap_add_entry XENMEM_add_to_physmap guest_physmap_add_page -> guest_physmap_add_entry __gnttab_map_grant_ref create_grant_host_mapping create_grant_p2m_mapping -> guest_physmap_add_entry XENMEM_decrease_reservation decrease_reservation -> guest_remove_page XENMEM_add_to_physmap -> guest_remove_page XENMEM_add_to_physmap -> XENMAPSPACE_gmfn Signed-off-by: Olaf Hering --- tools/firmware/hvmloader/hvmloader.c | 9 +++- tools/firmware/hvmloader/util.c | 26 +++++++++++- tools/include/Makefile | 1 tools/ioemu-qemu-xen/hw/vga.c | 5 +- tools/libxc/xc_domain.c | 71 +++++++++++++++++++++-------------- xen/arch/x86/mm.c | 26 ++++++++++-- xen/arch/x86/mm/p2m.c | 7 +++ xen/common/memory.c | 25 +++++++++++- 8 files changed, 131 insertions(+), 39 deletions(-) --- xen-4.0.1-testing.orig/tools/firmware/hvmloader/hvmloader.c +++ xen-4.0.1-testing/tools/firmware/hvmloader/hvmloader.c @@ -29,6 +29,7 @@ #include "pci_regs.h" #include "e820.h" #include "option_rom.h" +#include #include #include #include @@ -306,13 +307,19 @@ static void pci_setup(void) while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend ) { struct xen_add_to_physmap xatp; + int rc; if ( hvm_info->high_mem_pgend == 0 ) hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT); xatp.domid = DOMID_SELF; xatp.space = XENMAPSPACE_gmfn; xatp.idx = --hvm_info->low_mem_pgend; xatp.gpfn = hvm_info->high_mem_pgend++; - if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) + do { + rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp); + if ( rc == -ENOENT ) + cpu_relax(); + } while ( rc == -ENOENT ); + if ( rc != 0 ) BUG(); } --- xen-4.0.1-testing.orig/tools/firmware/hvmloader/util.c +++ xen-4.0.1-testing/tools/firmware/hvmloader/util.c @@ -23,6 +23,7 @@ #include "e820.h" #include "hypercall.h" #include +#include #include #include @@ -323,19 +324,27 @@ void *mem_alloc(uint32_t size, uint32_t while ( (reserve >> PAGE_SHIFT) != (e >> PAGE_SHIFT) ) { + int rc; reserve += PAGE_SIZE; mfn = reserve >> PAGE_SHIFT; /* Try to allocate a brand new page in the reserved area. */ if ( !over_allocated ) { + uint8_t delay = 0; xmr.domid = DOMID_SELF; xmr.mem_flags = 0; xmr.extent_order = 0; xmr.nr_extents = 1; set_xen_guest_handle(xmr.extent_start, &mfn); - if ( hypercall_memory_op(XENMEM_populate_physmap, &xmr) == 1 ) + do { + rc = hypercall_memory_op(XENMEM_populate_physmap, &xmr); + if ( rc == 0 ) + cpu_relax(); + } while ( rc == 0 && ++delay ); + if ( rc == 1 ) continue; + printf("%s: over_allocated\n", __func__); over_allocated = 1; } @@ -353,7 +362,12 @@ void *mem_alloc(uint32_t size, uint32_t xatp.domid = DOMID_SELF; xatp.space = XENMAPSPACE_gmfn; xatp.gpfn = mfn; - if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) + do { + rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp); + if ( rc == -ENOENT ) + cpu_relax(); + } while ( rc == -ENOENT ); + if ( rc != 0 ) BUG(); } @@ -595,6 +609,7 @@ uint16_t get_cpu_mhz(void) uint64_t cpu_khz; uint32_t tsc_to_nsec_mul, version; int8_t tsc_shift; + int rc; static uint16_t cpu_mhz; if ( cpu_mhz != 0 ) @@ -605,7 +620,12 @@ uint16_t get_cpu_mhz(void) xatp.space = XENMAPSPACE_shared_info; xatp.idx = 0; xatp.gpfn = (unsigned long)shared_info >> 12; - if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) + do { + rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp); + if ( rc == -ENOENT ) + cpu_relax(); + } while ( rc == -ENOENT ); + if ( rc != 0 ) BUG(); /* Get a consistent snapshot of scale factor (multiplier and shift). */ --- xen-4.0.1-testing.orig/tools/include/Makefile +++ xen-4.0.1-testing/tools/include/Makefile @@ -12,6 +12,7 @@ xen/.dir: @rm -rf xen mkdir -p xen/libelf ln -sf ../$(XEN_ROOT)/xen/include/public/COPYING xen + ln -sf ../$(XEN_ROOT)/xen/include/xen/errno.h xen ln -sf $(addprefix ../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) xen ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) xen ln -sf ../xen-sys/$(XEN_OS) xen/sys --- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/vga.c +++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/vga.c @@ -2157,9 +2157,10 @@ void set_vram_mapping(void *opaque, unsi for (i = 0; i < (end - begin) >> TARGET_PAGE_BITS; i++) { xatp.idx = (s->vram_gmfn >> TARGET_PAGE_BITS) + i; xatp.gpfn = (begin >> TARGET_PAGE_BITS) + i; - rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp); + while ((rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp)) && errno == ENOENT) + usleep(1000); if (rc) { - fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"PRI_xen_pfn" failed: %d\n", xatp.idx, xatp.gpfn, rc); + fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"PRI_xen_pfn" failed: %d\n", xatp.idx, xatp.gpfn, errno); return; } } --- xen-4.0.1-testing.orig/tools/libxc/xc_domain.c +++ xen-4.0.1-testing/tools/libxc/xc_domain.c @@ -536,6 +536,44 @@ int xc_domain_get_tsc_info(int xc_handle return rc; } +static int do_xenmem_op_retry(int xc_handle, int cmd, struct xen_memory_reservation *reservation, unsigned long nr_extents, xen_pfn_t *extent_start) +{ + int err = 0; + unsigned long count = nr_extents; + unsigned long delay = 0; + unsigned long start = 0; + + fprintf(stderr, "%s: cmd %d count %lx\n",__func__,cmd,count); + while ( count && start < nr_extents ) + { + set_xen_guest_handle(reservation->extent_start, extent_start + start); + reservation->nr_extents = count; + + err = xc_memory_op(xc_handle, cmd, reservation); + if ( err == count ) + { + err = 0; + break; + } + + if ( err > count || err < 0 || delay > 1000 * 1000) + { + fprintf(stderr, "%s: %d err %x count %lx start %lx delay %lu/%lu\n",__func__,cmd,err,count,start,delay,delay/666); + err = -1; + break; + } + + if ( err ) + delay = 0; + + start += err; + count -= err; + usleep(delay); + delay += 666; /* 1500 iterations, 12 seconds */ + } + + return err; +} int xc_domain_memory_increase_reservation(int xc_handle, uint32_t domid, @@ -546,26 +584,18 @@ int xc_domain_memory_increase_reservatio { int err; struct xen_memory_reservation reservation = { - .nr_extents = nr_extents, .extent_order = extent_order, .mem_flags = mem_flags, .domid = domid }; - /* may be NULL */ - set_xen_guest_handle(reservation.extent_start, extent_start); - - err = xc_memory_op(xc_handle, XENMEM_increase_reservation, &reservation); - if ( err == nr_extents ) - return 0; - - if ( err >= 0 ) + err = do_xenmem_op_retry(xc_handle, XENMEM_increase_reservation, &reservation, nr_extents, extent_start); + if ( err < 0 ) { DPRINTF("Failed allocation for dom %d: " "%ld extents of order %d, mem_flags %x\n", domid, nr_extents, extent_order, mem_flags); errno = ENOMEM; - err = -1; } return err; @@ -579,14 +609,11 @@ int xc_domain_memory_decrease_reservatio { int err; struct xen_memory_reservation reservation = { - .nr_extents = nr_extents, .extent_order = extent_order, .mem_flags = 0, .domid = domid }; - set_xen_guest_handle(reservation.extent_start, extent_start); - if ( extent_start == NULL ) { DPRINTF("decrease_reservation extent_start is NULL!\n"); @@ -594,16 +621,12 @@ int xc_domain_memory_decrease_reservatio return -1; } - err = xc_memory_op(xc_handle, XENMEM_decrease_reservation, &reservation); - if ( err == nr_extents ) - return 0; - - if ( err >= 0 ) + err = do_xenmem_op_retry(xc_handle, XENMEM_decrease_reservation, &reservation, nr_extents, extent_start); + if ( err < 0 ) { DPRINTF("Failed deallocation for dom %d: %ld extents of order %d\n", domid, nr_extents, extent_order); errno = EINVAL; - err = -1; } return err; @@ -618,23 +641,17 @@ int xc_domain_memory_populate_physmap(in { int err; struct xen_memory_reservation reservation = { - .nr_extents = nr_extents, .extent_order = extent_order, .mem_flags = mem_flags, .domid = domid }; - set_xen_guest_handle(reservation.extent_start, extent_start); - - err = xc_memory_op(xc_handle, XENMEM_populate_physmap, &reservation); - if ( err == nr_extents ) - return 0; - if ( err >= 0 ) + err = do_xenmem_op_retry(xc_handle, XENMEM_populate_physmap, &reservation, nr_extents, extent_start); + if ( err < 0 ) { DPRINTF("Failed allocation for dom %d: %ld extents of order %d\n", domid, nr_extents, extent_order); errno = EBUSY; - err = -1; } return err; --- xen-4.0.1-testing.orig/xen/arch/x86/mm.c +++ xen-4.0.1-testing/xen/arch/x86/mm.c @@ -3660,6 +3660,8 @@ static int create_grant_p2m_mapping(uint p2mt = p2m_grant_map_rw; rc = guest_physmap_add_entry(current->domain, addr >> PAGE_SHIFT, frame, 0, p2mt); + if ( rc == -ENOENT ) + return GNTST_eagain; if ( rc ) return GNTST_general_error; else @@ -4315,17 +4317,25 @@ long arch_memory_op(int op, XEN_GUEST_HA case XENMAPSPACE_gmfn: { p2m_type_t p2mt; + unsigned long tmp_mfn; - xatp.idx = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt, 0)); + tmp_mfn = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt, 0)); + if ( unlikely(p2m_is_paging(p2mt)) ) + { + if ( p2m_is_paged(p2mt) ) + p2m_mem_paging_populate(d, xatp.idx); + rcu_unlock_domain(d); + return -ENOENT; + } /* If the page is still shared, exit early */ if ( p2m_is_shared(p2mt) ) { rcu_unlock_domain(d); return -ENOMEM; } - if ( !get_page_from_pagenr(xatp.idx, d) ) + if ( !get_page_from_pagenr(tmp_mfn, d) ) break; - mfn = xatp.idx; + mfn = tmp_mfn; page = mfn_to_page(mfn); break; } @@ -4354,8 +4364,16 @@ long arch_memory_op(int op, XEN_GUEST_HA /* Xen heap frames are simply unhooked from this phys slot. */ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0); else + { /* Normal domain memory is freed, to avoid leaking memory. */ - guest_remove_page(d, xatp.gpfn); + rc = guest_remove_page(d, xatp.gpfn); + if ( rc == -ENOENT ) + { + domain_unlock(d); + rcu_unlock_domain(d); + return rc; + } + } } /* Unmap from old location, if any. */ --- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c +++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c @@ -2186,6 +2186,13 @@ guest_physmap_add_entry(struct domain *d P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", mfn + i, ogfn, gfn + i); omfn = gfn_to_mfn_query(d, ogfn, &ot); + if ( unlikely(p2m_is_paging(ot)) ) + { + p2m_unlock(d->arch.p2m); + if ( p2m_is_paged(ot) ) + p2m_mem_paging_populate(d, ogfn); + return -ENOENT; + } /* If we get here, we know the local domain owns the page, so it can't have been grant mapped in. */ BUG_ON( p2m_is_grant(ot) ); --- xen-4.0.1-testing.orig/xen/common/memory.c +++ xen-4.0.1-testing/xen/common/memory.c @@ -95,6 +95,7 @@ static void populate_physmap(struct memo unsigned long i, j; xen_pfn_t gpfn, mfn; struct domain *d = a->domain; + int rc; if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) ) @@ -134,7 +135,12 @@ static void populate_physmap(struct memo } mfn = page_to_mfn(page); - guest_physmap_add_page(d, gpfn, mfn, a->extent_order); + rc = guest_physmap_add_page(d, gpfn, mfn, a->extent_order); + if ( rc != 0 ) + { + free_domheap_pages(page, a->extent_order); + goto out; + } if ( !paging_mode_translate(d) ) { @@ -162,6 +168,12 @@ int guest_remove_page(struct domain *d, #ifdef CONFIG_X86 mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt)); + if ( unlikely(p2m_is_paging(p2mt)) ) + { + if ( p2m_is_paged(p2mt) ) + p2m_mem_paging_populate(d, gmfn); + return -ENOENT; + } #else mfn = gmfn_to_mfn(d, gmfn); #endif @@ -360,6 +372,13 @@ static long memory_exchange(XEN_GUEST_HA /* Shared pages cannot be exchanged */ mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn + k, &p2mt, 0)); + if ( p2m_is_paging(p2mt) ) + { + if ( p2m_is_paged(p2mt) ) + p2m_mem_paging_populate(d, gmfn); + rc = -ENOENT; + goto fail; + } if ( p2m_is_shared(p2mt) ) { rc = -ENOMEM; @@ -456,7 +475,9 @@ static long memory_exchange(XEN_GUEST_HA &gpfn, exch.out.extent_start, (i<