# HG changeset patch # User Keir Fraser # Date 1225708322 0 # Node ID 540483d2a98f3fbabf06961cc0cc52e3c59c245b # Parent 303b1014f91e5fa0783a5d7095626a47e82db9d0 x86: simplify page reference handling for partially (in-)validated pages Simplify general page reference management for preempted (partially [in-]validated) pages: Reserve on reference that can be acquired without the risk of overflowing the reference count, thus allowing to have a simplified get_page() equivalent that cannot fail (but must be used with care). Doing this conversion pointed out a latent issue in the changes done previously in this area: The extra reference must be acquired before the 'normal' reference gets dropped, so the patch fixes this at once in both the alloc_page_type() and free_page_type() paths (it's really only the latter that failed to work with the change described above). Signed-off-by: Jan Beulich --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -1856,7 +1856,8 @@ int get_page(struct page_info *page, str nx = x + 1; d = nd; if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */ - unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */ + /* Keep one spare reference to be acquired by get_page_light(). */ + unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */ unlikely(d != _domain) ) /* Wrong owner? */ { if ( !_shadow_mode_refcounts(domain) && !domain->is_dying ) @@ -1878,6 +1879,28 @@ int get_page(struct page_info *page, str return 1; } +/* + * Special version of get_page() to be used exclusively when + * - a page is known to already have a non-zero reference count + * - the page does not need its owner to be checked + * - it will not be called more than once without dropping the thus + * acquired reference again. + * Due to get_page() reserving one reference, this call cannot fail. + */ +static void get_page_light(struct page_info *page) +{ + u32 x, nx, y = page->count_info; + + do { + x = y; + nx = x + 1; + BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */ + BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */ + y = cmpxchg(&page->count_info, x, nx); + } + while ( unlikely(y != x) ); +} + static int alloc_page_type(struct page_info *page, unsigned long type, int preemptible) @@ -1885,10 +1908,6 @@ static int alloc_page_type(struct page_i struct domain *owner = page_get_owner(page); int rc; - /* Obtain an extra reference to retain if we set PGT_partial. */ - if ( preemptible && !get_page(page, owner) ) - return -EINVAL; - /* A page table is dirtied when its type count becomes non-zero. */ if ( likely(owner != NULL) ) paging_mark_dirty(owner, page_to_mfn(page)); @@ -1922,14 +1941,10 @@ static int alloc_page_type(struct page_i wmb(); if ( rc == -EAGAIN ) { + get_page_light(page); page->u.inuse.type_info |= PGT_partial; - return -EAGAIN; } - - if ( preemptible ) - put_page(page); - - if ( rc == -EINTR ) + else if ( rc == -EINTR ) { ASSERT((page->u.inuse.type_info & (PGT_count_mask|PGT_validated|PGT_partial)) == 1); @@ -2044,8 +2059,8 @@ static int __put_final_page_type( } else if ( rc == -EINTR ) { - ASSERT(!(page->u.inuse.type_info & - (PGT_count_mask|PGT_validated|PGT_partial))); + ASSERT((page->u.inuse.type_info & + (PGT_count_mask|PGT_validated|PGT_partial)) == 1); if ( !(shadow_mode_enabled(page_get_owner(page)) && (page->count_info & PGC_page_table)) ) page->tlbflush_timestamp = tlbflush_current_time(); @@ -2056,14 +2071,10 @@ static int __put_final_page_type( { BUG_ON(rc != -EAGAIN); wmb(); + get_page_light(page); page->u.inuse.type_info |= PGT_partial; - /* Must skip put_page() below. */ - preemptible = 0; } - if ( preemptible ) - put_page(page); - return rc; } @@ -2072,10 +2083,7 @@ static int __put_page_type(struct page_i int preemptible) { unsigned long nx, x, y = page->u.inuse.type_info; - - /* Obtain an extra reference to retain if we set PGT_partial. */ - if ( preemptible && !get_page(page, page_get_owner(page)) ) - return -EINVAL; + int rc = 0; for ( ; ; ) { @@ -2098,10 +2106,11 @@ static int __put_page_type(struct page_i if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ) continue; + /* We cleared the 'valid bit' so we do the clean up. */ + rc = __put_final_page_type(page, x, preemptible); if ( x & PGT_partial ) put_page(page); - /* We cleared the 'valid bit' so we do the clean up. */ - return __put_final_page_type(page, x, preemptible); + break; } /* @@ -2120,17 +2129,10 @@ static int __put_page_type(struct page_i break; if ( preemptible && hypercall_preempt_check() ) - { - if ( preemptible ) - put_page(page); return -EINTR; - } } - if ( preemptible ) - put_page(page); - - return 0; + return rc; } @@ -2138,6 +2140,7 @@ static int __get_page_type(struct page_i int preemptible) { unsigned long nx, x, y = page->u.inuse.type_info; + int rc = 0; ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2))); @@ -2233,11 +2236,7 @@ static int __get_page_type(struct page_i } if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) - { - if ( (x & PGT_partial) && !(nx & PGT_partial) ) - put_page(page); break; - } if ( preemptible && hypercall_preempt_check() ) return -EINTR; @@ -2264,10 +2263,13 @@ static int __get_page_type(struct page_i page->nr_validated_ptes = 0; page->partial_pte = 0; } - return alloc_page_type(page, type, preemptible); + rc = alloc_page_type(page, type, preemptible); } - return 0; + if ( (x & PGT_partial) && !(nx & PGT_partial) ) + put_page(page); + + return rc; } void put_page_type(struct page_info *page)