# HG changeset patch # User Jan Beulich # Date 1329134942 -3600 # Node ID e953d536d3c6e344cf310f63ead9feda87cc67b0 # Parent 9ad1e42c341bc78463b6f6610a6300f75b535fbb x86/paging: use clear_guest() for zero-filling guest buffers While static arrays of all zeros may be tolerable (but are simply inefficient now that we have the necessary infrastructure), using on- stack arrays for this purpose (particularly when their size doesn't have an upper limit enforced) is calling for eventual problems (even if the code can be reached via administrative interfaces only). Signed-off-by: Jan Beulich Acked-by: Tim Deegan (Include necessary prerequisite bits from 24543:d6cdbc4fe078 "Introduce clear_user and clear_guest".) --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2163,6 +2163,86 @@ static enum hvm_copy_result __hvm_copy( return HVMCOPY_okay; } +static enum hvm_copy_result __hvm_clear(paddr_t addr, int size) +{ + struct vcpu *curr = current; + struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain); + unsigned long gfn, mfn; + p2m_type_t p2mt; + char *p; + int count, todo = size; + uint32_t pfec = PFEC_page_present | PFEC_write_access; + + /* + * XXX Disable for 4.1.0: PV-on-HVM drivers will do grant-table ops + * such as query_size. Grant-table code currently does copy_to/from_guest + * accesses under the big per-domain lock, which this test would disallow. + * The test is not needed until we implement sleeping-on-waitqueue when + * we access a paged-out frame, and that's post 4.1.0 now. + */ +#if 0 + /* + * If the required guest memory is paged out, this function may sleep. + * Hence we bail immediately if called from atomic context. + */ + if ( in_atomic() ) + return HVMCOPY_unhandleable; +#endif + + while ( todo > 0 ) + { + count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo); + + gfn = paging_gva_to_gfn(curr, addr, &pfec); + if ( gfn == INVALID_GFN ) + { + if ( pfec == PFEC_page_paged ) + return HVMCOPY_gfn_paged_out; + if ( pfec == PFEC_page_shared ) + return HVMCOPY_gfn_shared; + return HVMCOPY_bad_gva_to_gfn; + } + + mfn = mfn_x(gfn_to_mfn_unshare(p2m, gfn, &p2mt, 0)); + + if ( p2m_is_paging(p2mt) ) + { + p2m_mem_paging_populate(p2m, gfn); + return HVMCOPY_gfn_paged_out; + } + if ( p2m_is_shared(p2mt) ) + return HVMCOPY_gfn_shared; + if ( p2m_is_grant(p2mt) ) + return HVMCOPY_unhandleable; + if ( !p2m_is_ram(p2mt) ) + return HVMCOPY_bad_gfn_to_mfn; + ASSERT(mfn_valid(mfn)); + + p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK); + + if ( p2mt == p2m_ram_ro ) + { + static unsigned long lastpage; + if ( xchg(&lastpage, gfn) != gfn ) + gdprintk(XENLOG_DEBUG, "guest attempted write to read-only" + " memory page. gfn=%#lx, mfn=%#lx\n", + gfn, mfn); + } + else + { + memset(p, 0x00, count); + paging_mark_dirty(curr->domain, mfn); + } + + unmap_domain_page(p); + + addr += count; + todo -= count; + } + + return HVMCOPY_okay; +} + enum hvm_copy_result hvm_copy_to_guest_phys( paddr_t paddr, void *buf, int size) { @@ -2249,6 +2329,23 @@ unsigned long copy_to_user_hvm(void *to, return rc ? len : 0; /* fake a copy_to_user() return code */ } +unsigned long clear_user_hvm(void *to, unsigned int len) +{ + int rc; + +#ifdef __x86_64__ + if ( !current->arch.hvm_vcpu.hcall_64bit && + is_compat_arg_xlat_range(to, len) ) + { + memset(to, 0x00, len); + return 0; + } +#endif + + rc = __hvm_clear((unsigned long)to, len); + return rc ? len : 0; /* fake a copy_to_user() return code */ +} + unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len) { int rc; --- a/xen/arch/x86/mm/paging.c +++ b/xen/arch/x86/mm/paging.c @@ -21,11 +21,11 @@ */ #include +#include #include #include #include #include -#include #include #include @@ -450,26 +450,30 @@ int paging_log_dirty_op(struct domain *d (pages < sc->pages) && (i2 < LOGDIRTY_NODE_ENTRIES); i2++ ) { - static unsigned long zeroes[PAGE_SIZE/BYTES_PER_LONG]; unsigned int bytes = PAGE_SIZE; l1 = ((l2 && mfn_valid(l2[i2])) ? - map_domain_page(mfn_x(l2[i2])) : zeroes); + map_domain_page(mfn_x(l2[i2])) : NULL); if ( unlikely(((sc->pages - pages + 7) >> 3) < bytes) ) bytes = (unsigned int)((sc->pages - pages + 7) >> 3); if ( likely(peek) ) { - if ( copy_to_guest_offset(sc->dirty_bitmap, pages >> 3, - (uint8_t *)l1, bytes) != 0 ) + if ( (l1 ? copy_to_guest_offset(sc->dirty_bitmap, + pages >> 3, (uint8_t *)l1, + bytes) + : clear_guest_offset(sc->dirty_bitmap, + pages >> 3, bytes)) != 0 ) { rv = -EFAULT; goto out; } } - if ( clean && l1 != zeroes ) - clear_page(l1); pages += bytes << 3; - if ( l1 != zeroes ) + if ( l1 ) + { + if ( clean ) + clear_page(l1); unmap_domain_page(l1); + } } if ( l2 ) unmap_domain_page(l2); @@ -529,12 +533,9 @@ int paging_log_dirty_range(struct domain if ( !d->arch.paging.log_dirty.fault_count && !d->arch.paging.log_dirty.dirty_count ) { - int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG; - unsigned long zeroes[size]; - memset(zeroes, 0x00, size * BYTES_PER_LONG); - rv = 0; - if ( copy_to_guest_offset(dirty_bitmap, 0, (uint8_t *) zeroes, - size * BYTES_PER_LONG) != 0 ) + unsigned int size = BITS_TO_LONGS(nr); + + if ( clear_guest(dirty_bitmap, size * BYTES_PER_LONG) != 0 ) rv = -EFAULT; goto out; } @@ -562,11 +563,10 @@ int paging_log_dirty_range(struct domain (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES); i2++ ) { - static unsigned long zeroes[PAGE_SIZE/BYTES_PER_LONG]; unsigned int bytes = PAGE_SIZE; uint8_t *s; l1 = ((l2 && mfn_valid(l2[i2])) ? - map_domain_page(mfn_x(l2[i2])) : zeroes); + map_domain_page(mfn_x(l2[i2])) : NULL); s = ((uint8_t*)l1) + (b1 >> 3); bytes -= b1 >> 3; @@ -574,9 +574,18 @@ int paging_log_dirty_range(struct domain if ( likely(((nr - pages + 7) >> 3) < bytes) ) bytes = (unsigned int)((nr - pages + 7) >> 3); + if ( !l1 ) + { + if ( clear_guest_offset(dirty_bitmap, pages >> 3, + bytes) != 0 ) + { + rv = -EFAULT; + goto out; + } + } /* begin_pfn is not 32K aligned, hence we have to bit * shift the bitmap */ - if ( b1 & 0x7 ) + else if ( b1 & 0x7 ) { int i, j; uint32_t *l = (uint32_t*) s; @@ -620,11 +629,12 @@ int paging_log_dirty_range(struct domain } } - if ( l1 != zeroes ) - clear_page(l1); pages += bytes << 3; - if ( l1 != zeroes ) + if ( l1 ) + { + clear_page(l1); unmap_domain_page(l1); + } b1 = b1 & 0x7; } b2 = 0; --- a/xen/arch/x86/usercopy.c +++ b/xen/arch/x86/usercopy.c @@ -110,6 +110,42 @@ copy_to_user(void __user *to, const void return n; } +#define __do_clear_user(addr,size) \ +do { \ + long __d0; \ + __asm__ __volatile__( \ + "0: rep; stosl\n" \ + " movl %2,%0\n" \ + "1: rep; stosb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%2,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(0b,3b) \ + _ASM_EXTABLE(1b,2b) \ + : "=&c"(size), "=&D" (__d0) \ + : "r"(size & 3), "0"(size / 4), "1"((long)addr), "a"(0)); \ +} while (0) + +/** + * clear_user: - Zero a block of memory in user space. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ +unsigned long +clear_user(void __user *to, unsigned n) +{ + if ( access_ok(to, n) ) + __do_clear_user(to, n); + return n; +} + /** * copy_from_user: - Copy a block of data from user space. * @to: Destination address, in kernel space. --- a/xen/include/asm-x86/guest_access.h +++ b/xen/include/asm-x86/guest_access.h @@ -21,6 +21,10 @@ (is_hvm_vcpu(current) ? \ copy_from_user_hvm((dst), (src), (len)) : \ copy_from_user((dst), (src), (len))) +#define raw_clear_guest(dst, len) \ + (is_hvm_vcpu(current) ? \ + clear_user_hvm((dst), (len)) : \ + clear_user((dst), (len))) #define __raw_copy_to_guest(dst, src, len) \ (is_hvm_vcpu(current) ? \ copy_to_user_hvm((dst), (src), (len)) : \ @@ -29,6 +33,10 @@ (is_hvm_vcpu(current) ? \ copy_from_user_hvm((dst), (src), (len)) : \ __copy_from_user((dst), (src), (len))) +#define __raw_clear_guest(dst, len) \ + (is_hvm_vcpu(current) ? \ + clear_user_hvm((dst), (len)) : \ + clear_user((dst), (len))) /* Is the guest handle a NULL reference? */ #define guest_handle_is_null(hnd) ((hnd).p == NULL) @@ -69,6 +77,11 @@ raw_copy_from_guest(_d, _s+(off), sizeof(*_d)*(nr));\ }) +#define clear_guest_offset(hnd, off, nr) ({ \ + void *_d = (hnd).p; \ + raw_clear_guest(_d+(off), nr); \ +}) + /* Copy sub-field of a structure to guest context via a guest handle. */ #define copy_field_to_guest(hnd, ptr, field) ({ \ const typeof(&(ptr)->field) _s = &(ptr)->field; \ @@ -110,6 +123,11 @@ __raw_copy_from_guest(_d, _s+(off), sizeof(*_d)*(nr));\ }) +#define __clear_guest_offset(hnd, off, nr) ({ \ + void *_d = (hnd).p; \ + __raw_clear_guest(_d+(off), nr); \ +}) + #define __copy_field_to_guest(hnd, ptr, field) ({ \ const typeof(&(ptr)->field) _s = &(ptr)->field; \ void *_d = &(hnd).p->field; \ --- a/xen/include/asm-x86/hvm/guest_access.h +++ b/xen/include/asm-x86/hvm/guest_access.h @@ -2,6 +2,7 @@ #define __ASM_X86_HVM_GUEST_ACCESS_H__ unsigned long copy_to_user_hvm(void *to, const void *from, unsigned len); +unsigned long clear_user_hvm(void *to, unsigned int len); unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len); #endif /* __ASM_X86_HVM_GUEST_ACCESS_H__ */ --- a/xen/include/asm-x86/uaccess.h +++ b/xen/include/asm-x86/uaccess.h @@ -16,6 +16,7 @@ #endif unsigned long copy_to_user(void *to, const void *from, unsigned len); +unsigned long clear_user(void *to, unsigned len); unsigned long copy_from_user(void *to, const void *from, unsigned len); /* Handles exceptions in both to and from, but doesn't do access_ok */ unsigned long __copy_to_user_ll(void *to, const void *from, unsigned n); --- a/xen/include/xen/guest_access.h +++ b/xen/include/xen/guest_access.h @@ -15,10 +15,16 @@ #define copy_from_guest(ptr, hnd, nr) \ copy_from_guest_offset(ptr, hnd, 0, nr) +#define clear_guest(hnd, nr) \ + clear_guest_offset(hnd, 0, nr) + #define __copy_to_guest(hnd, ptr, nr) \ __copy_to_guest_offset(hnd, 0, ptr, nr) #define __copy_from_guest(ptr, hnd, nr) \ __copy_from_guest_offset(ptr, hnd, 0, nr) +#define __clear_guest(hnd, nr) \ + __clear_guest_offset(hnd, 0, nr) + #endif /* __XEN_GUEST_ACCESS_H__ */