# HG changeset patch # User Keir Fraser # Date 1273583128 -3600 # Node ID d77a88f938c635c3ccfedaa00f946e4d9ed26098 # Parent 2b5e14e4c5e57b0064d1c0415d9c4629bd0aac9b x86: Replace our own specialised versions of memset and memcpy with direct use of gcc's built-in versions. This dramatically simplifies our code while also avoiding compile warnings with certain intermediate versions of gcc. This patch is based on an initial version by Jan Beulich. Signed-off-by: Keir Fraser Index: xen-4.0.0-testing/xen/arch/x86/string.c =================================================================== --- xen-4.0.0-testing.orig/xen/arch/x86/string.c +++ xen-4.0.0-testing/xen/arch/x86/string.c @@ -14,25 +14,12 @@ void *memcpy(void *dest, const void *src long d0, d1, d2; asm volatile ( -#ifdef __i386__ - " rep movsl ; " -#else - " rep movsq ; " - " testb $4,%b4 ; " - " je 0f ; " - " movsl ; " - "0: ; " -#endif - " testb $2,%b4 ; " - " je 1f ; " - " movsw ; " - "1: testb $1,%b4 ; " - " je 2f ; " - " movsb ; " - "2: " + " rep ; movs"__OS" ; " + " mov %4,%3 ; " + " rep ; movsb " : "=&c" (d0), "=&D" (d1), "=&S" (d2) - : "0" (n/sizeof(long)), "q" (n), "1" (dest), "2" (src) - : "memory"); + : "0" (n/BYTES_PER_LONG), "r" (n%BYTES_PER_LONG), "1" (dest), "2" (src) + : "memory" ); return dest; } @@ -55,7 +42,7 @@ void *memset(void *s, int c, size_t n) void *memmove(void *dest, const void *src, size_t n) { long d0, d1, d2; - + if ( dest < src ) return memcpy(dest, src, n); Index: xen-4.0.0-testing/xen/include/asm-x86/string.h =================================================================== --- xen-4.0.0-testing.orig/xen/include/asm-x86/string.h +++ xen-4.0.0-testing/xen/include/asm-x86/string.h @@ -3,246 +3,14 @@ #include -static inline void *__variable_memcpy(void *to, const void *from, size_t n) -{ - long d0, d1, d2; - __asm__ __volatile__ ( - " rep ; movs"__OS"\n" - " mov %4,%3 \n" - " rep ; movsb \n" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - : "0" (n/BYTES_PER_LONG), "r" (n%BYTES_PER_LONG), "1" (to), "2" (from) - : "memory" ); - return to; -} - -/* - * This looks horribly ugly, but the compiler can optimize it totally, - * as the count is constant. - */ -static always_inline void * __constant_memcpy( - void * to, const void * from, size_t n) -{ - switch ( n ) - { - case 0: - return to; - case 1: - *(u8 *)to = *(const u8 *)from; - return to; - case 2: - *(u16 *)to = *(const u16 *)from; - return to; - case 3: - *(u16 *)to = *(const u16 *)from; - *(2+(u8 *)to) = *(2+(const u8 *)from); - return to; - case 4: - *(u32 *)to = *(const u32 *)from; - return to; - case 5: - *(u32 *)to = *(const u32 *)from; - *(4+(u8 *)to) = *(4+(const u8 *)from); - return to; - case 6: - *(u32 *)to = *(const u32 *)from; - *(2+(u16 *)to) = *(2+(const u16 *)from); - return to; - case 7: - *(u32 *)to = *(const u32 *)from; - *(2+(u16 *)to) = *(2+(const u16 *)from); - *(6+(u8 *)to) = *(6+(const u8 *)from); - return to; - case 8: - *(u64 *)to = *(const u64 *)from; - return to; - case 12: - *(u64 *)to = *(const u64 *)from; - *(2+(u32 *)to) = *(2+(const u32 *)from); - return to; - case 16: - *(u64 *)to = *(const u64 *)from; - *(1+(u64 *)to) = *(1+(const u64 *)from); - return to; - case 20: - *(u64 *)to = *(const u64 *)from; - *(1+(u64 *)to) = *(1+(const u64 *)from); - *(4+(u32 *)to) = *(4+(const u32 *)from); - return to; - } -#define COMMON(x) \ - __asm__ __volatile__ ( \ - "rep ; movs"__OS \ - x \ - : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ - : "0" (n/BYTES_PER_LONG), "1" (to), "2" (from) \ - : "memory" ); - { - long d0, d1, d2; - switch ( n % BYTES_PER_LONG ) - { - case 0: COMMON(""); return to; - case 1: COMMON("\n\tmovsb"); return to; - case 2: COMMON("\n\tmovsw"); return to; - case 3: COMMON("\n\tmovsw\n\tmovsb"); return to; - case 4: COMMON("\n\tmovsl"); return to; - case 5: COMMON("\n\tmovsl\n\tmovsb"); return to; - case 6: COMMON("\n\tmovsl\n\tmovsw"); return to; - case 7: COMMON("\n\tmovsl\n\tmovsw\n\tmovsb"); return to; - } - } -#undef COMMON - return to; -} - #define __HAVE_ARCH_MEMCPY -/* align source to a 64-bit boundary */ -static always_inline -void *__var_memcpy(void *t, const void *f, size_t n) -{ - int off = (unsigned long)f & 0x7; - /* just do alignment if needed and if size is worth */ - if ( (n > 32) && off ) { - size_t n1 = 8 - off; - __variable_memcpy(t, f, n1); - __variable_memcpy(t + n1, f + n1, n - n1); - return t; - } else { - return (__variable_memcpy(t, f, n)); - } -} - -#define memcpy(t,f,n) (__memcpy((t),(f),(n))) -static always_inline -void *__memcpy(void *t, const void *f, size_t n) -{ - return (__builtin_constant_p(n) ? - __constant_memcpy((t),(f),(n)) : - __var_memcpy((t),(f),(n))); -} +#define memcpy(t,f,n) (__builtin_memcpy((t),(f),(n))) -/* Some version of gcc don't have this builtin. It's non-critical anyway. */ +/* Some versions of gcc don't have this builtin. It's non-critical anyway. */ #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *dest, const void *src, size_t n); -static inline void *__memset_generic(void *s, char c, size_t count) -{ - long d0, d1; - __asm__ __volatile__ ( - "rep ; stosb" - : "=&c" (d0), "=&D" (d1) : "a" (c), "1" (s), "0" (count) : "memory" ); - return s; -} - -/* we might want to write optimized versions of these later */ -#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) - -/* - * memset(x,0,y) is a reasonably common thing to do, so we want to fill - * things 32 bits at a time even when we don't know the size of the - * area at compile-time.. - */ -static inline void *__constant_c_memset(void *s, unsigned long c, size_t count) -{ - long d0, d1; - __asm__ __volatile__( - " rep ; stos"__OS"\n" - " mov %3,%4 \n" - " rep ; stosb \n" - : "=&c" (d0), "=&D" (d1) - : "a" (c), "r" (count%BYTES_PER_LONG), - "0" (count/BYTES_PER_LONG), "1" (s) - : "memory" ); - return s; -} - -/* - * This looks horribly ugly, but the compiler can optimize it totally, - * as we by now know that both pattern and count is constant.. - */ -static always_inline void *__constant_c_and_count_memset( - void *s, unsigned long pattern, size_t count) -{ - switch ( count ) - { - case 0: - return s; - case 1: - *(u8 *)s = pattern; - return s; - case 2: - *(u16 *)s = pattern; - return s; - case 3: - *(u16 *)s = pattern; - *(2+(u8 *)s) = pattern; - return s; - case 4: - *(u32 *)s = pattern; - return s; - case 5: - *(u32 *)s = pattern; - *(4+(u8 *)s) = pattern; - return s; - case 6: - *(u32 *)s = pattern; - *(2+(u16 *)s) = pattern; - return s; - case 7: - *(u32 *)s = pattern; - *(2+(u16 *)s) = pattern; - *(6+(u8 *)s) = pattern; - return s; - case 8: - *(u64 *)s = pattern; - return s; - } -#define COMMON(x) \ - __asm__ __volatile__ ( \ - "rep ; stos"__OS \ - x \ - : "=&c" (d0), "=&D" (d1) \ - : "a" (pattern), "0" (count/BYTES_PER_LONG), "1" (s) \ - : "memory" ) - { - long d0, d1; - switch ( count % BYTES_PER_LONG ) - { - case 0: COMMON(""); return s; - case 1: COMMON("\n\tstosb"); return s; - case 2: COMMON("\n\tstosw"); return s; - case 3: COMMON("\n\tstosw\n\tstosb"); return s; - case 4: COMMON("\n\tstosl"); return s; - case 5: COMMON("\n\tstosl\n\tstosb"); return s; - case 6: COMMON("\n\tstosl\n\tstosw"); return s; - case 7: COMMON("\n\tstosl\n\tstosw\n\tstosb"); return s; - } - } -#undef COMMON - return s; -} - -#define __constant_c_x_memset(s, c, count) \ -(__builtin_constant_p(count) ? \ - __constant_c_and_count_memset((s),(c),(count)) : \ - __constant_c_memset((s),(c),(count))) - -#define __var_x_memset(s, c, count) \ -(__builtin_constant_p(count) ? \ - __constant_count_memset((s),(c),(count)) : \ - __memset_generic((s),(c),(count))) - -#ifdef CONFIG_X86_64 -#define MEMSET_PATTERN_MUL 0x0101010101010101UL -#else -#define MEMSET_PATTERN_MUL 0x01010101UL -#endif - #define __HAVE_ARCH_MEMSET -#define memset(s, c, count) (__memset((s),(c),(count))) -#define __memset(s, c, count) \ -(__builtin_constant_p(c) ? \ - __constant_c_x_memset((s),(MEMSET_PATTERN_MUL*(unsigned char)(c)),(count)) : \ - __var_x_memset((s),(c),(count))) +#define memset(s,c,n) (__builtin_memset((s),(c),(n))) #endif /* __X86_STRING_H__ */