129 lines
3.8 KiB
Diff
129 lines
3.8 KiB
Diff
Following a change in Linux 2.6.33, make x86-32 always use
|
|
__builtin_mem{cpy,set}() on gcc 4.0+. This particularly works around
|
|
certain intermediate gcc revisions generating out-of-range-array-index
|
|
warnings with the current inline implementation.
|
|
|
|
It may be worthwhile considering to make this the case for x86-64 too.
|
|
|
|
At the same time eliminate the redundant inline assembly in the C
|
|
file, and instead use the inline functions coming from the header.
|
|
|
|
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
|
|
|
|
Index: xen-4.0.0-testing/xen/arch/x86/string.c
|
|
===================================================================
|
|
--- xen-4.0.0-testing.orig/xen/arch/x86/string.c
|
|
+++ xen-4.0.0-testing/xen/arch/x86/string.c
|
|
@@ -11,44 +11,13 @@
|
|
#undef memcpy
|
|
void *memcpy(void *dest, const void *src, size_t n)
|
|
{
|
|
- long d0, d1, d2;
|
|
-
|
|
- asm volatile (
|
|
-#ifdef __i386__
|
|
- " rep movsl ; "
|
|
-#else
|
|
- " rep movsq ; "
|
|
- " testb $4,%b4 ; "
|
|
- " je 0f ; "
|
|
- " movsl ; "
|
|
- "0: ; "
|
|
-#endif
|
|
- " testb $2,%b4 ; "
|
|
- " je 1f ; "
|
|
- " movsw ; "
|
|
- "1: testb $1,%b4 ; "
|
|
- " je 2f ; "
|
|
- " movsb ; "
|
|
- "2: "
|
|
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
|
|
- : "0" (n/sizeof(long)), "q" (n), "1" (dest), "2" (src)
|
|
- : "memory");
|
|
-
|
|
- return dest;
|
|
+ return __variable_memcpy(dest, src, n);
|
|
}
|
|
|
|
#undef memset
|
|
void *memset(void *s, int c, size_t n)
|
|
{
|
|
- long d0, d1;
|
|
-
|
|
- asm volatile (
|
|
- "rep stosb"
|
|
- : "=&c" (d0), "=&D" (d1)
|
|
- : "a" (c), "1" (s), "0" (n)
|
|
- : "memory");
|
|
-
|
|
- return s;
|
|
+ return __memset_generic(s, c, n);
|
|
}
|
|
|
|
#undef memmove
|
|
Index: xen-4.0.0-testing/xen/include/asm-x86/string.h
|
|
===================================================================
|
|
--- xen-4.0.0-testing.orig/xen/include/asm-x86/string.h
|
|
+++ xen-4.0.0-testing/xen/include/asm-x86/string.h
|
|
@@ -16,6 +16,11 @@ static inline void *__variable_memcpy(vo
|
|
return to;
|
|
}
|
|
|
|
+#define __HAVE_ARCH_MEMCPY
|
|
+#if defined(__i386__) && __GNUC__ >= 4
|
|
+#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
|
|
+#else
|
|
+
|
|
/*
|
|
* This looks horribly ugly, but the compiler can optimize it totally,
|
|
* as the count is constant.
|
|
@@ -95,7 +100,6 @@ static always_inline void * __constant_m
|
|
return to;
|
|
}
|
|
|
|
-#define __HAVE_ARCH_MEMCPY
|
|
/* align source to a 64-bit boundary */
|
|
static always_inline
|
|
void *__var_memcpy(void *t, const void *f, size_t n)
|
|
@@ -121,11 +125,13 @@ void *__memcpy(void *t, const void *f, s
|
|
__var_memcpy((t),(f),(n)));
|
|
}
|
|
|
|
+#endif /* !__i386__ || __GNUC__ < 4 */
|
|
+
|
|
/* Some version of gcc don't have this builtin. It's non-critical anyway. */
|
|
#define __HAVE_ARCH_MEMMOVE
|
|
extern void *memmove(void *dest, const void *src, size_t n);
|
|
|
|
-static inline void *__memset_generic(void *s, char c, size_t count)
|
|
+static inline void *__memset_generic(void *s, int c, size_t count)
|
|
{
|
|
long d0, d1;
|
|
__asm__ __volatile__ (
|
|
@@ -134,6 +140,11 @@ static inline void *__memset_generic(voi
|
|
return s;
|
|
}
|
|
|
|
+#define __HAVE_ARCH_MEMSET
|
|
+#if defined(__i386__) && __GNUC__ >= 4
|
|
+#define memset(s, c, n) __builtin_memset(s, c, n)
|
|
+#else
|
|
+
|
|
/* we might want to write optimized versions of these later */
|
|
#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count))
|
|
|
|
@@ -238,11 +249,12 @@ static always_inline void *__constant_c_
|
|
#define MEMSET_PATTERN_MUL 0x01010101UL
|
|
#endif
|
|
|
|
-#define __HAVE_ARCH_MEMSET
|
|
#define memset(s, c, count) (__memset((s),(c),(count)))
|
|
#define __memset(s, c, count) \
|
|
(__builtin_constant_p(c) ? \
|
|
__constant_c_x_memset((s),(MEMSET_PATTERN_MUL*(unsigned char)(c)),(count)) : \
|
|
__var_x_memset((s),(c),(count)))
|
|
|
|
+#endif /* !__i386__ || __GNUC__ < 4 */
|
|
+
|
|
#endif /* __X86_STRING_H__ */
|