2012-01-31 14:41:44 +01:00
|
|
|
commit afc5ed09cbce5d6fd48b3a8c5ec427b31f996880
|
|
|
|
Author: Ulrich Drepper <drepper@gmail.com>
|
|
|
|
Date: Thu Jan 26 07:45:14 2012 -0500
|
|
|
|
|
|
|
|
Reset bit_AVX in __cpu_features is OS support is missing
|
|
|
|
|
|
|
|
2012-01-26 Ulrich Drepper <drepper@gmail.com>
|
|
|
|
|
|
|
|
[BZ #13583]
|
|
|
|
* sysdeps/x86_64/multiarch/init-arch.h: Define bit_OSXSAVE.
|
|
|
|
* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If
|
|
|
|
bit_AVX is set also check OSXAVE/XCR0 and reset bit_AVX if necessary.
|
|
|
|
|
|
|
|
2012-01-25 Joseph Myers <joseph@codesourcery.com>
|
|
|
|
|
|
|
|
* elf/tst-unique3.cc (gets): Remove declaration.
|
2012-04-13 11:37:44 +02:00
|
|
|
Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.c
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/sysdeps/x86_64/multiarch/init-arch.c
|
|
|
|
+++ glibc-2.15/sysdeps/x86_64/multiarch/init-arch.c
|
2012-01-31 14:41:44 +01:00
|
|
|
@@ -1,6 +1,6 @@
|
|
|
|
/* Initialize CPU feature data.
|
|
|
|
This file is part of the GNU C Library.
|
|
|
|
- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
|
|
|
|
+ Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
|
|
|
Contributed by Ulrich Drepper <drepper@redhat.com>.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
|
|
@@ -144,6 +144,18 @@ __init_cpu_features (void)
|
|
|
|
else
|
|
|
|
kind = arch_kind_other;
|
|
|
|
|
|
|
|
+ if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
|
|
|
|
+ {
|
|
|
|
+ /* Reset the AVX bit in case OSXSAVE is disabled. */
|
2012-04-13 11:37:44 +02:00
|
|
|
+ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
|
|
|
|
+ && ({ unsigned int xcrlow;
|
|
|
|
+ unsigned int xcrhigh;
|
|
|
|
+ asm ("xgetbv"
|
|
|
|
+ : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
|
|
|
|
+ (xcrlow & 6) == 6; }))
|
|
|
|
+ __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
|
2012-01-31 14:41:44 +01:00
|
|
|
+ }
|
|
|
|
+
|
|
|
|
__cpu_features.family = family;
|
|
|
|
__cpu_features.model = model;
|
|
|
|
atomic_write_barrier ();
|
2012-04-13 11:37:44 +02:00
|
|
|
Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.h
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/sysdeps/x86_64/multiarch/init-arch.h
|
|
|
|
+++ glibc-2.15/sysdeps/x86_64/multiarch/init-arch.h
|
2012-01-31 14:41:44 +01:00
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
/* This file is part of the GNU C Library.
|
|
|
|
- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
|
|
|
|
+ Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
2012-04-13 11:37:44 +02:00
|
|
|
@@ -22,11 +22,13 @@
|
2012-01-31 14:41:44 +01:00
|
|
|
#define bit_Prefer_SSE_for_memop (1 << 3)
|
|
|
|
#define bit_Fast_Unaligned_Load (1 << 4)
|
|
|
|
#define bit_Prefer_PMINUB_for_stringop (1 << 5)
|
|
|
|
+#define bit_YMM_Usable (1 << 6)
|
|
|
|
|
|
|
|
#define bit_SSE2 (1 << 26)
|
|
|
|
#define bit_SSSE3 (1 << 9)
|
2012-04-13 11:37:44 +02:00
|
|
|
#define bit_SSE4_1 (1 << 19)
|
|
|
|
#define bit_SSE4_2 (1 << 20)
|
|
|
|
+#define bit_OSXSAVE (1 << 27)
|
|
|
|
#define bit_AVX (1 << 28)
|
|
|
|
#define bit_POPCOUNT (1 << 23)
|
|
|
|
#define bit_FMA (1 << 12)
|
|
|
|
@@ -48,6 +50,7 @@
|
2012-01-31 14:41:44 +01:00
|
|
|
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
|
|
|
|
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
|
|
|
|
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
|
|
|
|
+# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
|
|
|
|
|
|
|
#else /* __ASSEMBLER__ */
|
|
|
|
|
2012-04-13 11:37:44 +02:00
|
|
|
@@ -92,7 +95,7 @@ extern struct cpu_features
|
2012-01-31 14:41:44 +01:00
|
|
|
|
|
|
|
|
|
|
|
extern void __init_cpu_features (void) attribute_hidden;
|
|
|
|
-#define INIT_ARCH()\
|
|
|
|
+# define INIT_ARCH() \
|
|
|
|
do \
|
|
|
|
if (__cpu_features.kind == arch_kind_unknown) \
|
|
|
|
__init_cpu_features (); \
|
2012-04-13 11:37:44 +02:00
|
|
|
@@ -125,23 +128,21 @@ extern const struct cpu_features *__get_
|
2012-01-31 14:41:44 +01:00
|
|
|
# define index_Slow_BSF FEATURE_INDEX_1
|
|
|
|
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
|
|
|
|
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
|
|
|
|
+# define index_YMM_Usable FEATURE_INDEX_1
|
|
|
|
|
|
|
|
-#define HAS_ARCH_FEATURE(idx, bit) \
|
|
|
|
- ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
|
|
|
|
+# define HAS_ARCH_FEATURE(name) \
|
|
|
|
+ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
|
|
|
|
|
|
|
|
-#define HAS_FAST_REP_STRING \
|
|
|
|
- HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
|
|
|
|
+# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
|
|
|
|
|
|
|
|
-#define HAS_FAST_COPY_BACKWARD \
|
|
|
|
- HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
|
|
|
|
+# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
|
|
|
|
|
|
|
|
-#define HAS_SLOW_BSF \
|
|
|
|
- HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
|
|
|
|
+# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
|
|
|
|
|
|
|
|
-#define HAS_PREFER_SSE_FOR_MEMOP \
|
|
|
|
- HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
|
|
|
|
+# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
|
|
|
|
|
|
|
|
-#define HAS_FAST_UNALIGNED_LOAD \
|
|
|
|
- HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load)
|
|
|
|
+# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
|
|
|
|
+
|
|
|
|
+# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable)
|
|
|
|
|
|
|
|
#endif /* __ASSEMBLER__ */
|
2012-04-13 11:37:44 +02:00
|
|
|
Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/e_log.c
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/e_log.c
|
|
|
|
+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/e_log.c
|
|
|
|
@@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double
|
|
|
|
|
|
|
|
libm_ifunc (__ieee754_log,
|
|
|
|
HAS_FMA4 ? __ieee754_log_fma4
|
|
|
|
- : (HAS_AVX ? __ieee754_log_avx
|
|
|
|
- : __ieee754_log_sse2));
|
|
|
|
+ : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2));
|
|
|
|
strong_alias (__ieee754_log, __log_finite)
|
|
|
|
|
|
|
|
# define __ieee754_log __ieee754_log_sse2
|
|
|
|
Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_atan.c
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/s_atan.c
|
|
|
|
+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_atan.c
|
|
|
|
@@ -12,7 +12,8 @@ extern double __atan_fma4 (double);
|
|
|
|
# define __atan_fma4 ((void *) 0)
|
|
|
|
# endif
|
|
|
|
|
|
|
|
-libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2);
|
|
|
|
+libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
|
|
|
|
+ HAS_AVX ? __atan_avx : __atan_sse2));
|
|
|
|
|
|
|
|
# define atan __atan_sse2
|
|
|
|
#endif
|
|
|
|
Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_sin.c
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/s_sin.c
|
|
|
|
+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_sin.c
|
|
|
|
@@ -17,10 +17,12 @@ extern double __sin_fma4 (double);
|
|
|
|
# define __sin_fma4 ((void *) 0)
|
|
|
|
# endif
|
|
|
|
|
|
|
|
-libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2);
|
|
|
|
+libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
|
|
|
|
+ HAS_AVX ? __cos_avx : __cos_sse2));
|
|
|
|
weak_alias (__cos, cos)
|
|
|
|
|
|
|
|
-libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2);
|
|
|
|
+libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
|
|
|
|
+ HAS_AVX ? __sin_avx : __sin_sse2));
|
|
|
|
weak_alias (__sin, sin)
|
|
|
|
|
|
|
|
# define __cos __cos_sse2
|
|
|
|
Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_tan.c
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/s_tan.c
|
|
|
|
+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_tan.c
|
|
|
|
@@ -12,7 +12,8 @@ extern double __tan_fma4 (double);
|
|
|
|
# define __tan_fma4 ((void *) 0)
|
|
|
|
# endif
|
|
|
|
|
|
|
|
-libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2);
|
|
|
|
+libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
|
|
|
|
+ HAS_AVX ? __tan_avx : __tan_sse2));
|
2012-01-31 14:41:44 +01:00
|
|
|
|
2012-04-13 11:37:44 +02:00
|
|
|
# define tan __tan_sse2
|
|
|
|
#endif
|
|
|
|
Index: glibc-2.15/sysdeps/x86_64/fpu/math_private.h
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/sysdeps/x86_64/fpu/math_private.h
|
|
|
|
+++ glibc-2.15/sysdeps/x86_64/fpu/math_private.h
|
|
|
|
@@ -19,10 +19,14 @@
|
|
|
|
|
|
|
|
/* We can do a few things better on x86-64. */
|
|
|
|
|
|
|
|
-#ifdef __AVX__
|
|
|
|
+#if defined __AVX__ || defined SSE2AVX
|
2012-01-31 14:41:44 +01:00
|
|
|
# define MOVD "vmovd"
|
|
|
|
+# define STMXCSR "vstmxcsr"
|
|
|
|
+# define LDMXCSR "vldmxcsr"
|
|
|
|
#else
|
|
|
|
# define MOVD "movd"
|
|
|
|
+# define STMXCSR "stmxcsr"
|
|
|
|
+# define LDMXCSR "ldmxcsr"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Direct movement of float into integer register. */
|
2012-04-13 11:37:44 +02:00
|
|
|
@@ -86,7 +90,7 @@
|
|
|
|
({ int __di; GET_FLOAT_WORD (__di, (float) d); \
|
|
|
|
(__di & 0x7fffffff) < 0x7f800000; })
|
|
|
|
|
|
|
|
-#ifdef __AVX__
|
|
|
|
+#if defined __AVX__ || defined SSE2AVX
|
|
|
|
# define __ieee754_sqrt(d) \
|
|
|
|
({ double __res; \
|
|
|
|
asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
|
|
|
@@ -112,7 +116,7 @@
|
|
|
|
|
|
|
|
#ifdef __SSE4_1__
|
|
|
|
# ifndef __rint
|
|
|
|
-# ifdef __AVX__
|
|
|
|
+# if defined __AVX__ || defined SSE2AVX
|
|
|
|
# define __rint(d) \
|
|
|
|
({ double __res; \
|
|
|
|
asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
|
|
|
@@ -125,7 +129,7 @@
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
# ifndef __rintf
|
|
|
|
-# ifdef __AVX__
|
|
|
|
+# if defined __AVX__ || defined SSE2AVX
|
|
|
|
# define __rintf(d) \
|
|
|
|
({ float __res; \
|
|
|
|
asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
|
|
|
|
@@ -139,7 +143,7 @@
|
|
|
|
# endif
|
|
|
|
|
|
|
|
# ifndef __floor
|
|
|
|
-# ifdef __AVX__
|
|
|
|
+# if defined __AVX__ || defined SSE2AVX
|
|
|
|
# define __floor(d) \
|
|
|
|
({ double __res; \
|
|
|
|
asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
|
|
|
@@ -152,7 +156,7 @@
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
# ifndef __floorf
|
|
|
|
-# ifdef __AVX__
|
|
|
|
+# if defined __AVX__ || defined SSE2AVX
|
|
|
|
# define __floorf(d) \
|
|
|
|
({ float __res; \
|
|
|
|
asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
|
|
|
|
@@ -169,121 +173,56 @@
|
|
|
|
|
|
|
|
/* Specialized variants of the <fenv.h> interfaces which only handle
|
|
|
|
either the FPU or the SSE unit. */
|
|
|
|
-#undef libc_fegetround
|
|
|
|
-#define libc_fegetround() \
|
|
|
|
- ({ \
|
|
|
|
- unsigned int mxcsr; \
|
2012-01-31 14:41:44 +01:00
|
|
|
- asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \
|
2012-04-13 11:37:44 +02:00
|
|
|
- (mxcsr & 0x6000) >> 3; \
|
|
|
|
- })
|
|
|
|
-#undef libc_fegetroundf
|
|
|
|
-#define libc_fegetroundf() libc_fegetround ()
|
|
|
|
-// #define libc_fegetroundl() fegetround ()
|
|
|
|
-
|
|
|
|
-#undef libc_fesetround
|
|
|
|
-#define libc_fesetround(r) \
|
|
|
|
- do { \
|
|
|
|
- unsigned int mxcsr; \
|
2012-01-31 14:41:44 +01:00
|
|
|
- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \
|
2012-04-13 11:37:44 +02:00
|
|
|
- mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \
|
2012-01-31 14:41:44 +01:00
|
|
|
- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \
|
2012-04-13 11:37:44 +02:00
|
|
|
- } while (0)
|
|
|
|
-#undef libc_fesetroundf
|
|
|
|
-#define libc_fesetroundf(r) libc_fesetround (r)
|
|
|
|
-// #define libc_fesetroundl(r) (void) fesetround (r)
|
|
|
|
-
|
2012-01-31 14:41:44 +01:00
|
|
|
#undef libc_feholdexcept
|
|
|
|
-#ifdef __AVX__
|
|
|
|
-# define libc_feholdexcept(e) \
|
|
|
|
- do { \
|
|
|
|
- unsigned int mxcsr; \
|
|
|
|
- asm ("vstmxcsr %0" : "=m" (*&mxcsr)); \
|
|
|
|
- (e)->__mxcsr = mxcsr; \
|
|
|
|
- mxcsr = (mxcsr | 0x1f80) & ~0x3f; \
|
|
|
|
- asm volatile ("vldmxcsr %0" : : "m" (*&mxcsr)); \
|
|
|
|
- } while (0)
|
|
|
|
-#else
|
|
|
|
-# define libc_feholdexcept(e) \
|
|
|
|
+#define libc_feholdexcept(e) \
|
|
|
|
do { \
|
|
|
|
unsigned int mxcsr; \
|
|
|
|
- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \
|
|
|
|
+ asm (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
|
|
|
(e)->__mxcsr = mxcsr; \
|
|
|
|
mxcsr = (mxcsr | 0x1f80) & ~0x3f; \
|
|
|
|
- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \
|
|
|
|
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \
|
|
|
|
} while (0)
|
|
|
|
-#endif
|
|
|
|
#undef libc_feholdexceptf
|
|
|
|
#define libc_feholdexceptf(e) libc_feholdexcept (e)
|
|
|
|
// #define libc_feholdexceptl(e) (void) feholdexcept (e)
|
|
|
|
|
|
|
|
#undef libc_feholdexcept_setround
|
|
|
|
-#ifdef __AVX__
|
|
|
|
-# define libc_feholdexcept_setround(e, r) \
|
|
|
|
+#define libc_feholdexcept_setround(e, r) \
|
|
|
|
do { \
|
|
|
|
unsigned int mxcsr; \
|
2012-04-13 11:37:44 +02:00
|
|
|
- asm ("vstmxcsr %0" : "=m" (*&mxcsr)); \
|
2012-01-31 14:41:44 +01:00
|
|
|
+ asm (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
|
|
|
(e)->__mxcsr = mxcsr; \
|
|
|
|
mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3); \
|
2012-04-13 11:37:44 +02:00
|
|
|
- asm volatile ("vldmxcsr %0" : : "m" (*&mxcsr)); \
|
2012-01-31 14:41:44 +01:00
|
|
|
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \
|
|
|
|
} while (0)
|
2012-04-13 11:37:44 +02:00
|
|
|
-#else
|
|
|
|
-# define libc_feholdexcept_setround(e, r) \
|
|
|
|
- do { \
|
|
|
|
- unsigned int mxcsr; \
|
|
|
|
- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \
|
|
|
|
- (e)->__mxcsr = mxcsr; \
|
|
|
|
- mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3); \
|
|
|
|
- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \
|
|
|
|
- } while (0)
|
2012-01-31 14:41:44 +01:00
|
|
|
-#endif
|
|
|
|
#undef libc_feholdexcept_setroundf
|
|
|
|
#define libc_feholdexcept_setroundf(e, r) libc_feholdexcept_setround (e, r)
|
|
|
|
// #define libc_feholdexcept_setroundl(e, r) ...
|
|
|
|
|
|
|
|
#undef libc_fetestexcept
|
|
|
|
-#ifdef __AVX__
|
|
|
|
-# define libc_fetestexcept(e) \
|
|
|
|
- ({ unsigned int mxcsr; asm volatile ("vstmxcsr %0" : "=m" (*&mxcsr)); \
|
2012-04-13 11:37:44 +02:00
|
|
|
+#define libc_fetestexcept(e) \
|
|
|
|
+ ({ unsigned int mxcsr; \
|
|
|
|
+ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
|
|
|
mxcsr & (e) & FE_ALL_EXCEPT; })
|
2012-01-31 14:41:44 +01:00
|
|
|
-#else
|
|
|
|
-# define libc_fetestexcept(e) \
|
|
|
|
- ({ unsigned int mxcsr; asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \
|
2012-04-13 11:37:44 +02:00
|
|
|
- mxcsr & (e) & FE_ALL_EXCEPT; })
|
2012-01-31 14:41:44 +01:00
|
|
|
-#endif
|
|
|
|
#undef libc_fetestexceptf
|
|
|
|
#define libc_fetestexceptf(e) libc_fetestexcept (e)
|
|
|
|
// #define libc_fetestexceptl(e) fetestexcept (e)
|
|
|
|
|
|
|
|
#undef libc_fesetenv
|
|
|
|
-#ifdef __AVX__
|
|
|
|
-# define libc_fesetenv(e) \
|
|
|
|
- asm volatile ("vldmxcsr %0" : : "m" ((e)->__mxcsr))
|
|
|
|
-#else
|
|
|
|
-# define libc_fesetenv(e) \
|
|
|
|
- asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr))
|
|
|
|
-#endif
|
|
|
|
+#define libc_fesetenv(e) \
|
|
|
|
+ asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr))
|
|
|
|
#undef libc_fesetenvf
|
|
|
|
#define libc_fesetenvf(e) libc_fesetenv (e)
|
|
|
|
// #define libc_fesetenvl(e) (void) fesetenv (e)
|
|
|
|
|
|
|
|
#undef libc_feupdateenv
|
|
|
|
-#ifdef __AVX__
|
|
|
|
-# define libc_feupdateenv(e) \
|
|
|
|
+#define libc_feupdateenv(e) \
|
|
|
|
do { \
|
|
|
|
unsigned int mxcsr; \
|
|
|
|
- asm volatile ("vstmxcsr %0" : "=m" (*&mxcsr)); \
|
|
|
|
- asm volatile ("vldmxcsr %0" : : "m" ((e)->__mxcsr)); \
|
|
|
|
+ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
|
|
|
+ asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr)); \
|
|
|
|
__feraiseexcept (mxcsr & FE_ALL_EXCEPT); \
|
|
|
|
} while (0)
|
|
|
|
-#else
|
|
|
|
-# define libc_feupdateenv(e) \
|
|
|
|
- do { \
|
|
|
|
- unsigned int mxcsr; \
|
|
|
|
- asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \
|
|
|
|
- asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr)); \
|
|
|
|
- __feraiseexcept (mxcsr & FE_ALL_EXCEPT); \
|
|
|
|
- } while (0)
|
|
|
|
-#endif
|
|
|
|
#undef libc_feupdateenvf
|
|
|
|
#define libc_feupdateenvf(e) libc_feupdateenv (e)
|
|
|
|
// #define libc_feupdateenvl(e) (void) feupdateenv (e)
|
2012-04-13 11:37:44 +02:00
|
|
|
Index: glibc-2.15/config.h.in
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/config.h.in
|
|
|
|
+++ glibc-2.15/config.h.in
|
|
|
|
@@ -102,7 +102,7 @@
|
2012-01-31 14:41:44 +01:00
|
|
|
certain registers (CR0, MQ, CTR, LR) in asm statements. */
|
|
|
|
#undef BROKEN_PPC_ASM_CR0
|
|
|
|
|
|
|
|
-/* Defined on SPARC if ld doesn't handle R_SPARC_WDISP22 against .hidden
|
|
|
|
+/* Defined on SPARC if ld does not handle R_SPARC_WDISP22 against .hidden
|
|
|
|
symbol. sysdeps/sparc/sparc32/elf/configure. */
|
|
|
|
#undef BROKEN_SPARC_WDISP22
|
|
|
|
|
2012-04-13 11:37:44 +02:00
|
|
|
@@ -118,17 +118,20 @@
|
2012-01-31 14:41:44 +01:00
|
|
|
/* Define if gcc supports AVX. */
|
|
|
|
#undef HAVE_AVX_SUPPORT
|
|
|
|
|
|
|
|
+/* Define if gcc supports VEX encoding. */
|
|
|
|
+#undef HAVE_SSE2AVX_SUPPORT
|
|
|
|
+
|
|
|
|
/* Define if gcc supports FMA4. */
|
|
|
|
#undef HAVE_FMA4_SUPPORT
|
|
|
|
|
|
|
|
-/* Define if the compiler's exception support is based on libunwind. */
|
|
|
|
+/* Define if the compiler\'s exception support is based on libunwind. */
|
|
|
|
#undef HAVE_CC_WITH_LIBUNWIND
|
|
|
|
|
|
|
|
/* Define if the access to static and hidden variables is position independent
|
|
|
|
and does not need relocations. */
|
|
|
|
#undef PI_STATIC_AND_HIDDEN
|
|
|
|
|
|
|
|
-/* Define this to disable the `hidden_proto' et al macros in
|
|
|
|
+/* Define this to disable the 'hidden_proto' et al macros in
|
|
|
|
include/libc-symbols.h that avoid PLT slots in the shared objects. */
|
|
|
|
#undef NO_HIDDEN
|
|
|
|
|
2012-04-13 11:37:44 +02:00
|
|
|
Index: glibc-2.15/math/math_private.h
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/math/math_private.h
|
|
|
|
+++ glibc-2.15/math/math_private.h
|
|
|
|
@@ -365,14 +365,6 @@ extern void __docos (double __x, double
|
2012-01-31 14:41:44 +01:00
|
|
|
know what operations are going to be performed. Therefore we
|
|
|
|
define additional interfaces. By default they refer to the normal
|
|
|
|
interfaces. */
|
|
|
|
-#define libc_fegetround() fegetround ()
|
|
|
|
-#define libc_fegetroundf() fegetround ()
|
|
|
|
-#define libc_fegetroundl() fegetround ()
|
|
|
|
-
|
|
|
|
-#define libc_fesetround(r) (void) fesetround (r)
|
|
|
|
-#define libc_fesetroundf(r) (void) fesetround (r)
|
|
|
|
-#define libc_fesetroundl(r) (void) fesetround (r)
|
|
|
|
-
|
|
|
|
#define libc_feholdexcept(e) (void) feholdexcept (e)
|
|
|
|
#define libc_feholdexceptf(e) (void) feholdexcept (e)
|
|
|
|
#define libc_feholdexceptl(e) (void) feholdexcept (e)
|
2012-04-13 11:37:44 +02:00
|
|
|
Index: glibc-2.15/sysdeps/i386/configure
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/sysdeps/i386/configure
|
|
|
|
+++ glibc-2.15/sysdeps/i386/configure
|
2012-01-31 14:41:44 +01:00
|
|
|
@@ -756,6 +756,29 @@ if test $libc_cv_cc_avx = yes; then
|
|
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
|
|
|
|
+$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
|
|
|
|
+if ${libc_cv_cc_sse2avx+:} false; then :
|
|
|
|
+ $as_echo_n "(cached) " >&6
|
|
|
|
+else
|
|
|
|
+ if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null'
|
|
|
|
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
|
|
|
+ (eval $ac_try) 2>&5
|
|
|
|
+ ac_status=$?
|
|
|
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
|
|
|
+ test $ac_status = 0; }; }; then
|
|
|
|
+ libc_cv_cc_sse2avx=yes
|
|
|
|
+else
|
|
|
|
+ libc_cv_cc_sse2avx=no
|
|
|
|
+fi
|
|
|
|
+fi
|
|
|
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5
|
|
|
|
+$as_echo "$libc_cv_cc_sse2avx" >&6; }
|
|
|
|
+if test $libc_cv_cc_sse2avx = yes; then
|
|
|
|
+ $as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h
|
|
|
|
+
|
|
|
|
+fi
|
|
|
|
+
|
|
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
|
|
|
|
$as_echo_n "checking for FMA4 support... " >&6; }
|
|
|
|
if ${libc_cv_cc_fma4+:} false; then :
|
2012-04-13 11:37:44 +02:00
|
|
|
Index: glibc-2.15/sysdeps/i386/configure.in
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/sysdeps/i386/configure.in
|
|
|
|
+++ glibc-2.15/sysdeps/i386/configure.in
|
2012-01-31 14:41:44 +01:00
|
|
|
@@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then
|
|
|
|
AC_DEFINE(HAVE_AVX_SUPPORT)
|
|
|
|
fi
|
|
|
|
|
|
|
|
+dnl Check if -msse2avx works.
|
|
|
|
+AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
|
|
|
|
+if AC_TRY_COMMAND([${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null]); then
|
|
|
|
+ libc_cv_cc_sse2avx=yes
|
|
|
|
+else
|
|
|
|
+ libc_cv_cc_sse2avx=no
|
|
|
|
+fi])
|
|
|
|
+if test $libc_cv_cc_sse2avx = yes; then
|
|
|
|
+ AC_DEFINE(HAVE_SSE2AVX_SUPPORT)
|
|
|
|
+fi
|
|
|
|
+
|
|
|
|
dnl Check if -mfma4 works.
|
|
|
|
AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
|
|
|
|
if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then
|
2012-04-13 11:37:44 +02:00
|
|
|
Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/Makefile
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/Makefile
|
|
|
|
+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/Makefile
|
2012-01-31 14:41:44 +01:00
|
|
|
@@ -34,21 +34,21 @@ CFLAGS-s_sin-fma4.c = -mfma4
|
|
|
|
CFLAGS-s_tan-fma4.c = -mfma4
|
|
|
|
endif
|
|
|
|
|
|
|
|
-ifeq ($(config-cflags-avx),yes)
|
|
|
|
+ifeq ($(config-cflags-sse2avx),yes)
|
|
|
|
libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
|
|
|
|
e_atan2-avx s_sin-avx s_tan-avx \
|
|
|
|
mplog-avx mpa-avx slowexp-avx \
|
|
|
|
mpexp-avx
|
|
|
|
|
|
|
|
-CFLAGS-e_atan2-avx.c = -mavx
|
|
|
|
-CFLAGS-e_exp-avx.c = -mavx
|
|
|
|
-CFLAGS-e_log-avx.c = -mavx
|
|
|
|
-CFLAGS-mpa-avx.c = -mavx
|
|
|
|
-CFLAGS-mpexp-avx.c = -mavx
|
|
|
|
-CFLAGS-mplog-avx.c = -mavx
|
|
|
|
-CFLAGS-s_atan-avx.c = -mavx
|
|
|
|
-CFLAGS-s_sin-avx.c = -mavx
|
|
|
|
-CFLAGS-slowexp-avx.c = -mavx
|
|
|
|
-CFLAGS-s_tan-avx.c = -mavx
|
|
|
|
+CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
|
|
|
|
+CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
|
|
|
|
+CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
|
|
|
|
+CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX
|
|
|
|
+CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
|
|
|
|
+CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
|
|
|
|
+CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
|
|
|
|
+CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
|
2012-04-13 11:37:44 +02:00
|
|
|
+CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX
|
2012-01-31 14:41:44 +01:00
|
|
|
+CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
|
|
|
|
endif
|
|
|
|
endif
|
2012-04-13 11:37:44 +02:00
|
|
|
Index: glibc-2.15/config.make.in
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/config.make.in
|
|
|
|
+++ glibc-2.15/config.make.in
|
2012-01-31 14:41:44 +01:00
|
|
|
@@ -36,6 +36,7 @@ asflags-cpu = @libc_cv_cc_submachine@
|
|
|
|
|
|
|
|
config-cflags-sse4 = @libc_cv_cc_sse4@
|
|
|
|
config-cflags-avx = @libc_cv_cc_avx@
|
|
|
|
+config-cflags-sse2avx = @libc_cv_cc_sse2avx@
|
|
|
|
config-cflags-novzeroupper = @libc_cv_cc_novzeroupper@
|
|
|
|
config-asflags-i686 = @libc_cv_as_i686@
|
|
|
|
|
2012-04-13 11:37:44 +02:00
|
|
|
Index: glibc-2.15/configure
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/configure
|
|
|
|
+++ glibc-2.15/configure
|
|
|
|
@@ -625,6 +625,7 @@ use_ldconfig
|
2012-01-31 14:41:44 +01:00
|
|
|
libc_cv_as_i686
|
|
|
|
libc_cv_cc_fma4
|
|
|
|
libc_cv_cc_novzeroupper
|
|
|
|
+libc_cv_cc_sse2avx
|
|
|
|
libc_cv_cc_avx
|
|
|
|
libc_cv_cc_sse4
|
|
|
|
libc_cv_cpp_asm_debuginfo
|
2012-04-13 11:37:44 +02:00
|
|
|
Index: glibc-2.15/configure.in
|
|
|
|
===================================================================
|
|
|
|
--- glibc-2.15.orig/configure.in
|
|
|
|
+++ glibc-2.15/configure.in
|
|
|
|
@@ -2338,6 +2338,7 @@ dnl sysdeps/CPU/configure.in checks set
|
2012-01-31 14:41:44 +01:00
|
|
|
AC_SUBST(libc_cv_cpp_asm_debuginfo)
|
|
|
|
AC_SUBST(libc_cv_cc_sse4)
|
|
|
|
AC_SUBST(libc_cv_cc_avx)
|
|
|
|
+AC_SUBST(libc_cv_cc_sse2avx)
|
|
|
|
AC_SUBST(libc_cv_cc_novzeroupper)
|
|
|
|
AC_SUBST(libc_cv_cc_fma4)
|
|
|
|
AC_SUBST(libc_cv_as_i686)
|