glibc/glibc-2.16-avx.patch

commit afc5ed09cbce5d6fd48b3a8c5ec427b31f996880
Author: Ulrich Drepper <drepper@gmail.com>
Date:   Thu Jan 26 07:45:14 2012 -0500

    Reset bit_AVX in __cpu_features is OS support is missing

2012-01-26  Ulrich Drepper  <drepper@gmail.com>

	[BZ #13583]
	* sysdeps/x86_64/multiarch/init-arch.h: Define bit_OSXSAVE.
	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If
	bit_AVX is set also check OSXAVE/XCR0 and reset bit_AVX if necessary.

 2012-01-25  Joseph Myers  <joseph@codesourcery.com>

 	* elf/tst-unique3.cc (gets): Remove declaration.
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 65b0ee9..4fabbee 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -1,6 +1,6 @@
 /* Initialize CPU feature data.
    This file is part of the GNU C Library.
-   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@redhat.com>.

    The GNU C Library is free software; you can redistribute it and/or
@@ -144,6 +144,18 @@ __init_cpu_features (void)
   else
     kind = arch_kind_other;

+  if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
+    {
+      /* Reset the AVX bit in case OSXSAVE is disabled.  */
+      if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0
+	  || ({ unsigned int xcrlow;
+	      unsigned int xcrhigh;
+	      asm ("xgetbv"
+		   : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+	      (xcrlow & 6) != 6; }))
+	__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX;
+    }
+
   __cpu_features.family = family;
   __cpu_features.model = model;
   atomic_write_barrier ();
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 2a1df39..408e5ae 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -1,5 +1,5 @@
 /* This file is part of the GNU C Library.
-   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.

    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -27,6 +27,7 @@
 #define bit_SSSE3	(1 << 9)
 #define bit_SSE4_1	(1 << 19)
 #define bit_SSE4_2	(1 << 20)
+#define bit_OSXSAVE	(1 << 27)
 #define bit_AVX		(1 << 28)
 #define bit_POPCOUNT	(1 << 23)
 #define bit_FMA		(1 << 12)

commit 08cf777f9e7f6d826658a99c7d77a359f73a45bf
Author: Ulrich Drepper <drepper@gmail.com>
Date:   Thu Jan 26 09:45:54 2012 -0500

    Really fix AVX tests

    There is no problem with strcmp, it doesn't use the YMM registers.
    The math routines might since gcc perhaps generates such code.
    Introduce bit_YMM_USBALE and use it in the math routines.

	[BZ #13583]
	* sysdeps/x86_64/multiarch/init-arch.h: Define bit_OSXSAVE.
	Clean up HAS_* macros.
	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If
	bit_AVX is set also check OSXAVE/XCR0 and set bit_YMM_Usable if
	possible.
	* sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_YMM_USABLE, not
	HAS_AVX.
	* sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise.

diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
index 6867c6e..3a615fc 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
@@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);

 libm_ifunc (__ieee754_atan2,
 	    HAS_FMA4 ? __ieee754_atan2_fma4
-	    : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
+	    : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
 strong_alias (__ieee754_atan2, __atan2_finite)

 # define __ieee754_atan2 __ieee754_atan2_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
index 3c65028..7b2320a 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
@@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);

 libm_ifunc (__ieee754_exp,
 	    HAS_FMA4 ? __ieee754_exp_fma4
-	    : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
+	    : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2));
 strong_alias (__ieee754_exp, __exp_finite)

 # define __ieee754_exp __ieee754_exp_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
index 3b468d0..ab277d6 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
@@ -14,7 +14,7 @@ extern double __ieee754_log_fma4 (double);

 libm_ifunc (__ieee754_log,
 	    HAS_FMA4 ? __ieee754_log_fma4
-	    : (HAS_AVX ? __ieee754_log_avx
+	    : (HAS_YMM_USABLE ? __ieee754_log_avx
 	       : __ieee754_log_sse2));
 strong_alias (__ieee754_log, __log_finite)

diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
index 3160201..78c7e09 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
@@ -12,7 +12,8 @@ extern double __atan_fma4 (double);
 #  define __atan_fma4 ((void *) 0)
 # endif

-libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2);
+libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
+		   HAS_YMM_USABLE ? __atan_avx : __atan_sse2));

 # define atan __atan_sse2
 #endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
index 1ba9dbc..417acd0 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
@@ -17,10 +17,12 @@ extern double __sin_fma4 (double);
 #  define __sin_fma4 ((void *) 0)
 # endif

-libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2);
+libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
+		    HAS_YMM_USABLE ? __cos_avx : __cos_sse2));
 weak_alias (__cos, cos)

-libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2);
+libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
+		    HAS_YMM_USABLE ? __sin_avx : __sin_sse2));
 weak_alias (__sin, sin)

 # define __cos __cos_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
index 8f6601e..3047155 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
@@ -12,7 +12,8 @@ extern double __tan_fma4 (double);
 #  define __tan_fma4 ((void *) 0)
 # endif

-libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2);
+libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
+		  HAS_YMM_USABLE ? __tan_avx : __tan_sse2));

 # define tan __tan_sse2
 #endif
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 4fabbee..76d146c 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -147,13 +147,13 @@ __init_cpu_features (void)
   if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
     {
       /* Reset the AVX bit in case OSXSAVE is disabled.  */
-      if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0
-	  || ({ unsigned int xcrlow;
-	      unsigned int xcrhigh;
-	      asm ("xgetbv"
-		   : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
-	      (xcrlow & 6) != 6; }))
-	__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX;
+      if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
+	  && ({ unsigned int xcrlow;
+		unsigned int xcrhigh;
+		asm ("xgetbv"
+		     : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+		(xcrlow & 6) == 6; }))
+	__cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
     }

   __cpu_features.family = family;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 408e5ae..2dc75ab 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -22,6 +22,7 @@
 #define bit_Prefer_SSE_for_memop	(1 << 3)
 #define bit_Fast_Unaligned_Load		(1 << 4)
 #define bit_Prefer_PMINUB_for_stringop	(1 << 5)
+#define bit_YMM_Usable			(1 << 6)

 #define bit_SSE2	(1 << 26)
 #define bit_SSSE3	(1 << 9)
@@ -49,6 +50,7 @@
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
+# define index_YMM_Usable		FEATURE_INDEX_1*FEATURE_SIZE

 #else	/* __ASSEMBLER__ */

@@ -93,7 +95,7 @@ extern struct cpu_features


 extern void __init_cpu_features (void) attribute_hidden;
-#define INIT_ARCH()\
+# define INIT_ARCH() \
   do							\
     if (__cpu_features.kind == arch_kind_unknown)	\
       __init_cpu_features ();				\
@@ -126,23 +128,21 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_Slow_BSF			FEATURE_INDEX_1
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1
+# define index_YMM_Usable		FEATURE_INDEX_1

-#define HAS_ARCH_FEATURE(idx, bit) \
-  ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
+# define HAS_ARCH_FEATURE(name) \
+  ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)

-#define HAS_FAST_REP_STRING \
-  HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
+# define HAS_FAST_REP_STRING	HAS_ARCH_FEATURE (Fast_Rep_String)

-#define HAS_FAST_COPY_BACKWARD \
-  HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
+# define HAS_FAST_COPY_BACKWARD	HAS_ARCH_FEATURE (Fast_Copy_Backward)

-#define HAS_SLOW_BSF \
-  HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
+# define HAS_SLOW_BSF		HAS_ARCH_FEATURE (Slow_BSF)

-#define HAS_PREFER_SSE_FOR_MEMOP \
-  HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
+# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)

-#define HAS_FAST_UNALIGNED_LOAD \
-  HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load)
+# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+
+# define HAS_YMM_USABLE		HAS_ARCH_FEATURE (YMM_Usable)

 #endif	/* __ASSEMBLER__ */

commit 73139a7628c424c82eb9297ccb5505c0bc5b65aa
Author: Ulrich Drepper <drepper@gmail.com>
Date:   Sat Jan 28 11:19:06 2012 -0500

    Simplify use of AVX instructions in internal math macros

2012-01-28  Ulrich Drepper  <drepper@gmail.com>

	* sysdeps/x86_64/fpu/math_private.h: Simplify use of AVX instructions.

diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 7f52d5e..50f765f 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -21,8 +21,12 @@

 #ifdef __AVX__
 # define MOVD "vmovd"
+# define STMXCSR "vstmxcsr"
+# define LDMXCSR "vldmxcsr"
 #else
 # define MOVD "movd"
+# define STMXCSR "stmxcsr"
+# define LDMXCSR "ldmxcsr"
 #endif

 /* Direct movement of float into integer register.  */
@@ -173,7 +177,7 @@
 #define libc_fegetround() \
   ({									      \
      unsigned int mxcsr;						      \
-     asm volatile ("stmxcsr %0" : "=m" (*&mxcsr));			      \
+     asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
      (mxcsr & 0x6000) >> 3;						      \
   })
 #undef libc_fegetroundf
@@ -184,106 +188,63 @@
 #define libc_fesetround(r) \
   do {									      \
      unsigned int mxcsr;						      \
-     asm ("stmxcsr %0" : "=m" (*&mxcsr));				      \
+     asm (STMXCSR " %0" : "=m" (*&mxcsr));				      \
      mxcsr = (mxcsr & ~0x6000) | ((r) << 3);				      \
-     asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr));			      \
+     asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));			      \
   } while (0)
 #undef libc_fesetroundf
 #define libc_fesetroundf(r) libc_fesetround (r)
 // #define libc_fesetroundl(r) (void) fesetround (r)

 #undef libc_feholdexcept
-#ifdef __AVX__
-# define libc_feholdexcept(e) \
-  do {									      \
-     unsigned int mxcsr;						      \
-     asm ("vstmxcsr %0" : "=m" (*&mxcsr));				      \
-     (e)->__mxcsr = mxcsr;						      \
-     mxcsr = (mxcsr | 0x1f80) & ~0x3f;					      \
-     asm volatile ("vldmxcsr %0" : : "m" (*&mxcsr));			      \
-  } while (0)
-#else
-# define libc_feholdexcept(e) \
+#define libc_feholdexcept(e) \
   do {									      \
      unsigned int mxcsr;						      \
-     asm ("stmxcsr %0" : "=m" (*&mxcsr));				      \
+     asm (STMXCSR " %0" : "=m" (*&mxcsr));				      \
      (e)->__mxcsr = mxcsr;						      \
      mxcsr = (mxcsr | 0x1f80) & ~0x3f;					      \
-     asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr));			      \
+     asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));			      \
   } while (0)
-#endif
 #undef libc_feholdexceptf
 #define libc_feholdexceptf(e) libc_feholdexcept (e)
 // #define libc_feholdexceptl(e) (void) feholdexcept (e)

 #undef libc_feholdexcept_setround
-#ifdef __AVX__
-# define libc_feholdexcept_setround(e, r) \
-  do {									      \
-     unsigned int mxcsr;						      \
-     asm ("vstmxcsr %0" : "=m" (*&mxcsr));				      \
-     (e)->__mxcsr = mxcsr;						      \
-     mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3);			      \
-     asm volatile ("vldmxcsr %0" : : "m" (*&mxcsr));			      \
-  } while (0)
-#else
-# define libc_feholdexcept_setround(e, r) \
+#define libc_feholdexcept_setround(e, r) \
   do {									      \
      unsigned int mxcsr;						      \
-     asm ("stmxcsr %0" : "=m" (*&mxcsr));				      \
+     asm (STMXCSR " %0" : "=m" (*&mxcsr));				      \
      (e)->__mxcsr = mxcsr;						      \
      mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3);			      \
-     asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr));			      \
+     asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));			      \
   } while (0)
-#endif
 #undef libc_feholdexcept_setroundf
 #define libc_feholdexcept_setroundf(e, r) libc_feholdexcept_setround (e, r)
 // #define libc_feholdexcept_setroundl(e, r) ...

 #undef libc_fetestexcept
-#ifdef __AVX__
-# define libc_fetestexcept(e) \
-  ({ unsigned int mxcsr; asm volatile ("vstmxcsr %0" : "=m" (*&mxcsr));	      \
-     mxcsr & (e) & FE_ALL_EXCEPT; })
-#else
-# define libc_fetestexcept(e) \
-  ({ unsigned int mxcsr; asm volatile ("stmxcsr %0" : "=m" (*&mxcsr));	      \
+#define libc_fetestexcept(e) \
+  ({ unsigned int mxcsr; asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));	      \
      mxcsr & (e) & FE_ALL_EXCEPT; })
-#endif
 #undef libc_fetestexceptf
 #define libc_fetestexceptf(e) libc_fetestexcept (e)
 // #define libc_fetestexceptl(e) fetestexcept (e)

 #undef libc_fesetenv
-#ifdef __AVX__
-# define libc_fesetenv(e) \
-  asm volatile ("vldmxcsr %0" : : "m" ((e)->__mxcsr))
-#else
-# define libc_fesetenv(e) \
-  asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr))
-#endif
+#define libc_fesetenv(e) \
+  asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr))
 #undef libc_fesetenvf
 #define libc_fesetenvf(e) libc_fesetenv (e)
 // #define libc_fesetenvl(e) (void) fesetenv (e)

 #undef libc_feupdateenv
-#ifdef __AVX__
-# define libc_feupdateenv(e) \
+#define libc_feupdateenv(e) \
   do {									      \
     unsigned int mxcsr;							      \
-    asm volatile ("vstmxcsr %0" : "=m" (*&mxcsr));			      \
-    asm volatile ("vldmxcsr %0" : : "m" ((e)->__mxcsr));		      \
+    asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
+    asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr));		      \
     __feraiseexcept (mxcsr & FE_ALL_EXCEPT);				      \
   } while (0)
-#else
-# define libc_feupdateenv(e) \
-  do {									      \
-    unsigned int mxcsr;							      \
-    asm volatile ("stmxcsr %0" : "=m" (*&mxcsr));			      \
-    asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr));			      \
-    __feraiseexcept (mxcsr & FE_ALL_EXCEPT);				      \
-  } while (0)
-#endif
 #undef libc_feupdateenvf
 #define libc_feupdateenvf(e) libc_feupdateenv (e)
 // #define libc_feupdateenvl(e) (void) feupdateenv (e)

commit 56f6f6a2403cfa7267cad722597113be35ecf70d
Author: Ulrich Drepper <drepper@gmail.com>
Date:   Sat Jan 28 14:48:46 2012 -0500

    Use -msse2avx option for x86-64 libm functions

2012-01-28  Ulrich Drepper  <drepper@gmail.com>

	* config.h.in: Define HAVE_SSE2AVX_SUPPORT.
	* math/math_private.h: Remove libc_fegetround* and
	libc_fesetround*.
	* sysdeps/i386/configure.in: Check for -msse2avx.
	* sysdeps/x86_64/fpu/math_private.h: Use VEX-encoded instructions
	also if SSE2AVX is defined.
	Remove libc_fegetround* and libc_fesetround*.
	* sysdeps/x86_64/fpu/multiarch/Makefile: Compile *-avx functions
	if config-cflags-sse2avx is yes.  Also add -DSSE2AVX to defines.
	* sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_AVX again instead
	of HAS_YMM_USABLE.
	* sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise.

 2012-01-19  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
diff --git a/config.h.in b/config.h.in
index 50d53d4..1489476 100644
--- a/config.h.in
+++ b/config.h.in
@@ -90,7 +90,7 @@
    certain registers (CR0, MQ, CTR, LR) in asm statements.  */
 #undef	BROKEN_PPC_ASM_CR0

-/* Defined on SPARC if ld doesn't handle R_SPARC_WDISP22 against .hidden
+/* Defined on SPARC if ld does not handle R_SPARC_WDISP22 against .hidden
    symbol.  sysdeps/sparc/sparc32/elf/configure.  */
 #undef	BROKEN_SPARC_WDISP22

@@ -106,17 +106,20 @@
 /* Define if gcc supports AVX.  */
 #undef	HAVE_AVX_SUPPORT

+/* Define if gcc supports VEX encoding.  */
+#undef	HAVE_SSE2AVX_SUPPORT
+
 /* Define if gcc supports FMA4.  */
 #undef	HAVE_FMA4_SUPPORT

-/* Define if the compiler's exception support is based on libunwind.  */
+/* Define if the compiler\'s exception support is based on libunwind.  */
 #undef	HAVE_CC_WITH_LIBUNWIND

 /* Define if the access to static and hidden variables is position independent
    and does not need relocations.  */
 #undef	PI_STATIC_AND_HIDDEN

-/* Define this to disable the `hidden_proto' et al macros in
+/* Define this to disable the 'hidden_proto' et al macros in
    include/libc-symbols.h that avoid PLT slots in the shared objects.  */
 #undef	NO_HIDDEN

diff --git a/math/math_private.h b/math/math_private.h
index 602a271..e4108d8 100644
--- a/math/math_private.h
+++ b/math/math_private.h
@@ -365,14 +365,6 @@ extern void __docos (double __x, double __dx, double __v[]);
    know what operations are going to be performed.  Therefore we
    define additional interfaces.  By default they refer to the normal
    interfaces.  */
-#define libc_fegetround() fegetround ()
-#define libc_fegetroundf() fegetround ()
-#define libc_fegetroundl() fegetround ()
-
-#define libc_fesetround(r) (void) fesetround (r)
-#define libc_fesetroundf(r) (void) fesetround (r)
-#define libc_fesetroundl(r) (void) fesetround (r)
-
 #define libc_feholdexcept(e) (void) feholdexcept (e)
 #define libc_feholdexceptf(e) (void) feholdexcept (e)
 #define libc_feholdexceptl(e) (void) feholdexcept (e)
diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure
index ae494e2..bc7900e 100644
--- a/sysdeps/i386/configure
+++ b/sysdeps/i386/configure
@@ -756,6 +756,29 @@ if test $libc_cv_cc_avx = yes; then

 fi

+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
+$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
+if ${libc_cv_cc_sse2avx+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+  libc_cv_cc_sse2avx=yes
+else
+  libc_cv_cc_sse2avx=no
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5
+$as_echo "$libc_cv_cc_sse2avx" >&6; }
+if test $libc_cv_cc_sse2avx = yes; then
+  $as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h
+
+fi
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
 $as_echo_n "checking for FMA4 support... " >&6; }
 if ${libc_cv_cc_fma4+:} false; then :
diff --git a/sysdeps/i386/configure.in b/sysdeps/i386/configure.in
index 5a9840e..59a4cd6 100644
--- a/sysdeps/i386/configure.in
+++ b/sysdeps/i386/configure.in
@@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then
   AC_DEFINE(HAVE_AVX_SUPPORT)
 fi

+dnl Check if -msse2avx works.
+AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
+if AC_TRY_COMMAND([${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null]); then
+  libc_cv_cc_sse2avx=yes
+else
+  libc_cv_cc_sse2avx=no
+fi])
+if test $libc_cv_cc_sse2avx = yes; then
+  AC_DEFINE(HAVE_SSE2AVX_SUPPORT)
+fi
+
 dnl Check if -mfma4 works.
 AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
 if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 50f765f..8e79718 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -19,7 +19,7 @@

 /* We can do a few things better on x86-64.  */

-#ifdef __AVX__
+#if defined __AVX__ || defined SSE2AVX
 # define MOVD "vmovd"
 # define STMXCSR "vstmxcsr"
 # define LDMXCSR "vldmxcsr"
@@ -90,7 +90,7 @@
   ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
      (__di & 0x7fffffff) < 0x7f800000; })

-#ifdef __AVX__
+#if defined __AVX__ || defined SSE2AVX
 # define __ieee754_sqrt(d) \
   ({ double __res;							      \
     asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d)));	      \
@@ -116,7 +116,7 @@

 #ifdef __SSE4_1__
 # ifndef __rint
-#  ifdef __AVX__
+#  if defined __AVX__ || defined SSE2AVX
 #   define __rint(d) \
   ({ double __res; \
     asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d)));      \
@@ -129,7 +129,7 @@
 #  endif
 # endif
 # ifndef __rintf
-#  ifdef __AVX__
+#  if defined __AVX__ || defined SSE2AVX
 #   define __rintf(d) \
   ({ float __res; \
     asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d)));      \
@@ -143,7 +143,7 @@
 # endif

 # ifndef __floor
-#  ifdef __AVX__
+#  if defined __AVX__ || defined SSE2AVX
 #   define __floor(d) \
   ({ double __res; \
     asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d)));      \
@@ -156,7 +156,7 @@
 #  endif
 # endif
 # ifndef __floorf
-#  ifdef __AVX__
+#  if defined __AVX__ || defined SSE2AVX
 #   define __floorf(d) \
   ({ float __res; \
     asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d)));      \
@@ -173,29 +173,6 @@

 /* Specialized variants of the <fenv.h> interfaces which only handle
    either the FPU or the SSE unit.  */
-#undef libc_fegetround
-#define libc_fegetround() \
-  ({									      \
-     unsigned int mxcsr;						      \
-     asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
-     (mxcsr & 0x6000) >> 3;						      \
-  })
-#undef libc_fegetroundf
-#define libc_fegetroundf() libc_fegetround ()
-// #define libc_fegetroundl() fegetround ()
-
-#undef libc_fesetround
-#define libc_fesetround(r) \
-  do {									      \
-     unsigned int mxcsr;						      \
-     asm (STMXCSR " %0" : "=m" (*&mxcsr));				      \
-     mxcsr = (mxcsr & ~0x6000) | ((r) << 3);				      \
-     asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));			      \
-  } while (0)
-#undef libc_fesetroundf
-#define libc_fesetroundf(r) libc_fesetround (r)
-// #define libc_fesetroundl(r) (void) fesetround (r)
-
 #undef libc_feholdexcept
 #define libc_feholdexcept(e) \
   do {									      \
@@ -224,7 +201,8 @@

 #undef libc_fetestexcept
 #define libc_fetestexcept(e) \
-  ({ unsigned int mxcsr; asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));	      \
+  ({ unsigned int mxcsr;						      \
+     asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
      mxcsr & (e) & FE_ALL_EXCEPT; })
 #undef libc_fetestexceptf
 #define libc_fetestexceptf(e) libc_fetestexcept (e)
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index be68903..4b5c173 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -34,21 +34,21 @@ CFLAGS-s_sin-fma4.c = -mfma4
 CFLAGS-s_tan-fma4.c = -mfma4
 endif

-ifeq ($(config-cflags-avx),yes)
+ifeq ($(config-cflags-sse2avx),yes)
 libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
 			e_atan2-avx s_sin-avx s_tan-avx \
 			mplog-avx mpa-avx slowexp-avx \
 			mpexp-avx

-CFLAGS-e_atan2-avx.c = -mavx
-CFLAGS-e_exp-avx.c = -mavx
-CFLAGS-e_log-avx.c = -mavx
-CFLAGS-mpa-avx.c = -mavx
-CFLAGS-mpexp-avx.c = -mavx
-CFLAGS-mplog-avx.c = -mavx
-CFLAGS-s_atan-avx.c = -mavx
-CFLAGS-s_sin-avx.c = -mavx
-CFLAGS-slowexp-avx.c = -mavx
-CFLAGS-s_tan-avx.c = -mavx
+CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-slowexp-avx.c = -sse2mavx -DSSE2AVX
+CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
 endif
 endif
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
index 3a615fc..6867c6e 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
@@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);

 libm_ifunc (__ieee754_atan2,
 	    HAS_FMA4 ? __ieee754_atan2_fma4
-	    : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
+	    : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
 strong_alias (__ieee754_atan2, __atan2_finite)

 # define __ieee754_atan2 __ieee754_atan2_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
index 7b2320a..3c65028 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
@@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);

 libm_ifunc (__ieee754_exp,
 	    HAS_FMA4 ? __ieee754_exp_fma4
-	    : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2));
+	    : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
 strong_alias (__ieee754_exp, __exp_finite)

 # define __ieee754_exp __ieee754_exp_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
index ab277d6..05f3668 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
@@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double);

 libm_ifunc (__ieee754_log,
 	    HAS_FMA4 ? __ieee754_log_fma4
-	    : (HAS_YMM_USABLE ? __ieee754_log_avx
-	       : __ieee754_log_sse2));
+	    : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2));
 strong_alias (__ieee754_log, __log_finite)

 # define __ieee754_log __ieee754_log_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
index 78c7e09..ae16d7c 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
@@ -13,7 +13,7 @@ extern double __atan_fma4 (double);
 # endif

 libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
-		   HAS_YMM_USABLE ? __atan_avx : __atan_sse2));
+		   HAS_AVX ? __atan_avx : __atan_sse2));

 # define atan __atan_sse2
 #endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
index 417acd0..a0c2521 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
@@ -18,11 +18,11 @@ extern double __sin_fma4 (double);
 # endif

 libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
-		    HAS_YMM_USABLE ? __cos_avx : __cos_sse2));
+		    HAS_AVX ? __cos_avx : __cos_sse2));
 weak_alias (__cos, cos)

 libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
-		    HAS_YMM_USABLE ? __sin_avx : __sin_sse2));
+		    HAS_AVX ? __sin_avx : __sin_sse2));
 weak_alias (__sin, sin)

 # define __cos __cos_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
index 3047155..904308f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
@@ -13,7 +13,7 @@ extern double __tan_fma4 (double);
 # endif

 libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
-		  HAS_YMM_USABLE ? __tan_avx : __tan_sse2));
+		  HAS_AVX ? __tan_avx : __tan_sse2));

 # define tan __tan_sse2
 #endif

commit 3b1004624e54cc2fefd034ff80d5dea4b6db764f
Author: Joseph Myers <joseph@codesourcery.com>
Date:   Mon Jan 30 19:55:15 2012 +0000

    Fix makefile/configure problems with sse2avx changes.

2012-01-30  Joseph Myers  <joseph@codesourcery.com>

	* configure.in (libc_cv_cc_sse2avx): AC_SUBST.
	* configure: Regenerate.
	* config.make.in (config-cflags-sse2avx): Define.
	* sysdeps/x86_64/fpu/multiarch/Makefile (CFLAGS-slowexp-avx.c):
	Fix typo.

 	* scripts/config.guess: Update from upstream config git repository.
diff --git a/config.make.in b/config.make.in
index d937952..75061f6 100644
--- a/config.make.in
+++ b/config.make.in
@@ -36,6 +36,7 @@ asflags-cpu = @libc_cv_cc_submachine@

 config-cflags-sse4 = @libc_cv_cc_sse4@
 config-cflags-avx = @libc_cv_cc_avx@
+config-cflags-sse2avx = @libc_cv_cc_sse2avx@
 config-cflags-novzeroupper = @libc_cv_cc_novzeroupper@
 config-asflags-i686 = @libc_cv_as_i686@

diff --git a/configure b/configure
index 71e8de1..508e1bb 100755
--- a/configure
+++ b/configure
@@ -620,6 +620,7 @@ use_ldconfig
 libc_cv_as_i686
 libc_cv_cc_fma4
 libc_cv_cc_novzeroupper
+libc_cv_cc_sse2avx
 libc_cv_cc_avx
 libc_cv_cc_sse4
 libc_cv_cpp_asm_debuginfo
diff --git a/configure.in b/configure.in
index 5fd6d85..0499d3c 100644
--- a/configure.in
+++ b/configure.in
@@ -2154,6 +2154,7 @@ dnl sysdeps/CPU/configure.in checks set this via arch-specific asm tests
 AC_SUBST(libc_cv_cpp_asm_debuginfo)
 AC_SUBST(libc_cv_cc_sse4)
 AC_SUBST(libc_cv_cc_avx)
+AC_SUBST(libc_cv_cc_sse2avx)
 AC_SUBST(libc_cv_cc_novzeroupper)
 AC_SUBST(libc_cv_cc_fma4)
 AC_SUBST(libc_cv_as_i686)
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 4b5c173..2a38ffc 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -48,7 +48,7 @@ CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
-CFLAGS-slowexp-avx.c = -sse2mavx -DSSE2AVX
+CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
 endif
 endif