diff --git a/check-build.sh b/check-build.sh index fed8e7f..3f33595 100644 --- a/check-build.sh +++ b/check-build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2003, 2004, 2011 SUSE Linux Products GmbH, Germany. All rights reserved. +# Copyright (c) 2003, 2004, 2011,2012 SUSE Linux Products GmbH, Germany. All rights reserved. # # Authors: Thorsten Kukuk # @@ -13,12 +13,18 @@ fi # get kernel version OFS="$IFS" ; IFS=".-" ; version=(`uname -r`) ; IFS="$OIFS" +ARCH=(`uname -m`) if test ${version[0]} -gt 2 ; then : # okay elif test ${version[0]} -lt 2 -o ${version[1]} -lt 6 -o ${version[2]} -lt 16 ; then echo "FATAL: kernel too old, need kernel >= 2.6.16 for this package" 1>&2 exit 1 +elif $ARCH -eq 'x86_64' ; then + if test ${version[0]} -lt 2 -o ${version[1]} -lt 6 -o ${version[2]} -lt 32 ; then + echo "FATAL: kernel too old, need kernel >= 2.6.32 for this package" 1>&2 + exit 1 + fi fi diff --git a/glibc-2.15-math64crash.patch b/glibc-2.15-math64crash.patch deleted file mode 100644 index 5bcfe5e..0000000 --- a/glibc-2.15-math64crash.patch +++ /dev/null @@ -1,187 +0,0 @@ -The following patch comes from Arch Linux - and is just a workaround -until the real problem is fixed. - -diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile -index be68903..a032da8 100644 ---- a/sysdeps/x86_64/fpu/multiarch/Makefile -+++ b/sysdeps/x86_64/fpu/multiarch/Makefile -@@ -1,5 +1,5 @@ - ifeq ($(subdir),math) --libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \ -+libm-sysdep_routines += s_floorf-c s_ceilf-c \ - s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c - - ifeq ($(have-mfma4),yes) -diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-c.c b/sysdeps/x86_64/fpu/multiarch/s_ceil-c.c -deleted file mode 100644 -index 6a5ea3f..0000000 ---- a/sysdeps/x86_64/fpu/multiarch/s_ceil-c.c -+++ /dev/null -@@ -1,2 +0,0 @@ --#define __ceil __ceil_c --#include -diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.S b/sysdeps/x86_64/fpu/multiarch/s_ceil.S -deleted file mode 100644 -index d0f8da3..0000000 ---- a/sysdeps/x86_64/fpu/multiarch/s_ceil.S -+++ /dev/null -@@ -1,40 +0,0 @@ --/* Copyright (C) 2011 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- Contributed by Ulrich Drepper , 2011. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- -- --ENTRY(__ceil) -- .type __ceil, @gnu_indirect_function -- call __get_cpu_features@plt -- movq %rax, %rdx -- leaq __ceil_sse41(%rip), %rax -- testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) -- jnz 2f -- leaq __ceil_c(%rip), %rax --2: ret --END(__ceil) --weak_alias (__ceil, ceil) -- -- --ENTRY(__ceil_sse41) -- roundsd $2, %xmm0, %xmm0 -- ret --END(__ceil_sse41) -diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c -deleted file mode 100644 -index 68733b6..0000000 ---- a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c -+++ /dev/null -@@ -1,3 +0,0 @@ --#undef __floor --#define __floor __floor_c --#include -diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.S b/sysdeps/x86_64/fpu/multiarch/s_floor.S -deleted file mode 100644 -index 514ea95..0000000 ---- a/sysdeps/x86_64/fpu/multiarch/s_floor.S -+++ /dev/null -@@ -1,40 +0,0 @@ --/* Copyright (C) 2011 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- Contributed by Ulrich Drepper , 2011. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- -- --ENTRY(__floor) -- .type __floor, @gnu_indirect_function -- call __get_cpu_features@plt -- movq %rax, %rdx -- leaq __floor_sse41(%rip), %rax -- testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) -- jnz 2f -- leaq __floor_c(%rip), %rax --2: ret --END(__floor) --weak_alias (__floor, floor) -- -- --ENTRY(__floor_sse41) -- roundsd $1, %xmm0, %xmm0 -- ret --END(__floor_sse41) -diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c -deleted file mode 100644 -index 1ba9dbc..0000000 ---- a/sysdeps/x86_64/fpu/multiarch/s_sin.c -+++ /dev/null -@@ -1,31 +0,0 @@ --#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT --# include --# include --# undef NAN -- --extern double __cos_sse2 (double); --extern double __sin_sse2 (double); --extern double __cos_avx (double); --extern double __sin_avx (double); --# ifdef HAVE_FMA4_SUPPORT --extern double __cos_fma4 (double); --extern double __sin_fma4 (double); --# else --# undef HAS_FMA4 --# define HAS_FMA4 0 --# define __cos_fma4 ((void *) 0) --# define __sin_fma4 ((void *) 0) --# endif -- --libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2); --weak_alias (__cos, cos) -- --libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2); --weak_alias (__sin, sin) -- --# define __cos __cos_sse2 --# define __sin __sin_sse2 --#endif -- -- --#include -diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c -deleted file mode 100644 -index 8f6601e..0000000 ---- a/sysdeps/x86_64/fpu/multiarch/s_tan.c -+++ /dev/null -@@ -1,21 +0,0 @@ --#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT --# include --# include -- --extern double __tan_sse2 (double); --extern double __tan_avx (double); --# ifdef HAVE_FMA4_SUPPORT --extern double __tan_fma4 (double); --# else --# undef HAS_FMA4 --# define HAS_FMA4 0 --# define __tan_fma4 ((void *) 0) --# endif -- --libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2); -- --# define tan __tan_sse2 --#endif -- -- --#include diff --git a/glibc-2.16-avx.patch b/glibc-2.16-avx.patch new file mode 100644 index 0000000..31e699e --- /dev/null +++ b/glibc-2.16-avx.patch @@ -0,0 +1,846 @@ +commit afc5ed09cbce5d6fd48b3a8c5ec427b31f996880 +Author: Ulrich Drepper +Date: Thu Jan 26 07:45:14 2012 -0500 + + Reset bit_AVX in __cpu_features is OS support is missing + +2012-01-26 Ulrich Drepper + + [BZ #13583] + * sysdeps/x86_64/multiarch/init-arch.h: Define bit_OSXSAVE. + * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If + bit_AVX is set also check OSXAVE/XCR0 and reset bit_AVX if necessary. + + 2012-01-25 Joseph Myers + + * elf/tst-unique3.cc (gets): Remove declaration. +diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c +index 65b0ee9..4fabbee 100644 +--- a/sysdeps/x86_64/multiarch/init-arch.c ++++ b/sysdeps/x86_64/multiarch/init-arch.c +@@ -1,6 +1,6 @@ + /* Initialize CPU feature data. + This file is part of the GNU C Library. +- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. ++ Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Contributed by Ulrich Drepper . + + The GNU C Library is free software; you can redistribute it and/or +@@ -144,6 +144,18 @@ __init_cpu_features (void) + else + kind = arch_kind_other; + ++ if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) ++ { ++ /* Reset the AVX bit in case OSXSAVE is disabled. */ ++ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0 ++ || ({ unsigned int xcrlow; ++ unsigned int xcrhigh; ++ asm ("xgetbv" ++ : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); ++ (xcrlow & 6) != 6; })) ++ __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX; ++ } ++ + __cpu_features.family = family; + __cpu_features.model = model; + atomic_write_barrier (); +diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h +index 2a1df39..408e5ae 100644 +--- a/sysdeps/x86_64/multiarch/init-arch.h ++++ b/sysdeps/x86_64/multiarch/init-arch.h +@@ -1,5 +1,5 @@ + /* This file is part of the GNU C Library. +- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. ++ Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public +@@ -27,6 +27,7 @@ + #define bit_SSSE3 (1 << 9) + #define bit_SSE4_1 (1 << 19) + #define bit_SSE4_2 (1 << 20) ++#define bit_OSXSAVE (1 << 27) + #define bit_AVX (1 << 28) + #define bit_POPCOUNT (1 << 23) + #define bit_FMA (1 << 12) + +commit 08cf777f9e7f6d826658a99c7d77a359f73a45bf +Author: Ulrich Drepper +Date: Thu Jan 26 09:45:54 2012 -0500 + + Really fix AVX tests + + There is no problem with strcmp, it doesn't use the YMM registers. + The math routines might since gcc perhaps generates such code. + Introduce bit_YMM_USBALE and use it in the math routines. + + [BZ #13583] + * sysdeps/x86_64/multiarch/init-arch.h: Define bit_OSXSAVE. + Clean up HAS_* macros. + * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If + bit_AVX is set also check OSXAVE/XCR0 and set bit_YMM_Usable if + possible. + * sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_YMM_USABLE, not + HAS_AVX. + * sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise. + +diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c +index 6867c6e..3a615fc 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c +@@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double); + + libm_ifunc (__ieee754_atan2, + HAS_FMA4 ? __ieee754_atan2_fma4 +- : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); ++ : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); + strong_alias (__ieee754_atan2, __atan2_finite) + + # define __ieee754_atan2 __ieee754_atan2_sse2 +diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c +index 3c65028..7b2320a 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c +@@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double); + + libm_ifunc (__ieee754_exp, + HAS_FMA4 ? __ieee754_exp_fma4 +- : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2)); ++ : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2)); + strong_alias (__ieee754_exp, __exp_finite) + + # define __ieee754_exp __ieee754_exp_sse2 +diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c +index 3b468d0..ab277d6 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_log.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_log.c +@@ -14,7 +14,7 @@ extern double __ieee754_log_fma4 (double); + + libm_ifunc (__ieee754_log, + HAS_FMA4 ? __ieee754_log_fma4 +- : (HAS_AVX ? __ieee754_log_avx ++ : (HAS_YMM_USABLE ? __ieee754_log_avx + : __ieee754_log_sse2)); + strong_alias (__ieee754_log, __log_finite) + +diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c +index 3160201..78c7e09 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c +@@ -12,7 +12,8 @@ extern double __atan_fma4 (double); + # define __atan_fma4 ((void *) 0) + # endif + +-libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2); ++libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 : ++ HAS_YMM_USABLE ? __atan_avx : __atan_sse2)); + + # define atan __atan_sse2 + #endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c +index 1ba9dbc..417acd0 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c +@@ -17,10 +17,12 @@ extern double __sin_fma4 (double); + # define __sin_fma4 ((void *) 0) + # endif + +-libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2); ++libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 : ++ HAS_YMM_USABLE ? __cos_avx : __cos_sse2)); + weak_alias (__cos, cos) + +-libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2); ++libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 : ++ HAS_YMM_USABLE ? __sin_avx : __sin_sse2)); + weak_alias (__sin, sin) + + # define __cos __cos_sse2 +diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c +index 8f6601e..3047155 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c +@@ -12,7 +12,8 @@ extern double __tan_fma4 (double); + # define __tan_fma4 ((void *) 0) + # endif + +-libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2); ++libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 : ++ HAS_YMM_USABLE ? __tan_avx : __tan_sse2)); + + # define tan __tan_sse2 + #endif +diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c +index 4fabbee..76d146c 100644 +--- a/sysdeps/x86_64/multiarch/init-arch.c ++++ b/sysdeps/x86_64/multiarch/init-arch.c +@@ -147,13 +147,13 @@ __init_cpu_features (void) + if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) + { + /* Reset the AVX bit in case OSXSAVE is disabled. */ +- if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0 +- || ({ unsigned int xcrlow; +- unsigned int xcrhigh; +- asm ("xgetbv" +- : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); +- (xcrlow & 6) != 6; })) +- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX; ++ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0 ++ && ({ unsigned int xcrlow; ++ unsigned int xcrhigh; ++ asm ("xgetbv" ++ : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); ++ (xcrlow & 6) == 6; })) ++ __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable; + } + + __cpu_features.family = family; +diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h +index 408e5ae..2dc75ab 100644 +--- a/sysdeps/x86_64/multiarch/init-arch.h ++++ b/sysdeps/x86_64/multiarch/init-arch.h +@@ -22,6 +22,7 @@ + #define bit_Prefer_SSE_for_memop (1 << 3) + #define bit_Fast_Unaligned_Load (1 << 4) + #define bit_Prefer_PMINUB_for_stringop (1 << 5) ++#define bit_YMM_Usable (1 << 6) + + #define bit_SSE2 (1 << 26) + #define bit_SSSE3 (1 << 9) +@@ -49,6 +50,7 @@ + # define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE + # define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE + # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE ++# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE + + #else /* __ASSEMBLER__ */ + +@@ -93,7 +95,7 @@ extern struct cpu_features + + + extern void __init_cpu_features (void) attribute_hidden; +-#define INIT_ARCH()\ ++# define INIT_ARCH() \ + do \ + if (__cpu_features.kind == arch_kind_unknown) \ + __init_cpu_features (); \ +@@ -126,23 +128,21 @@ extern const struct cpu_features *__get_cpu_features (void) + # define index_Slow_BSF FEATURE_INDEX_1 + # define index_Prefer_SSE_for_memop FEATURE_INDEX_1 + # define index_Fast_Unaligned_Load FEATURE_INDEX_1 ++# define index_YMM_Usable FEATURE_INDEX_1 + +-#define HAS_ARCH_FEATURE(idx, bit) \ +- ((__get_cpu_features ()->feature[idx] & (bit)) != 0) ++# define HAS_ARCH_FEATURE(name) \ ++ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) + +-#define HAS_FAST_REP_STRING \ +- HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String) ++# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) + +-#define HAS_FAST_COPY_BACKWARD \ +- HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward) ++# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) + +-#define HAS_SLOW_BSF \ +- HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF) ++# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) + +-#define HAS_PREFER_SSE_FOR_MEMOP \ +- HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop) ++# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop) + +-#define HAS_FAST_UNALIGNED_LOAD \ +- HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load) ++# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) ++ ++# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable) + + #endif /* __ASSEMBLER__ */ + +commit 73139a7628c424c82eb9297ccb5505c0bc5b65aa +Author: Ulrich Drepper +Date: Sat Jan 28 11:19:06 2012 -0500 + + Simplify use of AVX instructions in internal math macros + +2012-01-28 Ulrich Drepper + + * sysdeps/x86_64/fpu/math_private.h: Simplify use of AVX instructions. + +diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h +index 7f52d5e..50f765f 100644 +--- a/sysdeps/x86_64/fpu/math_private.h ++++ b/sysdeps/x86_64/fpu/math_private.h +@@ -21,8 +21,12 @@ + + #ifdef __AVX__ + # define MOVD "vmovd" ++# define STMXCSR "vstmxcsr" ++# define LDMXCSR "vldmxcsr" + #else + # define MOVD "movd" ++# define STMXCSR "stmxcsr" ++# define LDMXCSR "ldmxcsr" + #endif + + /* Direct movement of float into integer register. */ +@@ -173,7 +177,7 @@ + #define libc_fegetround() \ + ({ \ + unsigned int mxcsr; \ +- asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \ ++ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ + (mxcsr & 0x6000) >> 3; \ + }) + #undef libc_fegetroundf +@@ -184,106 +188,63 @@ + #define libc_fesetround(r) \ + do { \ + unsigned int mxcsr; \ +- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \ ++ asm (STMXCSR " %0" : "=m" (*&mxcsr)); \ + mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \ +- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \ ++ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \ + } while (0) + #undef libc_fesetroundf + #define libc_fesetroundf(r) libc_fesetround (r) + // #define libc_fesetroundl(r) (void) fesetround (r) + + #undef libc_feholdexcept +-#ifdef __AVX__ +-# define libc_feholdexcept(e) \ +- do { \ +- unsigned int mxcsr; \ +- asm ("vstmxcsr %0" : "=m" (*&mxcsr)); \ +- (e)->__mxcsr = mxcsr; \ +- mxcsr = (mxcsr | 0x1f80) & ~0x3f; \ +- asm volatile ("vldmxcsr %0" : : "m" (*&mxcsr)); \ +- } while (0) +-#else +-# define libc_feholdexcept(e) \ ++#define libc_feholdexcept(e) \ + do { \ + unsigned int mxcsr; \ +- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \ ++ asm (STMXCSR " %0" : "=m" (*&mxcsr)); \ + (e)->__mxcsr = mxcsr; \ + mxcsr = (mxcsr | 0x1f80) & ~0x3f; \ +- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \ ++ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \ + } while (0) +-#endif + #undef libc_feholdexceptf + #define libc_feholdexceptf(e) libc_feholdexcept (e) + // #define libc_feholdexceptl(e) (void) feholdexcept (e) + + #undef libc_feholdexcept_setround +-#ifdef __AVX__ +-# define libc_feholdexcept_setround(e, r) \ +- do { \ +- unsigned int mxcsr; \ +- asm ("vstmxcsr %0" : "=m" (*&mxcsr)); \ +- (e)->__mxcsr = mxcsr; \ +- mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3); \ +- asm volatile ("vldmxcsr %0" : : "m" (*&mxcsr)); \ +- } while (0) +-#else +-# define libc_feholdexcept_setround(e, r) \ ++#define libc_feholdexcept_setround(e, r) \ + do { \ + unsigned int mxcsr; \ +- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \ ++ asm (STMXCSR " %0" : "=m" (*&mxcsr)); \ + (e)->__mxcsr = mxcsr; \ + mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3); \ +- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \ ++ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \ + } while (0) +-#endif + #undef libc_feholdexcept_setroundf + #define libc_feholdexcept_setroundf(e, r) libc_feholdexcept_setround (e, r) + // #define libc_feholdexcept_setroundl(e, r) ... + + #undef libc_fetestexcept +-#ifdef __AVX__ +-# define libc_fetestexcept(e) \ +- ({ unsigned int mxcsr; asm volatile ("vstmxcsr %0" : "=m" (*&mxcsr)); \ +- mxcsr & (e) & FE_ALL_EXCEPT; }) +-#else +-# define libc_fetestexcept(e) \ +- ({ unsigned int mxcsr; asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \ ++#define libc_fetestexcept(e) \ ++ ({ unsigned int mxcsr; asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ + mxcsr & (e) & FE_ALL_EXCEPT; }) +-#endif + #undef libc_fetestexceptf + #define libc_fetestexceptf(e) libc_fetestexcept (e) + // #define libc_fetestexceptl(e) fetestexcept (e) + + #undef libc_fesetenv +-#ifdef __AVX__ +-# define libc_fesetenv(e) \ +- asm volatile ("vldmxcsr %0" : : "m" ((e)->__mxcsr)) +-#else +-# define libc_fesetenv(e) \ +- asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr)) +-#endif ++#define libc_fesetenv(e) \ ++ asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr)) + #undef libc_fesetenvf + #define libc_fesetenvf(e) libc_fesetenv (e) + // #define libc_fesetenvl(e) (void) fesetenv (e) + + #undef libc_feupdateenv +-#ifdef __AVX__ +-# define libc_feupdateenv(e) \ ++#define libc_feupdateenv(e) \ + do { \ + unsigned int mxcsr; \ +- asm volatile ("vstmxcsr %0" : "=m" (*&mxcsr)); \ +- asm volatile ("vldmxcsr %0" : : "m" ((e)->__mxcsr)); \ ++ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ ++ asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr)); \ + __feraiseexcept (mxcsr & FE_ALL_EXCEPT); \ + } while (0) +-#else +-# define libc_feupdateenv(e) \ +- do { \ +- unsigned int mxcsr; \ +- asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \ +- asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr)); \ +- __feraiseexcept (mxcsr & FE_ALL_EXCEPT); \ +- } while (0) +-#endif + #undef libc_feupdateenvf + #define libc_feupdateenvf(e) libc_feupdateenv (e) + // #define libc_feupdateenvl(e) (void) feupdateenv (e) + +commit 56f6f6a2403cfa7267cad722597113be35ecf70d +Author: Ulrich Drepper +Date: Sat Jan 28 14:48:46 2012 -0500 + + Use -msse2avx option for x86-64 libm functions + +2012-01-28 Ulrich Drepper + + * config.h.in: Define HAVE_SSE2AVX_SUPPORT. + * math/math_private.h: Remove libc_fegetround* and + libc_fesetround*. + * sysdeps/i386/configure.in: Check for -msse2avx. + * sysdeps/x86_64/fpu/math_private.h: Use VEX-encoded instructions + also if SSE2AVX is defined. + Remove libc_fegetround* and libc_fesetround*. + * sysdeps/x86_64/fpu/multiarch/Makefile: Compile *-avx functions + if config-cflags-sse2avx is yes. Also add -DSSE2AVX to defines. + * sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_AVX again instead + of HAS_YMM_USABLE. + * sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise. + + 2012-01-19 Adhemerval Zanella +diff --git a/config.h.in b/config.h.in +index 50d53d4..1489476 100644 +--- a/config.h.in ++++ b/config.h.in +@@ -90,7 +90,7 @@ + certain registers (CR0, MQ, CTR, LR) in asm statements. */ + #undef BROKEN_PPC_ASM_CR0 + +-/* Defined on SPARC if ld doesn't handle R_SPARC_WDISP22 against .hidden ++/* Defined on SPARC if ld does not handle R_SPARC_WDISP22 against .hidden + symbol. sysdeps/sparc/sparc32/elf/configure. */ + #undef BROKEN_SPARC_WDISP22 + +@@ -106,17 +106,20 @@ + /* Define if gcc supports AVX. */ + #undef HAVE_AVX_SUPPORT + ++/* Define if gcc supports VEX encoding. */ ++#undef HAVE_SSE2AVX_SUPPORT ++ + /* Define if gcc supports FMA4. */ + #undef HAVE_FMA4_SUPPORT + +-/* Define if the compiler's exception support is based on libunwind. */ ++/* Define if the compiler\'s exception support is based on libunwind. */ + #undef HAVE_CC_WITH_LIBUNWIND + + /* Define if the access to static and hidden variables is position independent + and does not need relocations. */ + #undef PI_STATIC_AND_HIDDEN + +-/* Define this to disable the `hidden_proto' et al macros in ++/* Define this to disable the 'hidden_proto' et al macros in + include/libc-symbols.h that avoid PLT slots in the shared objects. */ + #undef NO_HIDDEN + +diff --git a/math/math_private.h b/math/math_private.h +index 602a271..e4108d8 100644 +--- a/math/math_private.h ++++ b/math/math_private.h +@@ -365,14 +365,6 @@ extern void __docos (double __x, double __dx, double __v[]); + know what operations are going to be performed. Therefore we + define additional interfaces. By default they refer to the normal + interfaces. */ +-#define libc_fegetround() fegetround () +-#define libc_fegetroundf() fegetround () +-#define libc_fegetroundl() fegetround () +- +-#define libc_fesetround(r) (void) fesetround (r) +-#define libc_fesetroundf(r) (void) fesetround (r) +-#define libc_fesetroundl(r) (void) fesetround (r) +- + #define libc_feholdexcept(e) (void) feholdexcept (e) + #define libc_feholdexceptf(e) (void) feholdexcept (e) + #define libc_feholdexceptl(e) (void) feholdexcept (e) +diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure +index ae494e2..bc7900e 100644 +--- a/sysdeps/i386/configure ++++ b/sysdeps/i386/configure +@@ -756,6 +756,29 @@ if test $libc_cv_cc_avx = yes; then + + fi + ++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5 ++$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; } ++if ${libc_cv_cc_sse2avx+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; }; then ++ libc_cv_cc_sse2avx=yes ++else ++ libc_cv_cc_sse2avx=no ++fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5 ++$as_echo "$libc_cv_cc_sse2avx" >&6; } ++if test $libc_cv_cc_sse2avx = yes; then ++ $as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h ++ ++fi ++ + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5 + $as_echo_n "checking for FMA4 support... " >&6; } + if ${libc_cv_cc_fma4+:} false; then : +diff --git a/sysdeps/i386/configure.in b/sysdeps/i386/configure.in +index 5a9840e..59a4cd6 100644 +--- a/sysdeps/i386/configure.in ++++ b/sysdeps/i386/configure.in +@@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then + AC_DEFINE(HAVE_AVX_SUPPORT) + fi + ++dnl Check if -msse2avx works. ++AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl ++if AC_TRY_COMMAND([${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null]); then ++ libc_cv_cc_sse2avx=yes ++else ++ libc_cv_cc_sse2avx=no ++fi]) ++if test $libc_cv_cc_sse2avx = yes; then ++ AC_DEFINE(HAVE_SSE2AVX_SUPPORT) ++fi ++ + dnl Check if -mfma4 works. + AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl + if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then +diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h +index 50f765f..8e79718 100644 +--- a/sysdeps/x86_64/fpu/math_private.h ++++ b/sysdeps/x86_64/fpu/math_private.h +@@ -19,7 +19,7 @@ + + /* We can do a few things better on x86-64. */ + +-#ifdef __AVX__ ++#if defined __AVX__ || defined SSE2AVX + # define MOVD "vmovd" + # define STMXCSR "vstmxcsr" + # define LDMXCSR "vldmxcsr" +@@ -90,7 +90,7 @@ + ({ int __di; GET_FLOAT_WORD (__di, (float) d); \ + (__di & 0x7fffffff) < 0x7f800000; }) + +-#ifdef __AVX__ ++#if defined __AVX__ || defined SSE2AVX + # define __ieee754_sqrt(d) \ + ({ double __res; \ + asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ +@@ -116,7 +116,7 @@ + + #ifdef __SSE4_1__ + # ifndef __rint +-# ifdef __AVX__ ++# if defined __AVX__ || defined SSE2AVX + # define __rint(d) \ + ({ double __res; \ + asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ +@@ -129,7 +129,7 @@ + # endif + # endif + # ifndef __rintf +-# ifdef __AVX__ ++# if defined __AVX__ || defined SSE2AVX + # define __rintf(d) \ + ({ float __res; \ + asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \ +@@ -143,7 +143,7 @@ + # endif + + # ifndef __floor +-# ifdef __AVX__ ++# if defined __AVX__ || defined SSE2AVX + # define __floor(d) \ + ({ double __res; \ + asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ +@@ -156,7 +156,7 @@ + # endif + # endif + # ifndef __floorf +-# ifdef __AVX__ ++# if defined __AVX__ || defined SSE2AVX + # define __floorf(d) \ + ({ float __res; \ + asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \ +@@ -173,29 +173,6 @@ + + /* Specialized variants of the interfaces which only handle + either the FPU or the SSE unit. */ +-#undef libc_fegetround +-#define libc_fegetround() \ +- ({ \ +- unsigned int mxcsr; \ +- asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ +- (mxcsr & 0x6000) >> 3; \ +- }) +-#undef libc_fegetroundf +-#define libc_fegetroundf() libc_fegetround () +-// #define libc_fegetroundl() fegetround () +- +-#undef libc_fesetround +-#define libc_fesetround(r) \ +- do { \ +- unsigned int mxcsr; \ +- asm (STMXCSR " %0" : "=m" (*&mxcsr)); \ +- mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \ +- asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \ +- } while (0) +-#undef libc_fesetroundf +-#define libc_fesetroundf(r) libc_fesetround (r) +-// #define libc_fesetroundl(r) (void) fesetround (r) +- + #undef libc_feholdexcept + #define libc_feholdexcept(e) \ + do { \ +@@ -224,7 +201,8 @@ + + #undef libc_fetestexcept + #define libc_fetestexcept(e) \ +- ({ unsigned int mxcsr; asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ ++ ({ unsigned int mxcsr; \ ++ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ + mxcsr & (e) & FE_ALL_EXCEPT; }) + #undef libc_fetestexceptf + #define libc_fetestexceptf(e) libc_fetestexcept (e) +diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile +index be68903..4b5c173 100644 +--- a/sysdeps/x86_64/fpu/multiarch/Makefile ++++ b/sysdeps/x86_64/fpu/multiarch/Makefile +@@ -34,21 +34,21 @@ CFLAGS-s_sin-fma4.c = -mfma4 + CFLAGS-s_tan-fma4.c = -mfma4 + endif + +-ifeq ($(config-cflags-avx),yes) ++ifeq ($(config-cflags-sse2avx),yes) + libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \ + e_atan2-avx s_sin-avx s_tan-avx \ + mplog-avx mpa-avx slowexp-avx \ + mpexp-avx + +-CFLAGS-e_atan2-avx.c = -mavx +-CFLAGS-e_exp-avx.c = -mavx +-CFLAGS-e_log-avx.c = -mavx +-CFLAGS-mpa-avx.c = -mavx +-CFLAGS-mpexp-avx.c = -mavx +-CFLAGS-mplog-avx.c = -mavx +-CFLAGS-s_atan-avx.c = -mavx +-CFLAGS-s_sin-avx.c = -mavx +-CFLAGS-slowexp-avx.c = -mavx +-CFLAGS-s_tan-avx.c = -mavx ++CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX ++CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX ++CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX ++CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX ++CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX ++CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX ++CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX ++CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX ++CFLAGS-slowexp-avx.c = -sse2mavx -DSSE2AVX ++CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX + endif + endif +diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c +index 3a615fc..6867c6e 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c +@@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double); + + libm_ifunc (__ieee754_atan2, + HAS_FMA4 ? __ieee754_atan2_fma4 +- : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); ++ : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); + strong_alias (__ieee754_atan2, __atan2_finite) + + # define __ieee754_atan2 __ieee754_atan2_sse2 +diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c +index 7b2320a..3c65028 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c +@@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double); + + libm_ifunc (__ieee754_exp, + HAS_FMA4 ? __ieee754_exp_fma4 +- : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2)); ++ : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2)); + strong_alias (__ieee754_exp, __exp_finite) + + # define __ieee754_exp __ieee754_exp_sse2 +diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c +index ab277d6..05f3668 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_log.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_log.c +@@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double); + + libm_ifunc (__ieee754_log, + HAS_FMA4 ? __ieee754_log_fma4 +- : (HAS_YMM_USABLE ? __ieee754_log_avx +- : __ieee754_log_sse2)); ++ : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2)); + strong_alias (__ieee754_log, __log_finite) + + # define __ieee754_log __ieee754_log_sse2 +diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c +index 78c7e09..ae16d7c 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c +@@ -13,7 +13,7 @@ extern double __atan_fma4 (double); + # endif + + libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 : +- HAS_YMM_USABLE ? __atan_avx : __atan_sse2)); ++ HAS_AVX ? __atan_avx : __atan_sse2)); + + # define atan __atan_sse2 + #endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c +index 417acd0..a0c2521 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c +@@ -18,11 +18,11 @@ extern double __sin_fma4 (double); + # endif + + libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 : +- HAS_YMM_USABLE ? __cos_avx : __cos_sse2)); ++ HAS_AVX ? __cos_avx : __cos_sse2)); + weak_alias (__cos, cos) + + libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 : +- HAS_YMM_USABLE ? __sin_avx : __sin_sse2)); ++ HAS_AVX ? __sin_avx : __sin_sse2)); + weak_alias (__sin, sin) + + # define __cos __cos_sse2 +diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c +index 3047155..904308f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c +@@ -13,7 +13,7 @@ extern double __tan_fma4 (double); + # endif + + libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 : +- HAS_YMM_USABLE ? __tan_avx : __tan_sse2)); ++ HAS_AVX ? __tan_avx : __tan_sse2)); + + # define tan __tan_sse2 + #endif + +commit 3b1004624e54cc2fefd034ff80d5dea4b6db764f +Author: Joseph Myers +Date: Mon Jan 30 19:55:15 2012 +0000 + + Fix makefile/configure problems with sse2avx changes. + +2012-01-30 Joseph Myers + + * configure.in (libc_cv_cc_sse2avx): AC_SUBST. + * configure: Regenerate. + * config.make.in (config-cflags-sse2avx): Define. + * sysdeps/x86_64/fpu/multiarch/Makefile (CFLAGS-slowexp-avx.c): + Fix typo. + + * scripts/config.guess: Update from upstream config git repository. +diff --git a/config.make.in b/config.make.in +index d937952..75061f6 100644 +--- a/config.make.in ++++ b/config.make.in +@@ -36,6 +36,7 @@ asflags-cpu = @libc_cv_cc_submachine@ + + config-cflags-sse4 = @libc_cv_cc_sse4@ + config-cflags-avx = @libc_cv_cc_avx@ ++config-cflags-sse2avx = @libc_cv_cc_sse2avx@ + config-cflags-novzeroupper = @libc_cv_cc_novzeroupper@ + config-asflags-i686 = @libc_cv_as_i686@ + +diff --git a/configure b/configure +index 71e8de1..508e1bb 100755 +--- a/configure ++++ b/configure +@@ -620,6 +620,7 @@ use_ldconfig + libc_cv_as_i686 + libc_cv_cc_fma4 + libc_cv_cc_novzeroupper ++libc_cv_cc_sse2avx + libc_cv_cc_avx + libc_cv_cc_sse4 + libc_cv_cpp_asm_debuginfo +diff --git a/configure.in b/configure.in +index 5fd6d85..0499d3c 100644 +--- a/configure.in ++++ b/configure.in +@@ -2154,6 +2154,7 @@ dnl sysdeps/CPU/configure.in checks set this via arch-specific asm tests + AC_SUBST(libc_cv_cpp_asm_debuginfo) + AC_SUBST(libc_cv_cc_sse4) + AC_SUBST(libc_cv_cc_avx) ++AC_SUBST(libc_cv_cc_sse2avx) + AC_SUBST(libc_cv_cc_novzeroupper) + AC_SUBST(libc_cv_cc_fma4) + AC_SUBST(libc_cv_as_i686) +diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile +index 4b5c173..2a38ffc 100644 +--- a/sysdeps/x86_64/fpu/multiarch/Makefile ++++ b/sysdeps/x86_64/fpu/multiarch/Makefile +@@ -48,7 +48,7 @@ CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX + CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX + CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX + CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX +-CFLAGS-slowexp-avx.c = -sse2mavx -DSSE2AVX ++CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX + CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX + endif + endif diff --git a/glibc-ifunc-2.16.patch b/glibc-ifunc-2.16.patch new file mode 100644 index 0000000..b565dbf --- /dev/null +++ b/glibc-ifunc-2.16.patch @@ -0,0 +1,278 @@ +commit 6ee65ed6ddbf04402fad0bec6aa9c73b9d982ae4 +Author: Ulrich Drepper +Date: Fri Jan 27 15:05:19 2012 -0500 + + Sort objects before relocations + +2012-01-27 Ulrich Drepper + + [BZ #13618] + * elf/dl-open.c (dl_open_worker): Sort objects by dependency before + relocation. + * Makeconfig (libm): Define. + * elf/Makefile: Add rules to build and run tst-relsort1. + * elf/tst-relsort1.c: New file. + * elf/tst-relsort1mod1.c: New file. + * elf/tst-relsort1mod2.c: New file. + +diff --git a/Makeconfig b/Makeconfig +index 8195245..185afbb 100644 +--- a/Makeconfig ++++ b/Makeconfig +@@ -950,6 +950,12 @@ libdl = + endif + endif + ++ifeq ($(build-shared),yes) ++libm = $(common-objpfx)math/libm.so$(libm.so-version) ++else ++libm = $(common-objpfx)math/libm.a ++endif ++ + # These are the subdirectories containing the library source. The order + # is more or less arbitrary. The sorting step will take care of the + # dependencies. +diff --git a/elf/Makefile b/elf/Makefile +index 052e763..3f1772a 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -124,7 +124,8 @@ distribute := rtld-Rules \ + tst-initordera1.c tst-initordera2.c tst-initorderb1.c \ + tst-initorderb2.c tst-initordera3.c tst-initordera4.c \ + tst-initorder.c \ +- tst-initorder2.c ++ tst-initorder2.c \ ++ tst-relsort1.c tst-relsort1mod1.c tst-relsort1mod2.c + + CFLAGS-dl-runtime.c = -fexceptions -fasynchronous-unwind-tables + CFLAGS-dl-lookup.c = -fexceptions -fasynchronous-unwind-tables +@@ -227,7 +228,7 @@ tests += loadtest restest1 preloadtest loadfail multiload origtest resolvfail \ + tst-audit1 tst-audit2 \ + tst-stackguard1 tst-addr1 tst-thrlock \ + tst-unique1 tst-unique2 tst-unique3 tst-unique4 \ +- tst-initorder tst-initorder2 ++ tst-initorder tst-initorder2 tst-relsort1 + # reldep9 + test-srcs = tst-pathopt + selinux-enabled := $(shell cat /selinux/enforce 2> /dev/null) +@@ -290,7 +291,9 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \ + tst-initordera1 tst-initorderb1 \ + tst-initordera2 tst-initorderb2 \ + tst-initordera3 tst-initordera4 \ +- tst-initorder2a tst-initorder2b tst-initorder2c tst-initorder2d ++ tst-initorder2a tst-initorder2b tst-initorder2c \ ++ tst-initorder2d \ ++ tst-relsort1mod1 tst-relsort1mod2 + ifeq (yes,$(have-initfini-array)) + modules-names += tst-array2dep tst-array5dep + endif +@@ -1195,3 +1198,9 @@ CFLAGS-tst-auditmod6b.c += $(AVX-CFLAGS) + CFLAGS-tst-auditmod6c.c += $(AVX-CFLAGS) + CFLAGS-tst-auditmod7b.c += $(AVX-CFLAGS) + endif ++ ++$(objpfx)tst-relsort1: $(libdl) ++$(objpfx)tst-relsort1mod1.so: $(libm) $(objpfx)tst-relsort1mod2.so ++$(objpfx)tst-relsort1mod2.so: $(libm) ++$(objpfx)tst-relsort1.out: $(objpfx)tst-relsort1mod1.so \ ++ $(objpfx)tst-relsort1mod2.so +diff --git a/elf/dl-open.c b/elf/dl-open.c +index a0b5c50..a56bdc1 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -1,5 +1,5 @@ + /* Load a shared object at runtime, relocate it, and run its initializer. +- Copyright (C) 1996-2007, 2009, 2010, 2011 Free Software Foundation, Inc. ++ Copyright (C) 1996-2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -302,45 +302,109 @@ dl_open_worker (void *a) + if (GLRO(dl_lazy)) + reloc_mode |= mode & RTLD_LAZY; + +- /* Relocate the objects loaded. We do this in reverse order so that copy +- relocs of earlier objects overwrite the data written by later objects. */ +- ++ /* Sort the objects by dependency for the relocation process. This ++ allows IFUNC relocations to work and it also means copy ++ relocation of dependencies are if necessary overwritten. */ ++ size_t nmaps = 0; + struct link_map *l = new; +- while (l->l_next) +- l = l->l_next; +- while (1) ++ do ++ { ++ if (! l->l_real->l_relocated) ++ ++nmaps; ++ l = l->l_next; ++ } ++ while (l != NULL); ++ struct link_map *maps[nmaps]; ++ nmaps = 0; ++ l = new; ++ do + { + if (! l->l_real->l_relocated) ++ maps[nmaps++] = l; ++ l = l->l_next; ++ } ++ while (l != NULL); ++ if (nmaps > 1) ++ { ++ char seen[nmaps]; ++ memset (seen, '\0', nmaps); ++ size_t i = 0; ++ while (1) + { +-#ifdef SHARED +- if (__builtin_expect (GLRO(dl_profile) != NULL, 0)) ++ ++seen[i]; ++ struct link_map *thisp = maps[i]; ++ ++ /* Find the last object in the list for which the current one is ++ a dependency and move the current object behind the object ++ with the dependency. */ ++ size_t k = nmaps - 1; ++ while (k > i) + { +- /* If this here is the shared object which we want to profile +- make sure the profile is started. We can find out whether +- this is necessary or not by observing the `_dl_profile_map' +- variable. If was NULL but is not NULL afterwars we must +- start the profiling. */ +- struct link_map *old_profile_map = GL(dl_profile_map); ++ struct link_map **runp = maps[k]->l_initfini; ++ if (runp != NULL) ++ /* Look through the dependencies of the object. */ ++ while (*runp != NULL) ++ if (__builtin_expect (*runp++ == thisp, 0)) ++ { ++ /* Move the current object to the back past the last ++ object with it as the dependency. */ ++ memmove (&maps[i], &maps[i + 1], ++ (k - i) * sizeof (maps[0])); ++ maps[k] = thisp; + +- _dl_relocate_object (l, l->l_scope, reloc_mode | RTLD_LAZY, 1); ++ if (seen[i + 1] > 1) ++ { ++ ++i; ++ goto next_clear; ++ } + +- if (old_profile_map == NULL && GL(dl_profile_map) != NULL) +- { +- /* We must prepare the profiling. */ +- _dl_start_profile (); ++ char this_seen = seen[i]; ++ memmove (&seen[i], &seen[i + 1], ++ (k - i) * sizeof (seen[0])); ++ seen[k] = this_seen; + +- /* Prevent unloading the object. */ +- GL(dl_profile_map)->l_flags_1 |= DF_1_NODELETE; +- } ++ goto next; ++ } ++ ++ --k; + } +- else +-#endif +- _dl_relocate_object (l, l->l_scope, reloc_mode, 0); ++ ++ if (++i == nmaps) ++ break; ++ next_clear: ++ memset (&seen[i], 0, (nmaps - i) * sizeof (seen[0])); ++ next:; + } ++ } ++ ++ for (size_t i = nmaps; i-- > 0; ) ++ { ++ l = maps[i]; ++ ++#ifdef SHARED ++ if (__builtin_expect (GLRO(dl_profile) != NULL, 0)) ++ { ++ /* If this here is the shared object which we want to profile ++ make sure the profile is started. We can find out whether ++ this is necessary or not by observing the `_dl_profile_map' ++ variable. If it was NULL but is not NULL afterwars we must ++ start the profiling. */ ++ struct link_map *old_profile_map = GL(dl_profile_map); ++ ++ _dl_relocate_object (l, l->l_scope, reloc_mode | RTLD_LAZY, 1); + +- if (l == new) +- break; +- l = l->l_prev; ++ if (old_profile_map == NULL && GL(dl_profile_map) != NULL) ++ { ++ /* We must prepare the profiling. */ ++ _dl_start_profile (); ++ ++ /* Prevent unloading the object. */ ++ GL(dl_profile_map)->l_flags_1 |= DF_1_NODELETE; ++ } ++ } ++ else ++#endif ++ _dl_relocate_object (l, l->l_scope, reloc_mode, 0); + } + + /* If the file is not loaded now as a dependency, add the search +diff --git a/elf/tst-relsort1.c b/elf/tst-relsort1.c +new file mode 100644 +index 0000000..972100c +--- /dev/null ++++ b/elf/tst-relsort1.c +@@ -0,0 +1,19 @@ ++#include ++#include ++ ++ ++static int ++do_test () ++{ ++ const char lib[] = "$ORIGIN/tst-relsort1mod1.so"; ++ void *h = dlopen (lib, RTLD_NOW); ++ if (h == NULL) ++ { ++ puts (dlerror ()); ++ return 1; ++ } ++ return 0; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" +diff --git a/elf/tst-relsort1mod1.c b/elf/tst-relsort1mod1.c +new file mode 100644 +index 0000000..9e4a943 +--- /dev/null ++++ b/elf/tst-relsort1mod1.c +@@ -0,0 +1,7 @@ ++extern int foo (double); ++ ++int ++bar (void) ++{ ++ return foo (1.2); ++} +diff --git a/elf/tst-relsort1mod2.c b/elf/tst-relsort1mod2.c +new file mode 100644 +index 0000000..a2c3e55 +--- /dev/null ++++ b/elf/tst-relsort1mod2.c +@@ -0,0 +1,7 @@ ++#include ++ ++int ++foo (double d) ++{ ++ return floor (d) != 0.0; ++} diff --git a/glibc.changes b/glibc.changes index 93c823f..795b8de 100644 --- a/glibc.changes +++ b/glibc.changes @@ -1,3 +1,23 @@ +------------------------------------------------------------------- +Tue Jan 31 13:20:37 UTC 2012 - aj@suse.de + +- Fix checks for AVX (patch glibc-2.16-avx.patch) + +------------------------------------------------------------------- +Tue Jan 31 09:44:07 UTC 2012 - aj@suse.de + +- Use Linux 2.6.32 on x86_64 as oldest supported kernel + +------------------------------------------------------------------- +Tue Jan 31 08:57:15 UTC 2012 - aj@suse.de + +- Fix relocation ordering to fix ifunc crash (bnc#740109). + +------------------------------------------------------------------- +Sun Jan 29 22:47:18 CET 2012 - dmueller@suse.de + +- set abi back to gnueabi for ARM (regression from Jan 12) + ------------------------------------------------------------------- Wed Jan 18 09:29:24 UTC 2012 - aj@suse.de @@ -44,7 +64,8 @@ Mon Jan 9 08:17:08 UTC 2012 - aj@suse.de Wed Jan 4 09:34:28 UTC 2012 - aj@suse.de - Remove patch glibc-2.3.4-gb18030-big5hkscs.diff.bz2 and - replace it by gb18030.patch.bz2 (synced with Fedora 16). + replace it by gb18030.patch.bz2 (synced with Fedora 16) + (bnc#743617). ------------------------------------------------------------------- Tue Jan 3 19:42:17 UTC 2012 - aj@suse.de diff --git a/glibc.spec b/glibc.spec index 3bb206f..9247e54 100644 --- a/glibc.spec +++ b/glibc.spec @@ -53,9 +53,14 @@ BuildRequires: libstdc++-devel %define powerpc_optimize_cpu_power7 0 %define powerpc_optimize_cpu_cell 0 %endif # ppc, ppc64 +%ifarch x86_64 +# 2.6.32 is the SLES 11 SP1 kernel +# 2.6.34 is the openSUSE 11.3 kernel +%define enablekernel 2.6.32 +%else # 2.6.16 is the SLES 10 kernel, use this as oldest supported kernel -# since many SUSE build machines have it %define enablekernel 2.6.16 +%endif # ngpt was used in 8.1 and SLES8 Obsoletes: ngpt < 2.2.2 Obsoletes: ngpt-devel < 2.2.2 @@ -70,7 +75,7 @@ Obsoletes: glibc-64bit Obsoletes: glibc-32bit %endif Version: 2.15 -Release: 11 +Release: 0 %define git_id 2ba92745c36e %define glibc_ports_ver 2.15 %define ports_git_id 8a70b2dcabbf @@ -195,10 +200,12 @@ Patch88: pthread-cond-wait-revert.patch Patch89: glibc-2.16-scanf.patch # PATCH-FIX-UPSTREAM Fix getcontext on 32-bit powerpc - aj@suse.de Patch90: glibc-ppc-getcontext.patch -# PATCH-FIX-OPENSUSE Revert some math ifuncs (bnc#740109) - aj@suse.de -Patch91: glibc-2.15-math64crash.patch +# PATCH-FIX-UPSTREAM Fix ifunc relocations (bnc#740109) - aj@suse.de +Patch91: glibc-ifunc-2.16.patch # PATCH-FIX-OPENSUSE Remove netlink optimization (bnc#741021) - aj@suse.de Patch92: glibc-revert-netlink-cache.patch +# PATCH-FIX-UPSTREAM Fix tests for AVX features - aj@suse.de +Patch93: glibc-2.16-avx.patch %description The GNU C Library provides the most important standard libraries used @@ -433,6 +440,7 @@ rm nscd/s-stamp %patch90 -p1 %patch91 -p1 %patch92 -p1 -R +%patch93 -p1 # # Inconsistency detected by ld.so: dl-close.c: 719: _dl_close: Assertion `map->l_init_called' failed! @@ -472,9 +480,13 @@ nice # We do not want configure to figure out the system its building one # to support a common ground and thus set build and host to the # target_cpu. +%ifarch %arm +%define target %{_target_cpu}-suse-linux-gnueabi +%else %define target %{_target_cpu}-suse-linux +%endif # Adjust glibc version.h -echo "#define CONFHOST \"%{_target_cpu}-suse-linux\"" >> version.h +echo "#define CONFHOST \"%{target}\"" >> version.h echo "#define CVSDATE \"`date -r ChangeLog +%Y%m%d`\"" >> version.h # # Default CFLAGS and Compiler @@ -536,9 +548,6 @@ PARALLEL="%{?_smp_mflags}" # fails to build otherwise - need to recheck and fix %define enable_stackguard_randomization 0 %endif -# Remove completely, these give some strange crashes with dlopen -# See bnc#740109 -rm -rf sysdeps/x86_64/fpu/multiarch configure_and_build_glibc() { local dirname="$1"; shift