diff --git a/glibc-2.16-avx.patch b/glibc-2.16-avx.patch index 80eaaf4..3784f23 100644 --- a/glibc-2.16-avx.patch +++ b/glibc-2.16-avx.patch @@ -1,4 +1,4 @@ -commit afc5ed09cbce5d6fd48b3a8c5ec427b31f996880 +qcommit afc5ed09cbce5d6fd48b3a8c5ec427b31f996880 Author: Ulrich Drepper Date: Thu Jan 26 07:45:14 2012 -0500 @@ -11,13 +11,13 @@ Date: Thu Jan 26 07:45:14 2012 -0500 * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If bit_AVX is set also check OSXAVE/XCR0 and reset bit_AVX if necessary. - 2012-01-25 Joseph Myers - - * elf/tst-unique3.cc (gets): Remove declaration. -Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.c -=================================================================== ---- glibc-2.15.orig/sysdeps/x86_64/multiarch/init-arch.c -+++ glibc-2.15/sysdeps/x86_64/multiarch/init-arch.c +Also contains selected changes from commits 08cf777f9e7f6d826658a99c7d77a359f73a45bf +and 56f6f6a2403cfa7267cad722597113be35ecf70d. + +diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c +index 65b0ee9..4fabbee 100644 +--- a/sysdeps/x86_64/multiarch/init-arch.c ++++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -1,6 +1,6 @@ /* Initialize CPU feature data. This file is part of the GNU C Library. @@ -33,22 +33,22 @@ Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.c + if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) + { + /* Reset the AVX bit in case OSXSAVE is disabled. */ -+ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0 -+ && ({ unsigned int xcrlow; -+ unsigned int xcrhigh; -+ asm ("xgetbv" -+ : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); -+ (xcrlow & 6) == 6; })) -+ __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable; ++ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0 ++ || ({ unsigned int xcrlow; ++ unsigned int xcrhigh; ++ asm ("xgetbv" ++ : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); ++ (xcrlow & 6) != 6; })) ++ __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX; + } + __cpu_features.family = family; __cpu_features.model = model; atomic_write_barrier (); -Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.h -=================================================================== ---- glibc-2.15.orig/sysdeps/x86_64/multiarch/init-arch.h -+++ glibc-2.15/sysdeps/x86_64/multiarch/init-arch.h +diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h +index 2a1df39..408e5ae 100644 +--- a/sysdeps/x86_64/multiarch/init-arch.h ++++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -1,5 +1,5 @@ /* This file is part of the GNU C Library. - Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. @@ -56,13 +56,7 @@ Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.h The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public -@@ -22,11 +22,13 @@ - #define bit_Prefer_SSE_for_memop (1 << 3) - #define bit_Fast_Unaligned_Load (1 << 4) - #define bit_Prefer_PMINUB_for_stringop (1 << 5) -+#define bit_YMM_Usable (1 << 6) - - #define bit_SSE2 (1 << 26) +@@ -27,6 +27,7 @@ #define bit_SSSE3 (1 << 9) #define bit_SSE4_1 (1 << 19) #define bit_SSE4_2 (1 << 20) @@ -70,319 +64,7 @@ Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.h #define bit_AVX (1 << 28) #define bit_POPCOUNT (1 << 23) #define bit_FMA (1 << 12) -@@ -48,6 +50,7 @@ - # define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE - # define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE - # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE -+# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE - - #else /* __ASSEMBLER__ */ - -@@ -92,7 +95,7 @@ extern struct cpu_features - - - extern void __init_cpu_features (void) attribute_hidden; --#define INIT_ARCH()\ -+# define INIT_ARCH() \ - do \ - if (__cpu_features.kind == arch_kind_unknown) \ - __init_cpu_features (); \ -@@ -125,23 +128,21 @@ extern const struct cpu_features *__get_ - # define index_Slow_BSF FEATURE_INDEX_1 - # define index_Prefer_SSE_for_memop FEATURE_INDEX_1 - # define index_Fast_Unaligned_Load FEATURE_INDEX_1 -+# define index_YMM_Usable FEATURE_INDEX_1 - --#define HAS_ARCH_FEATURE(idx, bit) \ -- ((__get_cpu_features ()->feature[idx] & (bit)) != 0) -+# define HAS_ARCH_FEATURE(name) \ -+ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) - --#define HAS_FAST_REP_STRING \ -- HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String) -+# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) - --#define HAS_FAST_COPY_BACKWARD \ -- HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward) -+# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) - --#define HAS_SLOW_BSF \ -- HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF) -+# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) - --#define HAS_PREFER_SSE_FOR_MEMOP \ -- HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop) -+# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop) - --#define HAS_FAST_UNALIGNED_LOAD \ -- HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load) -+# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) -+ -+# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable) - - #endif /* __ASSEMBLER__ */ -Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/e_log.c -=================================================================== ---- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/e_log.c -+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/e_log.c -@@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double - - libm_ifunc (__ieee754_log, - HAS_FMA4 ? __ieee754_log_fma4 -- : (HAS_AVX ? __ieee754_log_avx -- : __ieee754_log_sse2)); -+ : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2)); - strong_alias (__ieee754_log, __log_finite) - - # define __ieee754_log __ieee754_log_sse2 -Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_atan.c -=================================================================== ---- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/s_atan.c -+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_atan.c -@@ -12,7 +12,8 @@ extern double __atan_fma4 (double); - # define __atan_fma4 ((void *) 0) - # endif - --libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2); -+libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 : -+ HAS_AVX ? __atan_avx : __atan_sse2)); - - # define atan __atan_sse2 - #endif -Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_sin.c -=================================================================== ---- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/s_sin.c -+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_sin.c -@@ -17,10 +17,12 @@ extern double __sin_fma4 (double); - # define __sin_fma4 ((void *) 0) - # endif - --libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2); -+libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 : -+ HAS_AVX ? __cos_avx : __cos_sse2)); - weak_alias (__cos, cos) - --libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2); -+libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 : -+ HAS_AVX ? __sin_avx : __sin_sse2)); - weak_alias (__sin, sin) - - # define __cos __cos_sse2 -Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_tan.c -=================================================================== ---- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/s_tan.c -+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_tan.c -@@ -12,7 +12,8 @@ extern double __tan_fma4 (double); - # define __tan_fma4 ((void *) 0) - # endif - --libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2); -+libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 : -+ HAS_AVX ? __tan_avx : __tan_sse2)); - - # define tan __tan_sse2 - #endif -Index: glibc-2.15/sysdeps/x86_64/fpu/math_private.h -=================================================================== ---- glibc-2.15.orig/sysdeps/x86_64/fpu/math_private.h -+++ glibc-2.15/sysdeps/x86_64/fpu/math_private.h -@@ -19,10 +19,14 @@ - - /* We can do a few things better on x86-64. */ - --#ifdef __AVX__ -+#if defined __AVX__ || defined SSE2AVX - # define MOVD "vmovd" -+# define STMXCSR "vstmxcsr" -+# define LDMXCSR "vldmxcsr" - #else - # define MOVD "movd" -+# define STMXCSR "stmxcsr" -+# define LDMXCSR "ldmxcsr" - #endif - - /* Direct movement of float into integer register. */ -@@ -86,7 +90,7 @@ - ({ int __di; GET_FLOAT_WORD (__di, (float) d); \ - (__di & 0x7fffffff) < 0x7f800000; }) - --#ifdef __AVX__ -+#if defined __AVX__ || defined SSE2AVX - # define __ieee754_sqrt(d) \ - ({ double __res; \ - asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ -@@ -112,7 +116,7 @@ - - #ifdef __SSE4_1__ - # ifndef __rint --# ifdef __AVX__ -+# if defined __AVX__ || defined SSE2AVX - # define __rint(d) \ - ({ double __res; \ - asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ -@@ -125,7 +129,7 @@ - # endif - # endif - # ifndef __rintf --# ifdef __AVX__ -+# if defined __AVX__ || defined SSE2AVX - # define __rintf(d) \ - ({ float __res; \ - asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \ -@@ -139,7 +143,7 @@ - # endif - - # ifndef __floor --# ifdef __AVX__ -+# if defined __AVX__ || defined SSE2AVX - # define __floor(d) \ - ({ double __res; \ - asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ -@@ -152,7 +156,7 @@ - # endif - # endif - # ifndef __floorf --# ifdef __AVX__ -+# if defined __AVX__ || defined SSE2AVX - # define __floorf(d) \ - ({ float __res; \ - asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \ -@@ -169,121 +173,56 @@ - - /* Specialized variants of the interfaces which only handle - either the FPU or the SSE unit. */ --#undef libc_fegetround --#define libc_fegetround() \ -- ({ \ -- unsigned int mxcsr; \ -- asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \ -- (mxcsr & 0x6000) >> 3; \ -- }) --#undef libc_fegetroundf --#define libc_fegetroundf() libc_fegetround () --// #define libc_fegetroundl() fegetround () -- --#undef libc_fesetround --#define libc_fesetround(r) \ -- do { \ -- unsigned int mxcsr; \ -- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \ -- mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \ -- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \ -- } while (0) --#undef libc_fesetroundf --#define libc_fesetroundf(r) libc_fesetround (r) --// #define libc_fesetroundl(r) (void) fesetround (r) -- - #undef libc_feholdexcept --#ifdef __AVX__ --# define libc_feholdexcept(e) \ -- do { \ -- unsigned int mxcsr; \ -- asm ("vstmxcsr %0" : "=m" (*&mxcsr)); \ -- (e)->__mxcsr = mxcsr; \ -- mxcsr = (mxcsr | 0x1f80) & ~0x3f; \ -- asm volatile ("vldmxcsr %0" : : "m" (*&mxcsr)); \ -- } while (0) --#else --# define libc_feholdexcept(e) \ -+#define libc_feholdexcept(e) \ - do { \ - unsigned int mxcsr; \ -- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \ -+ asm (STMXCSR " %0" : "=m" (*&mxcsr)); \ - (e)->__mxcsr = mxcsr; \ - mxcsr = (mxcsr | 0x1f80) & ~0x3f; \ -- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \ -+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \ - } while (0) --#endif - #undef libc_feholdexceptf - #define libc_feholdexceptf(e) libc_feholdexcept (e) - // #define libc_feholdexceptl(e) (void) feholdexcept (e) - - #undef libc_feholdexcept_setround --#ifdef __AVX__ --# define libc_feholdexcept_setround(e, r) \ -+#define libc_feholdexcept_setround(e, r) \ - do { \ - unsigned int mxcsr; \ -- asm ("vstmxcsr %0" : "=m" (*&mxcsr)); \ -+ asm (STMXCSR " %0" : "=m" (*&mxcsr)); \ - (e)->__mxcsr = mxcsr; \ - mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3); \ -- asm volatile ("vldmxcsr %0" : : "m" (*&mxcsr)); \ -+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \ - } while (0) --#else --# define libc_feholdexcept_setround(e, r) \ -- do { \ -- unsigned int mxcsr; \ -- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \ -- (e)->__mxcsr = mxcsr; \ -- mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3); \ -- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \ -- } while (0) --#endif - #undef libc_feholdexcept_setroundf - #define libc_feholdexcept_setroundf(e, r) libc_feholdexcept_setround (e, r) - // #define libc_feholdexcept_setroundl(e, r) ... - - #undef libc_fetestexcept --#ifdef __AVX__ --# define libc_fetestexcept(e) \ -- ({ unsigned int mxcsr; asm volatile ("vstmxcsr %0" : "=m" (*&mxcsr)); \ -+#define libc_fetestexcept(e) \ -+ ({ unsigned int mxcsr; \ -+ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ - mxcsr & (e) & FE_ALL_EXCEPT; }) --#else --# define libc_fetestexcept(e) \ -- ({ unsigned int mxcsr; asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \ -- mxcsr & (e) & FE_ALL_EXCEPT; }) --#endif - #undef libc_fetestexceptf - #define libc_fetestexceptf(e) libc_fetestexcept (e) - // #define libc_fetestexceptl(e) fetestexcept (e) - - #undef libc_fesetenv --#ifdef __AVX__ --# define libc_fesetenv(e) \ -- asm volatile ("vldmxcsr %0" : : "m" ((e)->__mxcsr)) --#else --# define libc_fesetenv(e) \ -- asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr)) --#endif -+#define libc_fesetenv(e) \ -+ asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr)) - #undef libc_fesetenvf - #define libc_fesetenvf(e) libc_fesetenv (e) - // #define libc_fesetenvl(e) (void) fesetenv (e) - - #undef libc_feupdateenv --#ifdef __AVX__ --# define libc_feupdateenv(e) \ -+#define libc_feupdateenv(e) \ - do { \ - unsigned int mxcsr; \ -- asm volatile ("vstmxcsr %0" : "=m" (*&mxcsr)); \ -- asm volatile ("vldmxcsr %0" : : "m" ((e)->__mxcsr)); \ -+ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ -+ asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr)); \ - __feraiseexcept (mxcsr & FE_ALL_EXCEPT); \ - } while (0) --#else --# define libc_feupdateenv(e) \ -- do { \ -- unsigned int mxcsr; \ -- asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \ -- asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr)); \ -- __feraiseexcept (mxcsr & FE_ALL_EXCEPT); \ -- } while (0) --#endif - #undef libc_feupdateenvf - #define libc_feupdateenvf(e) libc_feupdateenv (e) - // #define libc_feupdateenvl(e) (void) feupdateenv (e) + Index: glibc-2.15/config.h.in =================================================================== --- glibc-2.15.orig/config.h.in @@ -419,25 +101,6 @@ Index: glibc-2.15/config.h.in include/libc-symbols.h that avoid PLT slots in the shared objects. */ #undef NO_HIDDEN -Index: glibc-2.15/math/math_private.h -=================================================================== ---- glibc-2.15.orig/math/math_private.h -+++ glibc-2.15/math/math_private.h -@@ -365,14 +365,6 @@ extern void __docos (double __x, double - know what operations are going to be performed. Therefore we - define additional interfaces. By default they refer to the normal - interfaces. */ --#define libc_fegetround() fegetround () --#define libc_fegetroundf() fegetround () --#define libc_fegetroundl() fegetround () -- --#define libc_fesetround(r) (void) fesetround (r) --#define libc_fesetroundf(r) (void) fesetround (r) --#define libc_fesetroundl(r) (void) fesetround (r) -- - #define libc_feholdexcept(e) (void) feholdexcept (e) - #define libc_feholdexceptf(e) (void) feholdexcept (e) - #define libc_feholdexceptl(e) (void) feholdexcept (e) Index: glibc-2.15/sysdeps/i386/configure =================================================================== --- glibc-2.15.orig/sysdeps/i386/configure @@ -567,3 +230,19 @@ Index: glibc-2.15/configure.in AC_SUBST(libc_cv_cc_novzeroupper) AC_SUBST(libc_cv_cc_fma4) AC_SUBST(libc_cv_as_i686) + + +--- eglibc-2.15-fma4-orig/sysdeps/x86_64/multiarch/init-arch.c 2012-05-07 10:02:38.869334673 -0700 ++++ eglibc-2.15-fma4-orig/sysdeps/x86_64/multiarch/init-arch.c 2012-05-07 10:01:16.217334959 -0700 +@@ -156,6 +156,11 @@ + __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX; + } + ++ /* FMA4 depends on AVX support */ ++ if (__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx & bit_FMA4 ++ && (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) == 0) ++ __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx &= ~bit_FMA4; ++ + __cpu_features.family = family; + __cpu_features.model = model; + atomic_write_barrier (); diff --git a/glibc-2.16-mcheck.patch b/glibc-2.16-mcheck.patch index a2287c2..48860fa 100644 --- a/glibc-2.16-mcheck.patch +++ b/glibc-2.16-mcheck.patch @@ -1,8 +1,8 @@ -diff --git a/malloc/mcheck.c b/malloc/mcheck.c -index 9213740..7491c49 100644 ---- a/malloc/mcheck.c -+++ b/malloc/mcheck.c -@@ -370,6 +370,12 @@ mabort (enum mcheck_status status) +Index: glibc-2.15/malloc/mcheck.c +=================================================================== +--- glibc-2.15.orig/malloc/mcheck.c ++++ glibc-2.15/malloc/mcheck.c +@@ -371,6 +371,12 @@ mabort (enum mcheck_status status) #endif } @@ -15,7 +15,7 @@ index 9213740..7491c49 100644 int mcheck (func) void (*func) (enum mcheck_status); -@@ -381,6 +387,7 @@ mcheck (func) +@@ -382,6 +388,7 @@ mcheck (func) { /* We call malloc() once here to ensure it is initialized. */ void *p = malloc (0); diff --git a/glibc.changes b/glibc.changes index 1f3006a..26fb8f7 100644 --- a/glibc.changes +++ b/glibc.changes @@ -1,3 +1,8 @@ +------------------------------------------------------------------- +Tue May 8 14:00:52 UTC 2012 - aj@suse.de + +- Refresh glibc-2.16-avx.patch and handle FMA4. + ------------------------------------------------------------------- Thu May 3 11:05:36 UTC 2012 - aj@suse.de