SHA256
1
0
forked from pool/glibc

Accepting request 116779 from Base:System

Refresh glibc-2.16-avx.patch and handle FMA4. (forwarded request 116778 from a_jaeger)

OBS-URL: https://build.opensuse.org/request/show/116779
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/glibc?expand=0&rev=108
This commit is contained in:
Stephan Kulow 2012-05-09 05:25:03 +00:00 committed by Git OBS Bridge
commit dd4f582fb7
3 changed files with 48 additions and 364 deletions

View File

@ -1,4 +1,4 @@
commit afc5ed09cbce5d6fd48b3a8c5ec427b31f996880
qcommit afc5ed09cbce5d6fd48b3a8c5ec427b31f996880
Author: Ulrich Drepper <drepper@gmail.com>
Date: Thu Jan 26 07:45:14 2012 -0500
@ -11,13 +11,13 @@ Date: Thu Jan 26 07:45:14 2012 -0500
* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If
bit_AVX is set also check OSXAVE/XCR0 and reset bit_AVX if necessary.
2012-01-25 Joseph Myers <joseph@codesourcery.com>
* elf/tst-unique3.cc (gets): Remove declaration.
Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.c
===================================================================
--- glibc-2.15.orig/sysdeps/x86_64/multiarch/init-arch.c
+++ glibc-2.15/sysdeps/x86_64/multiarch/init-arch.c
Also contains selected changes from commits 08cf777f9e7f6d826658a99c7d77a359f73a45bf
and 56f6f6a2403cfa7267cad722597113be35ecf70d.
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 65b0ee9..4fabbee 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -1,6 +1,6 @@
/* Initialize CPU feature data.
This file is part of the GNU C Library.
@ -33,22 +33,22 @@ Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.c
+ if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
+ {
+ /* Reset the AVX bit in case OSXSAVE is disabled. */
+ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
+ && ({ unsigned int xcrlow;
+ unsigned int xcrhigh;
+ asm ("xgetbv"
+ : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+ (xcrlow & 6) == 6; }))
+ __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
+ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0
+ || ({ unsigned int xcrlow;
+ unsigned int xcrhigh;
+ asm ("xgetbv"
+ : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+ (xcrlow & 6) != 6; }))
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX;
+ }
+
__cpu_features.family = family;
__cpu_features.model = model;
atomic_write_barrier ();
Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.h
===================================================================
--- glibc-2.15.orig/sysdeps/x86_64/multiarch/init-arch.h
+++ glibc-2.15/sysdeps/x86_64/multiarch/init-arch.h
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 2a1df39..408e5ae 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -1,5 +1,5 @@
/* This file is part of the GNU C Library.
- Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
@ -56,13 +56,7 @@ Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.h
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -22,11 +22,13 @@
#define bit_Prefer_SSE_for_memop (1 << 3)
#define bit_Fast_Unaligned_Load (1 << 4)
#define bit_Prefer_PMINUB_for_stringop (1 << 5)
+#define bit_YMM_Usable (1 << 6)
#define bit_SSE2 (1 << 26)
@@ -27,6 +27,7 @@
#define bit_SSSE3 (1 << 9)
#define bit_SSE4_1 (1 << 19)
#define bit_SSE4_2 (1 << 20)
@ -70,319 +64,7 @@ Index: glibc-2.15/sysdeps/x86_64/multiarch/init-arch.h
#define bit_AVX (1 << 28)
#define bit_POPCOUNT (1 << 23)
#define bit_FMA (1 << 12)
@@ -48,6 +50,7 @@
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
+# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE
#else /* __ASSEMBLER__ */
@@ -92,7 +95,7 @@ extern struct cpu_features
extern void __init_cpu_features (void) attribute_hidden;
-#define INIT_ARCH()\
+# define INIT_ARCH() \
do \
if (__cpu_features.kind == arch_kind_unknown) \
__init_cpu_features (); \
@@ -125,23 +128,21 @@ extern const struct cpu_features *__get_
# define index_Slow_BSF FEATURE_INDEX_1
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
+# define index_YMM_Usable FEATURE_INDEX_1
-#define HAS_ARCH_FEATURE(idx, bit) \
- ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
+# define HAS_ARCH_FEATURE(name) \
+ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
-#define HAS_FAST_REP_STRING \
- HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
+# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
-#define HAS_FAST_COPY_BACKWARD \
- HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
+# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
-#define HAS_SLOW_BSF \
- HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
+# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
-#define HAS_PREFER_SSE_FOR_MEMOP \
- HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
+# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
-#define HAS_FAST_UNALIGNED_LOAD \
- HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load)
+# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+
+# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable)
#endif /* __ASSEMBLER__ */
Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/e_log.c
===================================================================
--- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/e_log.c
+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/e_log.c
@@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double
libm_ifunc (__ieee754_log,
HAS_FMA4 ? __ieee754_log_fma4
- : (HAS_AVX ? __ieee754_log_avx
- : __ieee754_log_sse2));
+ : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2));
strong_alias (__ieee754_log, __log_finite)
# define __ieee754_log __ieee754_log_sse2
Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_atan.c
===================================================================
--- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/s_atan.c
+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_atan.c
@@ -12,7 +12,8 @@ extern double __atan_fma4 (double);
# define __atan_fma4 ((void *) 0)
# endif
-libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2);
+libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
+ HAS_AVX ? __atan_avx : __atan_sse2));
# define atan __atan_sse2
#endif
Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_sin.c
===================================================================
--- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/s_sin.c
+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_sin.c
@@ -17,10 +17,12 @@ extern double __sin_fma4 (double);
# define __sin_fma4 ((void *) 0)
# endif
-libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2);
+libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
+ HAS_AVX ? __cos_avx : __cos_sse2));
weak_alias (__cos, cos)
-libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2);
+libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
+ HAS_AVX ? __sin_avx : __sin_sse2));
weak_alias (__sin, sin)
# define __cos __cos_sse2
Index: glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_tan.c
===================================================================
--- glibc-2.15.orig/sysdeps/x86_64/fpu/multiarch/s_tan.c
+++ glibc-2.15/sysdeps/x86_64/fpu/multiarch/s_tan.c
@@ -12,7 +12,8 @@ extern double __tan_fma4 (double);
# define __tan_fma4 ((void *) 0)
# endif
-libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2);
+libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
+ HAS_AVX ? __tan_avx : __tan_sse2));
# define tan __tan_sse2
#endif
Index: glibc-2.15/sysdeps/x86_64/fpu/math_private.h
===================================================================
--- glibc-2.15.orig/sysdeps/x86_64/fpu/math_private.h
+++ glibc-2.15/sysdeps/x86_64/fpu/math_private.h
@@ -19,10 +19,14 @@
/* We can do a few things better on x86-64. */
-#ifdef __AVX__
+#if defined __AVX__ || defined SSE2AVX
# define MOVD "vmovd"
+# define STMXCSR "vstmxcsr"
+# define LDMXCSR "vldmxcsr"
#else
# define MOVD "movd"
+# define STMXCSR "stmxcsr"
+# define LDMXCSR "ldmxcsr"
#endif
/* Direct movement of float into integer register. */
@@ -86,7 +90,7 @@
({ int __di; GET_FLOAT_WORD (__di, (float) d); \
(__di & 0x7fffffff) < 0x7f800000; })
-#ifdef __AVX__
+#if defined __AVX__ || defined SSE2AVX
# define __ieee754_sqrt(d) \
({ double __res; \
asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
@@ -112,7 +116,7 @@
#ifdef __SSE4_1__
# ifndef __rint
-# ifdef __AVX__
+# if defined __AVX__ || defined SSE2AVX
# define __rint(d) \
({ double __res; \
asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
@@ -125,7 +129,7 @@
# endif
# endif
# ifndef __rintf
-# ifdef __AVX__
+# if defined __AVX__ || defined SSE2AVX
# define __rintf(d) \
({ float __res; \
asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
@@ -139,7 +143,7 @@
# endif
# ifndef __floor
-# ifdef __AVX__
+# if defined __AVX__ || defined SSE2AVX
# define __floor(d) \
({ double __res; \
asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
@@ -152,7 +156,7 @@
# endif
# endif
# ifndef __floorf
-# ifdef __AVX__
+# if defined __AVX__ || defined SSE2AVX
# define __floorf(d) \
({ float __res; \
asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
@@ -169,121 +173,56 @@
/* Specialized variants of the <fenv.h> interfaces which only handle
either the FPU or the SSE unit. */
-#undef libc_fegetround
-#define libc_fegetround() \
- ({ \
- unsigned int mxcsr; \
- asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \
- (mxcsr & 0x6000) >> 3; \
- })
-#undef libc_fegetroundf
-#define libc_fegetroundf() libc_fegetround ()
-// #define libc_fegetroundl() fegetround ()
-
-#undef libc_fesetround
-#define libc_fesetround(r) \
- do { \
- unsigned int mxcsr; \
- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \
- mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \
- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \
- } while (0)
-#undef libc_fesetroundf
-#define libc_fesetroundf(r) libc_fesetround (r)
-// #define libc_fesetroundl(r) (void) fesetround (r)
-
#undef libc_feholdexcept
-#ifdef __AVX__
-# define libc_feholdexcept(e) \
- do { \
- unsigned int mxcsr; \
- asm ("vstmxcsr %0" : "=m" (*&mxcsr)); \
- (e)->__mxcsr = mxcsr; \
- mxcsr = (mxcsr | 0x1f80) & ~0x3f; \
- asm volatile ("vldmxcsr %0" : : "m" (*&mxcsr)); \
- } while (0)
-#else
-# define libc_feholdexcept(e) \
+#define libc_feholdexcept(e) \
do { \
unsigned int mxcsr; \
- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \
+ asm (STMXCSR " %0" : "=m" (*&mxcsr)); \
(e)->__mxcsr = mxcsr; \
mxcsr = (mxcsr | 0x1f80) & ~0x3f; \
- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \
} while (0)
-#endif
#undef libc_feholdexceptf
#define libc_feholdexceptf(e) libc_feholdexcept (e)
// #define libc_feholdexceptl(e) (void) feholdexcept (e)
#undef libc_feholdexcept_setround
-#ifdef __AVX__
-# define libc_feholdexcept_setround(e, r) \
+#define libc_feholdexcept_setround(e, r) \
do { \
unsigned int mxcsr; \
- asm ("vstmxcsr %0" : "=m" (*&mxcsr)); \
+ asm (STMXCSR " %0" : "=m" (*&mxcsr)); \
(e)->__mxcsr = mxcsr; \
mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3); \
- asm volatile ("vldmxcsr %0" : : "m" (*&mxcsr)); \
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \
} while (0)
-#else
-# define libc_feholdexcept_setround(e, r) \
- do { \
- unsigned int mxcsr; \
- asm ("stmxcsr %0" : "=m" (*&mxcsr)); \
- (e)->__mxcsr = mxcsr; \
- mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3); \
- asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \
- } while (0)
-#endif
#undef libc_feholdexcept_setroundf
#define libc_feholdexcept_setroundf(e, r) libc_feholdexcept_setround (e, r)
// #define libc_feholdexcept_setroundl(e, r) ...
#undef libc_fetestexcept
-#ifdef __AVX__
-# define libc_fetestexcept(e) \
- ({ unsigned int mxcsr; asm volatile ("vstmxcsr %0" : "=m" (*&mxcsr)); \
+#define libc_fetestexcept(e) \
+ ({ unsigned int mxcsr; \
+ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
mxcsr & (e) & FE_ALL_EXCEPT; })
-#else
-# define libc_fetestexcept(e) \
- ({ unsigned int mxcsr; asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \
- mxcsr & (e) & FE_ALL_EXCEPT; })
-#endif
#undef libc_fetestexceptf
#define libc_fetestexceptf(e) libc_fetestexcept (e)
// #define libc_fetestexceptl(e) fetestexcept (e)
#undef libc_fesetenv
-#ifdef __AVX__
-# define libc_fesetenv(e) \
- asm volatile ("vldmxcsr %0" : : "m" ((e)->__mxcsr))
-#else
-# define libc_fesetenv(e) \
- asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr))
-#endif
+#define libc_fesetenv(e) \
+ asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr))
#undef libc_fesetenvf
#define libc_fesetenvf(e) libc_fesetenv (e)
// #define libc_fesetenvl(e) (void) fesetenv (e)
#undef libc_feupdateenv
-#ifdef __AVX__
-# define libc_feupdateenv(e) \
+#define libc_feupdateenv(e) \
do { \
unsigned int mxcsr; \
- asm volatile ("vstmxcsr %0" : "=m" (*&mxcsr)); \
- asm volatile ("vldmxcsr %0" : : "m" ((e)->__mxcsr)); \
+ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
+ asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr)); \
__feraiseexcept (mxcsr & FE_ALL_EXCEPT); \
} while (0)
-#else
-# define libc_feupdateenv(e) \
- do { \
- unsigned int mxcsr; \
- asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \
- asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr)); \
- __feraiseexcept (mxcsr & FE_ALL_EXCEPT); \
- } while (0)
-#endif
#undef libc_feupdateenvf
#define libc_feupdateenvf(e) libc_feupdateenv (e)
// #define libc_feupdateenvl(e) (void) feupdateenv (e)
Index: glibc-2.15/config.h.in
===================================================================
--- glibc-2.15.orig/config.h.in
@ -419,25 +101,6 @@ Index: glibc-2.15/config.h.in
include/libc-symbols.h that avoid PLT slots in the shared objects. */
#undef NO_HIDDEN
Index: glibc-2.15/math/math_private.h
===================================================================
--- glibc-2.15.orig/math/math_private.h
+++ glibc-2.15/math/math_private.h
@@ -365,14 +365,6 @@ extern void __docos (double __x, double
know what operations are going to be performed. Therefore we
define additional interfaces. By default they refer to the normal
interfaces. */
-#define libc_fegetround() fegetround ()
-#define libc_fegetroundf() fegetround ()
-#define libc_fegetroundl() fegetround ()
-
-#define libc_fesetround(r) (void) fesetround (r)
-#define libc_fesetroundf(r) (void) fesetround (r)
-#define libc_fesetroundl(r) (void) fesetround (r)
-
#define libc_feholdexcept(e) (void) feholdexcept (e)
#define libc_feholdexceptf(e) (void) feholdexcept (e)
#define libc_feholdexceptl(e) (void) feholdexcept (e)
Index: glibc-2.15/sysdeps/i386/configure
===================================================================
--- glibc-2.15.orig/sysdeps/i386/configure
@ -567,3 +230,19 @@ Index: glibc-2.15/configure.in
AC_SUBST(libc_cv_cc_novzeroupper)
AC_SUBST(libc_cv_cc_fma4)
AC_SUBST(libc_cv_as_i686)
--- eglibc-2.15-fma4-orig/sysdeps/x86_64/multiarch/init-arch.c 2012-05-07 10:02:38.869334673 -0700
+++ eglibc-2.15-fma4-orig/sysdeps/x86_64/multiarch/init-arch.c 2012-05-07 10:01:16.217334959 -0700
@@ -156,6 +156,11 @@
__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX;
}
+ /* FMA4 depends on AVX support */
+ if (__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx & bit_FMA4
+ && (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) == 0)
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx &= ~bit_FMA4;
+
__cpu_features.family = family;
__cpu_features.model = model;
atomic_write_barrier ();

View File

@ -1,8 +1,8 @@
diff --git a/malloc/mcheck.c b/malloc/mcheck.c
index 9213740..7491c49 100644
--- a/malloc/mcheck.c
+++ b/malloc/mcheck.c
@@ -370,6 +370,12 @@ mabort (enum mcheck_status status)
Index: glibc-2.15/malloc/mcheck.c
===================================================================
--- glibc-2.15.orig/malloc/mcheck.c
+++ glibc-2.15/malloc/mcheck.c
@@ -371,6 +371,12 @@ mabort (enum mcheck_status status)
#endif
}
@ -15,7 +15,7 @@ index 9213740..7491c49 100644
int
mcheck (func)
void (*func) (enum mcheck_status);
@@ -381,6 +387,7 @@ mcheck (func)
@@ -382,6 +388,7 @@ mcheck (func)
{
/* We call malloc() once here to ensure it is initialized. */
void *p = malloc (0);

View File

@ -1,3 +1,8 @@
-------------------------------------------------------------------
Tue May 8 14:00:52 UTC 2012 - aj@suse.de
- Refresh glibc-2.16-avx.patch and handle FMA4.
-------------------------------------------------------------------
Thu May 3 11:05:36 UTC 2012 - aj@suse.de