SHA256
1
0
forked from pool/glibc
glibc/0004-x86_64-Set-DL_RUNTIME_UNALIGNED_VEC_SIZE-to-8.patch

102 lines
3.8 KiB
Diff

2016-02-22 H.J. Lu <hongjiu.lu@intel.com>
[BZ #19679]
* sysdeps/x86_64/dl-trampoline.S (DL_RUNIME_UNALIGNED_VEC_SIZE):
Renamed to ...
(DL_RUNTIME_UNALIGNED_VEC_SIZE): This. Set to 8.
(DL_RUNIME_RESOLVE_REALIGN_STACK): Renamed to ...
(DL_RUNTIME_RESOLVE_REALIGN_STACK): This. Updated.
(DL_RUNIME_RESOLVE_REALIGN_STACK): Renamed to ...
(DL_RUNTIME_RESOLVE_REALIGN_STACK): This.
* sysdeps/x86_64/dl-trampoline.h
(DL_RUNIME_RESOLVE_REALIGN_STACK): Renamed to ...
(DL_RUNTIME_RESOLVE_REALIGN_STACK): This.
Index: glibc-2.23/sysdeps/x86_64/dl-trampoline.S
===================================================================
--- glibc-2.23.orig/sysdeps/x86_64/dl-trampoline.S
+++ glibc-2.23/sysdeps/x86_64/dl-trampoline.S
@@ -33,15 +33,19 @@
# define DL_STACK_ALIGNMENT 8
#endif
-#ifndef DL_RUNIME_UNALIGNED_VEC_SIZE
-/* The maximum size of unaligned vector load and store. */
-# define DL_RUNIME_UNALIGNED_VEC_SIZE 16
+#ifndef DL_RUNTIME_UNALIGNED_VEC_SIZE
+/* The maximum size in bytes of unaligned vector load and store in the
+ dynamic linker. Since SSE optimized memory/string functions with
+ aligned SSE register load and store are used in the dynamic linker,
+ we must set this to 8 so that _dl_runtime_resolve_sse will align the
+ stack before calling _dl_fixup. */
+# define DL_RUNTIME_UNALIGNED_VEC_SIZE 8
#endif
/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */
-#define DL_RUNIME_RESOLVE_REALIGN_STACK \
+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
(VEC_SIZE > DL_STACK_ALIGNMENT \
- && VEC_SIZE > DL_RUNIME_UNALIGNED_VEC_SIZE)
+ && VEC_SIZE > DL_RUNTIME_UNALIGNED_VEC_SIZE)
/* Align vector register save area to 16 bytes. */
#define REGISTER_SAVE_VEC_OFF 0
@@ -76,7 +80,7 @@
#ifdef HAVE_AVX512_ASM_SUPPORT
# define VEC_SIZE 64
# define VMOVA vmovdqa64
-# if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
# define VMOV vmovdqa64
# else
# define VMOV vmovdqu64
@@ -100,7 +104,7 @@ strong_alias (_dl_runtime_profile_avx, _
#define VEC_SIZE 32
#define VMOVA vmovdqa
-#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
# define VMOV vmovdqa
#else
# define VMOV vmovdqu
@@ -119,7 +123,7 @@ strong_alias (_dl_runtime_profile_avx, _
/* movaps/movups is 1-byte shorter. */
#define VEC_SIZE 16
#define VMOVA movaps
-#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
# define VMOV movaps
#else
# define VMOV movups
Index: glibc-2.23/sysdeps/x86_64/dl-trampoline.h
===================================================================
--- glibc-2.23.orig/sysdeps/x86_64/dl-trampoline.h
+++ glibc-2.23/sysdeps/x86_64/dl-trampoline.h
@@ -30,7 +30,7 @@
#undef REGISTER_SAVE_AREA
#undef LOCAL_STORAGE_AREA
#undef BASE
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
# define REGISTER_SAVE_AREA (REGISTER_SAVE_AREA_RAW + 8)
/* Local stack area before jumping to function address: RBX. */
# define LOCAL_STORAGE_AREA 8
@@ -57,7 +57,7 @@
cfi_startproc
_dl_runtime_resolve:
cfi_adjust_cfa_offset(16) # Incorporate PLT
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
# if LOCAL_STORAGE_AREA != 8
# error LOCAL_STORAGE_AREA must be 8
# endif
@@ -146,7 +146,7 @@ _dl_runtime_resolve:
VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5)
VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6)
VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7)
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
mov %RBX_LP, %RSP_LP
cfi_def_cfa_register(%rsp)
movq (%rsp), %rbx