forked from pool/glibc
Accepting request 535960 from home:Andreas_Schwab:Factory
- math-c++-compat.patch: Add more C++ compatibility (BZ #22296) - malloc-tcache-leak.patch: Fix tcache leak after thread destruction (BZ #22111) - falkor-memcpy-memmove.patch: Optimized implementation of memcpy/memmove for Qualcomm Falkor - aarch64-cpu-features.patch: Fix glibc.tune.cpu tunable handling - nss-files-large-buffers.patch: Avoid large buffers with many host addresses (BZ #22078) - sysconf-uio-maxiov.patch: Fix missing definition of UIO_MAXIOV (BZ #22321) - glob-tilde-overflow.patch: Fix buffer overflows with GLOB_TILDE (CVE-2017-15670, CVE-2017-15671, CVE-2017-15804, bsc#1064569. bsc#1064580, bsc#1064583, BZ #22320, BZ #22325, BZ #22332) - dl-runtime-resolve-xsave.patch: Use fxsave/xsave/xsavec in _dl_runtime_resolve (BZ #21265) OBS-URL: https://build.opensuse.org/request/show/535960 OBS-URL: https://build.opensuse.org/package/show/Base:System/glibc?expand=0&rev=483
This commit is contained in:
parent
787f325423
commit
a41899225a
18
aarch64-cpu-features.patch
Normal file
18
aarch64-cpu-features.patch
Normal file
@ -0,0 +1,18 @@
|
||||
2017-10-10 Steve Ellcey <sellcey@cavium.com>
|
||||
|
||||
* sysdeps/unix/sysv/linux/aarch64/cpu-features.c (get_midr_from_mcpu):
|
||||
Use strcmp instead of tunable_is_name.
|
||||
|
||||
Index: glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
|
||||
+++ glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
|
||||
@@ -37,7 +37,7 @@ static uint64_t
|
||||
get_midr_from_mcpu (const char *mcpu)
|
||||
{
|
||||
for (int i = 0; i < sizeof (cpu_list) / sizeof (struct cpu_list); i++)
|
||||
- if (tunable_is_name (mcpu, cpu_list[i].name) == 0)
|
||||
+ if (strcmp (mcpu, cpu_list[i].name) == 0)
|
||||
return cpu_list[i].midr;
|
||||
|
||||
return UINT64_MAX;
|
851
dl-runtime-resolve-xsave.patch
Normal file
851
dl-runtime-resolve-xsave.patch
Normal file
@ -0,0 +1,851 @@
|
||||
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
[BZ #21265]
|
||||
* sysdeps/x86/cpu-features-offsets.sym (XSAVE_STATE_SIZE_OFFSET):
|
||||
New.
|
||||
* sysdeps/x86/cpu-features.c: Include <libc-pointer-arith.h>.
|
||||
(get_common_indeces): Set xsave_state_size, xsave_state_full_size
|
||||
and bit_arch_XSAVEC_Usable if needed.
|
||||
(init_cpu_features): Remove bit_arch_Use_dl_runtime_resolve_slow
|
||||
and bit_arch_Use_dl_runtime_resolve_opt.
|
||||
* sysdeps/x86/cpu-features.h (bit_arch_Use_dl_runtime_resolve_opt):
|
||||
Removed.
|
||||
(bit_arch_Use_dl_runtime_resolve_slow): Likewise.
|
||||
(bit_arch_Prefer_No_AVX512): Updated.
|
||||
(bit_arch_MathVec_Prefer_No_AVX512): Likewise.
|
||||
(bit_arch_XSAVEC_Usable): New.
|
||||
(STATE_SAVE_OFFSET): Likewise.
|
||||
(STATE_SAVE_MASK): Likewise.
|
||||
[__ASSEMBLER__]: Include <cpu-features-offsets.h>.
|
||||
(cpu_features): Add xsave_state_size and xsave_state_full_size.
|
||||
(index_arch_Use_dl_runtime_resolve_opt): Removed.
|
||||
(index_arch_Use_dl_runtime_resolve_slow): Likewise.
|
||||
(index_arch_XSAVEC_Usable): New.
|
||||
* sysdeps/x86/cpu-tunables.c (TUNABLE_CALLBACK (set_hwcaps)):
|
||||
Support XSAVEC_Usable. Remove Use_dl_runtime_resolve_slow.
|
||||
* sysdeps/x86_64/Makefile (tst-x86_64-1-ENV): New if tunables
|
||||
is enabled.
|
||||
* sysdeps/x86_64/dl-machine.h (elf_machine_runtime_setup):
|
||||
Replace _dl_runtime_resolve_sse, _dl_runtime_resolve_avx,
|
||||
_dl_runtime_resolve_avx_slow, _dl_runtime_resolve_avx_opt,
|
||||
_dl_runtime_resolve_avx512 and _dl_runtime_resolve_avx512_opt
|
||||
with _dl_runtime_resolve_fxsave, _dl_runtime_resolve_xsave and
|
||||
_dl_runtime_resolve_xsavec.
|
||||
* sysdeps/x86_64/dl-trampoline.S (DL_RUNTIME_UNALIGNED_VEC_SIZE):
|
||||
Removed.
|
||||
(DL_RUNTIME_RESOLVE_REALIGN_STACK): Check STATE_SAVE_ALIGNMENT
|
||||
instead of VEC_SIZE.
|
||||
(REGISTER_SAVE_BND0): Removed.
|
||||
(REGISTER_SAVE_BND1): Likewise.
|
||||
(REGISTER_SAVE_BND3): Likewise.
|
||||
(REGISTER_SAVE_RAX): Always defined to 0.
|
||||
(VMOV): Removed.
|
||||
(_dl_runtime_resolve_avx): Likewise.
|
||||
(_dl_runtime_resolve_avx_slow): Likewise.
|
||||
(_dl_runtime_resolve_avx_opt): Likewise.
|
||||
(_dl_runtime_resolve_avx512): Likewise.
|
||||
(_dl_runtime_resolve_avx512_opt): Likewise.
|
||||
(_dl_runtime_resolve_sse): Likewise.
|
||||
(_dl_runtime_resolve_sse_vex): Likewise.
|
||||
(USE_FXSAVE): New.
|
||||
(_dl_runtime_resolve_fxsave): Likewise.
|
||||
(USE_XSAVE): Likewise.
|
||||
(_dl_runtime_resolve_xsave): Likewise.
|
||||
(USE_XSAVEC): Likewise.
|
||||
(_dl_runtime_resolve_xsavec): Likewise.
|
||||
* sysdeps/x86_64/dl-trampoline.h (_dl_runtime_resolve_avx512):
|
||||
Removed.
|
||||
(_dl_runtime_resolve_avx512_opt): Likewise.
|
||||
(_dl_runtime_resolve_avx): Likewise.
|
||||
(_dl_runtime_resolve_avx_opt): Likewise.
|
||||
(_dl_runtime_resolve_sse): Likewise.
|
||||
(_dl_runtime_resolve_sse_vex): Likewise.
|
||||
(_dl_runtime_resolve_fxsave): New.
|
||||
(_dl_runtime_resolve_xsave): Likewise.
|
||||
(_dl_runtime_resolve_xsavec): Likewise.
|
||||
|
||||
Index: glibc-2.26/sysdeps/x86/cpu-features-offsets.sym
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/x86/cpu-features-offsets.sym
|
||||
+++ glibc-2.26/sysdeps/x86/cpu-features-offsets.sym
|
||||
@@ -15,6 +15,7 @@ CPUID_ECX_OFFSET offsetof (struct cpuid_
|
||||
CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx)
|
||||
FAMILY_OFFSET offsetof (struct cpu_features, family)
|
||||
MODEL_OFFSET offsetof (struct cpu_features, model)
|
||||
+XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size)
|
||||
FEATURE_OFFSET offsetof (struct cpu_features, feature)
|
||||
FEATURE_SIZE sizeof (unsigned int)
|
||||
|
||||
Index: glibc-2.26/sysdeps/x86/cpu-features.c
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/x86/cpu-features.c
|
||||
+++ glibc-2.26/sysdeps/x86/cpu-features.c
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <cpuid.h>
|
||||
#include <cpu-features.h>
|
||||
#include <dl-hwcap.h>
|
||||
+#include <libc-pointer-arith.h>
|
||||
|
||||
#if HAVE_TUNABLES
|
||||
# define TUNABLE_NAMESPACE tune
|
||||
@@ -103,6 +104,76 @@ get_common_indeces (struct cpu_features
|
||||
}
|
||||
}
|
||||
}
|
||||
+
|
||||
+ /* For _dl_runtime_resolve, set xsave_state_size to xsave area
|
||||
+ size + integer register save size and align it to 64 bytes. */
|
||||
+ if (cpu_features->max_cpuid >= 0xd)
|
||||
+ {
|
||||
+ unsigned int eax, ebx, ecx, edx;
|
||||
+
|
||||
+ __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
|
||||
+ if (ebx != 0)
|
||||
+ {
|
||||
+ unsigned int xsave_state_full_size
|
||||
+ = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
|
||||
+
|
||||
+ cpu_features->xsave_state_size
|
||||
+ = xsave_state_full_size;
|
||||
+ cpu_features->xsave_state_full_size
|
||||
+ = xsave_state_full_size;
|
||||
+
|
||||
+ __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
|
||||
+
|
||||
+ /* Check if XSAVEC is available. */
|
||||
+ if ((eax & (1 << 1)) != 0)
|
||||
+ {
|
||||
+ unsigned int xstate_comp_offsets[32];
|
||||
+ unsigned int xstate_comp_sizes[32];
|
||||
+ unsigned int i;
|
||||
+
|
||||
+ xstate_comp_offsets[0] = 0;
|
||||
+ xstate_comp_offsets[1] = 160;
|
||||
+ xstate_comp_offsets[2] = 576;
|
||||
+ xstate_comp_sizes[0] = 160;
|
||||
+ xstate_comp_sizes[1] = 256;
|
||||
+
|
||||
+ for (i = 2; i < 32; i++)
|
||||
+ {
|
||||
+ if ((STATE_SAVE_MASK & (1 << i)) != 0)
|
||||
+ {
|
||||
+ __cpuid_count (0xd, i, eax, ebx, ecx, edx);
|
||||
+ xstate_comp_sizes[i] = eax;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ ecx = 0;
|
||||
+ xstate_comp_sizes[i] = 0;
|
||||
+ }
|
||||
+
|
||||
+ if (i > 2)
|
||||
+ {
|
||||
+ xstate_comp_offsets[i]
|
||||
+ = (xstate_comp_offsets[i - 1]
|
||||
+ + xstate_comp_sizes[i -1]);
|
||||
+ if ((ecx & (1 << 1)) != 0)
|
||||
+ xstate_comp_offsets[i]
|
||||
+ = ALIGN_UP (xstate_comp_offsets[i], 64);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Use XSAVEC. */
|
||||
+ unsigned int size
|
||||
+ = xstate_comp_offsets[31] + xstate_comp_sizes[31];
|
||||
+ if (size)
|
||||
+ {
|
||||
+ cpu_features->xsave_state_size
|
||||
+ = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
|
||||
+ cpu_features->feature[index_arch_XSAVEC_Usable]
|
||||
+ |= bit_arch_XSAVEC_Usable;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -242,23 +313,6 @@ init_cpu_features (struct cpu_features *
|
||||
else
|
||||
cpu_features->feature[index_arch_Prefer_No_AVX512]
|
||||
|= bit_arch_Prefer_No_AVX512;
|
||||
-
|
||||
- /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow.
|
||||
- If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt.
|
||||
- Use _dl_runtime_resolve_opt only with AVX512F since it is
|
||||
- slower than _dl_runtime_resolve_slow with AVX. */
|
||||
- cpu_features->feature[index_arch_Use_dl_runtime_resolve_slow]
|
||||
- |= bit_arch_Use_dl_runtime_resolve_slow;
|
||||
- if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
|
||||
- && cpu_features->max_cpuid >= 0xd)
|
||||
- {
|
||||
- unsigned int eax;
|
||||
-
|
||||
- __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
|
||||
- if ((eax & (1 << 2)) != 0)
|
||||
- cpu_features->feature[index_arch_Use_dl_runtime_resolve_opt]
|
||||
- |= bit_arch_Use_dl_runtime_resolve_opt;
|
||||
- }
|
||||
}
|
||||
/* This spells out "AuthenticAMD". */
|
||||
else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
|
||||
Index: glibc-2.26/sysdeps/x86/cpu-features.h
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/x86/cpu-features.h
|
||||
+++ glibc-2.26/sysdeps/x86/cpu-features.h
|
||||
@@ -37,9 +37,8 @@
|
||||
#define bit_arch_Prefer_No_VZEROUPPER (1 << 17)
|
||||
#define bit_arch_Fast_Unaligned_Copy (1 << 18)
|
||||
#define bit_arch_Prefer_ERMS (1 << 19)
|
||||
-#define bit_arch_Use_dl_runtime_resolve_opt (1 << 20)
|
||||
-#define bit_arch_Use_dl_runtime_resolve_slow (1 << 21)
|
||||
-#define bit_arch_Prefer_No_AVX512 (1 << 22)
|
||||
+#define bit_arch_Prefer_No_AVX512 (1 << 20)
|
||||
+#define bit_arch_XSAVEC_Usable (1 << 21)
|
||||
|
||||
/* CPUID Feature flags. */
|
||||
|
||||
@@ -88,6 +87,15 @@
|
||||
/* The current maximum size of the feature integer bit array. */
|
||||
#define FEATURE_INDEX_MAX 1
|
||||
|
||||
+/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
|
||||
+ space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
|
||||
+ aligned to 16 bytes for fxsave and 64 bytes for xsave. */
|
||||
+#define STATE_SAVE_OFFSET (8 * 7 + 8)
|
||||
+
|
||||
+/* Save SSE, AVX, AVX512, mask and bound registers. */
|
||||
+#define STATE_SAVE_MASK \
|
||||
+ ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
|
||||
+
|
||||
#ifdef __ASSEMBLER__
|
||||
|
||||
# include <cpu-features-offsets.h>
|
||||
@@ -123,8 +131,6 @@
|
||||
# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
|
||||
# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1*FEATURE_SIZE
|
||||
# define index_arch_Prefer_ERMS FEATURE_INDEX_1*FEATURE_SIZE
|
||||
-# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE
|
||||
-# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE
|
||||
# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1*FEATURE_SIZE
|
||||
|
||||
|
||||
@@ -214,6 +220,18 @@ struct cpu_features
|
||||
} cpuid[COMMON_CPUID_INDEX_MAX];
|
||||
unsigned int family;
|
||||
unsigned int model;
|
||||
+ /* The state size for XSAVEC or XSAVE. The type must be unsigned long
|
||||
+ int so that we use
|
||||
+
|
||||
+ sub xsave_state_size_offset(%rip) %RSP_LP
|
||||
+
|
||||
+ in _dl_runtime_resolve. */
|
||||
+ unsigned long int xsave_state_size;
|
||||
+ /* The full state size for XSAVE when XSAVEC is disabled by
|
||||
+
|
||||
+ GLIBC_TUNABLES=glibc.tune.hwcaps=-XSAVEC_Usable
|
||||
+ */
|
||||
+ unsigned int xsave_state_full_size;
|
||||
unsigned int feature[FEATURE_INDEX_MAX];
|
||||
/* Data cache size for use in memory and string routines, typically
|
||||
L1 size. */
|
||||
@@ -326,9 +344,8 @@ extern const struct cpu_features *__get_
|
||||
# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
|
||||
# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1
|
||||
# define index_arch_Prefer_ERMS FEATURE_INDEX_1
|
||||
-# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1
|
||||
-# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1
|
||||
# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1
|
||||
+# define index_arch_XSAVEC_Usable FEATURE_INDEX_1
|
||||
|
||||
#endif /* !__ASSEMBLER__ */
|
||||
|
||||
Index: glibc-2.26/sysdeps/x86/cpu-tunables.c
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/x86/cpu-tunables.c
|
||||
+++ glibc-2.26/sysdeps/x86/cpu-tunables.c
|
||||
@@ -240,6 +240,16 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_v
|
||||
Slow_SSE4_2, SSE4_2,
|
||||
disable, 11);
|
||||
break;
|
||||
+ case 13:
|
||||
+ if (disable)
|
||||
+ {
|
||||
+ /* Update xsave_state_size to XSAVE state size. */
|
||||
+ cpu_features->xsave_state_size
|
||||
+ = cpu_features->xsave_state_full_size;
|
||||
+ CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features,
|
||||
+ XSAVEC_Usable, 13);
|
||||
+ }
|
||||
+ break;
|
||||
case 14:
|
||||
if (disable)
|
||||
{
|
||||
@@ -308,13 +318,6 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_v
|
||||
disable, 26);
|
||||
}
|
||||
break;
|
||||
- case 27:
|
||||
- {
|
||||
- CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features,
|
||||
- Use_dl_runtime_resolve_slow,
|
||||
- disable, 27);
|
||||
- }
|
||||
- break;
|
||||
}
|
||||
p += len + 1;
|
||||
}
|
||||
Index: glibc-2.26/sysdeps/x86_64/Makefile
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/x86_64/Makefile
|
||||
+++ glibc-2.26/sysdeps/x86_64/Makefile
|
||||
@@ -55,6 +55,10 @@ CFLAGS-tst-quad2pie.c = $(PIE-ccflag)
|
||||
tests += tst-x86_64-1
|
||||
modules-names += x86_64/tst-x86_64mod-1
|
||||
LDFLAGS-tst-x86_64mod-1.so = -Wl,-soname,tst-x86_64mod-1.so
|
||||
+ifneq (no,$(have-tunables))
|
||||
+# Test the state size for XSAVE when XSAVEC is disabled.
|
||||
+tst-x86_64-1-ENV = GLIBC_TUNABLES=glibc.tune.hwcaps=-XSAVEC_Usable
|
||||
+endif
|
||||
|
||||
$(objpfx)tst-x86_64-1: $(objpfx)x86_64/tst-x86_64mod-1.so
|
||||
|
||||
Index: glibc-2.26/sysdeps/x86_64/dl-machine.h
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/x86_64/dl-machine.h
|
||||
+++ glibc-2.26/sysdeps/x86_64/dl-machine.h
|
||||
@@ -66,12 +66,9 @@ static inline int __attribute__ ((unused
|
||||
elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
|
||||
{
|
||||
Elf64_Addr *got;
|
||||
- extern void _dl_runtime_resolve_sse (ElfW(Word)) attribute_hidden;
|
||||
- extern void _dl_runtime_resolve_avx (ElfW(Word)) attribute_hidden;
|
||||
- extern void _dl_runtime_resolve_avx_slow (ElfW(Word)) attribute_hidden;
|
||||
- extern void _dl_runtime_resolve_avx_opt (ElfW(Word)) attribute_hidden;
|
||||
- extern void _dl_runtime_resolve_avx512 (ElfW(Word)) attribute_hidden;
|
||||
- extern void _dl_runtime_resolve_avx512_opt (ElfW(Word)) attribute_hidden;
|
||||
+ extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
|
||||
+ extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
|
||||
+ extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
|
||||
extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
|
||||
extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
|
||||
extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
|
||||
@@ -120,29 +117,14 @@ elf_machine_runtime_setup (struct link_m
|
||||
/* This function will get called to fix up the GOT entry
|
||||
indicated by the offset on the stack, and then jump to
|
||||
the resolved address. */
|
||||
- if (HAS_ARCH_FEATURE (AVX512F_Usable))
|
||||
- {
|
||||
- if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt))
|
||||
- *(ElfW(Addr) *) (got + 2)
|
||||
- = (ElfW(Addr)) &_dl_runtime_resolve_avx512_opt;
|
||||
- else
|
||||
- *(ElfW(Addr) *) (got + 2)
|
||||
- = (ElfW(Addr)) &_dl_runtime_resolve_avx512;
|
||||
- }
|
||||
- else if (HAS_ARCH_FEATURE (AVX_Usable))
|
||||
- {
|
||||
- if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt))
|
||||
- *(ElfW(Addr) *) (got + 2)
|
||||
- = (ElfW(Addr)) &_dl_runtime_resolve_avx_opt;
|
||||
- else if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_slow))
|
||||
- *(ElfW(Addr) *) (got + 2)
|
||||
- = (ElfW(Addr)) &_dl_runtime_resolve_avx_slow;
|
||||
- else
|
||||
- *(ElfW(Addr) *) (got + 2)
|
||||
- = (ElfW(Addr)) &_dl_runtime_resolve_avx;
|
||||
- }
|
||||
+ if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
|
||||
+ *(ElfW(Addr) *) (got + 2)
|
||||
+ = (HAS_ARCH_FEATURE (XSAVEC_Usable)
|
||||
+ ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
|
||||
+ : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
|
||||
else
|
||||
- *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_sse;
|
||||
+ *(ElfW(Addr) *) (got + 2)
|
||||
+ = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
|
||||
}
|
||||
}
|
||||
|
||||
Index: glibc-2.26/sysdeps/x86_64/dl-trampoline.S
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/x86_64/dl-trampoline.S
|
||||
+++ glibc-2.26/sysdeps/x86_64/dl-trampoline.S
|
||||
@@ -34,41 +34,24 @@
|
||||
# define DL_STACK_ALIGNMENT 8
|
||||
#endif
|
||||
|
||||
-#ifndef DL_RUNTIME_UNALIGNED_VEC_SIZE
|
||||
-/* The maximum size in bytes of unaligned vector load and store in the
|
||||
- dynamic linker. Since SSE optimized memory/string functions with
|
||||
- aligned SSE register load and store are used in the dynamic linker,
|
||||
- we must set this to 8 so that _dl_runtime_resolve_sse will align the
|
||||
- stack before calling _dl_fixup. */
|
||||
-# define DL_RUNTIME_UNALIGNED_VEC_SIZE 8
|
||||
-#endif
|
||||
-
|
||||
-/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */
|
||||
+/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
|
||||
+ stack to 16 bytes before calling _dl_fixup. */
|
||||
#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
|
||||
- (VEC_SIZE > DL_STACK_ALIGNMENT \
|
||||
- && VEC_SIZE > DL_RUNTIME_UNALIGNED_VEC_SIZE)
|
||||
-
|
||||
-/* Align vector register save area to 16 bytes. */
|
||||
-#define REGISTER_SAVE_VEC_OFF 0
|
||||
+ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
|
||||
+ || 16 > DL_STACK_ALIGNMENT)
|
||||
|
||||
/* Area on stack to save and restore registers used for parameter
|
||||
passing when calling _dl_fixup. */
|
||||
#ifdef __ILP32__
|
||||
-# define REGISTER_SAVE_RAX (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
|
||||
# define PRESERVE_BND_REGS_PREFIX
|
||||
#else
|
||||
-/* Align bound register save area to 16 bytes. */
|
||||
-# define REGISTER_SAVE_BND0 (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
|
||||
-# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16)
|
||||
-# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16)
|
||||
-# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16)
|
||||
-# define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16)
|
||||
# ifdef HAVE_MPX_SUPPORT
|
||||
# define PRESERVE_BND_REGS_PREFIX bnd
|
||||
# else
|
||||
# define PRESERVE_BND_REGS_PREFIX .byte 0xf2
|
||||
# endif
|
||||
#endif
|
||||
+#define REGISTER_SAVE_RAX 0
|
||||
#define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8)
|
||||
#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
|
||||
#define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8)
|
||||
@@ -80,68 +63,56 @@
|
||||
|
||||
#define VEC_SIZE 64
|
||||
#define VMOVA vmovdqa64
|
||||
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
|
||||
-# define VMOV vmovdqa64
|
||||
-#else
|
||||
-# define VMOV vmovdqu64
|
||||
-#endif
|
||||
#define VEC(i) zmm##i
|
||||
-#define _dl_runtime_resolve _dl_runtime_resolve_avx512
|
||||
#define _dl_runtime_profile _dl_runtime_profile_avx512
|
||||
#include "dl-trampoline.h"
|
||||
-#undef _dl_runtime_resolve
|
||||
#undef _dl_runtime_profile
|
||||
#undef VEC
|
||||
-#undef VMOV
|
||||
#undef VMOVA
|
||||
#undef VEC_SIZE
|
||||
|
||||
#define VEC_SIZE 32
|
||||
#define VMOVA vmovdqa
|
||||
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
|
||||
-# define VMOV vmovdqa
|
||||
-#else
|
||||
-# define VMOV vmovdqu
|
||||
-#endif
|
||||
#define VEC(i) ymm##i
|
||||
-#define _dl_runtime_resolve _dl_runtime_resolve_avx
|
||||
-#define _dl_runtime_resolve_opt _dl_runtime_resolve_avx_opt
|
||||
#define _dl_runtime_profile _dl_runtime_profile_avx
|
||||
#include "dl-trampoline.h"
|
||||
-#undef _dl_runtime_resolve
|
||||
-#undef _dl_runtime_resolve_opt
|
||||
#undef _dl_runtime_profile
|
||||
#undef VEC
|
||||
-#undef VMOV
|
||||
#undef VMOVA
|
||||
#undef VEC_SIZE
|
||||
|
||||
/* movaps/movups is 1-byte shorter. */
|
||||
#define VEC_SIZE 16
|
||||
#define VMOVA movaps
|
||||
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
|
||||
-# define VMOV movaps
|
||||
-#else
|
||||
-# define VMOV movups
|
||||
-#endif
|
||||
#define VEC(i) xmm##i
|
||||
-#define _dl_runtime_resolve _dl_runtime_resolve_sse
|
||||
#define _dl_runtime_profile _dl_runtime_profile_sse
|
||||
#undef RESTORE_AVX
|
||||
#include "dl-trampoline.h"
|
||||
-#undef _dl_runtime_resolve
|
||||
#undef _dl_runtime_profile
|
||||
-#undef VMOV
|
||||
+#undef VEC
|
||||
#undef VMOVA
|
||||
+#undef VEC_SIZE
|
||||
|
||||
-/* Used by _dl_runtime_resolve_avx_opt/_dl_runtime_resolve_avx512_opt
|
||||
- to preserve the full vector registers with zero upper bits. */
|
||||
-#define VMOVA vmovdqa
|
||||
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
|
||||
-# define VMOV vmovdqa
|
||||
-#else
|
||||
-# define VMOV vmovdqu
|
||||
-#endif
|
||||
-#define _dl_runtime_resolve _dl_runtime_resolve_sse_vex
|
||||
-#define _dl_runtime_resolve_opt _dl_runtime_resolve_avx512_opt
|
||||
+#define USE_FXSAVE
|
||||
+#define STATE_SAVE_ALIGNMENT 16
|
||||
+#define _dl_runtime_resolve _dl_runtime_resolve_fxsave
|
||||
+#include "dl-trampoline.h"
|
||||
+#undef _dl_runtime_resolve
|
||||
+#undef USE_FXSAVE
|
||||
+#undef STATE_SAVE_ALIGNMENT
|
||||
+
|
||||
+#define USE_XSAVE
|
||||
+#define STATE_SAVE_ALIGNMENT 64
|
||||
+#define _dl_runtime_resolve _dl_runtime_resolve_xsave
|
||||
+#include "dl-trampoline.h"
|
||||
+#undef _dl_runtime_resolve
|
||||
+#undef USE_XSAVE
|
||||
+#undef STATE_SAVE_ALIGNMENT
|
||||
+
|
||||
+#define USE_XSAVEC
|
||||
+#define STATE_SAVE_ALIGNMENT 64
|
||||
+#define _dl_runtime_resolve _dl_runtime_resolve_xsavec
|
||||
#include "dl-trampoline.h"
|
||||
+#undef _dl_runtime_resolve
|
||||
+#undef USE_XSAVEC
|
||||
+#undef STATE_SAVE_ALIGNMENT
|
||||
Index: glibc-2.26/sysdeps/x86_64/dl-trampoline.h
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/x86_64/dl-trampoline.h
|
||||
+++ glibc-2.26/sysdeps/x86_64/dl-trampoline.h
|
||||
@@ -16,140 +16,47 @@
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
-#undef REGISTER_SAVE_AREA_RAW
|
||||
-#ifdef __ILP32__
|
||||
-/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to
|
||||
- VEC7. */
|
||||
-# define REGISTER_SAVE_AREA_RAW (8 * 7 + VEC_SIZE * 8)
|
||||
-#else
|
||||
-/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as
|
||||
- BND0, BND1, BND2, BND3 and VEC0 to VEC7. */
|
||||
-# define REGISTER_SAVE_AREA_RAW (8 * 7 + 16 * 4 + VEC_SIZE * 8)
|
||||
-#endif
|
||||
+ .text
|
||||
+#ifdef _dl_runtime_resolve
|
||||
|
||||
-#undef REGISTER_SAVE_AREA
|
||||
-#undef LOCAL_STORAGE_AREA
|
||||
-#undef BASE
|
||||
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
|
||||
-# define REGISTER_SAVE_AREA (REGISTER_SAVE_AREA_RAW + 8)
|
||||
-/* Local stack area before jumping to function address: RBX. */
|
||||
-# define LOCAL_STORAGE_AREA 8
|
||||
-# define BASE rbx
|
||||
-# if (REGISTER_SAVE_AREA % VEC_SIZE) != 0
|
||||
-# error REGISTER_SAVE_AREA must be multples of VEC_SIZE
|
||||
-# endif
|
||||
-#else
|
||||
-# define REGISTER_SAVE_AREA REGISTER_SAVE_AREA_RAW
|
||||
-/* Local stack area before jumping to function address: All saved
|
||||
- registers. */
|
||||
-# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
|
||||
-# define BASE rsp
|
||||
-# if (REGISTER_SAVE_AREA % 16) != 8
|
||||
-# error REGISTER_SAVE_AREA must be odd multples of 8
|
||||
-# endif
|
||||
-#endif
|
||||
+# undef REGISTER_SAVE_AREA
|
||||
+# undef LOCAL_STORAGE_AREA
|
||||
+# undef BASE
|
||||
|
||||
- .text
|
||||
-#ifdef _dl_runtime_resolve_opt
|
||||
-/* Use the smallest vector registers to preserve the full YMM/ZMM
|
||||
- registers to avoid SSE transition penalty. */
|
||||
+# if (STATE_SAVE_ALIGNMENT % 16) != 0
|
||||
+# error STATE_SAVE_ALIGNMENT must be multples of 16
|
||||
+# endif
|
||||
|
||||
-# if VEC_SIZE == 32
|
||||
-/* Check if the upper 128 bits in %ymm0 - %ymm7 registers are non-zero
|
||||
- and preserve %xmm0 - %xmm7 registers with the zero upper bits. Since
|
||||
- there is no SSE transition penalty on AVX512 processors which don't
|
||||
- support XGETBV with ECX == 1, _dl_runtime_resolve_avx512_slow isn't
|
||||
- provided. */
|
||||
- .globl _dl_runtime_resolve_avx_slow
|
||||
- .hidden _dl_runtime_resolve_avx_slow
|
||||
- .type _dl_runtime_resolve_avx_slow, @function
|
||||
- .align 16
|
||||
-_dl_runtime_resolve_avx_slow:
|
||||
- cfi_startproc
|
||||
- cfi_adjust_cfa_offset(16) # Incorporate PLT
|
||||
- vorpd %ymm0, %ymm1, %ymm8
|
||||
- vorpd %ymm2, %ymm3, %ymm9
|
||||
- vorpd %ymm4, %ymm5, %ymm10
|
||||
- vorpd %ymm6, %ymm7, %ymm11
|
||||
- vorpd %ymm8, %ymm9, %ymm9
|
||||
- vorpd %ymm10, %ymm11, %ymm10
|
||||
- vpcmpeqd %xmm8, %xmm8, %xmm8
|
||||
- vorpd %ymm9, %ymm10, %ymm10
|
||||
- vptest %ymm10, %ymm8
|
||||
- # Preserve %ymm0 - %ymm7 registers if the upper 128 bits of any
|
||||
- # %ymm0 - %ymm7 registers aren't zero.
|
||||
- PRESERVE_BND_REGS_PREFIX
|
||||
- jnc _dl_runtime_resolve_avx
|
||||
- # Use vzeroupper to avoid SSE transition penalty.
|
||||
- vzeroupper
|
||||
- # Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits
|
||||
- # when the upper 128 bits of %ymm0 - %ymm7 registers are zero.
|
||||
- PRESERVE_BND_REGS_PREFIX
|
||||
- jmp _dl_runtime_resolve_sse_vex
|
||||
- cfi_adjust_cfa_offset(-16) # Restore PLT adjustment
|
||||
- cfi_endproc
|
||||
- .size _dl_runtime_resolve_avx_slow, .-_dl_runtime_resolve_avx_slow
|
||||
+# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
|
||||
+# error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
|
||||
# endif
|
||||
|
||||
-/* Use XGETBV with ECX == 1 to check which bits in vector registers are
|
||||
- non-zero and only preserve the non-zero lower bits with zero upper
|
||||
- bits. */
|
||||
- .globl _dl_runtime_resolve_opt
|
||||
- .hidden _dl_runtime_resolve_opt
|
||||
- .type _dl_runtime_resolve_opt, @function
|
||||
- .align 16
|
||||
-_dl_runtime_resolve_opt:
|
||||
- cfi_startproc
|
||||
- cfi_adjust_cfa_offset(16) # Incorporate PLT
|
||||
- pushq %rax
|
||||
- cfi_adjust_cfa_offset(8)
|
||||
- cfi_rel_offset(%rax, 0)
|
||||
- pushq %rcx
|
||||
- cfi_adjust_cfa_offset(8)
|
||||
- cfi_rel_offset(%rcx, 0)
|
||||
- pushq %rdx
|
||||
- cfi_adjust_cfa_offset(8)
|
||||
- cfi_rel_offset(%rdx, 0)
|
||||
- movl $1, %ecx
|
||||
- xgetbv
|
||||
- movl %eax, %r11d
|
||||
- popq %rdx
|
||||
- cfi_adjust_cfa_offset(-8)
|
||||
- cfi_restore (%rdx)
|
||||
- popq %rcx
|
||||
- cfi_adjust_cfa_offset(-8)
|
||||
- cfi_restore (%rcx)
|
||||
- popq %rax
|
||||
- cfi_adjust_cfa_offset(-8)
|
||||
- cfi_restore (%rax)
|
||||
-# if VEC_SIZE == 32
|
||||
- # For YMM registers, check if YMM state is in use.
|
||||
- andl $bit_YMM_state, %r11d
|
||||
- # Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits if
|
||||
- # YMM state isn't in use.
|
||||
- PRESERVE_BND_REGS_PREFIX
|
||||
- jz _dl_runtime_resolve_sse_vex
|
||||
-# elif VEC_SIZE == 16
|
||||
- # For ZMM registers, check if YMM state and ZMM state are in
|
||||
- # use.
|
||||
- andl $(bit_YMM_state | bit_ZMM0_15_state), %r11d
|
||||
- cmpl $bit_YMM_state, %r11d
|
||||
- # Preserve %zmm0 - %zmm7 registers if ZMM state is in use.
|
||||
- PRESERVE_BND_REGS_PREFIX
|
||||
- jg _dl_runtime_resolve_avx512
|
||||
- # Preserve %ymm0 - %ymm7 registers with the zero upper 256 bits if
|
||||
- # ZMM state isn't in use.
|
||||
- PRESERVE_BND_REGS_PREFIX
|
||||
- je _dl_runtime_resolve_avx
|
||||
- # Preserve %xmm0 - %xmm7 registers with the zero upper 384 bits if
|
||||
- # neither YMM state nor ZMM state are in use.
|
||||
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
|
||||
+/* Local stack area before jumping to function address: RBX. */
|
||||
+# define LOCAL_STORAGE_AREA 8
|
||||
+# define BASE rbx
|
||||
+# ifdef USE_FXSAVE
|
||||
+/* Use fxsave to save XMM registers. */
|
||||
+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
|
||||
+# if (REGISTER_SAVE_AREA % 16) != 0
|
||||
+# error REGISTER_SAVE_AREA must be multples of 16
|
||||
+# endif
|
||||
+# endif
|
||||
# else
|
||||
-# error Unsupported VEC_SIZE!
|
||||
+# ifndef USE_FXSAVE
|
||||
+# error USE_FXSAVE must be defined
|
||||
+# endif
|
||||
+/* Use fxsave to save XMM registers. */
|
||||
+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
|
||||
+/* Local stack area before jumping to function address: All saved
|
||||
+ registers. */
|
||||
+# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
|
||||
+# define BASE rsp
|
||||
+# if (REGISTER_SAVE_AREA % 16) != 8
|
||||
+# error REGISTER_SAVE_AREA must be odd multples of 8
|
||||
+# endif
|
||||
# endif
|
||||
- cfi_adjust_cfa_offset(-16) # Restore PLT adjustment
|
||||
- cfi_endproc
|
||||
- .size _dl_runtime_resolve_opt, .-_dl_runtime_resolve_opt
|
||||
-#endif
|
||||
+
|
||||
.globl _dl_runtime_resolve
|
||||
.hidden _dl_runtime_resolve
|
||||
.type _dl_runtime_resolve, @function
|
||||
@@ -157,21 +64,30 @@ _dl_runtime_resolve_opt:
|
||||
cfi_startproc
|
||||
_dl_runtime_resolve:
|
||||
cfi_adjust_cfa_offset(16) # Incorporate PLT
|
||||
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
|
||||
-# if LOCAL_STORAGE_AREA != 8
|
||||
-# error LOCAL_STORAGE_AREA must be 8
|
||||
-# endif
|
||||
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
|
||||
+# if LOCAL_STORAGE_AREA != 8
|
||||
+# error LOCAL_STORAGE_AREA must be 8
|
||||
+# endif
|
||||
pushq %rbx # push subtracts stack by 8.
|
||||
cfi_adjust_cfa_offset(8)
|
||||
cfi_rel_offset(%rbx, 0)
|
||||
mov %RSP_LP, %RBX_LP
|
||||
cfi_def_cfa_register(%rbx)
|
||||
- and $-VEC_SIZE, %RSP_LP
|
||||
-#endif
|
||||
+ and $-STATE_SAVE_ALIGNMENT, %RSP_LP
|
||||
+# endif
|
||||
+# ifdef REGISTER_SAVE_AREA
|
||||
sub $REGISTER_SAVE_AREA, %RSP_LP
|
||||
-#if !DL_RUNTIME_RESOLVE_REALIGN_STACK
|
||||
+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
|
||||
cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
|
||||
-#endif
|
||||
+# endif
|
||||
+# else
|
||||
+ # Allocate stack space of the required size to save the state.
|
||||
+# if IS_IN (rtld)
|
||||
+ sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
|
||||
+# else
|
||||
+ sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
|
||||
+# endif
|
||||
+# endif
|
||||
# Preserve registers otherwise clobbered.
|
||||
movq %rax, REGISTER_SAVE_RAX(%rsp)
|
||||
movq %rcx, REGISTER_SAVE_RCX(%rsp)
|
||||
@@ -180,59 +96,42 @@ _dl_runtime_resolve:
|
||||
movq %rdi, REGISTER_SAVE_RDI(%rsp)
|
||||
movq %r8, REGISTER_SAVE_R8(%rsp)
|
||||
movq %r9, REGISTER_SAVE_R9(%rsp)
|
||||
- VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp)
|
||||
- VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp)
|
||||
- VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp)
|
||||
- VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp)
|
||||
- VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp)
|
||||
- VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp)
|
||||
- VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp)
|
||||
- VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp)
|
||||
-#ifndef __ILP32__
|
||||
- # We also have to preserve bound registers. These are nops if
|
||||
- # Intel MPX isn't available or disabled.
|
||||
-# ifdef HAVE_MPX_SUPPORT
|
||||
- bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
|
||||
- bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
|
||||
- bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
|
||||
- bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
|
||||
+# ifdef USE_FXSAVE
|
||||
+ fxsave STATE_SAVE_OFFSET(%rsp)
|
||||
# else
|
||||
-# if REGISTER_SAVE_BND0 == 0
|
||||
- .byte 0x66,0x0f,0x1b,0x04,0x24
|
||||
+ movl $STATE_SAVE_MASK, %eax
|
||||
+ xorl %edx, %edx
|
||||
+ # Clear the XSAVE Header.
|
||||
+# ifdef USE_XSAVE
|
||||
+ movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
|
||||
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
|
||||
+# endif
|
||||
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
|
||||
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
|
||||
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
|
||||
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
|
||||
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
|
||||
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
|
||||
+# ifdef USE_XSAVE
|
||||
+ xsave STATE_SAVE_OFFSET(%rsp)
|
||||
# else
|
||||
- .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
|
||||
+ xsavec STATE_SAVE_OFFSET(%rsp)
|
||||
# endif
|
||||
- .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
|
||||
- .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
|
||||
- .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
|
||||
# endif
|
||||
-#endif
|
||||
# Copy args pushed by PLT in register.
|
||||
# %rdi: link_map, %rsi: reloc_index
|
||||
mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
|
||||
mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
|
||||
call _dl_fixup # Call resolver.
|
||||
mov %RAX_LP, %R11_LP # Save return value
|
||||
-#ifndef __ILP32__
|
||||
- # Restore bound registers. These are nops if Intel MPX isn't
|
||||
- # avaiable or disabled.
|
||||
-# ifdef HAVE_MPX_SUPPORT
|
||||
- bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
|
||||
- bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
|
||||
- bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
|
||||
- bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
|
||||
+ # Get register content back.
|
||||
+# ifdef USE_FXSAVE
|
||||
+ fxrstor STATE_SAVE_OFFSET(%rsp)
|
||||
# else
|
||||
- .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
|
||||
- .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
|
||||
- .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
|
||||
-# if REGISTER_SAVE_BND0 == 0
|
||||
- .byte 0x66,0x0f,0x1a,0x04,0x24
|
||||
-# else
|
||||
- .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
|
||||
-# endif
|
||||
+ movl $STATE_SAVE_MASK, %eax
|
||||
+ xorl %edx, %edx
|
||||
+ xrstor STATE_SAVE_OFFSET(%rsp)
|
||||
# endif
|
||||
-#endif
|
||||
- # Get register content back.
|
||||
movq REGISTER_SAVE_R9(%rsp), %r9
|
||||
movq REGISTER_SAVE_R8(%rsp), %r8
|
||||
movq REGISTER_SAVE_RDI(%rsp), %rdi
|
||||
@@ -240,20 +139,12 @@ _dl_runtime_resolve:
|
||||
movq REGISTER_SAVE_RDX(%rsp), %rdx
|
||||
movq REGISTER_SAVE_RCX(%rsp), %rcx
|
||||
movq REGISTER_SAVE_RAX(%rsp), %rax
|
||||
- VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0)
|
||||
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1)
|
||||
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2)
|
||||
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3)
|
||||
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4)
|
||||
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5)
|
||||
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6)
|
||||
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7)
|
||||
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
|
||||
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
|
||||
mov %RBX_LP, %RSP_LP
|
||||
cfi_def_cfa_register(%rsp)
|
||||
movq (%rsp), %rbx
|
||||
cfi_restore(%rbx)
|
||||
-#endif
|
||||
+# endif
|
||||
# Adjust stack(PLT did 2 pushes)
|
||||
add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
|
||||
cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
|
||||
@@ -262,11 +153,9 @@ _dl_runtime_resolve:
|
||||
jmp *%r11 # Jump to function address.
|
||||
cfi_endproc
|
||||
.size _dl_runtime_resolve, .-_dl_runtime_resolve
|
||||
+#endif
|
||||
|
||||
|
||||
-/* To preserve %xmm0 - %xmm7 registers, dl-trampoline.h is included
|
||||
- twice, for _dl_runtime_resolve_sse and _dl_runtime_resolve_sse_vex.
|
||||
- But we don't need another _dl_runtime_profile for XMM registers. */
|
||||
#if !defined PROF && defined _dl_runtime_profile
|
||||
# if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
|
||||
# error LR_VECTOR_OFFSET must be multples of VEC_SIZE
|
573
falkor-memcpy-memmove.patch
Normal file
573
falkor-memcpy-memmove.patch
Normal file
@ -0,0 +1,573 @@
|
||||
2017-10-10 Siddhesh Poyarekar <siddhesh@sourceware.org>
|
||||
|
||||
* sysdeps/aarch64/multiarch/Makefile (sysdep_routines): Add
|
||||
memmove_falkor.
|
||||
* sysdeps/aarch64/multiarch/ifunc-impl-list.c
|
||||
(__libc_ifunc_impl_list): Likewise.
|
||||
* sysdeps/aarch64/multiarch/memmove.c: Likewise.
|
||||
* sysdeps/aarch64/multiarch/memmove_falkor.S: New file.
|
||||
|
||||
* benchtests/bench-memmove-walk.c: New file.
|
||||
* benchtests/Makefile (string-benchset): Add it.
|
||||
|
||||
* benchtests/bench-memset-walk.c: New file.
|
||||
* benchtests/Makefile (string-benchset): Add it.
|
||||
|
||||
* benchtests/bench-memcpy-walk.c: New file.
|
||||
* benchtests/Makefile (string-benchset): Add it.
|
||||
|
||||
2017-10-10 Siddhesh Poyarekar <siddhesh@sourceware.org>
|
||||
|
||||
* manual/tunables.texi (Tunable glibc.tune.cpu): Add falkor.
|
||||
* sysdeps/aarch64/multiarch/Makefile (sysdep_routines): Add
|
||||
memcpy_falkor.
|
||||
* sysdeps/aarch64/multiarch/ifunc-impl-list.c (MAX_IFUNC):
|
||||
Bump.
|
||||
(__libc_ifunc_impl_list): Add __memcpy_falkor.
|
||||
* sysdeps/aarch64/multiarch/memcpy.c: Likewise.
|
||||
* sysdeps/aarch64/multiarch/memcpy_falkor.S: New file.
|
||||
* sysdeps/unix/sysv/linux/aarch64/cpu-features.c (cpu_list):
|
||||
Add falkor.
|
||||
* sysdeps/unix/sysv/linux/aarch64/cpu-features.h (IS_FALKOR):
|
||||
New macro.
|
||||
|
||||
Index: glibc-2.26/manual/tunables.texi
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/manual/tunables.texi
|
||||
+++ glibc-2.26/manual/tunables.texi
|
||||
@@ -267,7 +267,7 @@ This tunable is specific to i386 and x86
|
||||
@deftp Tunable glibc.tune.cpu
|
||||
The @code{glibc.tune.cpu=xxx} tunable allows the user to tell @theglibc{} to
|
||||
assume that the CPU is @code{xxx} where xxx may have one of these values:
|
||||
-@code{generic}, @code{thunderxt88}.
|
||||
+@code{generic}, @code{falkor}, @code{thunderxt88}.
|
||||
|
||||
This tunable is specific to aarch64.
|
||||
@end deftp
|
||||
Index: glibc-2.26/sysdeps/aarch64/multiarch/Makefile
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/aarch64/multiarch/Makefile
|
||||
+++ glibc-2.26/sysdeps/aarch64/multiarch/Makefile
|
||||
@@ -1,3 +1,4 @@
|
||||
ifeq ($(subdir),string)
|
||||
-sysdep_routines += memcpy_generic memcpy_thunderx
|
||||
+sysdep_routines += memcpy_generic memcpy_thunderx memcpy_falkor \
|
||||
+ memmove_falkor
|
||||
endif
|
||||
Index: glibc-2.26/sysdeps/aarch64/multiarch/ifunc-impl-list.c
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/aarch64/multiarch/ifunc-impl-list.c
|
||||
+++ glibc-2.26/sysdeps/aarch64/multiarch/ifunc-impl-list.c
|
||||
@@ -25,7 +25,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
/* Maximum number of IFUNC implementations. */
|
||||
-#define MAX_IFUNC 2
|
||||
+#define MAX_IFUNC 3
|
||||
|
||||
size_t
|
||||
__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
@@ -40,9 +40,11 @@ __libc_ifunc_impl_list (const char *name
|
||||
/* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c. */
|
||||
IFUNC_IMPL (i, name, memcpy,
|
||||
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx)
|
||||
+ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor)
|
||||
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
|
||||
IFUNC_IMPL (i, name, memmove,
|
||||
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx)
|
||||
+ IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_falkor)
|
||||
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
|
||||
|
||||
return i;
|
||||
Index: glibc-2.26/sysdeps/aarch64/multiarch/memcpy.c
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/aarch64/multiarch/memcpy.c
|
||||
+++ glibc-2.26/sysdeps/aarch64/multiarch/memcpy.c
|
||||
@@ -30,9 +30,14 @@ extern __typeof (__redirect_memcpy) __li
|
||||
|
||||
extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
|
||||
extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden;
|
||||
+extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden;
|
||||
|
||||
libc_ifunc (__libc_memcpy,
|
||||
- IS_THUNDERX (midr) ? __memcpy_thunderx : __memcpy_generic);
|
||||
+ (IS_THUNDERX (midr)
|
||||
+ ? __memcpy_thunderx
|
||||
+ : (IS_FALKOR (midr)
|
||||
+ ? __memcpy_falkor
|
||||
+ : __memcpy_generic)));
|
||||
|
||||
# undef memcpy
|
||||
strong_alias (__libc_memcpy, memcpy);
|
||||
Index: glibc-2.26/sysdeps/aarch64/multiarch/memcpy_falkor.S
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ glibc-2.26/sysdeps/aarch64/multiarch/memcpy_falkor.S
|
||||
@@ -0,0 +1,184 @@
|
||||
+/* Optimized memcpy for Qualcomm Falkor processor.
|
||||
+ Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+
|
||||
+/* Assumptions:
|
||||
+
|
||||
+ ARMv8-a, AArch64, falkor, unaligned accesses. */
|
||||
+
|
||||
+#define dstin x0
|
||||
+#define src x1
|
||||
+#define count x2
|
||||
+#define dst x3
|
||||
+#define srcend x4
|
||||
+#define dstend x5
|
||||
+#define A_l x6
|
||||
+#define A_lw w6
|
||||
+#define A_h x7
|
||||
+#define A_hw w7
|
||||
+#define tmp1 x14
|
||||
+
|
||||
+/* Copies are split into 3 main cases:
|
||||
+
|
||||
+ 1. Small copies of up to 32 bytes
|
||||
+ 2. Medium copies of 33..128 bytes which are fully unrolled
|
||||
+ 3. Large copies of more than 128 bytes.
|
||||
+
|
||||
+ Large copies align the sourceto a quad word and use an unrolled loop
|
||||
+ processing 64 bytes per iteration.
|
||||
+
|
||||
+ FALKOR-SPECIFIC DESIGN:
|
||||
+
|
||||
+ The smallest copies (32 bytes or less) focus on optimal pipeline usage,
|
||||
+ which is why the redundant copies of 0-3 bytes have been replaced with
|
||||
+ conditionals, since the former would unnecessarily break across multiple
|
||||
+ issue groups. The medium copy group has been enlarged to 128 bytes since
|
||||
+ bumping up the small copies up to 32 bytes allows us to do that without
|
||||
+ cost and also allows us to reduce the size of the prep code before loop64.
|
||||
+
|
||||
+ All copies are done only via two registers r6 and r7. This is to ensure
|
||||
+ that all loads hit a single hardware prefetcher which can get correctly
|
||||
+ trained to prefetch a single stream.
|
||||
+
|
||||
+ The non-temporal stores help optimize cache utilization. */
|
||||
+
|
||||
+#if IS_IN (libc)
|
||||
+ENTRY_ALIGN (__memcpy_falkor, 6)
|
||||
+
|
||||
+ cmp count, 32
|
||||
+ add srcend, src, count
|
||||
+ add dstend, dstin, count
|
||||
+ b.ls L(copy32)
|
||||
+ ldp A_l, A_h, [src]
|
||||
+ cmp count, 128
|
||||
+ stp A_l, A_h, [dstin]
|
||||
+ b.hi L(copy_long)
|
||||
+
|
||||
+ /* Medium copies: 33..128 bytes. */
|
||||
+ sub tmp1, count, 1
|
||||
+ ldp A_l, A_h, [src, 16]
|
||||
+ stp A_l, A_h, [dstin, 16]
|
||||
+ tbz tmp1, 6, 1f
|
||||
+ ldp A_l, A_h, [src, 32]
|
||||
+ stp A_l, A_h, [dstin, 32]
|
||||
+ ldp A_l, A_h, [src, 48]
|
||||
+ stp A_l, A_h, [dstin, 48]
|
||||
+ ldp A_l, A_h, [srcend, -64]
|
||||
+ stp A_l, A_h, [dstend, -64]
|
||||
+ ldp A_l, A_h, [srcend, -48]
|
||||
+ stp A_l, A_h, [dstend, -48]
|
||||
+1:
|
||||
+ ldp A_l, A_h, [srcend, -32]
|
||||
+ stp A_l, A_h, [dstend, -32]
|
||||
+ ldp A_l, A_h, [srcend, -16]
|
||||
+ stp A_l, A_h, [dstend, -16]
|
||||
+ ret
|
||||
+
|
||||
+ .p2align 4
|
||||
+ /* Small copies: 0..32 bytes. */
|
||||
+L(copy32):
|
||||
+ /* 16-32 */
|
||||
+ cmp count, 16
|
||||
+ b.lo 1f
|
||||
+ ldp A_l, A_h, [src]
|
||||
+ stp A_l, A_h, [dstin]
|
||||
+ ldp A_l, A_h, [srcend, -16]
|
||||
+ stp A_l, A_h, [dstend, -16]
|
||||
+ ret
|
||||
+ .p2align 4
|
||||
+1:
|
||||
+ /* 8-15 */
|
||||
+ tbz count, 3, 1f
|
||||
+ ldr A_l, [src]
|
||||
+ str A_l, [dstin]
|
||||
+ ldr A_l, [srcend, -8]
|
||||
+ str A_l, [dstend, -8]
|
||||
+ ret
|
||||
+ .p2align 4
|
||||
+1:
|
||||
+ /* 4-7 */
|
||||
+ tbz count, 2, 1f
|
||||
+ ldr A_lw, [src]
|
||||
+ str A_lw, [dstin]
|
||||
+ ldr A_lw, [srcend, -4]
|
||||
+ str A_lw, [dstend, -4]
|
||||
+ ret
|
||||
+ .p2align 4
|
||||
+1:
|
||||
+ /* 2-3 */
|
||||
+ tbz count, 1, 1f
|
||||
+ ldrh A_lw, [src]
|
||||
+ strh A_lw, [dstin]
|
||||
+ ldrh A_lw, [srcend, -2]
|
||||
+ strh A_lw, [dstend, -2]
|
||||
+ ret
|
||||
+ .p2align 4
|
||||
+1:
|
||||
+ /* 0-1 */
|
||||
+ tbz count, 0, 1f
|
||||
+ ldrb A_lw, [src]
|
||||
+ strb A_lw, [dstin]
|
||||
+1:
|
||||
+ ret
|
||||
+
|
||||
+ /* Align SRC to 16 bytes and copy; that way at least one of the
|
||||
+ accesses is aligned throughout the copy sequence.
|
||||
+
|
||||
+ The count is off by 0 to 15 bytes, but this is OK because we trim
|
||||
+ off the last 64 bytes to copy off from the end. Due to this the
|
||||
+ loop never runs out of bounds. */
|
||||
+ .p2align 6
|
||||
+L(copy_long):
|
||||
+ sub count, count, 64 + 16
|
||||
+ and tmp1, src, 15
|
||||
+ bic src, src, 15
|
||||
+ sub dst, dstin, tmp1
|
||||
+ add count, count, tmp1
|
||||
+
|
||||
+L(loop64):
|
||||
+ ldp A_l, A_h, [src, 16]!
|
||||
+ stnp A_l, A_h, [dst, 16]
|
||||
+ ldp A_l, A_h, [src, 16]!
|
||||
+ subs count, count, 64
|
||||
+ stnp A_l, A_h, [dst, 32]
|
||||
+ ldp A_l, A_h, [src, 16]!
|
||||
+ stnp A_l, A_h, [dst, 48]
|
||||
+ ldp A_l, A_h, [src, 16]!
|
||||
+ stnp A_l, A_h, [dst, 64]
|
||||
+ add dst, dst, 64
|
||||
+ b.hi L(loop64)
|
||||
+
|
||||
+ /* Write the last full set of 64 bytes. The remainder is at most 64
|
||||
+ bytes, so it is safe to always copy 64 bytes from the end even if
|
||||
+ there is just 1 byte left. */
|
||||
+L(last64):
|
||||
+ ldp A_l, A_h, [srcend, -64]
|
||||
+ stnp A_l, A_h, [dstend, -64]
|
||||
+ ldp A_l, A_h, [srcend, -48]
|
||||
+ stnp A_l, A_h, [dstend, -48]
|
||||
+ ldp A_l, A_h, [srcend, -32]
|
||||
+ stnp A_l, A_h, [dstend, -32]
|
||||
+ ldp A_l, A_h, [srcend, -16]
|
||||
+ stnp A_l, A_h, [dstend, -16]
|
||||
+ ret
|
||||
+
|
||||
+END (__memcpy_falkor)
|
||||
+libc_hidden_builtin_def (__memcpy_falkor)
|
||||
+#endif
|
||||
Index: glibc-2.26/sysdeps/aarch64/multiarch/memmove.c
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/aarch64/multiarch/memmove.c
|
||||
+++ glibc-2.26/sysdeps/aarch64/multiarch/memmove.c
|
||||
@@ -30,9 +30,14 @@ extern __typeof (__redirect_memmove) __l
|
||||
|
||||
extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden;
|
||||
extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden;
|
||||
+extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden;
|
||||
|
||||
libc_ifunc (__libc_memmove,
|
||||
- IS_THUNDERX (midr) ? __memmove_thunderx : __memmove_generic);
|
||||
+ (IS_THUNDERX (midr)
|
||||
+ ? __memmove_thunderx
|
||||
+ : (IS_FALKOR (midr)
|
||||
+ ? __memmove_falkor
|
||||
+ : __memmove_generic)));
|
||||
|
||||
# undef memmove
|
||||
strong_alias (__libc_memmove, memmove);
|
||||
Index: glibc-2.26/sysdeps/aarch64/multiarch/memmove_falkor.S
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ glibc-2.26/sysdeps/aarch64/multiarch/memmove_falkor.S
|
||||
@@ -0,0 +1,232 @@
|
||||
+/* Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+
|
||||
+/* Assumptions: ARMv8-a, AArch64, falkor, unaligned accesses. */
|
||||
+
|
||||
+#define dstin x0
|
||||
+#define src x1
|
||||
+#define count x2
|
||||
+#define dstlen x3
|
||||
+#define dst x3
|
||||
+#define srcend x4
|
||||
+#define dstend x5
|
||||
+#define A_l x6
|
||||
+#define A_lw w6
|
||||
+#define A_h x7
|
||||
+#define A_hw w7
|
||||
+#define B_l x8
|
||||
+#define B_lw w8
|
||||
+#define B_h x9
|
||||
+#define C_l x10
|
||||
+#define C_h x11
|
||||
+#define D_l x12
|
||||
+#define D_h x13
|
||||
+#define E_l src
|
||||
+#define E_h count
|
||||
+#define F_l srcend
|
||||
+#define F_h dst
|
||||
+#define tmp1 x14
|
||||
+
|
||||
+/* Alias with A_l and A_h to train the prefetcher. */
|
||||
+#define Q_l x22
|
||||
+#define Q_h x23
|
||||
+
|
||||
+/* RATIONALE:
|
||||
+
|
||||
+ The copy has 4 distinct parts:
|
||||
+ * Small copies of 16 bytes and under
|
||||
+ * Medium sized copies of 17-96 bytes
|
||||
+ * Large copies where the source address is higher than the destination
|
||||
+ (forward copies)
|
||||
+ * Large copies where the destination address is higher than the source
|
||||
+ (copy backward, or move).
|
||||
+
|
||||
+ We use only two registerpairs x6,x7 and x22,x23 for the copies and copy 32
|
||||
+ bytes at a time to correctly train the hardware prefetcher for better
|
||||
+ throughput. */
|
||||
+ENTRY_ALIGN (__memmove_falkor, 6)
|
||||
+
|
||||
+ sub tmp1, dstin, src
|
||||
+ add srcend, src, count
|
||||
+ add dstend, dstin, count
|
||||
+ cmp count, 96
|
||||
+ ccmp tmp1, count, 2, hi
|
||||
+ b.lo L(move_long)
|
||||
+
|
||||
+ cmp count, 16
|
||||
+ b.ls L(copy16)
|
||||
+ cmp count, 96
|
||||
+ b.hi L(copy_long)
|
||||
+
|
||||
+ /* Medium copies: 17..96 bytes. */
|
||||
+ sub tmp1, count, 1
|
||||
+ ldp A_l, A_h, [src]
|
||||
+ tbnz tmp1, 6, L(copy96)
|
||||
+ ldp D_l, D_h, [srcend, -16]
|
||||
+ tbz tmp1, 5, 1f
|
||||
+ ldp B_l, B_h, [src, 16]
|
||||
+ ldp C_l, C_h, [srcend, -32]
|
||||
+ stp B_l, B_h, [dstin, 16]
|
||||
+ stp C_l, C_h, [dstend, -32]
|
||||
+1:
|
||||
+ stp A_l, A_h, [dstin]
|
||||
+ stp D_l, D_h, [dstend, -16]
|
||||
+ ret
|
||||
+
|
||||
+ .p2align 4
|
||||
+ /* Small copies: 0..16 bytes. */
|
||||
+L(copy16):
|
||||
+ cmp count, 8
|
||||
+ b.lo 1f
|
||||
+ ldr A_l, [src]
|
||||
+ ldr A_h, [srcend, -8]
|
||||
+ str A_l, [dstin]
|
||||
+ str A_h, [dstend, -8]
|
||||
+ ret
|
||||
+ .p2align 4
|
||||
+1:
|
||||
+ /* 4-7 */
|
||||
+ tbz count, 2, 1f
|
||||
+ ldr A_lw, [src]
|
||||
+ ldr A_hw, [srcend, -4]
|
||||
+ str A_lw, [dstin]
|
||||
+ str A_hw, [dstend, -4]
|
||||
+ ret
|
||||
+ .p2align 4
|
||||
+1:
|
||||
+ /* 2-3 */
|
||||
+ tbz count, 1, 1f
|
||||
+ ldrh A_lw, [src]
|
||||
+ ldrh A_hw, [srcend, -2]
|
||||
+ strh A_lw, [dstin]
|
||||
+ strh A_hw, [dstend, -2]
|
||||
+ ret
|
||||
+ .p2align 4
|
||||
+1:
|
||||
+ /* 0-1 */
|
||||
+ tbz count, 0, 1f
|
||||
+ ldrb A_lw, [src]
|
||||
+ strb A_lw, [dstin]
|
||||
+1: ret
|
||||
+
|
||||
+ .p2align 4
|
||||
+ /* Copy 64..96 bytes. Copy 64 bytes from the start and
|
||||
+ 32 bytes from the end. */
|
||||
+L(copy96):
|
||||
+ ldp B_l, B_h, [src, 16]
|
||||
+ ldp C_l, C_h, [src, 32]
|
||||
+ ldp D_l, D_h, [src, 48]
|
||||
+ ldp E_l, E_h, [srcend, -32]
|
||||
+ ldp F_l, F_h, [srcend, -16]
|
||||
+ stp A_l, A_h, [dstin]
|
||||
+ stp B_l, B_h, [dstin, 16]
|
||||
+ stp C_l, C_h, [dstin, 32]
|
||||
+ stp D_l, D_h, [dstin, 48]
|
||||
+ stp E_l, E_h, [dstend, -32]
|
||||
+ stp F_l, F_h, [dstend, -16]
|
||||
+ ret
|
||||
+
|
||||
+ /* Align SRC to 16 byte alignment so that we don't cross cache line
|
||||
+ boundaries on both loads and stores. There are at least 96 bytes
|
||||
+ to copy, so copy 16 bytes unaligned and then align. The loop
|
||||
+ copies 32 bytes per iteration and prefetches one iteration ahead. */
|
||||
+
|
||||
+ .p2align 4
|
||||
+L(copy_long):
|
||||
+ sub count, count, 64 + 16 /* Test and readjust count. */
|
||||
+ mov B_l, Q_l
|
||||
+ mov B_h, Q_h
|
||||
+ ldp A_l, A_h, [src]
|
||||
+ and tmp1, src, 15
|
||||
+ bic src, src, 15
|
||||
+ sub dst, dstin, tmp1
|
||||
+ add count, count, tmp1 /* Count is now 16 too large. */
|
||||
+ ldp Q_l, Q_h, [src, 16]!
|
||||
+ stp A_l, A_h, [dstin]
|
||||
+ ldp A_l, A_h, [src, 16]!
|
||||
+
|
||||
+L(loop64):
|
||||
+ subs count, count, 32
|
||||
+ stp Q_l, Q_h, [dst, 16]
|
||||
+ ldp Q_l, Q_h, [src, 16]!
|
||||
+ stp A_l, A_h, [dst, 32]!
|
||||
+ ldp A_l, A_h, [src, 16]!
|
||||
+ b.hi L(loop64)
|
||||
+
|
||||
+ /* Write the last full set of 32 bytes. The remainder is at most 32
|
||||
+ bytes, so it is safe to always copy 32 bytes from the end even if
|
||||
+ there is just 1 byte left. */
|
||||
+L(last64):
|
||||
+ ldp C_l, C_h, [srcend, -32]
|
||||
+ stp Q_l, Q_h, [dst, 16]
|
||||
+ ldp Q_l, Q_h, [srcend, -16]
|
||||
+ stp A_l, A_h, [dst, 32]
|
||||
+ stp C_l, C_h, [dstend, -32]
|
||||
+ stp Q_l, Q_h, [dstend, -16]
|
||||
+ mov Q_l, B_l
|
||||
+ mov Q_h, B_h
|
||||
+ ret
|
||||
+
|
||||
+ .p2align 4
|
||||
+L(move_long):
|
||||
+ cbz tmp1, 3f
|
||||
+
|
||||
+ mov B_l, Q_l
|
||||
+ mov B_h, Q_h
|
||||
+
|
||||
+ /* Align SRCEND to 16 byte alignment so that we don't cross cache line
|
||||
+ boundaries on both loads and stores. There are at least 96 bytes
|
||||
+ to copy, so copy 16 bytes unaligned and then align. The loop
|
||||
+ copies 32 bytes per iteration and prefetches one iteration ahead. */
|
||||
+
|
||||
+ ldp A_l, A_h, [srcend, -16]
|
||||
+ and tmp1, srcend, 15
|
||||
+ sub srcend, srcend, tmp1
|
||||
+ ldp Q_l, Q_h, [srcend, -16]!
|
||||
+ stp A_l, A_h, [dstend, -16]
|
||||
+ sub count, count, tmp1
|
||||
+ ldp A_l, A_h, [srcend, -16]!
|
||||
+ sub dstend, dstend, tmp1
|
||||
+ sub count, count, 64
|
||||
+
|
||||
+1:
|
||||
+ subs count, count, 32
|
||||
+ stp Q_l, Q_h, [dstend, -16]
|
||||
+ ldp Q_l, Q_h, [srcend, -16]!
|
||||
+ stp A_l, A_h, [dstend, -32]!
|
||||
+ ldp A_l, A_h, [srcend, -16]!
|
||||
+ b.hi 1b
|
||||
+
|
||||
+ /* Write the last full set of 32 bytes. The remainder is at most 32
|
||||
+ bytes, so it is safe to always copy 32 bytes from the start even if
|
||||
+ there is just 1 byte left. */
|
||||
+2:
|
||||
+ ldp C_l, C_h, [src, 16]
|
||||
+ stp Q_l, Q_h, [dstend, -16]
|
||||
+ ldp Q_l, Q_h, [src]
|
||||
+ stp A_l, A_h, [dstend, -32]
|
||||
+ stp C_l, C_h, [dstin, 16]
|
||||
+ stp Q_l, Q_h, [dstin]
|
||||
+ mov Q_l, B_l
|
||||
+ mov Q_h, B_h
|
||||
+3: ret
|
||||
+
|
||||
+END (__memmove_falkor)
|
||||
+libc_hidden_builtin_def (__memmove_falkor)
|
||||
Index: glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
|
||||
+++ glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
|
||||
@@ -28,6 +28,7 @@ struct cpu_list
|
||||
};
|
||||
|
||||
static struct cpu_list cpu_list[] = {
|
||||
+ {"falkor", 0x510FC000},
|
||||
{"thunderxt88", 0x430F0A10},
|
||||
{"generic", 0x0}
|
||||
};
|
||||
Index: glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
|
||||
+++ glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
|
||||
@@ -41,6 +41,9 @@
|
||||
#define IS_THUNDERX(midr) (MIDR_IMPLEMENTOR(midr) == 'C' \
|
||||
&& MIDR_PARTNUM(midr) == 0x0a1)
|
||||
|
||||
+#define IS_FALKOR(midr) (MIDR_IMPLEMENTOR(midr) == 'Q' \
|
||||
+ && MIDR_PARTNUM(midr) == 0xc00)
|
||||
+
|
||||
struct cpu_features
|
||||
{
|
||||
uint64_t midr_el1;
|
@ -10,22 +10,22 @@ Fix fnmatch handling of collating elements (BZ #17396, BZ #16976)
|
||||
* posix/tst-fnmatch4.c: New file.
|
||||
* posix/tst-fnmatch5.c: New file.
|
||||
|
||||
Index: glibc-2.22/posix/Makefile
|
||||
Index: glibc-2.26/posix/Makefile
|
||||
===================================================================
|
||||
--- glibc-2.22.orig/posix/Makefile
|
||||
+++ glibc-2.22/posix/Makefile
|
||||
@@ -87,6 +87,7 @@ tests := tstgetopt testfnm runtests run
|
||||
--- glibc-2.26.orig/posix/Makefile
|
||||
+++ glibc-2.26/posix/Makefile
|
||||
@@ -91,6 +91,7 @@ tests := test-errno tstgetopt testfnm r
|
||||
bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \
|
||||
tst-pathconf tst-getaddrinfo4 tst-rxspencer-no-utf8 \
|
||||
tst-fnmatch3 bug-regex36 tst-getaddrinfo5 \
|
||||
+ tst-fnmatch4 tst-fnmatch5 \
|
||||
tst-posix_spawn-fd tst-posix_spawn-setsid \
|
||||
tst-posix_fadvise tst-posix_fadvise64 \
|
||||
tst-sysconf-empty-chroot
|
||||
Index: glibc-2.22/posix/fnmatch.c
|
||||
tst-sysconf-empty-chroot tst-glob-tilde
|
||||
Index: glibc-2.26/posix/fnmatch.c
|
||||
===================================================================
|
||||
--- glibc-2.22.orig/posix/fnmatch.c
|
||||
+++ glibc-2.22/posix/fnmatch.c
|
||||
--- glibc-2.26.orig/posix/fnmatch.c
|
||||
+++ glibc-2.26/posix/fnmatch.c
|
||||
@@ -53,7 +53,6 @@
|
||||
we support a correct implementation only in glibc. */
|
||||
#ifdef _LIBC
|
||||
@ -34,11 +34,11 @@ Index: glibc-2.22/posix/fnmatch.c
|
||||
# include "../locale/coll-lookup.h"
|
||||
# include <shlib-compat.h>
|
||||
|
||||
Index: glibc-2.22/posix/fnmatch_loop.c
|
||||
Index: glibc-2.26/posix/fnmatch_loop.c
|
||||
===================================================================
|
||||
--- glibc-2.22.orig/posix/fnmatch_loop.c
|
||||
+++ glibc-2.22/posix/fnmatch_loop.c
|
||||
@@ -503,26 +503,12 @@ FCT (pattern, string, string_end, no_lea
|
||||
--- glibc-2.26.orig/posix/fnmatch_loop.c
|
||||
+++ glibc-2.26/posix/fnmatch_loop.c
|
||||
@@ -497,26 +497,12 @@ FCT (const CHAR *pattern, const CHAR *st
|
||||
{
|
||||
int32_t table_size;
|
||||
const int32_t *symb_table;
|
||||
@ -67,7 +67,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
|
||||
|
||||
table_size =
|
||||
_NL_CURRENT_WORD (LC_COLLATE,
|
||||
@@ -534,71 +520,55 @@ FCT (pattern, string, string_end, no_lea
|
||||
@@ -528,71 +514,55 @@ FCT (const CHAR *pattern, const CHAR *st
|
||||
_NL_CURRENT (LC_COLLATE,
|
||||
_NL_COLLATE_SYMB_EXTRAMB);
|
||||
|
||||
@ -180,7 +180,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
|
||||
}
|
||||
|
||||
/* Get the collation sequence value. */
|
||||
@@ -606,9 +576,9 @@ FCT (pattern, string, string_end, no_lea
|
||||
@@ -600,9 +570,9 @@ FCT (const CHAR *pattern, const CHAR *st
|
||||
# if WIDE_CHAR_VERSION
|
||||
cold = wextra[1 + wextra[idx]];
|
||||
# else
|
||||
@ -192,7 +192,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
|
||||
cold = *((int32_t *) &extra[idx]);
|
||||
# endif
|
||||
|
||||
@@ -618,10 +588,10 @@ FCT (pattern, string, string_end, no_lea
|
||||
@@ -612,10 +582,10 @@ FCT (const CHAR *pattern, const CHAR *st
|
||||
{
|
||||
/* No valid character. Match it as a
|
||||
single byte. */
|
||||
@ -205,7 +205,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
|
||||
c = *p++;
|
||||
}
|
||||
else
|
||||
@@ -629,7 +599,6 @@ FCT (pattern, string, string_end, no_lea
|
||||
@@ -623,7 +593,6 @@ FCT (const CHAR *pattern, const CHAR *st
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -213,7 +213,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
|
||||
#endif
|
||||
{
|
||||
c = FOLD (c);
|
||||
@@ -721,25 +690,11 @@ FCT (pattern, string, string_end, no_lea
|
||||
@@ -715,25 +684,11 @@ FCT (const CHAR *pattern, const CHAR *st
|
||||
{
|
||||
int32_t table_size;
|
||||
const int32_t *symb_table;
|
||||
@ -240,7 +240,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
|
||||
# endif
|
||||
|
||||
table_size =
|
||||
@@ -752,51 +707,44 @@ FCT (pattern, string, string_end, no_lea
|
||||
@@ -746,51 +701,44 @@ FCT (const CHAR *pattern, const CHAR *st
|
||||
_NL_CURRENT (LC_COLLATE,
|
||||
_NL_COLLATE_SYMB_EXTRAMB);
|
||||
|
||||
@ -328,7 +328,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
|
||||
/* Get the collation sequence value. */
|
||||
is_seqval = 1;
|
||||
# if WIDE_CHAR_VERSION
|
||||
@@ -804,19 +752,18 @@ FCT (pattern, string, string_end, no_lea
|
||||
@@ -798,19 +746,18 @@ FCT (const CHAR *pattern, const CHAR *st
|
||||
# else
|
||||
/* Adjust for the alignment. */
|
||||
idx += 1 + extra[idx];
|
||||
@ -351,10 +351,10 @@ Index: glibc-2.22/posix/fnmatch_loop.c
|
||||
}
|
||||
else
|
||||
{
|
||||
Index: glibc-2.22/posix/tst-fnmatch4.c
|
||||
Index: glibc-2.26/posix/tst-fnmatch4.c
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ glibc-2.22/posix/tst-fnmatch4.c
|
||||
+++ glibc-2.26/posix/tst-fnmatch4.c
|
||||
@@ -0,0 +1,51 @@
|
||||
+/* Test for fnmatch handling of collating elements
|
||||
+ Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
@ -407,10 +407,10 @@ Index: glibc-2.22/posix/tst-fnmatch4.c
|
||||
+
|
||||
+#define TEST_FUNCTION do_test ()
|
||||
+#include "../test-skeleton.c"
|
||||
Index: glibc-2.22/posix/tst-fnmatch5.c
|
||||
Index: glibc-2.26/posix/tst-fnmatch5.c
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ glibc-2.22/posix/tst-fnmatch5.c
|
||||
+++ glibc-2.26/posix/tst-fnmatch5.c
|
||||
@@ -0,0 +1,53 @@
|
||||
+/* Test for fnmatch handling of collating elements
|
||||
+ Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
|
@ -1,3 +1,22 @@
|
||||
-------------------------------------------------------------------
|
||||
Mon Oct 23 09:35:18 UTC 2017 - schwab@suse.de
|
||||
|
||||
- math-c++-compat.patch: Add more C++ compatibility (BZ #22296)
|
||||
- malloc-tcache-leak.patch: Fix tcache leak after thread destruction (BZ
|
||||
#22111)
|
||||
- falkor-memcpy-memmove.patch: Optimized implementation of memcpy/memmove
|
||||
for Qualcomm Falkor
|
||||
- aarch64-cpu-features.patch: Fix glibc.tune.cpu tunable handling
|
||||
- nss-files-large-buffers.patch: Avoid large buffers with many host
|
||||
addresses (BZ #22078)
|
||||
- sysconf-uio-maxiov.patch: Fix missing definition of UIO_MAXIOV (BZ
|
||||
#22321)
|
||||
- glob-tilde-overflow.patch: Fix buffer overflows with GLOB_TILDE
|
||||
(CVE-2017-15670, CVE-2017-15671, CVE-2017-15804,
|
||||
bsc#1064569. bsc#1064580, bsc#1064583, BZ #22320, BZ #22325, BZ #22332)
|
||||
- dl-runtime-resolve-xsave.patch: Use fxsave/xsave/xsavec in
|
||||
_dl_runtime_resolve (BZ #21265)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Tue Oct 10 15:47:05 UTC 2017 - schwab@suse.de
|
||||
|
||||
@ -22,7 +41,7 @@ Thu Sep 28 07:57:52 UTC 2017 - schwab@suse.de
|
||||
|
||||
- assert-pedantic.patch: Suppress pedantic warning caused by statement
|
||||
expression (BZ #21242, BZ #21972)
|
||||
- math-c++-compat.patch: Add more C++ compatibility
|
||||
- math-c++-compat.patch: Add more C++ compatibility (BZ #22235)
|
||||
- getaddrinfo-errno.patch: Fix errno and h_errno handling in getaddrinfo
|
||||
(BZ #21915, BZ #21922)
|
||||
- resolv-conf-oom.patch: Fix memory handling in OOM situation during
|
||||
|
@ -273,7 +273,7 @@ Patch1000: resolv-context-leak.patch
|
||||
Patch1001: dl-runtime-resolve-opt-avx512f.patch
|
||||
# PATCH-FIX-UPSTREAM Don't use IFUNC resolver for longjmp or system in libpthread (BZ #21041)
|
||||
Patch1002: libpthread-compat-wrappers.patch
|
||||
# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930)
|
||||
# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930, BZ #22146, BZ #22235, BZ #22296)
|
||||
Patch1003: math-c++-compat.patch
|
||||
# PATCH-FIX-UPSTREAM Remove nis and compat from default NSS configs
|
||||
Patch1004: remove-nss-nis-compat.patch
|
||||
@ -295,6 +295,20 @@ Patch1011: nearbyint-inexact.patch
|
||||
Patch1012: nss-compat.patch
|
||||
# PATCH-FIX-UPSTREAM Remove reference to libnsl from nscd
|
||||
Patch1013: nscd-libnsl.patch
|
||||
# PATCH-FIX-UPSTREAM malloc: Fix tcache leak after thread destruction (BZ #22111)
|
||||
Patch1014: malloc-tcache-leak.patch
|
||||
# PATCH-FIX-UPSTREAM aarch64: Optimized implementation of memcpy/memmove for Qualcomm Falkor
|
||||
Patch1015: falkor-memcpy-memmove.patch
|
||||
# PATCH-FIX-UPSTREAM aarch64: Fix glibc.tune.cpu tunable handling
|
||||
Patch1016: aarch64-cpu-features.patch
|
||||
# PATCH-FIX-UPSTREAM nss_files: Avoid large buffers with many host addresses (BZ #22078)
|
||||
Patch1017: nss-files-large-buffers.patch
|
||||
# PATCH-FIX-UPSTREAM sysconf: Fix missing definition of UIO_MAXIOV on Linux (BZ #22321)
|
||||
Patch1018: sysconf-uio-maxiov.patch
|
||||
# PATCH-FIX-UPSTREAM glob: Fix buffer overflows (CVE-2017-15670, CVE-2017-15671, CVE-2017-15804, BZ #22320, BZ #22325, BZ #22332)
|
||||
Patch1019: glob-tilde-overflow.patch
|
||||
# PATCH-FIX-UPSTREAM x86-64: Use fxsave/xsave/xsavec in _dl_runtime_resolve (BZ #21265)
|
||||
Patch1020: dl-runtime-resolve-xsave.patch
|
||||
|
||||
###
|
||||
# Patches awaiting upstream approval
|
||||
@ -519,6 +533,13 @@ rm nscd/s-stamp
|
||||
%patch1011 -p1
|
||||
%patch1012 -p1
|
||||
%patch1013 -p1
|
||||
%patch1014 -p1
|
||||
%patch1015 -p1
|
||||
%patch1016 -p1
|
||||
%patch1017 -p1
|
||||
%patch1018 -p1
|
||||
%patch1019 -p1
|
||||
%patch1020 -p1
|
||||
|
||||
%patch2000 -p1
|
||||
%patch2001 -p1
|
||||
|
@ -1,3 +1,22 @@
|
||||
-------------------------------------------------------------------
|
||||
Mon Oct 23 09:35:18 UTC 2017 - schwab@suse.de
|
||||
|
||||
- math-c++-compat.patch: Add more C++ compatibility (BZ #22296)
|
||||
- malloc-tcache-leak.patch: Fix tcache leak after thread destruction (BZ
|
||||
#22111)
|
||||
- falkor-memcpy-memmove.patch: Optimized implementation of memcpy/memmove
|
||||
for Qualcomm Falkor
|
||||
- aarch64-cpu-features.patch: Fix glibc.tune.cpu tunable handling
|
||||
- nss-files-large-buffers.patch: Avoid large buffers with many host
|
||||
addresses (BZ #22078)
|
||||
- sysconf-uio-maxiov.patch: Fix missing definition of UIO_MAXIOV (BZ
|
||||
#22321)
|
||||
- glob-tilde-overflow.patch: Fix buffer overflows with GLOB_TILDE
|
||||
(CVE-2017-15670, CVE-2017-15671, CVE-2017-15804,
|
||||
bsc#1064569. bsc#1064580, bsc#1064583, BZ #22320, BZ #22325, BZ #22332)
|
||||
- dl-runtime-resolve-xsave.patch: Use fxsave/xsave/xsavec in
|
||||
_dl_runtime_resolve (BZ #21265)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Tue Oct 10 15:47:05 UTC 2017 - schwab@suse.de
|
||||
|
||||
@ -22,7 +41,7 @@ Thu Sep 28 07:57:52 UTC 2017 - schwab@suse.de
|
||||
|
||||
- assert-pedantic.patch: Suppress pedantic warning caused by statement
|
||||
expression (BZ #21242, BZ #21972)
|
||||
- math-c++-compat.patch: Add more C++ compatibility
|
||||
- math-c++-compat.patch: Add more C++ compatibility (BZ #22235)
|
||||
- getaddrinfo-errno.patch: Fix errno and h_errno handling in getaddrinfo
|
||||
(BZ #21915, BZ #21922)
|
||||
- resolv-conf-oom.patch: Fix memory handling in OOM situation during
|
||||
|
@ -273,7 +273,7 @@ Patch1000: resolv-context-leak.patch
|
||||
Patch1001: dl-runtime-resolve-opt-avx512f.patch
|
||||
# PATCH-FIX-UPSTREAM Don't use IFUNC resolver for longjmp or system in libpthread (BZ #21041)
|
||||
Patch1002: libpthread-compat-wrappers.patch
|
||||
# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930)
|
||||
# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930, BZ #22146, BZ #22235, BZ #22296)
|
||||
Patch1003: math-c++-compat.patch
|
||||
# PATCH-FIX-UPSTREAM Remove nis and compat from default NSS configs
|
||||
Patch1004: remove-nss-nis-compat.patch
|
||||
@ -295,6 +295,20 @@ Patch1011: nearbyint-inexact.patch
|
||||
Patch1012: nss-compat.patch
|
||||
# PATCH-FIX-UPSTREAM Remove reference to libnsl from nscd
|
||||
Patch1013: nscd-libnsl.patch
|
||||
# PATCH-FIX-UPSTREAM malloc: Fix tcache leak after thread destruction (BZ #22111)
|
||||
Patch1014: malloc-tcache-leak.patch
|
||||
# PATCH-FIX-UPSTREAM aarch64: Optimized implementation of memcpy/memmove for Qualcomm Falkor
|
||||
Patch1015: falkor-memcpy-memmove.patch
|
||||
# PATCH-FIX-UPSTREAM aarch64: Fix glibc.tune.cpu tunable handling
|
||||
Patch1016: aarch64-cpu-features.patch
|
||||
# PATCH-FIX-UPSTREAM nss_files: Avoid large buffers with many host addresses (BZ #22078)
|
||||
Patch1017: nss-files-large-buffers.patch
|
||||
# PATCH-FIX-UPSTREAM sysconf: Fix missing definition of UIO_MAXIOV on Linux (BZ #22321)
|
||||
Patch1018: sysconf-uio-maxiov.patch
|
||||
# PATCH-FIX-UPSTREAM glob: Fix buffer overflows (CVE-2017-15670, CVE-2017-15671, CVE-2017-15804, BZ #22320, BZ #22325, BZ #22332)
|
||||
Patch1019: glob-tilde-overflow.patch
|
||||
# PATCH-FIX-UPSTREAM x86-64: Use fxsave/xsave/xsavec in _dl_runtime_resolve (BZ #21265)
|
||||
Patch1020: dl-runtime-resolve-xsave.patch
|
||||
|
||||
###
|
||||
# Patches awaiting upstream approval
|
||||
@ -519,6 +533,13 @@ rm nscd/s-stamp
|
||||
%patch1011 -p1
|
||||
%patch1012 -p1
|
||||
%patch1013 -p1
|
||||
%patch1014 -p1
|
||||
%patch1015 -p1
|
||||
%patch1016 -p1
|
||||
%patch1017 -p1
|
||||
%patch1018 -p1
|
||||
%patch1019 -p1
|
||||
%patch1020 -p1
|
||||
|
||||
%patch2000 -p1
|
||||
%patch2001 -p1
|
||||
|
@ -1,3 +1,22 @@
|
||||
-------------------------------------------------------------------
|
||||
Mon Oct 23 09:35:18 UTC 2017 - schwab@suse.de
|
||||
|
||||
- math-c++-compat.patch: Add more C++ compatibility (BZ #22296)
|
||||
- malloc-tcache-leak.patch: Fix tcache leak after thread destruction (BZ
|
||||
#22111)
|
||||
- falkor-memcpy-memmove.patch: Optimized implementation of memcpy/memmove
|
||||
for Qualcomm Falkor
|
||||
- aarch64-cpu-features.patch: Fix glibc.tune.cpu tunable handling
|
||||
- nss-files-large-buffers.patch: Avoid large buffers with many host
|
||||
addresses (BZ #22078)
|
||||
- sysconf-uio-maxiov.patch: Fix missing definition of UIO_MAXIOV (BZ
|
||||
#22321)
|
||||
- glob-tilde-overflow.patch: Fix buffer overflows with GLOB_TILDE
|
||||
(CVE-2017-15670, CVE-2017-15671, CVE-2017-15804,
|
||||
bsc#1064569. bsc#1064580, bsc#1064583, BZ #22320, BZ #22325, BZ #22332)
|
||||
- dl-runtime-resolve-xsave.patch: Use fxsave/xsave/xsavec in
|
||||
_dl_runtime_resolve (BZ #21265)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Tue Oct 10 15:47:05 UTC 2017 - schwab@suse.de
|
||||
|
||||
@ -22,7 +41,7 @@ Thu Sep 28 07:57:52 UTC 2017 - schwab@suse.de
|
||||
|
||||
- assert-pedantic.patch: Suppress pedantic warning caused by statement
|
||||
expression (BZ #21242, BZ #21972)
|
||||
- math-c++-compat.patch: Add more C++ compatibility
|
||||
- math-c++-compat.patch: Add more C++ compatibility (BZ #22235)
|
||||
- getaddrinfo-errno.patch: Fix errno and h_errno handling in getaddrinfo
|
||||
(BZ #21915, BZ #21922)
|
||||
- resolv-conf-oom.patch: Fix memory handling in OOM situation during
|
||||
|
23
glibc.spec
23
glibc.spec
@ -279,7 +279,7 @@ Patch1000: resolv-context-leak.patch
|
||||
Patch1001: dl-runtime-resolve-opt-avx512f.patch
|
||||
# PATCH-FIX-UPSTREAM Don't use IFUNC resolver for longjmp or system in libpthread (BZ #21041)
|
||||
Patch1002: libpthread-compat-wrappers.patch
|
||||
# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930)
|
||||
# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930, BZ #22146, BZ #22235, BZ #22296)
|
||||
Patch1003: math-c++-compat.patch
|
||||
# PATCH-FIX-UPSTREAM Remove nis and compat from default NSS configs
|
||||
Patch1004: remove-nss-nis-compat.patch
|
||||
@ -301,6 +301,20 @@ Patch1011: nearbyint-inexact.patch
|
||||
Patch1012: nss-compat.patch
|
||||
# PATCH-FIX-UPSTREAM Remove reference to libnsl from nscd
|
||||
Patch1013: nscd-libnsl.patch
|
||||
# PATCH-FIX-UPSTREAM malloc: Fix tcache leak after thread destruction (BZ #22111)
|
||||
Patch1014: malloc-tcache-leak.patch
|
||||
# PATCH-FIX-UPSTREAM aarch64: Optimized implementation of memcpy/memmove for Qualcomm Falkor
|
||||
Patch1015: falkor-memcpy-memmove.patch
|
||||
# PATCH-FIX-UPSTREAM aarch64: Fix glibc.tune.cpu tunable handling
|
||||
Patch1016: aarch64-cpu-features.patch
|
||||
# PATCH-FIX-UPSTREAM nss_files: Avoid large buffers with many host addresses (BZ #22078)
|
||||
Patch1017: nss-files-large-buffers.patch
|
||||
# PATCH-FIX-UPSTREAM sysconf: Fix missing definition of UIO_MAXIOV on Linux (BZ #22321)
|
||||
Patch1018: sysconf-uio-maxiov.patch
|
||||
# PATCH-FIX-UPSTREAM glob: Fix buffer overflows (CVE-2017-15670, CVE-2017-15671, CVE-2017-15804, BZ #22320, BZ #22325, BZ #22332)
|
||||
Patch1019: glob-tilde-overflow.patch
|
||||
# PATCH-FIX-UPSTREAM x86-64: Use fxsave/xsave/xsavec in _dl_runtime_resolve (BZ #21265)
|
||||
Patch1020: dl-runtime-resolve-xsave.patch
|
||||
|
||||
###
|
||||
# Patches awaiting upstream approval
|
||||
@ -525,6 +539,13 @@ rm nscd/s-stamp
|
||||
%patch1011 -p1
|
||||
%patch1012 -p1
|
||||
%patch1013 -p1
|
||||
%patch1014 -p1
|
||||
%patch1015 -p1
|
||||
%patch1016 -p1
|
||||
%patch1017 -p1
|
||||
%patch1018 -p1
|
||||
%patch1019 -p1
|
||||
%patch1020 -p1
|
||||
|
||||
%patch2000 -p1
|
||||
%patch2001 -p1
|
||||
|
2244
glob-tilde-overflow.patch
Normal file
2244
glob-tilde-overflow.patch
Normal file
File diff suppressed because it is too large
Load Diff
179
malloc-tcache-leak.patch
Normal file
179
malloc-tcache-leak.patch
Normal file
@ -0,0 +1,179 @@
|
||||
2017-10-06 Carlos O'Donell <carlos@redhat.com>
|
||||
|
||||
[BZ #22111]
|
||||
* malloc/malloc.c (tcache_shutting_down): Use bool type.
|
||||
(tcache_thread_freeres): Set tcache_shutting_down before
|
||||
freeing the tcache.
|
||||
* malloc/Makefile (tests): Add tst-malloc-tcache-leak.
|
||||
* malloc/tst-malloc-tcache-leak.c: New file.
|
||||
|
||||
Index: glibc-2.26/malloc/Makefile
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/malloc/Makefile
|
||||
+++ glibc-2.26/malloc/Makefile
|
||||
@@ -34,6 +34,7 @@ tests := mallocbug tst-malloc tst-valloc
|
||||
tst-interpose-nothread \
|
||||
tst-interpose-thread \
|
||||
tst-alloc_buffer \
|
||||
+ tst-malloc-tcache-leak \
|
||||
|
||||
tests-static := \
|
||||
tst-interpose-static-nothread \
|
||||
@@ -242,3 +243,5 @@ tst-dynarray-fail-ENV = MALLOC_TRACE=$(o
|
||||
$(objpfx)tst-dynarray-fail-mem.out: $(objpfx)tst-dynarray-fail.out
|
||||
$(common-objpfx)malloc/mtrace $(objpfx)tst-dynarray-fail.mtrace > $@; \
|
||||
$(evaluate-test)
|
||||
+
|
||||
+$(objpfx)tst-malloc-tcache-leak: $(shared-thread-library)
|
||||
Index: glibc-2.26/malloc/malloc.c
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/malloc/malloc.c
|
||||
+++ glibc-2.26/malloc/malloc.c
|
||||
@@ -2940,7 +2940,7 @@ typedef struct tcache_perthread_struct
|
||||
tcache_entry *entries[TCACHE_MAX_BINS];
|
||||
} tcache_perthread_struct;
|
||||
|
||||
-static __thread char tcache_shutting_down = 0;
|
||||
+static __thread bool tcache_shutting_down = false;
|
||||
static __thread tcache_perthread_struct *tcache = NULL;
|
||||
|
||||
/* Caller must ensure that we know tc_idx is valid and there's room
|
||||
@@ -2977,8 +2977,12 @@ tcache_thread_freeres (void)
|
||||
if (!tcache)
|
||||
return;
|
||||
|
||||
+ /* Disable the tcache and prevent it from being reinitialized. */
|
||||
tcache = NULL;
|
||||
+ tcache_shutting_down = true;
|
||||
|
||||
+ /* Free all of the entries and the tcache itself back to the arena
|
||||
+ heap for coalescing. */
|
||||
for (i = 0; i < TCACHE_MAX_BINS; ++i)
|
||||
{
|
||||
while (tcache_tmp->entries[i])
|
||||
@@ -2990,8 +2994,6 @@ tcache_thread_freeres (void)
|
||||
}
|
||||
|
||||
__libc_free (tcache_tmp);
|
||||
-
|
||||
- tcache_shutting_down = 1;
|
||||
}
|
||||
text_set_element (__libc_thread_subfreeres, tcache_thread_freeres);
|
||||
|
||||
Index: glibc-2.26/malloc/tst-malloc-tcache-leak.c
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ glibc-2.26/malloc/tst-malloc-tcache-leak.c
|
||||
@@ -0,0 +1,112 @@
|
||||
+/* Bug 22111: Test that threads do not leak their per thread cache.
|
||||
+ Copyright (C) 2015-2017 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* The point of this test is to start and exit a large number of
|
||||
+ threads, while at the same time looking to see if the used
|
||||
+ memory grows with each round of threads run. If the memory
|
||||
+ grows above some linear bound we declare the test failed and
|
||||
+ that the malloc implementation is leaking memory with each
|
||||
+ thread. This is a good indicator that the thread local cache
|
||||
+ is leaking chunks. */
|
||||
+
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <malloc.h>
|
||||
+#include <pthread.h>
|
||||
+#include <assert.h>
|
||||
+
|
||||
+#include <support/check.h>
|
||||
+#include <support/support.h>
|
||||
+#include <support/xthread.h>
|
||||
+
|
||||
+void *
|
||||
+worker (void *data)
|
||||
+{
|
||||
+ void *ret;
|
||||
+ /* Allocate an arbitrary amount of memory that is known to fit into
|
||||
+ the thread local cache (tcache). If we have at least 64 bins
|
||||
+ (default e.g. TCACHE_MAX_BINS) we should be able to allocate 32
|
||||
+ bytes and force malloc to fill the tcache. We are assuming tcahce
|
||||
+ init happens at the first small alloc, but it might in the future
|
||||
+ be deferred to some other point. Therefore to future proof this
|
||||
+ test we include a full alloc/free/alloc cycle for the thread. We
|
||||
+ need a compiler barrier to avoid the removal of the useless
|
||||
+ alloc/free. We send some memory back to main to have the memory
|
||||
+ freed after the thread dies, as just another check that the chunks
|
||||
+ that were previously in the tcache are still OK to free after
|
||||
+ thread death. */
|
||||
+ ret = xmalloc (32);
|
||||
+ __asm__ volatile ("" ::: "memory");
|
||||
+ free (ret);
|
||||
+ return (void *) xmalloc (32);
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+do_test (void)
|
||||
+{
|
||||
+ pthread_t *thread;
|
||||
+ struct mallinfo info_before, info_after;
|
||||
+ void *retval;
|
||||
+
|
||||
+ /* This is an arbitrary choice. We choose a total of THREADS
|
||||
+ threads created and joined. This gives us enough iterations to
|
||||
+ show a leak. */
|
||||
+ int threads = 100000;
|
||||
+
|
||||
+ /* Avoid there being 0 malloc'd data at this point by allocating the
|
||||
+ pthread_t required to run the test. */
|
||||
+ thread = (pthread_t *) xcalloc (1, sizeof (pthread_t));
|
||||
+
|
||||
+ info_before = mallinfo ();
|
||||
+
|
||||
+ assert (info_before.uordblks != 0);
|
||||
+
|
||||
+ printf ("INFO: %d (bytes) are in use before starting threads.\n",
|
||||
+ info_before.uordblks);
|
||||
+
|
||||
+ for (int loop = 0; loop < threads; loop++)
|
||||
+ {
|
||||
+ *thread = xpthread_create (NULL, worker, NULL);
|
||||
+ retval = xpthread_join (*thread);
|
||||
+ free (retval);
|
||||
+ }
|
||||
+
|
||||
+ info_after = mallinfo ();
|
||||
+ printf ("INFO: %d (bytes) are in use after all threads joined.\n",
|
||||
+ info_after.uordblks);
|
||||
+
|
||||
+ /* We need to compare the memory in use before and the memory in use
|
||||
+ after starting and joining THREADS threads. We almost always grow
|
||||
+ memory slightly, but not much. Consider that if even 1-byte leaked
|
||||
+ per thread we'd have THREADS bytes of additional memory, and in
|
||||
+ general the in-use at the start of main is quite low. We will
|
||||
+ always leak a full malloc chunk, and never just 1-byte, therefore
|
||||
+ anything above "+ threads" from the start (constant offset) is a
|
||||
+ leak. Obviously this assumes no thread-related malloc'd internal
|
||||
+ libc data structures persist beyond the thread death, and any that
|
||||
+ did would limit the number of times you could call pthread_create,
|
||||
+ which is a QoI we'd want to detect and fix. */
|
||||
+ if (info_after.uordblks > (info_before.uordblks + threads))
|
||||
+ FAIL_EXIT1 ("Memory usage after threads is too high.\n");
|
||||
+
|
||||
+ /* Did not detect excessive memory usage. */
|
||||
+ free (thread);
|
||||
+ exit (0);
|
||||
+}
|
||||
+
|
||||
+#include <support/test-driver.c>
|
@ -1,3 +1,18 @@
|
||||
2017-10-17 Romain Naour <romain.naour@gmail.com> (tiny change)
|
||||
|
||||
[BZ #22296]
|
||||
* math/math.h: Let signbit use the builtin in C++ mode with gcc
|
||||
< 6.x
|
||||
|
||||
2017-10-03 Gabriel F. T. Gomes <gabriel@inconstante.eti.br>
|
||||
|
||||
[BZ #22235]
|
||||
* sysdeps/ieee754/ldbl-96/bits/iscanonical.h (iscanonical):
|
||||
Provide a C++ implementation based on function overloading,
|
||||
rather than using __MATH_TG, which uses C-only builtins.
|
||||
* sysdeps/ieee754/ldbl-128ibm/bits/iscanonical.h (iscanonical):
|
||||
Likewise.
|
||||
|
||||
2017-09-22 Gabriel F. T. Gomes <gabriel@inconstante.eti.br>
|
||||
|
||||
[BZ #22146]
|
||||
@ -56,7 +71,23 @@ Index: glibc-2.26/math/math.h
|
||||
# define fpclassify(x) __builtin_fpclassify (FP_NAN, FP_INFINITE, \
|
||||
FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x)
|
||||
# else
|
||||
@@ -442,8 +448,12 @@ enum
|
||||
@@ -412,6 +418,15 @@ enum
|
||||
/* Return nonzero value if sign of X is negative. */
|
||||
# if __GNUC_PREREQ (6,0)
|
||||
# define signbit(x) __builtin_signbit (x)
|
||||
+# elif defined __cplusplus
|
||||
+ /* In C++ mode, __MATH_TG cannot be used, because it relies on
|
||||
+ __builtin_types_compatible_p, which is a C-only builtin.
|
||||
+ The check for __cplusplus allows the use of the builtin instead of
|
||||
+ __MATH_TG. This is provided for libstdc++, only to let its configure
|
||||
+ test work. No further use of this definition of signbit is expected
|
||||
+ in C++ mode, since libstdc++ provides its own version of signbit
|
||||
+ in cmath (which undefines signbit). */
|
||||
+# define signbit(x) __builtin_signbitl (x)
|
||||
# elif __GNUC_PREREQ (4,0)
|
||||
# define signbit(x) __MATH_TG ((x), __builtin_signbit, (x))
|
||||
# else
|
||||
@@ -442,8 +457,12 @@ enum
|
||||
|
||||
/* Return nonzero value if X is positive or negative infinity. */
|
||||
# if __HAVE_DISTINCT_FLOAT128 && !__GNUC_PREREQ (7,0) \
|
||||
@ -71,7 +102,7 @@ Index: glibc-2.26/math/math.h
|
||||
# define isinf(x) \
|
||||
(__builtin_types_compatible_p (__typeof (x), _Float128) \
|
||||
? __isinff128 (x) : __builtin_isinf_sign (x))
|
||||
@@ -470,7 +480,32 @@ enum
|
||||
@@ -470,7 +489,32 @@ enum
|
||||
# include <bits/iscanonical.h>
|
||||
|
||||
/* Return nonzero value if X is a signaling NaN. */
|
||||
@ -105,7 +136,7 @@ Index: glibc-2.26/math/math.h
|
||||
|
||||
/* Return nonzero value if X is subnormal. */
|
||||
# define issubnormal(x) (fpclassify (x) == FP_SUBNORMAL)
|
||||
@@ -484,15 +519,40 @@ enum
|
||||
@@ -484,15 +528,40 @@ enum
|
||||
# endif
|
||||
# else /* __cplusplus */
|
||||
extern "C++" {
|
||||
@ -182,3 +213,59 @@ Index: glibc-2.26/misc/sys/cdefs.h
|
||||
# define __HAVE_GENERIC_SELECTION 1
|
||||
#else
|
||||
# define __HAVE_GENERIC_SELECTION 0
|
||||
Index: glibc-2.26/sysdeps/ieee754/ldbl-128ibm/bits/iscanonical.h
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/ieee754/ldbl-128ibm/bits/iscanonical.h
|
||||
+++ glibc-2.26/sysdeps/ieee754/ldbl-128ibm/bits/iscanonical.h
|
||||
@@ -37,5 +37,22 @@ extern int __iscanonicall (long double _
|
||||
conversion, before being discarded; in IBM long double, there are
|
||||
encodings that are not consistently handled as corresponding to any
|
||||
particular value of the type, and we return 0 for those. */
|
||||
-# define iscanonical(x) __MATH_TG ((x), __iscanonical, (x))
|
||||
-#endif
|
||||
+# ifndef __cplusplus
|
||||
+# define iscanonical(x) __MATH_TG ((x), __iscanonical, (x))
|
||||
+# else
|
||||
+/* In C++ mode, __MATH_TG cannot be used, because it relies on
|
||||
+ __builtin_types_compatible_p, which is a C-only builtin. On the
|
||||
+ other hand, overloading provides the means to distinguish between
|
||||
+ the floating-point types. The overloading resolution will match
|
||||
+ the correct parameter (regardless of type qualifiers (i.e.: const
|
||||
+ and volatile)). */
|
||||
+extern "C++" {
|
||||
+inline int iscanonical (float __val) { return __iscanonicalf (__val); }
|
||||
+inline int iscanonical (double __val) { return __iscanonical (__val); }
|
||||
+inline int iscanonical (long double __val) { return __iscanonicall (__val); }
|
||||
+# if __HAVE_DISTINCT_FLOAT128
|
||||
+inline int iscanonical (_Float128 __val) { return __iscanonicalf128 (__val); }
|
||||
+# endif
|
||||
+}
|
||||
+# endif /* __cplusplus */
|
||||
+#endif /* __NO_LONG_DOUBLE_MATH */
|
||||
Index: glibc-2.26/sysdeps/ieee754/ldbl-96/bits/iscanonical.h
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/ieee754/ldbl-96/bits/iscanonical.h
|
||||
+++ glibc-2.26/sysdeps/ieee754/ldbl-96/bits/iscanonical.h
|
||||
@@ -34,4 +34,21 @@ extern int __iscanonicall (long double _
|
||||
conversion, before being discarded; in extended precision, there
|
||||
are encodings that are not consistently handled as corresponding to
|
||||
any particular value of the type, and we return 0 for those. */
|
||||
-#define iscanonical(x) __MATH_TG ((x), __iscanonical, (x))
|
||||
+#ifndef __cplusplus
|
||||
+# define iscanonical(x) __MATH_TG ((x), __iscanonical, (x))
|
||||
+#else
|
||||
+/* In C++ mode, __MATH_TG cannot be used, because it relies on
|
||||
+ __builtin_types_compatible_p, which is a C-only builtin. On the
|
||||
+ other hand, overloading provides the means to distinguish between
|
||||
+ the floating-point types. The overloading resolution will match
|
||||
+ the correct parameter (regardless of type qualifiers (i.e.: const
|
||||
+ and volatile)). */
|
||||
+extern "C++" {
|
||||
+inline int iscanonical (float __val) { return __iscanonicalf (__val); }
|
||||
+inline int iscanonical (double __val) { return __iscanonical (__val); }
|
||||
+inline int iscanonical (long double __val) { return __iscanonicall (__val); }
|
||||
+# if __HAVE_DISTINCT_FLOAT128
|
||||
+inline int iscanonical (_Float128 __val) { return __iscanonicalf128 (__val); }
|
||||
+# endif
|
||||
+}
|
||||
+#endif /* __cplusplus */
|
||||
|
831
nss-files-large-buffers.patch
Normal file
831
nss-files-large-buffers.patch
Normal file
@ -0,0 +1,831 @@
|
||||
2017-10-11 Florian Weimer <fweimer@redhat.com>
|
||||
|
||||
[BZ #22078]
|
||||
Avoid large NSS buffers with many addresses, aliases.
|
||||
* nss/nss_files/files-hosts.c (gethostbyname3_multi): Rewrite
|
||||
using dynarrays and struct alloc_buffer.
|
||||
* nss/Makefile (tests): Add tst-nss-files-hosts-multi.
|
||||
(tst-nss-files-hosts-multi): Link with -ldl.
|
||||
* nss/tst-nss-files-hosts-multi.c: New file.
|
||||
|
||||
2017-10-11 Florian Weimer <fweimer@redhat.com>
|
||||
|
||||
[BZ #18023]
|
||||
* nss/nss_files/files-hosts.c (gethostbyname3_multi): Use struct
|
||||
scratch_buffer. Eliminate gotos.
|
||||
|
||||
2017-10-10 Florian Weimer <fweimer@redhat.com>
|
||||
|
||||
* nss/nss_files/files-hosts.c (gethostbyname3_multi): New
|
||||
function.
|
||||
(_nss_files_gethostbyname3_r): Call it.
|
||||
|
||||
Index: glibc-2.26/nss/Makefile
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/nss/Makefile
|
||||
+++ glibc-2.26/nss/Makefile
|
||||
@@ -61,6 +61,7 @@ xtests = bug-erange
|
||||
# Tests which need libdl
|
||||
ifeq (yes,$(build-shared))
|
||||
tests += tst-nss-files-hosts-erange
|
||||
+tests += tst-nss-files-hosts-multi
|
||||
endif
|
||||
|
||||
# If we have a thread library then we can test cancellation against
|
||||
@@ -165,3 +166,4 @@ $(objpfx)tst-cancel-getpwuid_r: $(shared
|
||||
endif
|
||||
|
||||
$(objpfx)tst-nss-files-hosts-erange: $(libdl)
|
||||
+$(objpfx)tst-nss-files-hosts-multi: $(libdl)
|
||||
Index: glibc-2.26/nss/nss_files/files-hosts.c
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/nss/nss_files/files-hosts.c
|
||||
+++ glibc-2.26/nss/nss_files/files-hosts.c
|
||||
@@ -22,6 +22,8 @@
|
||||
#include <arpa/nameser.h>
|
||||
#include <netdb.h>
|
||||
#include <resolv/resolv-internal.h>
|
||||
+#include <scratch_buffer.h>
|
||||
+#include <alloc_buffer.h>
|
||||
|
||||
|
||||
/* Get implementation for some internal functions. */
|
||||
@@ -110,228 +112,250 @@ DB_LOOKUP (hostbyaddr, ,,,
|
||||
}, const void *addr, socklen_t len, int af)
|
||||
#undef EXTRA_ARGS_VALUE
|
||||
|
||||
-enum nss_status
|
||||
-_nss_files_gethostbyname3_r (const char *name, int af, struct hostent *result,
|
||||
- char *buffer, size_t buflen, int *errnop,
|
||||
- int *herrnop, int32_t *ttlp, char **canonp)
|
||||
+/* Type of the address and alias arrays. */
|
||||
+#define DYNARRAY_STRUCT array
|
||||
+#define DYNARRAY_ELEMENT char *
|
||||
+#define DYNARRAY_PREFIX array_
|
||||
+#include <malloc/dynarray-skeleton.c>
|
||||
+
|
||||
+static enum nss_status
|
||||
+gethostbyname3_multi (FILE * stream, const char *name, int af,
|
||||
+ struct hostent *result, char *buffer, size_t buflen,
|
||||
+ int *errnop, int *herrnop, int flags)
|
||||
{
|
||||
- FILE *stream = NULL;
|
||||
- uintptr_t pad = -(uintptr_t) buffer % __alignof__ (struct hostent_data);
|
||||
- buffer += pad;
|
||||
- buflen = buflen > pad ? buflen - pad : 0;
|
||||
+ assert (af == AF_INET || af == AF_INET6);
|
||||
|
||||
- /* Open file. */
|
||||
- enum nss_status status = internal_setent (&stream);
|
||||
+ /* We have to get all host entries from the file. */
|
||||
+ struct scratch_buffer tmp_buffer;
|
||||
+ scratch_buffer_init (&tmp_buffer);
|
||||
+ struct hostent tmp_result_buf;
|
||||
+ struct array addresses;
|
||||
+ array_init (&addresses);
|
||||
+ struct array aliases;
|
||||
+ array_init (&aliases);
|
||||
+ enum nss_status status;
|
||||
+
|
||||
+ /* Preserve the addresses and aliases encountered so far. */
|
||||
+ for (size_t i = 0; result->h_addr_list[i] != NULL; ++i)
|
||||
+ array_add (&addresses, result->h_addr_list[i]);
|
||||
+ for (size_t i = 0; result->h_aliases[i] != NULL; ++i)
|
||||
+ array_add (&aliases, result->h_aliases[i]);
|
||||
+
|
||||
+ /* The output buffer re-uses now-unused space at the end of the
|
||||
+ buffer, starting with the aliases array. It comes last in the
|
||||
+ data produced by internal_getent. (The alias names themselves
|
||||
+ are still located in the line read in internal_getent, which is
|
||||
+ stored at the beginning of the buffer.) */
|
||||
+ struct alloc_buffer outbuf;
|
||||
+ {
|
||||
+ char *bufferend = (char *) result->h_aliases;
|
||||
+ outbuf = alloc_buffer_create (bufferend, buffer + buflen - bufferend);
|
||||
+ }
|
||||
|
||||
- if (status == NSS_STATUS_SUCCESS)
|
||||
+ while (true)
|
||||
{
|
||||
- /* XXX Is using _res to determine whether we want to convert IPv4
|
||||
- addresses to IPv6 addresses really the right thing to do? */
|
||||
- int flags = (res_use_inet6 () ? AI_V4MAPPED : 0);
|
||||
-
|
||||
- while ((status = internal_getent (stream, result, buffer, buflen, errnop,
|
||||
- herrnop, af, flags))
|
||||
- == NSS_STATUS_SUCCESS)
|
||||
+ status = internal_getent (stream, &tmp_result_buf, tmp_buffer.data,
|
||||
+ tmp_buffer.length, errnop, herrnop, af,
|
||||
+ flags);
|
||||
+ /* Enlarge the buffer if necessary. */
|
||||
+ if (status == NSS_STATUS_TRYAGAIN && *herrnop == NETDB_INTERNAL
|
||||
+ && *errnop == ERANGE)
|
||||
{
|
||||
- LOOKUP_NAME_CASE (h_name, h_aliases)
|
||||
+ if (!scratch_buffer_grow (&tmp_buffer))
|
||||
+ {
|
||||
+ *errnop = ENOMEM;
|
||||
+ /* *herrnop and status already have the right value. */
|
||||
+ break;
|
||||
+ }
|
||||
+ /* Loop around and retry with a larger buffer. */
|
||||
}
|
||||
-
|
||||
- if (status == NSS_STATUS_SUCCESS
|
||||
- && _res_hconf.flags & HCONF_FLAG_MULTI)
|
||||
+ else if (status == NSS_STATUS_SUCCESS)
|
||||
{
|
||||
- /* We have to get all host entries from the file. */
|
||||
- size_t tmp_buflen = MIN (buflen, 4096);
|
||||
- char tmp_buffer_stack[tmp_buflen]
|
||||
- __attribute__ ((__aligned__ (__alignof__ (struct hostent_data))));
|
||||
- char *tmp_buffer = tmp_buffer_stack;
|
||||
- struct hostent tmp_result_buf;
|
||||
- int naddrs = 1;
|
||||
- int naliases = 0;
|
||||
- char *bufferend;
|
||||
- bool tmp_buffer_malloced = false;
|
||||
-
|
||||
- while (result->h_aliases[naliases] != NULL)
|
||||
- ++naliases;
|
||||
-
|
||||
- bufferend = (char *) &result->h_aliases[naliases + 1];
|
||||
-
|
||||
- again:
|
||||
- while ((status = internal_getent (stream, &tmp_result_buf, tmp_buffer,
|
||||
- tmp_buflen, errnop, herrnop, af,
|
||||
- flags))
|
||||
- == NSS_STATUS_SUCCESS)
|
||||
+ /* A line was read. Check that it matches the search
|
||||
+ criteria. */
|
||||
+
|
||||
+ int matches = 1;
|
||||
+ struct hostent *old_result = result;
|
||||
+ result = &tmp_result_buf;
|
||||
+ /* The following piece is a bit clumsy but we want to use
|
||||
+ the `LOOKUP_NAME_CASE' value. The optimizer should do
|
||||
+ its job. */
|
||||
+ do
|
||||
{
|
||||
- int matches = 1;
|
||||
- struct hostent *old_result = result;
|
||||
- result = &tmp_result_buf;
|
||||
- /* The following piece is a bit clumsy but we want to use the
|
||||
- `LOOKUP_NAME_CASE' value. The optimizer should do its
|
||||
- job. */
|
||||
- do
|
||||
- {
|
||||
- LOOKUP_NAME_CASE (h_name, h_aliases)
|
||||
- result = old_result;
|
||||
- }
|
||||
- while ((matches = 0));
|
||||
+ LOOKUP_NAME_CASE (h_name, h_aliases)
|
||||
+ result = old_result;
|
||||
+ }
|
||||
+ while ((matches = 0));
|
||||
|
||||
- if (matches)
|
||||
+ /* If the line matches, we need to copy the addresses and
|
||||
+ aliases, so that we can reuse tmp_buffer for the next
|
||||
+ line. */
|
||||
+ if (matches)
|
||||
+ {
|
||||
+ /* Record the addresses. */
|
||||
+ for (size_t i = 0; tmp_result_buf.h_addr_list[i] != NULL; ++i)
|
||||
{
|
||||
- /* We could be very clever and try to recycle a few bytes
|
||||
- in the buffer instead of generating new arrays. But
|
||||
- we are not doing this here since it's more work than
|
||||
- it's worth. Simply let the user provide a bit bigger
|
||||
- buffer. */
|
||||
- char **new_h_addr_list;
|
||||
- char **new_h_aliases;
|
||||
- int newaliases = 0;
|
||||
- size_t newstrlen = 0;
|
||||
- int cnt;
|
||||
-
|
||||
- /* Count the new aliases and the length of the strings. */
|
||||
- while (tmp_result_buf.h_aliases[newaliases] != NULL)
|
||||
+ /* Allocate the target space in the output buffer,
|
||||
+ depending on the address family. */
|
||||
+ void *target;
|
||||
+ if (af == AF_INET)
|
||||
{
|
||||
- char *cp = tmp_result_buf.h_aliases[newaliases];
|
||||
- ++newaliases;
|
||||
- newstrlen += strlen (cp) + 1;
|
||||
+ assert (tmp_result_buf.h_length == 4);
|
||||
+ target = alloc_buffer_alloc (&outbuf, struct in_addr);
|
||||
}
|
||||
- /* If the real name is different add it also to the
|
||||
- aliases. This means that there is a duplication
|
||||
- in the alias list but this is really the user's
|
||||
- problem. */
|
||||
- if (strcmp (old_result->h_name,
|
||||
- tmp_result_buf.h_name) != 0)
|
||||
+ else if (af == AF_INET6)
|
||||
{
|
||||
- ++newaliases;
|
||||
- newstrlen += strlen (tmp_result_buf.h_name) + 1;
|
||||
+ assert (tmp_result_buf.h_length == 16);
|
||||
+ target = alloc_buffer_alloc (&outbuf, struct in6_addr);
|
||||
}
|
||||
+ else
|
||||
+ __builtin_unreachable ();
|
||||
|
||||
- /* Make sure bufferend is aligned. */
|
||||
- assert ((bufferend - (char *) 0) % sizeof (char *) == 0);
|
||||
-
|
||||
- /* Now we can check whether the buffer is large enough.
|
||||
- 16 is the maximal size of the IP address. */
|
||||
- if (bufferend + 16 + (naddrs + 2) * sizeof (char *)
|
||||
- + roundup (newstrlen, sizeof (char *))
|
||||
- + (naliases + newaliases + 1) * sizeof (char *)
|
||||
- >= buffer + buflen)
|
||||
+ if (target == NULL)
|
||||
{
|
||||
+ /* Request a larger output buffer. */
|
||||
*errnop = ERANGE;
|
||||
*herrnop = NETDB_INTERNAL;
|
||||
status = NSS_STATUS_TRYAGAIN;
|
||||
- goto out;
|
||||
- }
|
||||
-
|
||||
- new_h_addr_list =
|
||||
- (char **) (bufferend
|
||||
- + roundup (newstrlen, sizeof (char *))
|
||||
- + 16);
|
||||
- new_h_aliases =
|
||||
- (char **) ((char *) new_h_addr_list
|
||||
- + (naddrs + 2) * sizeof (char *));
|
||||
-
|
||||
- /* Copy the old data in the new arrays. */
|
||||
- for (cnt = 0; cnt < naddrs; ++cnt)
|
||||
- new_h_addr_list[cnt] = old_result->h_addr_list[cnt];
|
||||
-
|
||||
- for (cnt = 0; cnt < naliases; ++cnt)
|
||||
- new_h_aliases[cnt] = old_result->h_aliases[cnt];
|
||||
-
|
||||
- /* Store the new strings. */
|
||||
- cnt = 0;
|
||||
- while (tmp_result_buf.h_aliases[cnt] != NULL)
|
||||
- {
|
||||
- new_h_aliases[naliases++] = bufferend;
|
||||
- bufferend = (__stpcpy (bufferend,
|
||||
- tmp_result_buf.h_aliases[cnt])
|
||||
- + 1);
|
||||
- ++cnt;
|
||||
+ break;
|
||||
}
|
||||
-
|
||||
- if (cnt < newaliases)
|
||||
- {
|
||||
- new_h_aliases[naliases++] = bufferend;
|
||||
- bufferend = __stpcpy (bufferend,
|
||||
- tmp_result_buf.h_name) + 1;
|
||||
- }
|
||||
-
|
||||
- /* Final NULL pointer. */
|
||||
- new_h_aliases[naliases] = NULL;
|
||||
-
|
||||
- /* Round up the buffer end address. */
|
||||
- bufferend += (sizeof (char *)
|
||||
- - ((bufferend - (char *) 0)
|
||||
- % sizeof (char *))) % sizeof (char *);
|
||||
-
|
||||
- /* Now the new address. */
|
||||
- new_h_addr_list[naddrs++] =
|
||||
- memcpy (bufferend, tmp_result_buf.h_addr,
|
||||
- tmp_result_buf.h_length);
|
||||
-
|
||||
- /* Also here a final NULL pointer. */
|
||||
- new_h_addr_list[naddrs] = NULL;
|
||||
-
|
||||
- /* Store the new array pointers. */
|
||||
- old_result->h_aliases = new_h_aliases;
|
||||
- old_result->h_addr_list = new_h_addr_list;
|
||||
-
|
||||
- /* Compute the new buffer end. */
|
||||
- bufferend = (char *) &new_h_aliases[naliases + 1];
|
||||
- assert (bufferend <= buffer + buflen);
|
||||
-
|
||||
- result = old_result;
|
||||
+ memcpy (target, tmp_result_buf.h_addr_list[i],
|
||||
+ tmp_result_buf.h_length);
|
||||
+ array_add (&addresses, target);
|
||||
}
|
||||
- }
|
||||
|
||||
- if (status == NSS_STATUS_TRYAGAIN)
|
||||
- {
|
||||
- size_t newsize = 2 * tmp_buflen;
|
||||
- if (tmp_buffer_malloced)
|
||||
+ /* Record the aliases. */
|
||||
+ for (size_t i = 0; tmp_result_buf.h_aliases[i] != NULL; ++i)
|
||||
{
|
||||
- char *newp = realloc (tmp_buffer, newsize);
|
||||
- if (newp != NULL)
|
||||
- {
|
||||
- assert ((((uintptr_t) newp)
|
||||
- & (__alignof__ (struct hostent_data) - 1))
|
||||
- == 0);
|
||||
- tmp_buffer = newp;
|
||||
- tmp_buflen = newsize;
|
||||
- goto again;
|
||||
- }
|
||||
+ char *alias = tmp_result_buf.h_aliases[i];
|
||||
+ array_add (&aliases,
|
||||
+ alloc_buffer_copy_string (&outbuf, alias));
|
||||
}
|
||||
- else if (!__libc_use_alloca (buflen + newsize))
|
||||
+
|
||||
+ /* If the real name is different add, it also to the
|
||||
+ aliases. This means that there is a duplication in
|
||||
+ the alias list but this is really the user's
|
||||
+ problem. */
|
||||
+ {
|
||||
+ char *new_name = tmp_result_buf.h_name;
|
||||
+ if (strcmp (old_result->h_name, new_name) != 0)
|
||||
+ array_add (&aliases,
|
||||
+ alloc_buffer_copy_string (&outbuf, new_name));
|
||||
+ }
|
||||
+
|
||||
+ /* Report memory allocation failures during the
|
||||
+ expansion of the temporary arrays. */
|
||||
+ if (array_has_failed (&addresses) || array_has_failed (&aliases))
|
||||
{
|
||||
- tmp_buffer = malloc (newsize);
|
||||
- if (tmp_buffer != NULL)
|
||||
- {
|
||||
- assert ((((uintptr_t) tmp_buffer)
|
||||
- & (__alignof__ (struct hostent_data) - 1))
|
||||
- == 0);
|
||||
- tmp_buffer_malloced = true;
|
||||
- tmp_buflen = newsize;
|
||||
- goto again;
|
||||
- }
|
||||
+ *errnop = ENOMEM;
|
||||
+ *herrnop = NETDB_INTERNAL;
|
||||
+ status = NSS_STATUS_UNAVAIL;
|
||||
+ break;
|
||||
}
|
||||
- else
|
||||
+
|
||||
+ /* Request a larger output buffer if we ran out of room. */
|
||||
+ if (alloc_buffer_has_failed (&outbuf))
|
||||
{
|
||||
- tmp_buffer
|
||||
- = extend_alloca (tmp_buffer, tmp_buflen,
|
||||
- newsize
|
||||
- + __alignof__ (struct hostent_data));
|
||||
- tmp_buffer = (char *) (((uintptr_t) tmp_buffer
|
||||
- + __alignof__ (struct hostent_data)
|
||||
- - 1)
|
||||
- & ~(__alignof__ (struct hostent_data)
|
||||
- - 1));
|
||||
- goto again;
|
||||
+ *errnop = ERANGE;
|
||||
+ *herrnop = NETDB_INTERNAL;
|
||||
+ status = NSS_STATUS_TRYAGAIN;
|
||||
+ break;
|
||||
}
|
||||
- }
|
||||
- else
|
||||
- status = NSS_STATUS_SUCCESS;
|
||||
- out:
|
||||
- if (tmp_buffer_malloced)
|
||||
- free (tmp_buffer);
|
||||
+
|
||||
+ result = old_result;
|
||||
+ } /* If match was found. */
|
||||
+
|
||||
+ /* If no match is found, loop around and fetch another
|
||||
+ line. */
|
||||
+
|
||||
+ } /* status == NSS_STATUS_SUCCESS. */
|
||||
+ else
|
||||
+ /* internal_getent returned an error. */
|
||||
+ break;
|
||||
+ } /* while (true) */
|
||||
+
|
||||
+ /* Propagate the NSS_STATUS_TRYAGAIN error to the caller. It means
|
||||
+ that we may not have loaded the complete result.
|
||||
+ NSS_STATUS_NOTFOUND, however, means that we reached the end of
|
||||
+ the file successfully. */
|
||||
+ if (status != NSS_STATUS_TRYAGAIN)
|
||||
+ status = NSS_STATUS_SUCCESS;
|
||||
+
|
||||
+ if (status == NSS_STATUS_SUCCESS)
|
||||
+ {
|
||||
+ /* Copy the address and alias arrays into the output buffer and
|
||||
+ add NULL terminators. The pointed-to elements were directly
|
||||
+ written into the output buffer above and do not need to be
|
||||
+ copied again. */
|
||||
+ size_t addresses_count = array_size (&addresses);
|
||||
+ size_t aliases_count = array_size (&aliases);
|
||||
+ char **out_addresses = alloc_buffer_alloc_array
|
||||
+ (&outbuf, char *, addresses_count + 1);
|
||||
+ char **out_aliases = alloc_buffer_alloc_array
|
||||
+ (&outbuf, char *, aliases_count + 1);
|
||||
+ if (out_addresses == NULL || out_aliases == NULL)
|
||||
+ {
|
||||
+ /* The output buffer is not large enough. */
|
||||
+ *errnop = ERANGE;
|
||||
+ *herrnop = NETDB_INTERNAL;
|
||||
+ status = NSS_STATUS_TRYAGAIN;
|
||||
+ /* Fall through to function exit. */
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* Everything is allocated in place. Make the copies and
|
||||
+ adjust the array pointers. */
|
||||
+ memcpy (out_addresses, array_begin (&addresses),
|
||||
+ addresses_count * sizeof (char *));
|
||||
+ out_addresses[addresses_count] = NULL;
|
||||
+ memcpy (out_aliases, array_begin (&aliases),
|
||||
+ aliases_count * sizeof (char *));
|
||||
+ out_aliases[aliases_count] = NULL;
|
||||
+
|
||||
+ result->h_addr_list = out_addresses;
|
||||
+ result->h_aliases = out_aliases;
|
||||
+
|
||||
+ status = NSS_STATUS_SUCCESS;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ scratch_buffer_free (&tmp_buffer);
|
||||
+ array_free (&addresses);
|
||||
+ array_free (&aliases);
|
||||
+ return status;
|
||||
+}
|
||||
+
|
||||
+enum nss_status
|
||||
+_nss_files_gethostbyname3_r (const char *name, int af, struct hostent *result,
|
||||
+ char *buffer, size_t buflen, int *errnop,
|
||||
+ int *herrnop, int32_t *ttlp, char **canonp)
|
||||
+{
|
||||
+ FILE *stream = NULL;
|
||||
+ uintptr_t pad = -(uintptr_t) buffer % __alignof__ (struct hostent_data);
|
||||
+ buffer += pad;
|
||||
+ buflen = buflen > pad ? buflen - pad : 0;
|
||||
+
|
||||
+ /* Open file. */
|
||||
+ enum nss_status status = internal_setent (&stream);
|
||||
+
|
||||
+ if (status == NSS_STATUS_SUCCESS)
|
||||
+ {
|
||||
+ /* XXX Is using _res to determine whether we want to convert IPv4
|
||||
+ addresses to IPv6 addresses really the right thing to do? */
|
||||
+ int flags = (res_use_inet6 () ? AI_V4MAPPED : 0);
|
||||
+
|
||||
+ while ((status = internal_getent (stream, result, buffer, buflen, errnop,
|
||||
+ herrnop, af, flags))
|
||||
+ == NSS_STATUS_SUCCESS)
|
||||
+ {
|
||||
+ LOOKUP_NAME_CASE (h_name, h_aliases)
|
||||
}
|
||||
|
||||
+ if (status == NSS_STATUS_SUCCESS
|
||||
+ && _res_hconf.flags & HCONF_FLAG_MULTI)
|
||||
+ status = gethostbyname3_multi
|
||||
+ (stream, name, af, result, buffer, buflen, errnop, herrnop, flags);
|
||||
+
|
||||
internal_endent (&stream);
|
||||
}
|
||||
|
||||
Index: glibc-2.26/nss/tst-nss-files-hosts-multi.c
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ glibc-2.26/nss/tst-nss-files-hosts-multi.c
|
||||
@@ -0,0 +1,331 @@
|
||||
+/* Parse /etc/hosts in multi mode with many addresses/aliases.
|
||||
+ Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <dlfcn.h>
|
||||
+#include <errno.h>
|
||||
+#include <gnu/lib-names.h>
|
||||
+#include <netdb.h>
|
||||
+#include <nss.h>
|
||||
+#include <stdbool.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <string.h>
|
||||
+#include <support/check.h>
|
||||
+#include <support/check_nss.h>
|
||||
+#include <support/namespace.h>
|
||||
+#include <support/support.h>
|
||||
+#include <support/test-driver.h>
|
||||
+#include <support/test-driver.h>
|
||||
+#include <support/xmemstream.h>
|
||||
+#include <support/xstdio.h>
|
||||
+#include <support/xunistd.h>
|
||||
+#include <sys/resource.h>
|
||||
+
|
||||
+struct support_chroot *chroot_env;
|
||||
+
|
||||
+static void
|
||||
+prepare (int argc, char **argv)
|
||||
+{
|
||||
+ chroot_env = support_chroot_create
|
||||
+ ((struct support_chroot_configuration)
|
||||
+ {
|
||||
+ .resolv_conf = "",
|
||||
+ .hosts = "", /* See write_hosts below. */
|
||||
+ .host_conf = "multi on\n",
|
||||
+ });
|
||||
+}
|
||||
+
|
||||
+/* Create the /etc/hosts file from outside the chroot. */
|
||||
+static void
|
||||
+write_hosts (int count)
|
||||
+{
|
||||
+ TEST_VERIFY (count > 0 && count <= 65535);
|
||||
+ FILE *fp = xfopen (chroot_env->path_hosts, "w");
|
||||
+ fputs ("127.0.0.1 localhost localhost.localdomain\n"
|
||||
+ "::1 localhost localhost.localdomain\n",
|
||||
+ fp);
|
||||
+ for (int i = 0; i < count; ++i)
|
||||
+ {
|
||||
+ fprintf (fp, "10.4.%d.%d www4.example.com\n",
|
||||
+ (i / 256) & 0xff, i & 0xff);
|
||||
+ fprintf (fp, "10.46.%d.%d www.example.com\n",
|
||||
+ (i / 256) & 0xff, i & 0xff);
|
||||
+ fprintf (fp, "192.0.2.1 alias.example.com v4-%d.example.com\n", i);
|
||||
+ fprintf (fp, "2001:db8::6:%x www6.example.com\n", i);
|
||||
+ fprintf (fp, "2001:db8::46:%x www.example.com\n", i);
|
||||
+ fprintf (fp, "2001:db8::1 alias.example.com v6-%d.example.com\n", i);
|
||||
+ }
|
||||
+ xfclose (fp);
|
||||
+}
|
||||
+
|
||||
+/* Parameters of a single test. */
|
||||
+struct test_params
|
||||
+{
|
||||
+ const char *name; /* Name to query. */
|
||||
+ const char *marker; /* Address marker for the name. */
|
||||
+ int count; /* Number of addresses/aliases. */
|
||||
+ int family; /* AF_INET, AF_INET_6 or AF_UNSPEC. */
|
||||
+ bool canonname; /* True if AI_CANONNAME should be enabled. */
|
||||
+};
|
||||
+
|
||||
+/* Expected result of gethostbyname/gethostbyname2. */
|
||||
+static char *
|
||||
+expected_ghbn (const struct test_params *params)
|
||||
+{
|
||||
+ TEST_VERIFY (params->family == AF_INET || params->family == AF_INET6);
|
||||
+
|
||||
+ struct xmemstream expected;
|
||||
+ xopen_memstream (&expected);
|
||||
+ if (strcmp (params->name, "alias.example.com") == 0)
|
||||
+ {
|
||||
+ fprintf (expected.out, "name: %s\n", params->name);
|
||||
+ char af;
|
||||
+ if (params->family == AF_INET)
|
||||
+ af = '4';
|
||||
+ else
|
||||
+ af = '6';
|
||||
+ for (int i = 0; i < params->count; ++i)
|
||||
+ fprintf (expected.out, "alias: v%c-%d.example.com\n", af, i);
|
||||
+
|
||||
+ for (int i = 0; i < params->count; ++i)
|
||||
+ if (params->family == AF_INET)
|
||||
+ fputs ("address: 192.0.2.1\n", expected.out);
|
||||
+ else
|
||||
+ fputs ("address: 2001:db8::1\n", expected.out);
|
||||
+ }
|
||||
+ else /* www/www4/www6 name. */
|
||||
+ {
|
||||
+ bool do_ipv4 = params->family == AF_INET
|
||||
+ && strncmp (params->name, "www6", 4) != 0;
|
||||
+ bool do_ipv6 = params->family == AF_INET6
|
||||
+ && strncmp (params->name, "www4", 4) != 0;
|
||||
+ if (do_ipv4 || do_ipv6)
|
||||
+ {
|
||||
+ fprintf (expected.out, "name: %s\n", params->name);
|
||||
+ if (do_ipv4)
|
||||
+ for (int i = 0; i < params->count; ++i)
|
||||
+ fprintf (expected.out, "address: 10.%s.%d.%d\n",
|
||||
+ params->marker, i / 256, i % 256);
|
||||
+ if (do_ipv6)
|
||||
+ for (int i = 0; i < params->count; ++i)
|
||||
+ fprintf (expected.out, "address: 2001:db8::%s:%x\n",
|
||||
+ params->marker, i);
|
||||
+ }
|
||||
+ else
|
||||
+ fputs ("error: HOST_NOT_FOUND\n", expected.out);
|
||||
+ }
|
||||
+ xfclose_memstream (&expected);
|
||||
+ return expected.buffer;
|
||||
+}
|
||||
+
|
||||
+/* Expected result of getaddrinfo. */
|
||||
+static char *
|
||||
+expected_gai (const struct test_params *params)
|
||||
+{
|
||||
+ bool do_ipv4 = false;
|
||||
+ bool do_ipv6 = false;
|
||||
+ if (params->family == AF_UNSPEC)
|
||||
+ do_ipv4 = do_ipv6 = true;
|
||||
+ else if (params->family == AF_INET)
|
||||
+ do_ipv4 = true;
|
||||
+ else if (params->family == AF_INET6)
|
||||
+ do_ipv6 = true;
|
||||
+
|
||||
+ struct xmemstream expected;
|
||||
+ xopen_memstream (&expected);
|
||||
+ if (strcmp (params->name, "alias.example.com") == 0)
|
||||
+ {
|
||||
+ if (params->canonname)
|
||||
+ fprintf (expected.out,
|
||||
+ "flags: AI_CANONNAME\n"
|
||||
+ "canonname: %s\n",
|
||||
+ params->name);
|
||||
+
|
||||
+ if (do_ipv4)
|
||||
+ for (int i = 0; i < params->count; ++i)
|
||||
+ fputs ("address: STREAM/TCP 192.0.2.1 80\n", expected.out);
|
||||
+ if (do_ipv6)
|
||||
+ for (int i = 0; i < params->count; ++i)
|
||||
+ fputs ("address: STREAM/TCP 2001:db8::1 80\n", expected.out);
|
||||
+ }
|
||||
+ else /* www/www4/www6 name. */
|
||||
+ {
|
||||
+ if (strncmp (params->name, "www4", 4) == 0)
|
||||
+ do_ipv6 = false;
|
||||
+ else if (strncmp (params->name, "www6", 4) == 0)
|
||||
+ do_ipv4 = false;
|
||||
+ /* Otherwise, we have www as the name, so we do both. */
|
||||
+
|
||||
+ if (do_ipv4 || do_ipv6)
|
||||
+ {
|
||||
+ if (params->canonname)
|
||||
+ fprintf (expected.out,
|
||||
+ "flags: AI_CANONNAME\n"
|
||||
+ "canonname: %s\n",
|
||||
+ params->name);
|
||||
+
|
||||
+ if (do_ipv4)
|
||||
+ for (int i = 0; i < params->count; ++i)
|
||||
+ fprintf (expected.out, "address: STREAM/TCP 10.%s.%d.%d 80\n",
|
||||
+ params->marker, i / 256, i % 256);
|
||||
+ if (do_ipv6)
|
||||
+ for (int i = 0; i < params->count; ++i)
|
||||
+ fprintf (expected.out,
|
||||
+ "address: STREAM/TCP 2001:db8::%s:%x 80\n",
|
||||
+ params->marker, i);
|
||||
+ }
|
||||
+ else
|
||||
+ fputs ("error: Name or service not known\n", expected.out);
|
||||
+ }
|
||||
+ xfclose_memstream (&expected);
|
||||
+ return expected.buffer;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+run_gbhn_gai (struct test_params *params)
|
||||
+{
|
||||
+ char *ctx = xasprintf ("name=%s marker=%s count=%d family=%d",
|
||||
+ params->name, params->marker, params->count,
|
||||
+ params->family);
|
||||
+ if (test_verbose > 0)
|
||||
+ printf ("info: %s\n", ctx);
|
||||
+
|
||||
+ /* Check gethostbyname, gethostbyname2. */
|
||||
+ if (params->family == AF_INET)
|
||||
+ {
|
||||
+ char *expected = expected_ghbn (params);
|
||||
+ check_hostent (ctx, gethostbyname (params->name), expected);
|
||||
+ free (expected);
|
||||
+ }
|
||||
+ if (params->family != AF_UNSPEC)
|
||||
+ {
|
||||
+ char *expected = expected_ghbn (params);
|
||||
+ check_hostent (ctx, gethostbyname2 (params->name, params->family),
|
||||
+ expected);
|
||||
+ free (expected);
|
||||
+ }
|
||||
+
|
||||
+ /* Check getaddrinfo. */
|
||||
+ for (int do_canonical = 0; do_canonical < 2; ++do_canonical)
|
||||
+ {
|
||||
+ params->canonname = do_canonical;
|
||||
+ char *expected = expected_gai (params);
|
||||
+ struct addrinfo hints =
|
||||
+ {
|
||||
+ .ai_family = params->family,
|
||||
+ .ai_socktype = SOCK_STREAM,
|
||||
+ .ai_protocol = IPPROTO_TCP,
|
||||
+ };
|
||||
+ if (do_canonical)
|
||||
+ hints.ai_flags |= AI_CANONNAME;
|
||||
+ struct addrinfo *ai;
|
||||
+ int ret = getaddrinfo (params->name, "80", &hints, &ai);
|
||||
+ check_addrinfo (ctx, ai, ret, expected);
|
||||
+ if (ret == 0)
|
||||
+ freeaddrinfo (ai);
|
||||
+ free (expected);
|
||||
+ }
|
||||
+
|
||||
+ free (ctx);
|
||||
+}
|
||||
+
|
||||
+/* Callback for the subprocess which runs the test in a chroot. */
|
||||
+static void
|
||||
+subprocess (void *closure)
|
||||
+{
|
||||
+ struct test_params *params = closure;
|
||||
+
|
||||
+ xchroot (chroot_env->path_chroot);
|
||||
+
|
||||
+ static const int families[] = { AF_INET, AF_INET6, AF_UNSPEC, -1 };
|
||||
+ static const char *const names[] =
|
||||
+ {
|
||||
+ "www.example.com", "www4.example.com", "www6.example.com",
|
||||
+ "alias.example.com",
|
||||
+ NULL
|
||||
+ };
|
||||
+ static const char *const names_marker[] = { "46", "4", "6", "" };
|
||||
+
|
||||
+ for (int family_idx = 0; families[family_idx] >= 0; ++family_idx)
|
||||
+ {
|
||||
+ params->family = families[family_idx];
|
||||
+ for (int names_idx = 0; names[names_idx] != NULL; ++names_idx)
|
||||
+ {
|
||||
+ params->name = names[names_idx];
|
||||
+ params->marker = names_marker[names_idx];
|
||||
+ run_gbhn_gai (params);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Run the test for a specific number of addresses/aliases. */
|
||||
+static void
|
||||
+run_test (int count)
|
||||
+{
|
||||
+ write_hosts (count);
|
||||
+
|
||||
+ struct test_params params =
|
||||
+ {
|
||||
+ .count = count,
|
||||
+ };
|
||||
+
|
||||
+ support_isolate_in_subprocess (subprocess, ¶ms);
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+do_test (void)
|
||||
+{
|
||||
+ support_become_root ();
|
||||
+ if (!support_can_chroot ())
|
||||
+ return EXIT_UNSUPPORTED;
|
||||
+
|
||||
+ /* This test should not use gigabytes of memory. */
|
||||
+ {
|
||||
+ struct rlimit limit;
|
||||
+ if (getrlimit (RLIMIT_AS, &limit) != 0)
|
||||
+ {
|
||||
+ printf ("getrlimit (RLIMIT_AS) failed: %m\n");
|
||||
+ return 1;
|
||||
+ }
|
||||
+ long target = 200 * 1024 * 1024;
|
||||
+ if (limit.rlim_cur == RLIM_INFINITY || limit.rlim_cur > target)
|
||||
+ {
|
||||
+ limit.rlim_cur = target;
|
||||
+ if (setrlimit (RLIMIT_AS, &limit) != 0)
|
||||
+ {
|
||||
+ printf ("setrlimit (RLIMIT_AS) failed: %m\n");
|
||||
+ return 1;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ __nss_configure_lookup ("hosts", "files");
|
||||
+ if (dlopen (LIBNSS_FILES_SO, RTLD_LAZY) == NULL)
|
||||
+ FAIL_EXIT1 ("could not load " LIBNSS_DNS_SO ": %s", dlerror ());
|
||||
+
|
||||
+ /* Run the tests with a few different address/alias counts. */
|
||||
+ for (int count = 1; count <= 111; ++count)
|
||||
+ run_test (count);
|
||||
+ run_test (1111);
|
||||
+ run_test (22222);
|
||||
+
|
||||
+ support_chroot_free (chroot_env);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#define PREPARE prepare
|
||||
+#include <support/test-driver.c>
|
125
sysconf-uio-maxiov.patch
Normal file
125
sysconf-uio-maxiov.patch
Normal file
@ -0,0 +1,125 @@
|
||||
2017-10-20 Florian Weimer <fweimer@redhat.com>
|
||||
|
||||
[BZ #22321]
|
||||
sysconf: Fix missing definition of UIO_MAXIOV on Linux.
|
||||
* sysdeps/posix/sysconf.c: Include <sys/uio.h>.
|
||||
* sysdeps/unix/sysv/linux/Makefile (tests): Add tst-sysconf-iov_max.
|
||||
(tst-sysconf-iov_max): Link with tst-sysconf-iov_max-uapi.o.
|
||||
* sysdeps/unix/sysv/linux/tst-sysconf-iov_max.c: New file.
|
||||
* sysdeps/unix/sysv/linux/tst-sysconf-iov_max-uapi.c: Likewise.
|
||||
|
||||
Index: glibc-2.26/sysdeps/posix/sysconf.c
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/posix/sysconf.c
|
||||
+++ glibc-2.26/sysdeps/posix/sysconf.c
|
||||
@@ -29,6 +29,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#include <sys/types.h>
|
||||
+#include <sys/uio.h>
|
||||
#include <regex.h>
|
||||
|
||||
#define NEED_SPEC_ARRAY 0
|
||||
Index: glibc-2.26/sysdeps/unix/sysv/linux/Makefile
|
||||
===================================================================
|
||||
--- glibc-2.26.orig/sysdeps/unix/sysv/linux/Makefile
|
||||
+++ glibc-2.26/sysdeps/unix/sysv/linux/Makefile
|
||||
@@ -50,7 +50,7 @@ sysdep_headers += sys/mount.h sys/acct.h
|
||||
bits/siginfo-arch.h bits/siginfo-consts-arch.h
|
||||
|
||||
tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \
|
||||
- tst-quota tst-sync_file_range test-errno-linux
|
||||
+ tst-quota tst-sync_file_range test-errno-linux tst-sysconf-iov_max
|
||||
|
||||
# Generate the list of SYS_* macros for the system calls (__NR_* macros).
|
||||
|
||||
@@ -120,7 +120,11 @@ ifndef no_deps
|
||||
-include $(objpfx)bits/syscall.d
|
||||
endif
|
||||
generated += bits/syscall.h bits/syscall.d
|
||||
-endif
|
||||
+
|
||||
+# Separate object file for access to the constant from the UAPI header.
|
||||
+$(objpfx)tst-sysconf-iov_max: $(objpfx)tst-sysconf-iov_max-uapi.o
|
||||
+
|
||||
+endif # $(subdir) == misc
|
||||
|
||||
ifeq ($(subdir),time)
|
||||
sysdep_headers += sys/timex.h bits/timex.h
|
||||
Index: glibc-2.26/sysdeps/unix/sysv/linux/tst-sysconf-iov_max-uapi.c
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ glibc-2.26/sysdeps/unix/sysv/linux/tst-sysconf-iov_max-uapi.c
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* Check IOV_MAX definition: Helper function to capture UAPI header value.
|
||||
+ Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Use a separate function to avoid header compatibility issues. */
|
||||
+
|
||||
+#include <linux/uio.h>
|
||||
+
|
||||
+long
|
||||
+uio_maxiov_value (void)
|
||||
+{
|
||||
+ return UIO_MAXIOV;
|
||||
+}
|
||||
Index: glibc-2.26/sysdeps/unix/sysv/linux/tst-sysconf-iov_max.c
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ glibc-2.26/sysdeps/unix/sysv/linux/tst-sysconf-iov_max.c
|
||||
@@ -0,0 +1,40 @@
|
||||
+/* Check IOV_MAX definition for consistency (bug 22321).
|
||||
+ Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+/* Defined in tst-sysconf-iov_max-uapi.c. */
|
||||
+long uio_maxiov_value (void);
|
||||
+
|
||||
+
|
||||
+#include <limits.h>
|
||||
+#include <support/check.h>
|
||||
+#include <sys/uio.h>
|
||||
+#include <unistd.h>
|
||||
+
|
||||
+static int
|
||||
+do_test (void)
|
||||
+{
|
||||
+ TEST_VERIFY (_XOPEN_IOV_MAX == 16); /* Value required by POSIX. */
|
||||
+ TEST_VERIFY (uio_maxiov_value () >= _XOPEN_IOV_MAX);
|
||||
+ TEST_VERIFY (IOV_MAX == uio_maxiov_value ());
|
||||
+ TEST_VERIFY (UIO_MAXIOV == uio_maxiov_value ());
|
||||
+ TEST_VERIFY (sysconf (_SC_UIO_MAXIOV) == uio_maxiov_value ());
|
||||
+ TEST_VERIFY (sysconf (_SC_IOV_MAX) == uio_maxiov_value ());
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#include <support/test-driver.c>
|
Loading…
Reference in New Issue
Block a user