Accepting request 535960 from home:Andreas_Schwab:Factory

- math-c++-compat.patch: Add more C++ compatibility (BZ #22296) - malloc-tcache-leak.patch: Fix tcache leak after thread destruction (BZ #22111) - falkor-memcpy-memmove.patch: Optimized implementation of memcpy/memmove for Qualcomm Falkor - aarch64-cpu-features.patch: Fix glibc.tune.cpu tunable handling - nss-files-large-buffers.patch: Avoid large buffers with many host addresses (BZ #22078) - sysconf-uio-maxiov.patch: Fix missing definition of UIO_MAXIOV (BZ #22321) - glob-tilde-overflow.patch: Fix buffer overflows with GLOB_TILDE (CVE-2017-15670, CVE-2017-15671, CVE-2017-15804, bsc#1064569. bsc#1064580, bsc#1064583, BZ #22320, BZ #22325, BZ #22332) - dl-runtime-resolve-xsave.patch: Use fxsave/xsave/xsavec in _dl_runtime_resolve (BZ #21265) OBS-URL: https://build.opensuse.org/request/show/535960 OBS-URL: https://build.opensuse.org/package/show/Base:System/glibc?expand=0&rev=483
2017-10-23 11:59:05 +00:00 · 2017-10-23 11:59:05 +00:00 · a41899225a
commit a41899225a
parent 787f325423
15 changed files with 5060 additions and 32 deletions
--- a/aarch64-cpu-features.patch
+++ b/aarch64-cpu-features.patch
@ -0,0 +1,18 @@
+2017-10-10  Steve Ellcey  <sellcey@cavium.com>
+
+	* sysdeps/unix/sysv/linux/aarch64/cpu-features.c (get_midr_from_mcpu):
+	Use strcmp instead of tunable_is_name.
+
+Index: glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+===================================================================
+--- glibc-2.26.orig/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+++ glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+@@ -37,7 +37,7 @@ static uint64_t
+ get_midr_from_mcpu (const char *mcpu)
+ {
+   for (int i = 0; i < sizeof (cpu_list) / sizeof (struct cpu_list); i++)
+-    if (tunable_is_name (mcpu, cpu_list[i].name) == 0)
+    if (strcmp (mcpu, cpu_list[i].name) == 0)
+       return cpu_list[i].midr;
+ 
+   return UINT64_MAX;
--- a/dl-runtime-resolve-xsave.patch
+++ b/dl-runtime-resolve-xsave.patch
@ -0,0 +1,851 @@
+2017-10-22  H.J. Lu  <hongjiu.lu@intel.com>
+
+	[BZ #21265]
+	* sysdeps/x86/cpu-features-offsets.sym (XSAVE_STATE_SIZE_OFFSET):
+	New.
+	* sysdeps/x86/cpu-features.c: Include <libc-pointer-arith.h>.
+	(get_common_indeces): Set xsave_state_size, xsave_state_full_size
+	and bit_arch_XSAVEC_Usable if needed.
+	(init_cpu_features): Remove bit_arch_Use_dl_runtime_resolve_slow
+	and bit_arch_Use_dl_runtime_resolve_opt.
+	* sysdeps/x86/cpu-features.h (bit_arch_Use_dl_runtime_resolve_opt):
+	Removed.
+	(bit_arch_Use_dl_runtime_resolve_slow): Likewise.
+	(bit_arch_Prefer_No_AVX512): Updated.
+	(bit_arch_MathVec_Prefer_No_AVX512): Likewise.
+	(bit_arch_XSAVEC_Usable): New.
+	(STATE_SAVE_OFFSET): Likewise.
+	(STATE_SAVE_MASK): Likewise.
+	[__ASSEMBLER__]: Include <cpu-features-offsets.h>.
+	(cpu_features): Add xsave_state_size and xsave_state_full_size.
+	(index_arch_Use_dl_runtime_resolve_opt): Removed.
+	(index_arch_Use_dl_runtime_resolve_slow): Likewise.
+	(index_arch_XSAVEC_Usable): New.
+	* sysdeps/x86/cpu-tunables.c (TUNABLE_CALLBACK (set_hwcaps)):
+	Support XSAVEC_Usable.  Remove Use_dl_runtime_resolve_slow.
+	* sysdeps/x86_64/Makefile (tst-x86_64-1-ENV): New if tunables
+	is enabled.
+	* sysdeps/x86_64/dl-machine.h (elf_machine_runtime_setup):
+	Replace _dl_runtime_resolve_sse, _dl_runtime_resolve_avx,
+	_dl_runtime_resolve_avx_slow, _dl_runtime_resolve_avx_opt,
+	_dl_runtime_resolve_avx512 and _dl_runtime_resolve_avx512_opt
+	with _dl_runtime_resolve_fxsave, _dl_runtime_resolve_xsave and
+	_dl_runtime_resolve_xsavec.
+	* sysdeps/x86_64/dl-trampoline.S (DL_RUNTIME_UNALIGNED_VEC_SIZE):
+	Removed.
+	(DL_RUNTIME_RESOLVE_REALIGN_STACK): Check STATE_SAVE_ALIGNMENT
+	instead of VEC_SIZE.
+	(REGISTER_SAVE_BND0): Removed.
+	(REGISTER_SAVE_BND1): Likewise.
+	(REGISTER_SAVE_BND3): Likewise.
+	(REGISTER_SAVE_RAX): Always defined to 0.
+	(VMOV): Removed.
+	(_dl_runtime_resolve_avx): Likewise.
+	(_dl_runtime_resolve_avx_slow): Likewise.
+	(_dl_runtime_resolve_avx_opt): Likewise.
+	(_dl_runtime_resolve_avx512): Likewise.
+	(_dl_runtime_resolve_avx512_opt): Likewise.
+	(_dl_runtime_resolve_sse): Likewise.
+	(_dl_runtime_resolve_sse_vex): Likewise.
+	(USE_FXSAVE): New.
+	(_dl_runtime_resolve_fxsave): Likewise.
+	(USE_XSAVE): Likewise.
+	(_dl_runtime_resolve_xsave): Likewise.
+	(USE_XSAVEC): Likewise.
+	(_dl_runtime_resolve_xsavec): Likewise.
+	* sysdeps/x86_64/dl-trampoline.h (_dl_runtime_resolve_avx512):
+	Removed.
+	(_dl_runtime_resolve_avx512_opt): Likewise.
+	(_dl_runtime_resolve_avx): Likewise.
+	(_dl_runtime_resolve_avx_opt): Likewise.
+	(_dl_runtime_resolve_sse): Likewise.
+	(_dl_runtime_resolve_sse_vex): Likewise.
+	(_dl_runtime_resolve_fxsave): New.
+	(_dl_runtime_resolve_xsave): Likewise.
+	(_dl_runtime_resolve_xsavec): Likewise.
+
+Index: glibc-2.26/sysdeps/x86/cpu-features-offsets.sym
+===================================================================
+--- glibc-2.26.orig/sysdeps/x86/cpu-features-offsets.sym
+++ glibc-2.26/sysdeps/x86/cpu-features-offsets.sym
+@@ -15,6 +15,7 @@ CPUID_ECX_OFFSET	offsetof (struct cpuid_
+ CPUID_EDX_OFFSET	offsetof (struct cpuid_registers, edx)
+ FAMILY_OFFSET		offsetof (struct cpu_features, family)
+ MODEL_OFFSET		offsetof (struct cpu_features, model)
+XSAVE_STATE_SIZE_OFFSET	offsetof (struct cpu_features, xsave_state_size)
+ FEATURE_OFFSET		offsetof (struct cpu_features, feature)
+ FEATURE_SIZE		sizeof (unsigned int)
+ 
+Index: glibc-2.26/sysdeps/x86/cpu-features.c
+===================================================================
+--- glibc-2.26.orig/sysdeps/x86/cpu-features.c
+++ glibc-2.26/sysdeps/x86/cpu-features.c
+@@ -19,6 +19,7 @@
+ #include <cpuid.h>
+ #include <cpu-features.h>
+ #include <dl-hwcap.h>
+#include <libc-pointer-arith.h>
+ 
+ #if HAVE_TUNABLES
+ # define TUNABLE_NAMESPACE tune
+@@ -103,6 +104,76 @@ get_common_indeces (struct cpu_features
+ 		}
+ 	    }
+ 	}
+
+      /* For _dl_runtime_resolve, set xsave_state_size to xsave area
+	 size + integer register save size and align it to 64 bytes.  */
+      if (cpu_features->max_cpuid >= 0xd)
+	{
+	  unsigned int eax, ebx, ecx, edx;
+
+	  __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
+	  if (ebx != 0)
+	    {
+	      unsigned int xsave_state_full_size
+		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+
+	      cpu_features->xsave_state_size
+		= xsave_state_full_size;
+	      cpu_features->xsave_state_full_size
+		= xsave_state_full_size;
+
+	      __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
+
+	      /* Check if XSAVEC is available.  */
+	      if ((eax & (1 << 1)) != 0)
+		{
+		  unsigned int xstate_comp_offsets[32];
+		  unsigned int xstate_comp_sizes[32];
+		  unsigned int i;
+
+		  xstate_comp_offsets[0] = 0;
+		  xstate_comp_offsets[1] = 160;
+		  xstate_comp_offsets[2] = 576;
+		  xstate_comp_sizes[0] = 160;
+		  xstate_comp_sizes[1] = 256;
+
+		  for (i = 2; i < 32; i++)
+		    {
+		      if ((STATE_SAVE_MASK & (1 << i)) != 0)
+			{
+			  __cpuid_count (0xd, i, eax, ebx, ecx, edx);
+			  xstate_comp_sizes[i] = eax;
+			}
+		      else
+			{
+			  ecx = 0;
+			  xstate_comp_sizes[i] = 0;
+			}
+
+		      if (i > 2)
+			{
+			  xstate_comp_offsets[i]
+			    = (xstate_comp_offsets[i - 1]
+			       + xstate_comp_sizes[i -1]);
+			  if ((ecx & (1 << 1)) != 0)
+			    xstate_comp_offsets[i]
+			      = ALIGN_UP (xstate_comp_offsets[i], 64);
+			}
+		    }
+
+		  /* Use XSAVEC.  */
+		  unsigned int size
+		    = xstate_comp_offsets[31] + xstate_comp_sizes[31];
+		  if (size)
+		    {
+		      cpu_features->xsave_state_size
+			= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+		      cpu_features->feature[index_arch_XSAVEC_Usable]
+			|= bit_arch_XSAVEC_Usable;
+		    }
+		}
+	    }
+	}
+     }
+ }
+ 
+@@ -242,23 +313,6 @@ init_cpu_features (struct cpu_features *
+       else
+ 	cpu_features->feature[index_arch_Prefer_No_AVX512]
+ 	  |= bit_arch_Prefer_No_AVX512;
+-
+-      /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow.
+-         If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt.
+-	 Use _dl_runtime_resolve_opt only with AVX512F since it is
+-	 slower than _dl_runtime_resolve_slow with AVX.  */
+-      cpu_features->feature[index_arch_Use_dl_runtime_resolve_slow]
+-	|= bit_arch_Use_dl_runtime_resolve_slow;
+-      if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+-	  && cpu_features->max_cpuid >= 0xd)
+-	{
+-	  unsigned int eax;
+-
+-	  __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
+-	  if ((eax & (1 << 2)) != 0)
+-	    cpu_features->feature[index_arch_Use_dl_runtime_resolve_opt]
+-	      |= bit_arch_Use_dl_runtime_resolve_opt;
+-	}
+     }
+   /* This spells out "AuthenticAMD".  */
+   else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
+Index: glibc-2.26/sysdeps/x86/cpu-features.h
+===================================================================
+--- glibc-2.26.orig/sysdeps/x86/cpu-features.h
+++ glibc-2.26/sysdeps/x86/cpu-features.h
+@@ -37,9 +37,8 @@
+ #define bit_arch_Prefer_No_VZEROUPPER		(1 << 17)
+ #define bit_arch_Fast_Unaligned_Copy		(1 << 18)
+ #define bit_arch_Prefer_ERMS			(1 << 19)
+-#define bit_arch_Use_dl_runtime_resolve_opt	(1 << 20)
+-#define bit_arch_Use_dl_runtime_resolve_slow	(1 << 21)
+-#define bit_arch_Prefer_No_AVX512		(1 << 22)
+#define bit_arch_Prefer_No_AVX512		(1 << 20)
+#define bit_arch_XSAVEC_Usable			(1 << 21)
+ 
+ /* CPUID Feature flags.  */
+ 
+@@ -88,6 +87,15 @@
+ /* The current maximum size of the feature integer bit array.  */
+ #define FEATURE_INDEX_MAX 1
+ 
+/* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
+   space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
+   aligned to 16 bytes for fxsave and 64 bytes for xsave.  */
+#define STATE_SAVE_OFFSET (8 * 7 + 8)
+
+/* Save SSE, AVX, AVX512, mask and bound registers.  */
+#define STATE_SAVE_MASK \
+  ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
+
+ #ifdef	__ASSEMBLER__
+ 
+ # include <cpu-features-offsets.h>
+@@ -123,8 +131,6 @@
+ # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
+ # define index_arch_Fast_Unaligned_Copy	FEATURE_INDEX_1*FEATURE_SIZE
+ # define index_arch_Prefer_ERMS		FEATURE_INDEX_1*FEATURE_SIZE
+-# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE
+-# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE
+ # define index_arch_Prefer_No_AVX512	FEATURE_INDEX_1*FEATURE_SIZE
+ 
+ 
+@@ -214,6 +220,18 @@ struct cpu_features
+   } cpuid[COMMON_CPUID_INDEX_MAX];
+   unsigned int family;
+   unsigned int model;
+  /* The state size for XSAVEC or XSAVE.  The type must be unsigned long
+     int so that we use
+
+	sub xsave_state_size_offset(%rip) %RSP_LP
+
+     in _dl_runtime_resolve.  */
+  unsigned long int xsave_state_size;
+  /* The full state size for XSAVE when XSAVEC is disabled by
+
+     GLIBC_TUNABLES=glibc.tune.hwcaps=-XSAVEC_Usable
+   */
+  unsigned int xsave_state_full_size;
+   unsigned int feature[FEATURE_INDEX_MAX];
+   /* Data cache size for use in memory and string routines, typically
+      L1 size.  */
+@@ -326,9 +344,8 @@ extern const struct cpu_features *__get_
+ # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
+ # define index_arch_Fast_Unaligned_Copy	FEATURE_INDEX_1
+ # define index_arch_Prefer_ERMS		FEATURE_INDEX_1
+-# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1
+-# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1
+ # define index_arch_Prefer_No_AVX512	FEATURE_INDEX_1
+# define index_arch_XSAVEC_Usable	FEATURE_INDEX_1
+ 
+ #endif	/* !__ASSEMBLER__ */
+ 
+Index: glibc-2.26/sysdeps/x86/cpu-tunables.c
+===================================================================
+--- glibc-2.26.orig/sysdeps/x86/cpu-tunables.c
+++ glibc-2.26/sysdeps/x86/cpu-tunables.c
+@@ -240,6 +240,16 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_v
+ 						Slow_SSE4_2, SSE4_2,
+ 						disable, 11);
+ 	  break;
+	case 13:
+	  if (disable)
+	    {
+	      /* Update xsave_state_size to XSAVE state size.  */
+	      cpu_features->xsave_state_size
+		= cpu_features->xsave_state_full_size;
+	      CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features,
+					  XSAVEC_Usable, 13);
+	    }
+	  break;
+ 	case 14:
+ 	  if (disable)
+ 	    {
+@@ -308,13 +318,6 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_v
+ 		 disable, 26);
+ 	    }
+ 	  break;
+-	case 27:
+-	    {
+-	      CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features,
+-					   Use_dl_runtime_resolve_slow,
+-					   disable, 27);
+-	    }
+-	  break;
+ 	}
+       p += len + 1;
+     }
+Index: glibc-2.26/sysdeps/x86_64/Makefile
+===================================================================
+--- glibc-2.26.orig/sysdeps/x86_64/Makefile
+++ glibc-2.26/sysdeps/x86_64/Makefile
+@@ -55,6 +55,10 @@ CFLAGS-tst-quad2pie.c = $(PIE-ccflag)
+ tests += tst-x86_64-1
+ modules-names += x86_64/tst-x86_64mod-1
+ LDFLAGS-tst-x86_64mod-1.so = -Wl,-soname,tst-x86_64mod-1.so
+ifneq (no,$(have-tunables))
+# Test the state size for XSAVE when XSAVEC is disabled.
+tst-x86_64-1-ENV = GLIBC_TUNABLES=glibc.tune.hwcaps=-XSAVEC_Usable
+endif
+ 
+ $(objpfx)tst-x86_64-1: $(objpfx)x86_64/tst-x86_64mod-1.so
+ 
+Index: glibc-2.26/sysdeps/x86_64/dl-machine.h
+===================================================================
+--- glibc-2.26.orig/sysdeps/x86_64/dl-machine.h
+++ glibc-2.26/sysdeps/x86_64/dl-machine.h
+@@ -66,12 +66,9 @@ static inline int __attribute__ ((unused
+ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+ {
+   Elf64_Addr *got;
+-  extern void _dl_runtime_resolve_sse (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_avx (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_avx_slow (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_avx_opt (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_avx512 (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_avx512_opt (ElfW(Word)) attribute_hidden;
+  extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
+  extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
+  extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
+   extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
+   extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
+   extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
+@@ -120,29 +117,14 @@ elf_machine_runtime_setup (struct link_m
+ 	  /* This function will get called to fix up the GOT entry
+ 	     indicated by the offset on the stack, and then jump to
+ 	     the resolved address.  */
+-	  if (HAS_ARCH_FEATURE (AVX512F_Usable))
+-	    {
+-	      if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt))
+-		*(ElfW(Addr) *) (got + 2)
+-		  = (ElfW(Addr)) &_dl_runtime_resolve_avx512_opt;
+-	      else
+-		*(ElfW(Addr) *) (got + 2)
+-		  = (ElfW(Addr)) &_dl_runtime_resolve_avx512;
+-	    }
+-	  else if (HAS_ARCH_FEATURE (AVX_Usable))
+-	    {
+-	      if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt))
+-		*(ElfW(Addr) *) (got + 2)
+-		  = (ElfW(Addr)) &_dl_runtime_resolve_avx_opt;
+-	      else if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_slow))
+-		*(ElfW(Addr) *) (got + 2)
+-		  = (ElfW(Addr)) &_dl_runtime_resolve_avx_slow;
+-	      else
+-		*(ElfW(Addr) *) (got + 2)
+-		  = (ElfW(Addr)) &_dl_runtime_resolve_avx;
+-	    }
+	  if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
+	    *(ElfW(Addr) *) (got + 2)
+	      = (HAS_ARCH_FEATURE (XSAVEC_Usable)
+		 ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
+		 : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
+ 	  else
+-	    *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_sse;
+	    *(ElfW(Addr) *) (got + 2)
+	      = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
+ 	}
+     }
+ 
+Index: glibc-2.26/sysdeps/x86_64/dl-trampoline.S
+===================================================================
+--- glibc-2.26.orig/sysdeps/x86_64/dl-trampoline.S
+++ glibc-2.26/sysdeps/x86_64/dl-trampoline.S
+@@ -34,41 +34,24 @@
+ # define DL_STACK_ALIGNMENT 8
+ #endif
+ 
+-#ifndef DL_RUNTIME_UNALIGNED_VEC_SIZE
+-/* The maximum size in bytes of unaligned vector load and store in the
+-   dynamic linker.  Since SSE optimized memory/string functions with
+-   aligned SSE register load and store are used in the dynamic linker,
+-   we must set this to 8 so that _dl_runtime_resolve_sse will align the
+-   stack before calling _dl_fixup.  */
+-# define DL_RUNTIME_UNALIGNED_VEC_SIZE 8
+-#endif
+-
+-/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes.  */
+/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
+   stack to 16 bytes before calling _dl_fixup.  */
+ #define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+-  (VEC_SIZE > DL_STACK_ALIGNMENT \
+-   && VEC_SIZE > DL_RUNTIME_UNALIGNED_VEC_SIZE)
+-
+-/* Align vector register save area to 16 bytes.  */
+-#define REGISTER_SAVE_VEC_OFF	0
+  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+   || 16 > DL_STACK_ALIGNMENT)
+ 
+ /* Area on stack to save and restore registers used for parameter
+    passing when calling _dl_fixup.  */
+ #ifdef __ILP32__
+-# define REGISTER_SAVE_RAX	(REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
+ # define PRESERVE_BND_REGS_PREFIX
+ #else
+-/* Align bound register save area to 16 bytes.  */
+-# define REGISTER_SAVE_BND0	(REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
+-# define REGISTER_SAVE_BND1	(REGISTER_SAVE_BND0 + 16)
+-# define REGISTER_SAVE_BND2	(REGISTER_SAVE_BND1 + 16)
+-# define REGISTER_SAVE_BND3	(REGISTER_SAVE_BND2 + 16)
+-# define REGISTER_SAVE_RAX	(REGISTER_SAVE_BND3 + 16)
+ # ifdef HAVE_MPX_SUPPORT
+ #  define PRESERVE_BND_REGS_PREFIX bnd
+ # else
+ #  define PRESERVE_BND_REGS_PREFIX .byte 0xf2
+ # endif
+ #endif
+#define REGISTER_SAVE_RAX	0
+ #define REGISTER_SAVE_RCX	(REGISTER_SAVE_RAX + 8)
+ #define REGISTER_SAVE_RDX	(REGISTER_SAVE_RCX + 8)
+ #define REGISTER_SAVE_RSI	(REGISTER_SAVE_RDX + 8)
+@@ -80,68 +63,56 @@
+ 
+ #define VEC_SIZE		64
+ #define VMOVA			vmovdqa64
+-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+-# define VMOV			vmovdqa64
+-#else
+-# define VMOV			vmovdqu64
+-#endif
+ #define VEC(i)			zmm##i
+-#define _dl_runtime_resolve	_dl_runtime_resolve_avx512
+ #define _dl_runtime_profile	_dl_runtime_profile_avx512
+ #include "dl-trampoline.h"
+-#undef _dl_runtime_resolve
+ #undef _dl_runtime_profile
+ #undef VEC
+-#undef VMOV
+ #undef VMOVA
+ #undef VEC_SIZE
+ 
+ #define VEC_SIZE		32
+ #define VMOVA			vmovdqa
+-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+-# define VMOV			vmovdqa
+-#else
+-# define VMOV			vmovdqu
+-#endif
+ #define VEC(i)			ymm##i
+-#define _dl_runtime_resolve	_dl_runtime_resolve_avx
+-#define _dl_runtime_resolve_opt	_dl_runtime_resolve_avx_opt
+ #define _dl_runtime_profile	_dl_runtime_profile_avx
+ #include "dl-trampoline.h"
+-#undef _dl_runtime_resolve
+-#undef _dl_runtime_resolve_opt
+ #undef _dl_runtime_profile
+ #undef VEC
+-#undef VMOV
+ #undef VMOVA
+ #undef VEC_SIZE
+ 
+ /* movaps/movups is 1-byte shorter.  */
+ #define VEC_SIZE		16
+ #define VMOVA			movaps
+-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+-# define VMOV			movaps
+-#else
+-# define VMOV			movups
+-#endif
+ #define VEC(i)			xmm##i
+-#define _dl_runtime_resolve	_dl_runtime_resolve_sse
+ #define _dl_runtime_profile	_dl_runtime_profile_sse
+ #undef RESTORE_AVX
+ #include "dl-trampoline.h"
+-#undef _dl_runtime_resolve
+ #undef _dl_runtime_profile
+-#undef VMOV
+#undef VEC
+ #undef VMOVA
+#undef VEC_SIZE
+ 
+-/* Used by _dl_runtime_resolve_avx_opt/_dl_runtime_resolve_avx512_opt
+-   to preserve the full vector registers with zero upper bits.  */
+-#define VMOVA			vmovdqa
+-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+-# define VMOV			vmovdqa
+-#else
+-# define VMOV			vmovdqu
+-#endif
+-#define _dl_runtime_resolve	_dl_runtime_resolve_sse_vex
+-#define _dl_runtime_resolve_opt	_dl_runtime_resolve_avx512_opt
+#define USE_FXSAVE
+#define STATE_SAVE_ALIGNMENT	16
+#define _dl_runtime_resolve	_dl_runtime_resolve_fxsave
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_FXSAVE
+#undef STATE_SAVE_ALIGNMENT
+
+#define USE_XSAVE
+#define STATE_SAVE_ALIGNMENT	64
+#define _dl_runtime_resolve	_dl_runtime_resolve_xsave
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_XSAVE
+#undef STATE_SAVE_ALIGNMENT
+
+#define USE_XSAVEC
+#define STATE_SAVE_ALIGNMENT	64
+#define _dl_runtime_resolve	_dl_runtime_resolve_xsavec
+ #include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_XSAVEC
+#undef STATE_SAVE_ALIGNMENT
+Index: glibc-2.26/sysdeps/x86_64/dl-trampoline.h
+===================================================================
+--- glibc-2.26.orig/sysdeps/x86_64/dl-trampoline.h
+++ glibc-2.26/sysdeps/x86_64/dl-trampoline.h
+@@ -16,140 +16,47 @@
+    License along with the GNU C Library; if not, see
+    <http://www.gnu.org/licenses/>.  */
+ 
+-#undef REGISTER_SAVE_AREA_RAW
+-#ifdef __ILP32__
+-/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to
+-   VEC7.  */
+-# define REGISTER_SAVE_AREA_RAW	(8 * 7 + VEC_SIZE * 8)
+-#else
+-/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as
+-   BND0, BND1, BND2, BND3 and VEC0 to VEC7. */
+-# define REGISTER_SAVE_AREA_RAW	(8 * 7 + 16 * 4 + VEC_SIZE * 8)
+-#endif
+	.text
+#ifdef _dl_runtime_resolve
+ 
+-#undef REGISTER_SAVE_AREA
+-#undef LOCAL_STORAGE_AREA
+-#undef BASE
+-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+-# define REGISTER_SAVE_AREA	(REGISTER_SAVE_AREA_RAW + 8)
+-/* Local stack area before jumping to function address: RBX.  */
+-# define LOCAL_STORAGE_AREA	8
+-# define BASE			rbx
+-# if (REGISTER_SAVE_AREA % VEC_SIZE) != 0
+-#  error REGISTER_SAVE_AREA must be multples of VEC_SIZE
+-# endif
+-#else
+-# define REGISTER_SAVE_AREA	REGISTER_SAVE_AREA_RAW
+-/* Local stack area before jumping to function address:  All saved
+-   registers.  */
+-# define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
+-# define BASE			rsp
+-# if (REGISTER_SAVE_AREA % 16) != 8
+-#  error REGISTER_SAVE_AREA must be odd multples of 8
+-# endif
+-#endif
+# undef REGISTER_SAVE_AREA
+# undef LOCAL_STORAGE_AREA
+# undef BASE
+ 
+-	.text
+-#ifdef _dl_runtime_resolve_opt
+-/* Use the smallest vector registers to preserve the full YMM/ZMM
+-   registers to avoid SSE transition penalty.  */
+# if (STATE_SAVE_ALIGNMENT % 16) != 0
+#  error STATE_SAVE_ALIGNMENT must be multples of 16
+# endif
+ 
+-# if VEC_SIZE == 32
+-/* Check if the upper 128 bits in %ymm0 - %ymm7 registers are non-zero
+-   and preserve %xmm0 - %xmm7 registers with the zero upper bits.  Since
+-   there is no SSE transition penalty on AVX512 processors which don't
+-   support XGETBV with ECX == 1, _dl_runtime_resolve_avx512_slow isn't
+-   provided.   */
+-	.globl _dl_runtime_resolve_avx_slow
+-	.hidden _dl_runtime_resolve_avx_slow
+-	.type _dl_runtime_resolve_avx_slow, @function
+-	.align 16
+-_dl_runtime_resolve_avx_slow:
+-	cfi_startproc
+-	cfi_adjust_cfa_offset(16) # Incorporate PLT
+-	vorpd %ymm0, %ymm1, %ymm8
+-	vorpd %ymm2, %ymm3, %ymm9
+-	vorpd %ymm4, %ymm5, %ymm10
+-	vorpd %ymm6, %ymm7, %ymm11
+-	vorpd %ymm8, %ymm9, %ymm9
+-	vorpd %ymm10, %ymm11, %ymm10
+-	vpcmpeqd %xmm8, %xmm8, %xmm8
+-	vorpd %ymm9, %ymm10, %ymm10
+-	vptest %ymm10, %ymm8
+-	# Preserve %ymm0 - %ymm7 registers if the upper 128 bits of any
+-	# %ymm0 - %ymm7 registers aren't zero.
+-	PRESERVE_BND_REGS_PREFIX
+-	jnc _dl_runtime_resolve_avx
+-	# Use vzeroupper to avoid SSE transition penalty.
+-	vzeroupper
+-	# Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits
+-	# when the upper 128 bits of %ymm0 - %ymm7 registers are zero.
+-	PRESERVE_BND_REGS_PREFIX
+-	jmp _dl_runtime_resolve_sse_vex
+-	cfi_adjust_cfa_offset(-16) # Restore PLT adjustment
+-	cfi_endproc
+-	.size _dl_runtime_resolve_avx_slow, .-_dl_runtime_resolve_avx_slow
+# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
+#  error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
+ # endif
+ 
+-/* Use XGETBV with ECX == 1 to check which bits in vector registers are
+-   non-zero and only preserve the non-zero lower bits with zero upper
+-   bits.  */
+-	.globl _dl_runtime_resolve_opt
+-	.hidden _dl_runtime_resolve_opt
+-	.type _dl_runtime_resolve_opt, @function
+-	.align 16
+-_dl_runtime_resolve_opt:
+-	cfi_startproc
+-	cfi_adjust_cfa_offset(16) # Incorporate PLT
+-	pushq %rax
+-	cfi_adjust_cfa_offset(8)
+-	cfi_rel_offset(%rax, 0)
+-	pushq %rcx
+-	cfi_adjust_cfa_offset(8)
+-	cfi_rel_offset(%rcx, 0)
+-	pushq %rdx
+-	cfi_adjust_cfa_offset(8)
+-	cfi_rel_offset(%rdx, 0)
+-	movl $1, %ecx
+-	xgetbv
+-	movl %eax, %r11d
+-	popq %rdx
+-	cfi_adjust_cfa_offset(-8)
+-	cfi_restore (%rdx)
+-	popq %rcx
+-	cfi_adjust_cfa_offset(-8)
+-	cfi_restore (%rcx)
+-	popq %rax
+-	cfi_adjust_cfa_offset(-8)
+-	cfi_restore (%rax)
+-# if VEC_SIZE == 32
+-	# For YMM registers, check if YMM state is in use.
+-	andl $bit_YMM_state, %r11d
+-	# Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits if
+-	# YMM state isn't in use.
+-	PRESERVE_BND_REGS_PREFIX
+-	jz _dl_runtime_resolve_sse_vex
+-# elif VEC_SIZE == 16
+-	# For ZMM registers, check if YMM state and ZMM state are in
+-	# use.
+-	andl $(bit_YMM_state | bit_ZMM0_15_state), %r11d
+-	cmpl $bit_YMM_state, %r11d
+-	# Preserve %zmm0 - %zmm7 registers if ZMM state is in use.
+-	PRESERVE_BND_REGS_PREFIX
+-	jg _dl_runtime_resolve_avx512
+-	# Preserve %ymm0 - %ymm7 registers with the zero upper 256 bits if
+-	# ZMM state isn't in use.
+-	PRESERVE_BND_REGS_PREFIX
+-	je _dl_runtime_resolve_avx
+-	# Preserve %xmm0 - %xmm7 registers with the zero upper 384 bits if
+-	# neither YMM state nor ZMM state are in use.
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+/* Local stack area before jumping to function address: RBX.  */
+#  define LOCAL_STORAGE_AREA	8
+#  define BASE			rbx
+#  ifdef USE_FXSAVE
+/* Use fxsave to save XMM registers.  */
+#   define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET)
+#   if (REGISTER_SAVE_AREA % 16) != 0
+#    error REGISTER_SAVE_AREA must be multples of 16
+#   endif
+#  endif
+ # else
+-#  error Unsupported VEC_SIZE!
+#  ifndef USE_FXSAVE
+#   error USE_FXSAVE must be defined
+#  endif
+/* Use fxsave to save XMM registers.  */
+#  define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET + 8)
+/* Local stack area before jumping to function address:  All saved
+   registers.  */
+#  define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
+#  define BASE			rsp
+#  if (REGISTER_SAVE_AREA % 16) != 8
+#   error REGISTER_SAVE_AREA must be odd multples of 8
+#  endif
+ # endif
+-	cfi_adjust_cfa_offset(-16) # Restore PLT adjustment
+-	cfi_endproc
+-	.size _dl_runtime_resolve_opt, .-_dl_runtime_resolve_opt
+-#endif
+
+ 	.globl _dl_runtime_resolve
+ 	.hidden _dl_runtime_resolve
+ 	.type _dl_runtime_resolve, @function
+@@ -157,21 +64,30 @@ _dl_runtime_resolve_opt:
+ 	cfi_startproc
+ _dl_runtime_resolve:
+ 	cfi_adjust_cfa_offset(16) # Incorporate PLT
+-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+-# if LOCAL_STORAGE_AREA != 8
+-#  error LOCAL_STORAGE_AREA must be 8
+-# endif
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+#  if LOCAL_STORAGE_AREA != 8
+#   error LOCAL_STORAGE_AREA must be 8
+#  endif
+ 	pushq %rbx			# push subtracts stack by 8.
+ 	cfi_adjust_cfa_offset(8)
+ 	cfi_rel_offset(%rbx, 0)
+ 	mov %RSP_LP, %RBX_LP
+ 	cfi_def_cfa_register(%rbx)
+-	and $-VEC_SIZE, %RSP_LP
+-#endif
+	and $-STATE_SAVE_ALIGNMENT, %RSP_LP
+# endif
+# ifdef REGISTER_SAVE_AREA
+ 	sub $REGISTER_SAVE_AREA, %RSP_LP
+-#if !DL_RUNTIME_RESOLVE_REALIGN_STACK
+#  if !DL_RUNTIME_RESOLVE_REALIGN_STACK
+ 	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+-#endif
+#  endif
+# else
+	# Allocate stack space of the required size to save the state.
+#  if IS_IN (rtld)
+	sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+#  else
+	sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+#  endif
+# endif
+ 	# Preserve registers otherwise clobbered.
+ 	movq %rax, REGISTER_SAVE_RAX(%rsp)
+ 	movq %rcx, REGISTER_SAVE_RCX(%rsp)
+@@ -180,59 +96,42 @@ _dl_runtime_resolve:
+ 	movq %rdi, REGISTER_SAVE_RDI(%rsp)
+ 	movq %r8, REGISTER_SAVE_R8(%rsp)
+ 	movq %r9, REGISTER_SAVE_R9(%rsp)
+-	VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp)
+-	VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp)
+-	VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp)
+-	VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp)
+-	VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp)
+-	VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp)
+-	VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp)
+-	VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp)
+-#ifndef __ILP32__
+-	# We also have to preserve bound registers.  These are nops if
+-	# Intel MPX isn't available or disabled.
+-# ifdef HAVE_MPX_SUPPORT
+-	bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
+-	bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
+-	bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
+-	bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
+# ifdef USE_FXSAVE
+	fxsave STATE_SAVE_OFFSET(%rsp)
+ # else
+-#  if REGISTER_SAVE_BND0 == 0
+-	.byte 0x66,0x0f,0x1b,0x04,0x24
+	movl $STATE_SAVE_MASK, %eax
+	xorl %edx, %edx
+	# Clear the XSAVE Header.
+#  ifdef USE_XSAVE
+	movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
+#  endif
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
+#  ifdef USE_XSAVE
+	xsave STATE_SAVE_OFFSET(%rsp)
+ #  else
+-	.byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
+	xsavec STATE_SAVE_OFFSET(%rsp)
+ #  endif
+-	.byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
+-	.byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
+-	.byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
+ # endif
+-#endif
+ 	# Copy args pushed by PLT in register.
+ 	# %rdi: link_map, %rsi: reloc_index
+ 	mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
+ 	mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
+ 	call _dl_fixup		# Call resolver.
+ 	mov %RAX_LP, %R11_LP	# Save return value
+-#ifndef __ILP32__
+-	# Restore bound registers.  These are nops if Intel MPX isn't
+-	# avaiable or disabled.
+-# ifdef HAVE_MPX_SUPPORT
+-	bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
+-	bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
+-	bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
+-	bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
+	# Get register content back.
+# ifdef USE_FXSAVE
+	fxrstor STATE_SAVE_OFFSET(%rsp)
+ # else
+-	.byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
+-	.byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
+-	.byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
+-#  if REGISTER_SAVE_BND0 == 0
+-	.byte 0x66,0x0f,0x1a,0x04,0x24
+-#  else
+-	.byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
+-#  endif
+	movl $STATE_SAVE_MASK, %eax
+	xorl %edx, %edx
+	xrstor STATE_SAVE_OFFSET(%rsp)
+ # endif
+-#endif
+-	# Get register content back.
+ 	movq REGISTER_SAVE_R9(%rsp), %r9
+ 	movq REGISTER_SAVE_R8(%rsp), %r8
+ 	movq REGISTER_SAVE_RDI(%rsp), %rdi
+@@ -240,20 +139,12 @@ _dl_runtime_resolve:
+ 	movq REGISTER_SAVE_RDX(%rsp), %rdx
+ 	movq REGISTER_SAVE_RCX(%rsp), %rcx
+ 	movq REGISTER_SAVE_RAX(%rsp), %rax
+-	VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6)
+-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7)
+-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+ 	mov %RBX_LP, %RSP_LP
+ 	cfi_def_cfa_register(%rsp)
+ 	movq (%rsp), %rbx
+ 	cfi_restore(%rbx)
+-#endif
+# endif
+ 	# Adjust stack(PLT did 2 pushes)
+ 	add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
+ 	cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
+@@ -262,11 +153,9 @@ _dl_runtime_resolve:
+ 	jmp *%r11		# Jump to function address.
+ 	cfi_endproc
+ 	.size _dl_runtime_resolve, .-_dl_runtime_resolve
+#endif
+ 
+ 
+-/* To preserve %xmm0 - %xmm7 registers, dl-trampoline.h is included
+-   twice, for _dl_runtime_resolve_sse and _dl_runtime_resolve_sse_vex.
+-   But we don't need another _dl_runtime_profile for XMM registers.  */
+ #if !defined PROF && defined _dl_runtime_profile
+ # if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
+ #  error LR_VECTOR_OFFSET must be multples of VEC_SIZE
--- a/falkor-memcpy-memmove.patch
+++ b/falkor-memcpy-memmove.patch
@ -0,0 +1,573 @@
+2017-10-10  Siddhesh Poyarekar  <siddhesh@sourceware.org>
+
+	* sysdeps/aarch64/multiarch/Makefile (sysdep_routines): Add
+	memmove_falkor.
+	* sysdeps/aarch64/multiarch/ifunc-impl-list.c
+	(__libc_ifunc_impl_list): Likewise.
+	* sysdeps/aarch64/multiarch/memmove.c: Likewise.
+	* sysdeps/aarch64/multiarch/memmove_falkor.S: New file.
+
+	* benchtests/bench-memmove-walk.c: New file.
+	* benchtests/Makefile (string-benchset): Add it.
+
+	* benchtests/bench-memset-walk.c: New file.
+	* benchtests/Makefile (string-benchset): Add it.
+
+	* benchtests/bench-memcpy-walk.c: New file.
+	* benchtests/Makefile (string-benchset): Add it.
+
+2017-10-10  Siddhesh Poyarekar  <siddhesh@sourceware.org>
+
+	* manual/tunables.texi (Tunable glibc.tune.cpu): Add falkor.
+	* sysdeps/aarch64/multiarch/Makefile (sysdep_routines): Add
+	memcpy_falkor.
+	* sysdeps/aarch64/multiarch/ifunc-impl-list.c (MAX_IFUNC):
+	Bump.
+	(__libc_ifunc_impl_list): Add __memcpy_falkor.
+	* sysdeps/aarch64/multiarch/memcpy.c: Likewise.
+	* sysdeps/aarch64/multiarch/memcpy_falkor.S: New file.
+	* sysdeps/unix/sysv/linux/aarch64/cpu-features.c (cpu_list):
+	Add falkor.
+	* sysdeps/unix/sysv/linux/aarch64/cpu-features.h (IS_FALKOR):
+	New macro.
+
+Index: glibc-2.26/manual/tunables.texi
+===================================================================
+--- glibc-2.26.orig/manual/tunables.texi
+++ glibc-2.26/manual/tunables.texi
+@@ -267,7 +267,7 @@ This tunable is specific to i386 and x86
+ @deftp Tunable glibc.tune.cpu
+ The @code{glibc.tune.cpu=xxx} tunable allows the user to tell @theglibc{} to
+ assume that the CPU is @code{xxx} where xxx may have one of these values:
+-@code{generic}, @code{thunderxt88}.
+@code{generic}, @code{falkor}, @code{thunderxt88}.
+ 
+ This tunable is specific to aarch64.
+ @end deftp
+Index: glibc-2.26/sysdeps/aarch64/multiarch/Makefile
+===================================================================
+--- glibc-2.26.orig/sysdeps/aarch64/multiarch/Makefile
+++ glibc-2.26/sysdeps/aarch64/multiarch/Makefile
+@@ -1,3 +1,4 @@
+ ifeq ($(subdir),string)
+-sysdep_routines += memcpy_generic memcpy_thunderx
+sysdep_routines += memcpy_generic memcpy_thunderx memcpy_falkor \
+		   memmove_falkor
+ endif
+Index: glibc-2.26/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+===================================================================
+--- glibc-2.26.orig/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ glibc-2.26/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+@@ -25,7 +25,7 @@
+ #include <stdio.h>
+ 
+ /* Maximum number of IFUNC implementations.  */
+-#define MAX_IFUNC	2
+#define MAX_IFUNC	3
+ 
+ size_t
+ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+@@ -40,9 +40,11 @@ __libc_ifunc_impl_list (const char *name
+   /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c.  */
+   IFUNC_IMPL (i, name, memcpy,
+ 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx)
+	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor)
+ 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
+   IFUNC_IMPL (i, name, memmove,
+ 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx)
+	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_falkor)
+ 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
+ 
+   return i;
+Index: glibc-2.26/sysdeps/aarch64/multiarch/memcpy.c
+===================================================================
+--- glibc-2.26.orig/sysdeps/aarch64/multiarch/memcpy.c
+++ glibc-2.26/sysdeps/aarch64/multiarch/memcpy.c
+@@ -30,9 +30,14 @@ extern __typeof (__redirect_memcpy) __li
+ 
+ extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
+ extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden;
+extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden;
+ 
+ libc_ifunc (__libc_memcpy,
+-            IS_THUNDERX (midr) ? __memcpy_thunderx : __memcpy_generic);
+            (IS_THUNDERX (midr)
+	     ? __memcpy_thunderx
+	     : (IS_FALKOR (midr)
+		? __memcpy_falkor
+		: __memcpy_generic)));
+ 
+ # undef memcpy
+ strong_alias (__libc_memcpy, memcpy);
+Index: glibc-2.26/sysdeps/aarch64/multiarch/memcpy_falkor.S
+===================================================================
+--- /dev/null
+++ glibc-2.26/sysdeps/aarch64/multiarch/memcpy_falkor.S
+@@ -0,0 +1,184 @@
+/* Optimized memcpy for Qualcomm Falkor processor.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+
+   ARMv8-a, AArch64, falkor, unaligned accesses.  */
+
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define dst	x3
+#define srcend	x4
+#define dstend	x5
+#define A_l	x6
+#define A_lw	w6
+#define A_h	x7
+#define A_hw	w7
+#define tmp1	x14
+
+/* Copies are split into 3 main cases:
+
+   1. Small copies of up to 32 bytes
+   2. Medium copies of 33..128 bytes which are fully unrolled
+   3. Large copies of more than 128 bytes.
+
+   Large copies align the sourceto a quad word and use an unrolled loop
+   processing 64 bytes per iteration.
+
+   FALKOR-SPECIFIC DESIGN:
+
+   The smallest copies (32 bytes or less) focus on optimal pipeline usage,
+   which is why the redundant copies of 0-3 bytes have been replaced with
+   conditionals, since the former would unnecessarily break across multiple
+   issue groups.  The medium copy group has been enlarged to 128 bytes since
+   bumping up the small copies up to 32 bytes allows us to do that without
+   cost and also allows us to reduce the size of the prep code before loop64.
+
+   All copies are done only via two registers r6 and r7.  This is to ensure
+   that all loads hit a single hardware prefetcher which can get correctly
+   trained to prefetch a single stream.
+
+   The non-temporal stores help optimize cache utilization.  */
+
+#if IS_IN (libc)
+ENTRY_ALIGN (__memcpy_falkor, 6)
+
+	cmp	count, 32
+	add	srcend, src, count
+	add	dstend, dstin, count
+	b.ls	L(copy32)
+	ldp	A_l, A_h, [src]
+	cmp	count, 128
+	stp	A_l, A_h, [dstin]
+	b.hi	L(copy_long)
+
+	/* Medium copies: 33..128 bytes.  */
+	sub	tmp1, count, 1
+	ldp	A_l, A_h, [src, 16]
+	stp	A_l, A_h, [dstin, 16]
+	tbz	tmp1, 6, 1f
+	ldp	A_l, A_h, [src, 32]
+	stp	A_l, A_h, [dstin, 32]
+	ldp	A_l, A_h, [src, 48]
+	stp	A_l, A_h, [dstin, 48]
+	ldp	A_l, A_h, [srcend, -64]
+	stp	A_l, A_h, [dstend, -64]
+	ldp	A_l, A_h, [srcend, -48]
+	stp	A_l, A_h, [dstend, -48]
+1:
+	ldp	A_l, A_h, [srcend, -32]
+	stp	A_l, A_h, [dstend, -32]
+	ldp	A_l, A_h, [srcend, -16]
+	stp	A_l, A_h, [dstend, -16]
+	ret
+
+	.p2align 4
+	/* Small copies: 0..32 bytes.  */
+L(copy32):
+	/* 16-32 */
+	cmp	count, 16
+	b.lo	1f
+	ldp	A_l, A_h, [src]
+	stp	A_l, A_h, [dstin]
+	ldp	A_l, A_h, [srcend, -16]
+	stp	A_l, A_h, [dstend, -16]
+	ret
+	.p2align 4
+1:
+	/* 8-15 */
+	tbz	count, 3, 1f
+	ldr	A_l, [src]
+	str	A_l, [dstin]
+	ldr	A_l, [srcend, -8]
+	str	A_l, [dstend, -8]
+	ret
+	.p2align 4
+1:
+	/* 4-7 */
+	tbz	count, 2, 1f
+	ldr	A_lw, [src]
+	str	A_lw, [dstin]
+	ldr	A_lw, [srcend, -4]
+	str	A_lw, [dstend, -4]
+	ret
+	.p2align 4
+1:
+	/* 2-3 */
+	tbz	count, 1, 1f
+	ldrh	A_lw, [src]
+	strh	A_lw, [dstin]
+	ldrh	A_lw, [srcend, -2]
+	strh	A_lw, [dstend, -2]
+	ret
+	.p2align 4
+1:
+	/* 0-1 */
+	tbz	count, 0, 1f
+	ldrb	A_lw, [src]
+	strb	A_lw, [dstin]
+1:
+	ret
+
+	/* Align SRC to 16 bytes and copy; that way at least one of the
+	   accesses is aligned throughout the copy sequence.
+
+	   The count is off by 0 to 15 bytes, but this is OK because we trim
+	   off the last 64 bytes to copy off from the end.  Due to this the
+	   loop never runs out of bounds.  */
+	.p2align 6
+L(copy_long):
+	sub	count, count, 64 + 16
+	and	tmp1, src, 15
+	bic	src, src, 15
+	sub	dst, dstin, tmp1
+	add	count, count, tmp1
+
+L(loop64):
+	ldp	A_l, A_h, [src, 16]!
+	stnp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [src, 16]!
+	subs	count, count, 64
+	stnp	A_l, A_h, [dst, 32]
+	ldp	A_l, A_h, [src, 16]!
+	stnp	A_l, A_h, [dst, 48]
+	ldp	A_l, A_h, [src, 16]!
+	stnp	A_l, A_h, [dst, 64]
+	add	dst, dst, 64
+	b.hi	L(loop64)
+
+	/* Write the last full set of 64 bytes.  The remainder is at most 64
+	   bytes, so it is safe to always copy 64 bytes from the end even if
+	   there is just 1 byte left.  */
+L(last64):
+	ldp	A_l, A_h, [srcend, -64]
+	stnp	A_l, A_h, [dstend, -64]
+	ldp	A_l, A_h, [srcend, -48]
+	stnp	A_l, A_h, [dstend, -48]
+	ldp	A_l, A_h, [srcend, -32]
+	stnp	A_l, A_h, [dstend, -32]
+	ldp	A_l, A_h, [srcend, -16]
+	stnp	A_l, A_h, [dstend, -16]
+	ret
+
+END (__memcpy_falkor)
+libc_hidden_builtin_def (__memcpy_falkor)
+#endif
+Index: glibc-2.26/sysdeps/aarch64/multiarch/memmove.c
+===================================================================
+--- glibc-2.26.orig/sysdeps/aarch64/multiarch/memmove.c
+++ glibc-2.26/sysdeps/aarch64/multiarch/memmove.c
+@@ -30,9 +30,14 @@ extern __typeof (__redirect_memmove) __l
+ 
+ extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden;
+ extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden;
+extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden;
+ 
+ libc_ifunc (__libc_memmove,
+-            IS_THUNDERX (midr) ? __memmove_thunderx : __memmove_generic);
+            (IS_THUNDERX (midr)
+	     ? __memmove_thunderx
+	     : (IS_FALKOR (midr)
+		? __memmove_falkor
+		: __memmove_generic)));
+ 
+ # undef memmove
+ strong_alias (__libc_memmove, memmove);
+Index: glibc-2.26/sysdeps/aarch64/multiarch/memmove_falkor.S
+===================================================================
+--- /dev/null
+++ glibc-2.26/sysdeps/aarch64/multiarch/memmove_falkor.S
+@@ -0,0 +1,232 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions: ARMv8-a, AArch64, falkor, unaligned accesses.  */
+
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define dstlen	x3
+#define dst	x3
+#define srcend	x4
+#define dstend	x5
+#define A_l	x6
+#define A_lw	w6
+#define A_h	x7
+#define A_hw	w7
+#define B_l	x8
+#define B_lw	w8
+#define B_h	x9
+#define C_l	x10
+#define C_h	x11
+#define D_l	x12
+#define D_h	x13
+#define E_l	src
+#define E_h	count
+#define F_l	srcend
+#define F_h	dst
+#define tmp1	x14
+
+/* Alias with A_l and A_h to train the prefetcher.  */
+#define Q_l	x22
+#define Q_h	x23
+
+/* RATIONALE:
+
+   The copy has 4 distinct parts:
+   * Small copies of 16 bytes and under
+   * Medium sized copies of 17-96 bytes
+   * Large copies where the source address is higher than the destination
+     (forward copies)
+   * Large copies where the destination address is higher than the source
+     (copy backward, or move).
+
+   We use only two registerpairs x6,x7 and x22,x23 for the copies and copy 32
+   bytes at a time to correctly train the hardware prefetcher for better
+   throughput.  */
+ENTRY_ALIGN (__memmove_falkor, 6)
+
+	sub	tmp1, dstin, src
+	add	srcend, src, count
+	add	dstend, dstin, count
+	cmp	count, 96
+	ccmp	tmp1, count, 2, hi
+	b.lo	L(move_long)
+
+	cmp	count, 16
+	b.ls	L(copy16)
+	cmp	count, 96
+	b.hi	L(copy_long)
+
+	/* Medium copies: 17..96 bytes.  */
+	sub	tmp1, count, 1
+	ldp	A_l, A_h, [src]
+	tbnz	tmp1, 6, L(copy96)
+	ldp	D_l, D_h, [srcend, -16]
+	tbz	tmp1, 5, 1f
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [srcend, -32]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstend, -32]
+1:
+	stp	A_l, A_h, [dstin]
+	stp	D_l, D_h, [dstend, -16]
+	ret
+
+	.p2align 4
+	/* Small copies: 0..16 bytes.  */
+L(copy16):
+	cmp	count, 8
+	b.lo	1f
+	ldr	A_l, [src]
+	ldr	A_h, [srcend, -8]
+	str	A_l, [dstin]
+	str	A_h, [dstend, -8]
+	ret
+	.p2align 4
+1:
+	/* 4-7 */
+	tbz	count, 2, 1f
+	ldr	A_lw, [src]
+	ldr	A_hw, [srcend, -4]
+	str	A_lw, [dstin]
+	str	A_hw, [dstend, -4]
+	ret
+	.p2align 4
+1:
+	/* 2-3 */
+	tbz	count, 1, 1f
+	ldrh	A_lw, [src]
+	ldrh	A_hw, [srcend, -2]
+	strh	A_lw, [dstin]
+	strh	A_hw, [dstend, -2]
+	ret
+	.p2align 4
+1:
+	/* 0-1 */
+	tbz	count, 0, 1f
+	ldrb	A_lw, [src]
+	strb	A_lw, [dstin]
+1:	ret
+
+	.p2align 4
+	/* Copy 64..96 bytes.  Copy 64 bytes from the start and
+	   32 bytes from the end.  */
+L(copy96):
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [src, 32]
+	ldp	D_l, D_h, [src, 48]
+	ldp	E_l, E_h, [srcend, -32]
+	ldp	F_l, F_h, [srcend, -16]
+	stp	A_l, A_h, [dstin]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstin, 32]
+	stp	D_l, D_h, [dstin, 48]
+	stp	E_l, E_h, [dstend, -32]
+	stp	F_l, F_h, [dstend, -16]
+	ret
+
+	/* Align SRC to 16 byte alignment so that we don't cross cache line
+	   boundaries on both loads and stores.  There are at least 96 bytes
+	   to copy, so copy 16 bytes unaligned and then align.  The loop
+	   copies 32 bytes per iteration and prefetches one iteration ahead.  */
+
+	.p2align 4
+L(copy_long):
+	sub	count, count, 64 + 16	/* Test and readjust count.  */
+	mov	B_l, Q_l
+	mov	B_h, Q_h
+	ldp	A_l, A_h, [src]
+	and	tmp1, src, 15
+	bic	src, src, 15
+	sub	dst, dstin, tmp1
+	add	count, count, tmp1	/* Count is now 16 too large.  */
+	ldp	Q_l, Q_h, [src, 16]!
+	stp	A_l, A_h, [dstin]
+	ldp	A_l, A_h, [src, 16]!
+
+L(loop64):
+	subs	count, count, 32
+	stp	Q_l, Q_h, [dst, 16]
+	ldp	Q_l, Q_h, [src, 16]!
+	stp	A_l, A_h, [dst, 32]!
+	ldp	A_l, A_h, [src, 16]!
+	b.hi	L(loop64)
+
+	/* Write the last full set of 32 bytes.  The remainder is at most 32
+	   bytes, so it is safe to always copy 32 bytes from the end even if
+	   there is just 1 byte left.  */
+L(last64):
+	ldp	C_l, C_h, [srcend, -32]
+	stp	Q_l, Q_h, [dst, 16]
+	ldp	Q_l, Q_h, [srcend, -16]
+	stp	A_l, A_h, [dst, 32]
+	stp	C_l, C_h, [dstend, -32]
+	stp	Q_l, Q_h, [dstend, -16]
+	mov	Q_l, B_l
+	mov	Q_h, B_h
+	ret
+
+	.p2align 4
+L(move_long):
+	cbz	tmp1, 3f
+
+	mov	B_l, Q_l
+	mov	B_h, Q_h
+
+	/* Align SRCEND to 16 byte alignment so that we don't cross cache line
+	   boundaries on both loads and stores.  There are at least 96 bytes
+	   to copy, so copy 16 bytes unaligned and then align.  The loop
+	   copies 32 bytes per iteration and prefetches one iteration ahead.  */
+
+	ldp	A_l, A_h, [srcend, -16]
+	and	tmp1, srcend, 15
+	sub	srcend, srcend, tmp1
+	ldp	Q_l, Q_h, [srcend, -16]!
+	stp	A_l, A_h, [dstend, -16]
+	sub	count, count, tmp1
+	ldp	A_l, A_h, [srcend, -16]!
+	sub	dstend, dstend, tmp1
+	sub	count, count, 64
+
+1:
+	subs	count, count, 32
+	stp	Q_l, Q_h, [dstend, -16]
+	ldp	Q_l, Q_h, [srcend, -16]!
+	stp	A_l, A_h, [dstend, -32]!
+	ldp	A_l, A_h, [srcend, -16]!
+	b.hi	1b
+
+	/* Write the last full set of 32 bytes.  The remainder is at most 32
+	   bytes, so it is safe to always copy 32 bytes from the start even if
+	   there is just 1 byte left.  */
+2:
+	ldp	C_l, C_h, [src, 16]
+	stp	Q_l, Q_h, [dstend, -16]
+	ldp	Q_l, Q_h, [src]
+	stp	A_l, A_h, [dstend, -32]
+	stp	C_l, C_h, [dstin, 16]
+	stp	Q_l, Q_h, [dstin]
+	mov	Q_l, B_l
+	mov	Q_h, B_h
+3:	ret
+
+END (__memmove_falkor)
+libc_hidden_builtin_def (__memmove_falkor)
+Index: glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+===================================================================
+--- glibc-2.26.orig/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+++ glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+@@ -28,6 +28,7 @@ struct cpu_list
+ };
+ 
+ static struct cpu_list cpu_list[] = {
+      {"falkor",	0x510FC000},
+       {"thunderxt88",	0x430F0A10},
+       {"generic", 	0x0}
+ };
+Index: glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+===================================================================
+--- glibc-2.26.orig/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+++ glibc-2.26/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+@@ -41,6 +41,9 @@
+ #define IS_THUNDERX(midr) (MIDR_IMPLEMENTOR(midr) == 'C'	\
+ 			   && MIDR_PARTNUM(midr) == 0x0a1)
+ 
+#define IS_FALKOR(midr) (MIDR_IMPLEMENTOR(midr) == 'Q'			      \
+                        && MIDR_PARTNUM(midr) == 0xc00)
+
+ struct cpu_features
+ {
+   uint64_t midr_el1;
--- a/fnmatch-collating-elements.patch
+++ b/fnmatch-collating-elements.patch
@ -10,22 +10,22 @@ Fix fnmatch handling of collating elements (BZ #17396, BZ #16976)
 	* posix/tst-fnmatch4.c: New file.
 	* posix/tst-fnmatch5.c: New file.

-Index: glibc-2.22/posix/Makefile
+Index: glibc-2.26/posix/Makefile
 ===================================================================
--- glibc-2.22.orig/posix/Makefile
-+++ glibc-2.22/posix/Makefile
-@@ -87,6 +87,7 @@ tests		:= tstgetopt testfnm runtests run
+--- glibc-2.26.orig/posix/Makefile
+++ glibc-2.26/posix/Makefile
+@@ -91,6 +91,7 @@ tests		:= test-errno tstgetopt testfnm r
 		   bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \
 		   tst-pathconf tst-getaddrinfo4 tst-rxspencer-no-utf8 \
 		   tst-fnmatch3 bug-regex36 tst-getaddrinfo5 \
 +		   tst-fnmatch4 tst-fnmatch5 \
 		   tst-posix_spawn-fd tst-posix_spawn-setsid \
 		   tst-posix_fadvise tst-posix_fadvise64 \
- 		   tst-sysconf-empty-chroot
-Index: glibc-2.22/posix/fnmatch.c
+ 		   tst-sysconf-empty-chroot tst-glob-tilde
+Index: glibc-2.26/posix/fnmatch.c
 ===================================================================
--- glibc-2.22.orig/posix/fnmatch.c
-+++ glibc-2.22/posix/fnmatch.c
+--- glibc-2.26.orig/posix/fnmatch.c
+++ glibc-2.26/posix/fnmatch.c
@@ -53,7 +53,6 @@
    we support a correct implementation only in glibc.  */
 #ifdef _LIBC
@ -34,11 +34,11 @@ Index: glibc-2.22/posix/fnmatch.c
 # include "../locale/coll-lookup.h"
 # include <shlib-compat.h>
 
-Index: glibc-2.22/posix/fnmatch_loop.c
+Index: glibc-2.26/posix/fnmatch_loop.c
 ===================================================================
--- glibc-2.22.orig/posix/fnmatch_loop.c
-+++ glibc-2.22/posix/fnmatch_loop.c
-@@ -503,26 +503,12 @@ FCT (pattern, string, string_end, no_lea
+--- glibc-2.26.orig/posix/fnmatch_loop.c
+++ glibc-2.26/posix/fnmatch_loop.c
+@@ -497,26 +497,12 @@ FCT (const CHAR *pattern, const CHAR *st
 			  {
 			    int32_t table_size;
 			    const int32_t *symb_table;
@ -67,7 +67,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
 
 			    table_size =
 			      _NL_CURRENT_WORD (LC_COLLATE,
-@@ -534,71 +520,55 @@ FCT (pattern, string, string_end, no_lea
+@@ -528,71 +514,55 @@ FCT (const CHAR *pattern, const CHAR *st
 			      _NL_CURRENT (LC_COLLATE,
 					   _NL_COLLATE_SYMB_EXTRAMB);
 
@ -180,7 +180,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
 				  }
 
 				/* Get the collation sequence value.  */
-@@ -606,9 +576,9 @@ FCT (pattern, string, string_end, no_lea
+@@ -600,9 +570,9 @@ FCT (const CHAR *pattern, const CHAR *st
 # if WIDE_CHAR_VERSION
 				cold = wextra[1 + wextra[idx]];
 # else
@ -192,7 +192,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
 				cold = *((int32_t *) &extra[idx]);
 # endif
 
-@@ -618,10 +588,10 @@ FCT (pattern, string, string_end, no_lea
+@@ -612,10 +582,10 @@ FCT (const CHAR *pattern, const CHAR *st
 			      {
 				/* No valid character.  Match it as a
 				   single byte.  */
@ -205,7 +205,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
 				c = *p++;
 			      }
 			    else
-@@ -629,7 +599,6 @@ FCT (pattern, string, string_end, no_lea
+@@ -623,7 +593,6 @@ FCT (const CHAR *pattern, const CHAR *st
 			  }
 		      }
 		    else
@ -213,7 +213,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
 #endif
 		      {
 			c = FOLD (c);
-@@ -721,25 +690,11 @@ FCT (pattern, string, string_end, no_lea
+@@ -715,25 +684,11 @@ FCT (const CHAR *pattern, const CHAR *st
 			      {
 				int32_t table_size;
 				const int32_t *symb_table;
@ -240,7 +240,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
 # endif
 
 				table_size =
-@@ -752,51 +707,44 @@ FCT (pattern, string, string_end, no_lea
+@@ -746,51 +701,44 @@ FCT (const CHAR *pattern, const CHAR *st
 				  _NL_CURRENT (LC_COLLATE,
 					       _NL_COLLATE_SYMB_EXTRAMB);
 
@ -328,7 +328,7 @@ Index: glibc-2.22/posix/fnmatch_loop.c
 				    /* Get the collation sequence value.  */
 				    is_seqval = 1;
 # if WIDE_CHAR_VERSION
-@@ -804,19 +752,18 @@ FCT (pattern, string, string_end, no_lea
+@@ -798,19 +746,18 @@ FCT (const CHAR *pattern, const CHAR *st
 # else
 				    /* Adjust for the alignment.  */
 				    idx += 1 + extra[idx];
@ -351,10 +351,10 @@ Index: glibc-2.22/posix/fnmatch_loop.c
 			  }
 			else
 			  {
-Index: glibc-2.22/posix/tst-fnmatch4.c
+Index: glibc-2.26/posix/tst-fnmatch4.c
 ===================================================================
 --- /dev/null
-+++ glibc-2.22/posix/tst-fnmatch4.c
+++ glibc-2.26/posix/tst-fnmatch4.c
@@ -0,0 +1,51 @@
 +/* Test for fnmatch handling of collating elements
 +   Copyright (C) 2015 Free Software Foundation, Inc.
@ -407,10 +407,10 @@ Index: glibc-2.22/posix/tst-fnmatch4.c
 +
 +#define TEST_FUNCTION do_test ()
 +#include "../test-skeleton.c"
-Index: glibc-2.22/posix/tst-fnmatch5.c
+Index: glibc-2.26/posix/tst-fnmatch5.c
 ===================================================================
 --- /dev/null
-+++ glibc-2.22/posix/tst-fnmatch5.c
+++ glibc-2.26/posix/tst-fnmatch5.c
@@ -0,0 +1,53 @@
 +/* Test for fnmatch handling of collating elements
 +   Copyright (C) 2015 Free Software Foundation, Inc.
--- a/glibc-testsuite.changes
+++ b/glibc-testsuite.changes
@ -1,3 +1,22 @@
+-------------------------------------------------------------------
+Mon Oct 23 09:35:18 UTC 2017 - schwab@suse.de
+
+- math-c++-compat.patch: Add more C++ compatibility (BZ #22296)
+- malloc-tcache-leak.patch: Fix tcache leak after thread destruction (BZ
+  #22111)
+- falkor-memcpy-memmove.patch: Optimized implementation of memcpy/memmove
+  for Qualcomm Falkor
+- aarch64-cpu-features.patch: Fix glibc.tune.cpu tunable handling
+- nss-files-large-buffers.patch: Avoid large buffers with many host
+  addresses (BZ #22078)
+- sysconf-uio-maxiov.patch: Fix missing definition of UIO_MAXIOV (BZ
+  #22321)
+- glob-tilde-overflow.patch: Fix buffer overflows with GLOB_TILDE
+  (CVE-2017-15670, CVE-2017-15671, CVE-2017-15804,
+  bsc#1064569. bsc#1064580, bsc#1064583, BZ #22320, BZ #22325, BZ #22332)
+- dl-runtime-resolve-xsave.patch: Use fxsave/xsave/xsavec in
+  _dl_runtime_resolve (BZ #21265)
+
 -------------------------------------------------------------------
 Tue Oct 10 15:47:05 UTC 2017 - schwab@suse.de

@ -22,7 +41,7 @@ Thu Sep 28 07:57:52 UTC 2017 - schwab@suse.de

 - assert-pedantic.patch: Suppress pedantic warning caused by statement
  expression (BZ #21242, BZ #21972)
- math-c++-compat.patch: Add more C++ compatibility
+- math-c++-compat.patch: Add more C++ compatibility (BZ #22235)
 - getaddrinfo-errno.patch: Fix errno and h_errno handling in getaddrinfo
  (BZ #21915, BZ #21922)
 - resolv-conf-oom.patch: Fix memory handling in OOM situation during
--- a/glibc-testsuite.spec
+++ b/glibc-testsuite.spec
@ -273,7 +273,7 @@ Patch1000:      resolv-context-leak.patch
 Patch1001:      dl-runtime-resolve-opt-avx512f.patch
 # PATCH-FIX-UPSTREAM Don't use IFUNC resolver for longjmp or system in libpthread (BZ #21041)
 Patch1002:      libpthread-compat-wrappers.patch
-# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930)
+# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930, BZ #22146, BZ #22235, BZ #22296)
 Patch1003:      math-c++-compat.patch
 # PATCH-FIX-UPSTREAM Remove nis and compat from default NSS configs
 Patch1004:      remove-nss-nis-compat.patch
@ -295,6 +295,20 @@ Patch1011:      nearbyint-inexact.patch
 Patch1012:      nss-compat.patch
 # PATCH-FIX-UPSTREAM Remove reference to libnsl from nscd
 Patch1013:      nscd-libnsl.patch
+# PATCH-FIX-UPSTREAM malloc: Fix tcache leak after thread destruction (BZ #22111)
+Patch1014:      malloc-tcache-leak.patch
+# PATCH-FIX-UPSTREAM aarch64: Optimized implementation of memcpy/memmove for Qualcomm Falkor
+Patch1015:      falkor-memcpy-memmove.patch
+# PATCH-FIX-UPSTREAM aarch64: Fix glibc.tune.cpu tunable handling
+Patch1016:      aarch64-cpu-features.patch
+# PATCH-FIX-UPSTREAM nss_files: Avoid large buffers with many host addresses (BZ #22078)
+Patch1017:      nss-files-large-buffers.patch
+# PATCH-FIX-UPSTREAM sysconf: Fix missing definition of UIO_MAXIOV on Linux (BZ #22321)
+Patch1018:      sysconf-uio-maxiov.patch
+# PATCH-FIX-UPSTREAM glob: Fix buffer overflows (CVE-2017-15670, CVE-2017-15671, CVE-2017-15804, BZ #22320, BZ #22325, BZ #22332)
+Patch1019:      glob-tilde-overflow.patch
+# PATCH-FIX-UPSTREAM x86-64: Use fxsave/xsave/xsavec in _dl_runtime_resolve (BZ #21265)
+Patch1020:      dl-runtime-resolve-xsave.patch

 ### 
 # Patches awaiting upstream approval
@ -519,6 +533,13 @@ rm nscd/s-stamp
 %patch1011 -p1
 %patch1012 -p1
 %patch1013 -p1
+%patch1014 -p1
+%patch1015 -p1
+%patch1016 -p1
+%patch1017 -p1
+%patch1018 -p1
+%patch1019 -p1
+%patch1020 -p1

 %patch2000 -p1
 %patch2001 -p1
--- a/glibc-utils.changes
+++ b/glibc-utils.changes
@ -1,3 +1,22 @@
+-------------------------------------------------------------------
+Mon Oct 23 09:35:18 UTC 2017 - schwab@suse.de
+
+- math-c++-compat.patch: Add more C++ compatibility (BZ #22296)
+- malloc-tcache-leak.patch: Fix tcache leak after thread destruction (BZ
+  #22111)
+- falkor-memcpy-memmove.patch: Optimized implementation of memcpy/memmove
+  for Qualcomm Falkor
+- aarch64-cpu-features.patch: Fix glibc.tune.cpu tunable handling
+- nss-files-large-buffers.patch: Avoid large buffers with many host
+  addresses (BZ #22078)
+- sysconf-uio-maxiov.patch: Fix missing definition of UIO_MAXIOV (BZ
+  #22321)
+- glob-tilde-overflow.patch: Fix buffer overflows with GLOB_TILDE
+  (CVE-2017-15670, CVE-2017-15671, CVE-2017-15804,
+  bsc#1064569. bsc#1064580, bsc#1064583, BZ #22320, BZ #22325, BZ #22332)
+- dl-runtime-resolve-xsave.patch: Use fxsave/xsave/xsavec in
+  _dl_runtime_resolve (BZ #21265)
+
 -------------------------------------------------------------------
 Tue Oct 10 15:47:05 UTC 2017 - schwab@suse.de

@ -22,7 +41,7 @@ Thu Sep 28 07:57:52 UTC 2017 - schwab@suse.de

 - assert-pedantic.patch: Suppress pedantic warning caused by statement
  expression (BZ #21242, BZ #21972)
- math-c++-compat.patch: Add more C++ compatibility
+- math-c++-compat.patch: Add more C++ compatibility (BZ #22235)
 - getaddrinfo-errno.patch: Fix errno and h_errno handling in getaddrinfo
  (BZ #21915, BZ #21922)
 - resolv-conf-oom.patch: Fix memory handling in OOM situation during
--- a/glibc-utils.spec
+++ b/glibc-utils.spec
@ -273,7 +273,7 @@ Patch1000:      resolv-context-leak.patch
 Patch1001:      dl-runtime-resolve-opt-avx512f.patch
 # PATCH-FIX-UPSTREAM Don't use IFUNC resolver for longjmp or system in libpthread (BZ #21041)
 Patch1002:      libpthread-compat-wrappers.patch
-# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930)
+# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930, BZ #22146, BZ #22235, BZ #22296)
 Patch1003:      math-c++-compat.patch
 # PATCH-FIX-UPSTREAM Remove nis and compat from default NSS configs
 Patch1004:      remove-nss-nis-compat.patch
@ -295,6 +295,20 @@ Patch1011:      nearbyint-inexact.patch
 Patch1012:      nss-compat.patch
 # PATCH-FIX-UPSTREAM Remove reference to libnsl from nscd
 Patch1013:      nscd-libnsl.patch
+# PATCH-FIX-UPSTREAM malloc: Fix tcache leak after thread destruction (BZ #22111)
+Patch1014:      malloc-tcache-leak.patch
+# PATCH-FIX-UPSTREAM aarch64: Optimized implementation of memcpy/memmove for Qualcomm Falkor
+Patch1015:      falkor-memcpy-memmove.patch
+# PATCH-FIX-UPSTREAM aarch64: Fix glibc.tune.cpu tunable handling
+Patch1016:      aarch64-cpu-features.patch
+# PATCH-FIX-UPSTREAM nss_files: Avoid large buffers with many host addresses (BZ #22078)
+Patch1017:      nss-files-large-buffers.patch
+# PATCH-FIX-UPSTREAM sysconf: Fix missing definition of UIO_MAXIOV on Linux (BZ #22321)
+Patch1018:      sysconf-uio-maxiov.patch
+# PATCH-FIX-UPSTREAM glob: Fix buffer overflows (CVE-2017-15670, CVE-2017-15671, CVE-2017-15804, BZ #22320, BZ #22325, BZ #22332)
+Patch1019:      glob-tilde-overflow.patch
+# PATCH-FIX-UPSTREAM x86-64: Use fxsave/xsave/xsavec in _dl_runtime_resolve (BZ #21265)
+Patch1020:      dl-runtime-resolve-xsave.patch

 ### 
 # Patches awaiting upstream approval
@ -519,6 +533,13 @@ rm nscd/s-stamp
 %patch1011 -p1
 %patch1012 -p1
 %patch1013 -p1
+%patch1014 -p1
+%patch1015 -p1
+%patch1016 -p1
+%patch1017 -p1
+%patch1018 -p1
+%patch1019 -p1
+%patch1020 -p1

 %patch2000 -p1
 %patch2001 -p1
--- a/glibc.changes
+++ b/glibc.changes
@ -1,3 +1,22 @@
+-------------------------------------------------------------------
+Mon Oct 23 09:35:18 UTC 2017 - schwab@suse.de
+
+- math-c++-compat.patch: Add more C++ compatibility (BZ #22296)
+- malloc-tcache-leak.patch: Fix tcache leak after thread destruction (BZ
+  #22111)
+- falkor-memcpy-memmove.patch: Optimized implementation of memcpy/memmove
+  for Qualcomm Falkor
+- aarch64-cpu-features.patch: Fix glibc.tune.cpu tunable handling
+- nss-files-large-buffers.patch: Avoid large buffers with many host
+  addresses (BZ #22078)
+- sysconf-uio-maxiov.patch: Fix missing definition of UIO_MAXIOV (BZ
+  #22321)
+- glob-tilde-overflow.patch: Fix buffer overflows with GLOB_TILDE
+  (CVE-2017-15670, CVE-2017-15671, CVE-2017-15804,
+  bsc#1064569. bsc#1064580, bsc#1064583, BZ #22320, BZ #22325, BZ #22332)
+- dl-runtime-resolve-xsave.patch: Use fxsave/xsave/xsavec in
+  _dl_runtime_resolve (BZ #21265)
+
 -------------------------------------------------------------------
 Tue Oct 10 15:47:05 UTC 2017 - schwab@suse.de

@ -22,7 +41,7 @@ Thu Sep 28 07:57:52 UTC 2017 - schwab@suse.de

 - assert-pedantic.patch: Suppress pedantic warning caused by statement
  expression (BZ #21242, BZ #21972)
- math-c++-compat.patch: Add more C++ compatibility
+- math-c++-compat.patch: Add more C++ compatibility (BZ #22235)
 - getaddrinfo-errno.patch: Fix errno and h_errno handling in getaddrinfo
  (BZ #21915, BZ #21922)
 - resolv-conf-oom.patch: Fix memory handling in OOM situation during
--- a/glibc.spec
+++ b/glibc.spec
@ -279,7 +279,7 @@ Patch1000:      resolv-context-leak.patch
 Patch1001:      dl-runtime-resolve-opt-avx512f.patch
 # PATCH-FIX-UPSTREAM Don't use IFUNC resolver for longjmp or system in libpthread (BZ #21041)
 Patch1002:      libpthread-compat-wrappers.patch
-# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930)
+# PATCH-FIX-UPSTREAM Do not use __builtin_types_compatible_p in C++ mode (BZ #21930, BZ #22146, BZ #22235, BZ #22296)
 Patch1003:      math-c++-compat.patch
 # PATCH-FIX-UPSTREAM Remove nis and compat from default NSS configs
 Patch1004:      remove-nss-nis-compat.patch
@ -301,6 +301,20 @@ Patch1011:      nearbyint-inexact.patch
 Patch1012:      nss-compat.patch
 # PATCH-FIX-UPSTREAM Remove reference to libnsl from nscd
 Patch1013:      nscd-libnsl.patch
+# PATCH-FIX-UPSTREAM malloc: Fix tcache leak after thread destruction (BZ #22111)
+Patch1014:      malloc-tcache-leak.patch
+# PATCH-FIX-UPSTREAM aarch64: Optimized implementation of memcpy/memmove for Qualcomm Falkor
+Patch1015:      falkor-memcpy-memmove.patch
+# PATCH-FIX-UPSTREAM aarch64: Fix glibc.tune.cpu tunable handling
+Patch1016:      aarch64-cpu-features.patch
+# PATCH-FIX-UPSTREAM nss_files: Avoid large buffers with many host addresses (BZ #22078)
+Patch1017:      nss-files-large-buffers.patch
+# PATCH-FIX-UPSTREAM sysconf: Fix missing definition of UIO_MAXIOV on Linux (BZ #22321)
+Patch1018:      sysconf-uio-maxiov.patch
+# PATCH-FIX-UPSTREAM glob: Fix buffer overflows (CVE-2017-15670, CVE-2017-15671, CVE-2017-15804, BZ #22320, BZ #22325, BZ #22332)
+Patch1019:      glob-tilde-overflow.patch
+# PATCH-FIX-UPSTREAM x86-64: Use fxsave/xsave/xsavec in _dl_runtime_resolve (BZ #21265)
+Patch1020:      dl-runtime-resolve-xsave.patch

 ### 
 # Patches awaiting upstream approval
@ -525,6 +539,13 @@ rm nscd/s-stamp
 %patch1011 -p1
 %patch1012 -p1
 %patch1013 -p1
+%patch1014 -p1
+%patch1015 -p1
+%patch1016 -p1
+%patch1017 -p1
+%patch1018 -p1
+%patch1019 -p1
+%patch1020 -p1

 %patch2000 -p1
 %patch2001 -p1
--- a/glob-tilde-overflow.patch
+++ b/glob-tilde-overflow.patch
--- a/malloc-tcache-leak.patch
+++ b/malloc-tcache-leak.patch
@ -0,0 +1,179 @@
+2017-10-06  Carlos O'Donell  <carlos@redhat.com>
+
+	[BZ #22111]
+	* malloc/malloc.c (tcache_shutting_down): Use bool type.
+	(tcache_thread_freeres): Set tcache_shutting_down before
+	freeing the tcache.
+	* malloc/Makefile (tests): Add tst-malloc-tcache-leak.
+	* malloc/tst-malloc-tcache-leak.c: New file.
+
+Index: glibc-2.26/malloc/Makefile
+===================================================================
+--- glibc-2.26.orig/malloc/Makefile
+++ glibc-2.26/malloc/Makefile
+@@ -34,6 +34,7 @@ tests := mallocbug tst-malloc tst-valloc
+ 	 tst-interpose-nothread \
+ 	 tst-interpose-thread \
+ 	 tst-alloc_buffer \
+	 tst-malloc-tcache-leak \
+ 
+ tests-static := \
+ 	 tst-interpose-static-nothread \
+@@ -242,3 +243,5 @@ tst-dynarray-fail-ENV = MALLOC_TRACE=$(o
+ $(objpfx)tst-dynarray-fail-mem.out: $(objpfx)tst-dynarray-fail.out
+ 	$(common-objpfx)malloc/mtrace $(objpfx)tst-dynarray-fail.mtrace > $@; \
+ 	$(evaluate-test)
+
+$(objpfx)tst-malloc-tcache-leak: $(shared-thread-library)
+Index: glibc-2.26/malloc/malloc.c
+===================================================================
+--- glibc-2.26.orig/malloc/malloc.c
+++ glibc-2.26/malloc/malloc.c
+@@ -2940,7 +2940,7 @@ typedef struct tcache_perthread_struct
+   tcache_entry *entries[TCACHE_MAX_BINS];
+ } tcache_perthread_struct;
+ 
+-static __thread char tcache_shutting_down = 0;
+static __thread bool tcache_shutting_down = false;
+ static __thread tcache_perthread_struct *tcache = NULL;
+ 
+ /* Caller must ensure that we know tc_idx is valid and there's room
+@@ -2977,8 +2977,12 @@ tcache_thread_freeres (void)
+   if (!tcache)
+     return;
+ 
+  /* Disable the tcache and prevent it from being reinitialized.  */
+   tcache = NULL;
+  tcache_shutting_down = true;
+ 
+  /* Free all of the entries and the tcache itself back to the arena
+     heap for coalescing.  */
+   for (i = 0; i < TCACHE_MAX_BINS; ++i)
+     {
+       while (tcache_tmp->entries[i])
+@@ -2990,8 +2994,6 @@ tcache_thread_freeres (void)
+     }
+ 
+   __libc_free (tcache_tmp);
+-
+-  tcache_shutting_down = 1;
+ }
+ text_set_element (__libc_thread_subfreeres, tcache_thread_freeres);
+ 
+Index: glibc-2.26/malloc/tst-malloc-tcache-leak.c
+===================================================================
+--- /dev/null
+++ glibc-2.26/malloc/tst-malloc-tcache-leak.c
+@@ -0,0 +1,112 @@
+/* Bug 22111: Test that threads do not leak their per thread cache.
+   Copyright (C) 2015-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The point of this test is to start and exit a large number of
+   threads, while at the same time looking to see if the used
+   memory grows with each round of threads run.  If the memory
+   grows above some linear bound we declare the test failed and
+   that the malloc implementation is leaking memory with each
+   thread.  This is a good indicator that the thread local cache
+   is leaking chunks.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+#include <pthread.h>
+#include <assert.h>
+
+#include <support/check.h>
+#include <support/support.h>
+#include <support/xthread.h>
+
+void *
+worker (void *data)
+{
+  void *ret;
+  /* Allocate an arbitrary amount of memory that is known to fit into
+     the thread local cache (tcache).  If we have at least 64 bins
+     (default e.g. TCACHE_MAX_BINS) we should be able to allocate 32
+     bytes and force malloc to fill the tcache.  We are assuming tcahce
+     init happens at the first small alloc, but it might in the future
+     be deferred to some other point.  Therefore to future proof this
+     test we include a full alloc/free/alloc cycle for the thread.  We
+     need a compiler barrier to avoid the removal of the useless
+     alloc/free.  We send some memory back to main to have the memory
+     freed after the thread dies, as just another check that the chunks
+     that were previously in the tcache are still OK to free after
+     thread death.  */
+  ret = xmalloc (32);
+  __asm__ volatile ("" ::: "memory");
+  free (ret);
+  return (void *) xmalloc (32);
+}
+
+static int
+do_test (void)
+{
+  pthread_t *thread;
+  struct mallinfo info_before, info_after;
+  void *retval;
+
+  /* This is an arbitrary choice. We choose a total of THREADS
+     threads created and joined.  This gives us enough iterations to
+     show a leak.  */
+  int threads = 100000;
+
+  /* Avoid there being 0 malloc'd data at this point by allocating the
+     pthread_t required to run the test.  */
+  thread = (pthread_t *) xcalloc (1, sizeof (pthread_t));
+
+  info_before = mallinfo ();
+
+  assert (info_before.uordblks != 0);
+
+  printf ("INFO: %d (bytes) are in use before starting threads.\n",
+          info_before.uordblks);
+
+  for (int loop = 0; loop < threads; loop++)
+    {
+      *thread = xpthread_create (NULL, worker, NULL);
+      retval = xpthread_join (*thread);
+      free (retval);
+    }
+
+  info_after = mallinfo ();
+  printf ("INFO: %d (bytes) are in use after all threads joined.\n",
+          info_after.uordblks);
+
+  /* We need to compare the memory in use before and the memory in use
+     after starting and joining THREADS threads.  We almost always grow
+     memory slightly, but not much. Consider that if even 1-byte leaked
+     per thread we'd have THREADS bytes of additional memory, and in
+     general the in-use at the start of main is quite low.  We will
+     always leak a full malloc chunk, and never just 1-byte, therefore
+     anything above "+ threads" from the start (constant offset) is a
+     leak.  Obviously this assumes no thread-related malloc'd internal
+     libc data structures persist beyond the thread death, and any that
+     did would limit the number of times you could call pthread_create,
+     which is a QoI we'd want to detect and fix.  */
+  if (info_after.uordblks > (info_before.uordblks + threads))
+    FAIL_EXIT1 ("Memory usage after threads is too high.\n");
+
+  /* Did not detect excessive memory usage.  */
+  free (thread);
+  exit (0);
+}
+
+#include <support/test-driver.c>
--- a/math-c++-compat.patch
+++ b/math-c++-compat.patch
@ -1,3 +1,18 @@
+2017-10-17  Romain Naour  <romain.naour@gmail.com>  (tiny change)
+
+	[BZ #22296]
+	* math/math.h: Let signbit use the builtin in C++ mode with gcc
+	< 6.x
+
+2017-10-03  Gabriel F. T. Gomes  <gabriel@inconstante.eti.br>
+
+	[BZ #22235]
+	* sysdeps/ieee754/ldbl-96/bits/iscanonical.h (iscanonical):
+	Provide a C++ implementation based on function overloading,
+	rather than using __MATH_TG, which uses C-only builtins.
+	* sysdeps/ieee754/ldbl-128ibm/bits/iscanonical.h (iscanonical):
+	Likewise.
+
 2017-09-22  Gabriel F. T. Gomes  <gabriel@inconstante.eti.br>

 	[BZ #22146]
@ -56,7 +71,23 @@ Index: glibc-2.26/math/math.h
 #  define fpclassify(x) __builtin_fpclassify (FP_NAN, FP_INFINITE,	      \
      FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x)
 # else
-@@ -442,8 +448,12 @@ enum
+@@ -412,6 +418,15 @@ enum
+ /* Return nonzero value if sign of X is negative.  */
+ # if __GNUC_PREREQ (6,0)
+ #  define signbit(x) __builtin_signbit (x)
+# elif defined __cplusplus
+  /* In C++ mode, __MATH_TG cannot be used, because it relies on
+     __builtin_types_compatible_p, which is a C-only builtin.
+     The check for __cplusplus allows the use of the builtin instead of
+     __MATH_TG. This is provided for libstdc++, only to let its configure
+     test work. No further use of this definition of signbit is expected
+     in C++ mode, since libstdc++ provides its own version of signbit
+     in cmath (which undefines signbit). */
+#  define signbit(x) __builtin_signbitl (x)
+ # elif __GNUC_PREREQ (4,0)
+ #  define signbit(x) __MATH_TG ((x), __builtin_signbit, (x))
+ # else
+@@ -442,8 +457,12 @@ enum
 
 /* Return nonzero value if X is positive or negative infinity.  */
 # if __HAVE_DISTINCT_FLOAT128 && !__GNUC_PREREQ (7,0) \
@ -71,7 +102,7 @@ Index: glibc-2.26/math/math.h
 #  define isinf(x) \
     (__builtin_types_compatible_p (__typeof (x), _Float128) \
      ? __isinff128 (x) : __builtin_isinf_sign (x))
-@@ -470,7 +480,32 @@ enum
+@@ -470,7 +489,32 @@ enum
 # include <bits/iscanonical.h>
 
 /* Return nonzero value if X is a signaling NaN.  */
@ -105,7 +136,7 @@ Index: glibc-2.26/math/math.h
 
 /* Return nonzero value if X is subnormal.  */
 # define issubnormal(x) (fpclassify (x) == FP_SUBNORMAL)
-@@ -484,15 +519,40 @@ enum
+@@ -484,15 +528,40 @@ enum
 #  endif
 # else	/* __cplusplus */
 extern "C++" {
@ -182,3 +213,59 @@ Index: glibc-2.26/misc/sys/cdefs.h
 # define __HAVE_GENERIC_SELECTION 1
 #else
 # define __HAVE_GENERIC_SELECTION 0
+Index: glibc-2.26/sysdeps/ieee754/ldbl-128ibm/bits/iscanonical.h
+===================================================================
+--- glibc-2.26.orig/sysdeps/ieee754/ldbl-128ibm/bits/iscanonical.h
+++ glibc-2.26/sysdeps/ieee754/ldbl-128ibm/bits/iscanonical.h
+@@ -37,5 +37,22 @@ extern int __iscanonicall (long double _
+    conversion, before being discarded; in IBM long double, there are
+    encodings that are not consistently handled as corresponding to any
+    particular value of the type, and we return 0 for those.  */
+-# define iscanonical(x) __MATH_TG ((x), __iscanonical, (x))
+-#endif
+# ifndef __cplusplus
+#  define iscanonical(x) __MATH_TG ((x), __iscanonical, (x))
+# else
+/* In C++ mode, __MATH_TG cannot be used, because it relies on
+   __builtin_types_compatible_p, which is a C-only builtin.  On the
+   other hand, overloading provides the means to distinguish between
+   the floating-point types.  The overloading resolution will match
+   the correct parameter (regardless of type qualifiers (i.e.: const
+   and volatile)).  */
+extern "C++" {
+inline int iscanonical (float __val) { return __iscanonicalf (__val); }
+inline int iscanonical (double __val) { return __iscanonical (__val); }
+inline int iscanonical (long double __val) { return __iscanonicall (__val); }
+#  if __HAVE_DISTINCT_FLOAT128
+inline int iscanonical (_Float128 __val) { return __iscanonicalf128 (__val); }
+#  endif
+}
+# endif /* __cplusplus */
+#endif /* __NO_LONG_DOUBLE_MATH */
+Index: glibc-2.26/sysdeps/ieee754/ldbl-96/bits/iscanonical.h
+===================================================================
+--- glibc-2.26.orig/sysdeps/ieee754/ldbl-96/bits/iscanonical.h
+++ glibc-2.26/sysdeps/ieee754/ldbl-96/bits/iscanonical.h
+@@ -34,4 +34,21 @@ extern int __iscanonicall (long double _
+    conversion, before being discarded; in extended precision, there
+    are encodings that are not consistently handled as corresponding to
+    any particular value of the type, and we return 0 for those.  */
+-#define iscanonical(x) __MATH_TG ((x), __iscanonical, (x))
+#ifndef __cplusplus
+# define iscanonical(x) __MATH_TG ((x), __iscanonical, (x))
+#else
+/* In C++ mode, __MATH_TG cannot be used, because it relies on
+   __builtin_types_compatible_p, which is a C-only builtin.  On the
+   other hand, overloading provides the means to distinguish between
+   the floating-point types.  The overloading resolution will match
+   the correct parameter (regardless of type qualifiers (i.e.: const
+   and volatile)).  */
+extern "C++" {
+inline int iscanonical (float __val) { return __iscanonicalf (__val); }
+inline int iscanonical (double __val) { return __iscanonical (__val); }
+inline int iscanonical (long double __val) { return __iscanonicall (__val); }
+# if __HAVE_DISTINCT_FLOAT128
+inline int iscanonical (_Float128 __val) { return __iscanonicalf128 (__val); }
+# endif
+}
+#endif /* __cplusplus */
--- a/nss-files-large-buffers.patch
+++ b/nss-files-large-buffers.patch
@ -0,0 +1,831 @@
+2017-10-11  Florian Weimer  <fweimer@redhat.com>
+
+	[BZ #22078]
+	Avoid large NSS buffers with many addresses, aliases.
+	* nss/nss_files/files-hosts.c (gethostbyname3_multi): Rewrite
+	using dynarrays and struct alloc_buffer.
+	* nss/Makefile (tests): Add tst-nss-files-hosts-multi.
+	(tst-nss-files-hosts-multi): Link with -ldl.
+	* nss/tst-nss-files-hosts-multi.c: New file.
+
+2017-10-11  Florian Weimer  <fweimer@redhat.com>
+
+	[BZ #18023]
+	* nss/nss_files/files-hosts.c (gethostbyname3_multi): Use struct
+	scratch_buffer.  Eliminate gotos.
+
+2017-10-10  Florian Weimer  <fweimer@redhat.com>
+
+	* nss/nss_files/files-hosts.c (gethostbyname3_multi): New
+	function.
+	(_nss_files_gethostbyname3_r): Call it.
+
+Index: glibc-2.26/nss/Makefile
+===================================================================
+--- glibc-2.26.orig/nss/Makefile
+++ glibc-2.26/nss/Makefile
+@@ -61,6 +61,7 @@ xtests			= bug-erange
+ # Tests which need libdl
+ ifeq (yes,$(build-shared))
+ tests += tst-nss-files-hosts-erange
+tests += tst-nss-files-hosts-multi
+ endif
+ 
+ # If we have a thread library then we can test cancellation against
+@@ -165,3 +166,4 @@ $(objpfx)tst-cancel-getpwuid_r: $(shared
+ endif
+ 
+ $(objpfx)tst-nss-files-hosts-erange: $(libdl)
+$(objpfx)tst-nss-files-hosts-multi: $(libdl)
+Index: glibc-2.26/nss/nss_files/files-hosts.c
+===================================================================
+--- glibc-2.26.orig/nss/nss_files/files-hosts.c
+++ glibc-2.26/nss/nss_files/files-hosts.c
+@@ -22,6 +22,8 @@
+ #include <arpa/nameser.h>
+ #include <netdb.h>
+ #include <resolv/resolv-internal.h>
+#include <scratch_buffer.h>
+#include <alloc_buffer.h>
+ 
+ 
+ /* Get implementation for some internal functions.  */
+@@ -110,228 +112,250 @@ DB_LOOKUP (hostbyaddr, ,,,
+ 	   }, const void *addr, socklen_t len, int af)
+ #undef EXTRA_ARGS_VALUE
+ 
+-enum nss_status
+-_nss_files_gethostbyname3_r (const char *name, int af, struct hostent *result,
+-			     char *buffer, size_t buflen, int *errnop,
+-			     int *herrnop, int32_t *ttlp, char **canonp)
+/* Type of the address and alias arrays.  */
+#define DYNARRAY_STRUCT array
+#define DYNARRAY_ELEMENT char *
+#define DYNARRAY_PREFIX array_
+#include <malloc/dynarray-skeleton.c>
+
+static enum nss_status
+gethostbyname3_multi (FILE * stream, const char *name, int af,
+		      struct hostent *result, char *buffer, size_t buflen,
+		      int *errnop, int *herrnop, int flags)
+ {
+-  FILE *stream = NULL;
+-  uintptr_t pad = -(uintptr_t) buffer % __alignof__ (struct hostent_data);
+-  buffer += pad;
+-  buflen = buflen > pad ? buflen - pad : 0;
+  assert (af == AF_INET || af == AF_INET6);
+ 
+-  /* Open file.  */
+-  enum nss_status status = internal_setent (&stream);
+  /* We have to get all host entries from the file.  */
+  struct scratch_buffer tmp_buffer;
+  scratch_buffer_init (&tmp_buffer);
+  struct hostent tmp_result_buf;
+  struct array addresses;
+  array_init (&addresses);
+  struct array aliases;
+  array_init (&aliases);
+  enum nss_status status;
+
+  /* Preserve the addresses and aliases encountered so far.  */
+  for (size_t i = 0; result->h_addr_list[i] != NULL; ++i)
+    array_add (&addresses, result->h_addr_list[i]);
+  for (size_t i = 0; result->h_aliases[i] != NULL; ++i)
+    array_add (&aliases, result->h_aliases[i]);
+
+  /* The output buffer re-uses now-unused space at the end of the
+     buffer, starting with the aliases array.  It comes last in the
+     data produced by internal_getent.  (The alias names themselves
+     are still located in the line read in internal_getent, which is
+     stored at the beginning of the buffer.)  */
+  struct alloc_buffer outbuf;
+  {
+    char *bufferend = (char *) result->h_aliases;
+    outbuf = alloc_buffer_create (bufferend, buffer + buflen - bufferend);
+  }
+ 
+-  if (status == NSS_STATUS_SUCCESS)
+  while (true)
+     {
+-      /* XXX Is using _res to determine whether we want to convert IPv4
+-         addresses to IPv6 addresses really the right thing to do?  */
+-      int flags = (res_use_inet6 () ? AI_V4MAPPED : 0);
+-
+-      while ((status = internal_getent (stream, result, buffer, buflen, errnop,
+-					herrnop, af, flags))
+-	     == NSS_STATUS_SUCCESS)
+      status = internal_getent (stream, &tmp_result_buf, tmp_buffer.data,
+				tmp_buffer.length, errnop, herrnop, af,
+				flags);
+      /* Enlarge the buffer if necessary.  */
+      if (status == NSS_STATUS_TRYAGAIN && *herrnop == NETDB_INTERNAL
+	  && *errnop == ERANGE)
+ 	{
+-	  LOOKUP_NAME_CASE (h_name, h_aliases)
+	  if (!scratch_buffer_grow (&tmp_buffer))
+	    {
+	      *errnop = ENOMEM;
+	      /* *herrnop and status already have the right value.  */
+	      break;
+	    }
+	  /* Loop around and retry with a larger buffer.  */
+ 	}
+-
+-      if (status == NSS_STATUS_SUCCESS
+-	  && _res_hconf.flags & HCONF_FLAG_MULTI)
+      else if (status == NSS_STATUS_SUCCESS)
+ 	{
+-	  /* We have to get all host entries from the file.  */
+-	  size_t tmp_buflen = MIN (buflen, 4096);
+-	  char tmp_buffer_stack[tmp_buflen]
+-	    __attribute__ ((__aligned__ (__alignof__ (struct hostent_data))));
+-	  char *tmp_buffer = tmp_buffer_stack;
+-	  struct hostent tmp_result_buf;
+-	  int naddrs = 1;
+-	  int naliases = 0;
+-	  char *bufferend;
+-	  bool tmp_buffer_malloced = false;
+-
+-	  while (result->h_aliases[naliases] != NULL)
+-	    ++naliases;
+-
+-	  bufferend = (char *) &result->h_aliases[naliases + 1];
+-
+-	again:
+-	  while ((status = internal_getent (stream, &tmp_result_buf, tmp_buffer,
+-					    tmp_buflen, errnop, herrnop, af,
+-					    flags))
+-		 == NSS_STATUS_SUCCESS)
+	  /* A line was read.  Check that it matches the search
+	     criteria.  */
+
+	  int matches = 1;
+	  struct hostent *old_result = result;
+	  result = &tmp_result_buf;
+	  /* The following piece is a bit clumsy but we want to use
+	     the `LOOKUP_NAME_CASE' value.  The optimizer should do
+	     its job.  */
+	  do
+ 	    {
+-	      int matches = 1;
+-	      struct hostent *old_result = result;
+-	      result = &tmp_result_buf;
+-	      /* The following piece is a bit clumsy but we want to use the
+-		 `LOOKUP_NAME_CASE' value.  The optimizer should do its
+-		 job.  */
+-	      do
+-		{
+-		  LOOKUP_NAME_CASE (h_name, h_aliases)
+-		  result = old_result;
+-		}
+-	      while ((matches = 0));
+	      LOOKUP_NAME_CASE (h_name, h_aliases)
+		result = old_result;
+	    }
+	  while ((matches = 0));
+ 
+-	      if (matches)
+	  /* If the line matches, we need to copy the addresses and
+	     aliases, so that we can reuse tmp_buffer for the next
+	     line.  */
+	  if (matches)
+	    {
+	      /* Record the addresses.  */
+	      for (size_t i = 0; tmp_result_buf.h_addr_list[i] != NULL; ++i)
+ 		{
+-		  /* We could be very clever and try to recycle a few bytes
+-		     in the buffer instead of generating new arrays.  But
+-		     we are not doing this here since it's more work than
+-		     it's worth.  Simply let the user provide a bit bigger
+-		     buffer.  */
+-		  char **new_h_addr_list;
+-		  char **new_h_aliases;
+-		  int newaliases = 0;
+-		  size_t newstrlen = 0;
+-		  int cnt;
+-
+-		  /* Count the new aliases and the length of the strings.  */
+-		  while (tmp_result_buf.h_aliases[newaliases] != NULL)
+		  /* Allocate the target space in the output buffer,
+		     depending on the address family.  */
+		  void *target;
+		  if (af == AF_INET)
+ 		    {
+-		      char *cp = tmp_result_buf.h_aliases[newaliases];
+-		      ++newaliases;
+-		      newstrlen += strlen (cp) + 1;
+		      assert (tmp_result_buf.h_length == 4);
+		      target = alloc_buffer_alloc (&outbuf, struct in_addr);
+ 		    }
+-		  /* If the real name is different add it also to the
+-		     aliases.  This means that there is a duplication
+-		     in the alias list but this is really the user's
+-		     problem.  */
+-		  if (strcmp (old_result->h_name,
+-			      tmp_result_buf.h_name) != 0)
+		  else if (af == AF_INET6)
+ 		    {
+-		      ++newaliases;
+-		      newstrlen += strlen (tmp_result_buf.h_name) + 1;
+		      assert (tmp_result_buf.h_length == 16);
+		      target = alloc_buffer_alloc (&outbuf, struct in6_addr);
+ 		    }
+		  else
+		    __builtin_unreachable ();
+ 
+-		  /* Make sure bufferend is aligned.  */
+-		  assert ((bufferend - (char *) 0) % sizeof (char *) == 0);
+-
+-		  /* Now we can check whether the buffer is large enough.
+-		     16 is the maximal size of the IP address.  */
+-		  if (bufferend + 16 + (naddrs + 2) * sizeof (char *)
+-		      + roundup (newstrlen, sizeof (char *))
+-		      + (naliases + newaliases + 1) * sizeof (char *)
+-		      >= buffer + buflen)
+		  if (target == NULL)
+ 		    {
+		      /* Request a larger output buffer.  */
+ 		      *errnop = ERANGE;
+ 		      *herrnop = NETDB_INTERNAL;
+ 		      status = NSS_STATUS_TRYAGAIN;
+-		      goto out;
+-		    }
+-
+-		  new_h_addr_list =
+-		    (char **) (bufferend
+-			       + roundup (newstrlen, sizeof (char *))
+-			       + 16);
+-		  new_h_aliases =
+-		    (char **) ((char *) new_h_addr_list
+-			       + (naddrs + 2) * sizeof (char *));
+-
+-		  /* Copy the old data in the new arrays.  */
+-		  for (cnt = 0; cnt < naddrs; ++cnt)
+-		    new_h_addr_list[cnt] = old_result->h_addr_list[cnt];
+-
+-		  for (cnt = 0; cnt < naliases; ++cnt)
+-		    new_h_aliases[cnt] = old_result->h_aliases[cnt];
+-
+-		  /* Store the new strings.  */
+-		  cnt = 0;
+-		  while (tmp_result_buf.h_aliases[cnt] != NULL)
+-		    {
+-		      new_h_aliases[naliases++] = bufferend;
+-		      bufferend = (__stpcpy (bufferend,
+-					     tmp_result_buf.h_aliases[cnt])
+-				   + 1);
+-		      ++cnt;
+		      break;
+ 		    }
+-
+-		  if (cnt < newaliases)
+-		    {
+-		      new_h_aliases[naliases++] = bufferend;
+-		      bufferend = __stpcpy (bufferend,
+-					    tmp_result_buf.h_name) + 1;
+-		    }
+-
+-		  /* Final NULL pointer.  */
+-		  new_h_aliases[naliases] = NULL;
+-
+-		  /* Round up the buffer end address.  */
+-		  bufferend += (sizeof (char *)
+-				- ((bufferend - (char *) 0)
+-				   % sizeof (char *))) % sizeof (char *);
+-
+-		  /* Now the new address.  */
+-		  new_h_addr_list[naddrs++] =
+-		    memcpy (bufferend, tmp_result_buf.h_addr,
+-			    tmp_result_buf.h_length);
+-
+-		  /* Also here a final NULL pointer.  */
+-		  new_h_addr_list[naddrs] = NULL;
+-
+-		  /* Store the new array pointers.  */
+-		  old_result->h_aliases = new_h_aliases;
+-		  old_result->h_addr_list = new_h_addr_list;
+-
+-		  /* Compute the new buffer end.  */
+-		  bufferend = (char *) &new_h_aliases[naliases + 1];
+-		  assert (bufferend <= buffer + buflen);
+-
+-		  result = old_result;
+		  memcpy (target, tmp_result_buf.h_addr_list[i],
+			  tmp_result_buf.h_length);
+		  array_add (&addresses, target);
+ 		}
+-	    }
+ 
+-	  if (status == NSS_STATUS_TRYAGAIN)
+-	    {
+-	      size_t newsize = 2 * tmp_buflen;
+-	      if (tmp_buffer_malloced)
+	      /* Record the aliases.  */
+	      for (size_t i = 0; tmp_result_buf.h_aliases[i] != NULL; ++i)
+ 		{
+-		  char *newp = realloc (tmp_buffer, newsize);
+-		  if (newp != NULL)
+-		    {
+-		      assert ((((uintptr_t) newp)
+-			       & (__alignof__ (struct hostent_data) - 1))
+-			      == 0);
+-		      tmp_buffer = newp;
+-		      tmp_buflen = newsize;
+-		      goto again;
+-		    }
+		  char *alias = tmp_result_buf.h_aliases[i];
+		  array_add (&aliases,
+			     alloc_buffer_copy_string (&outbuf, alias));
+ 		}
+-	      else if (!__libc_use_alloca (buflen + newsize))
+
+	      /* If the real name is different add, it also to the
+		 aliases.  This means that there is a duplication in
+		 the alias list but this is really the user's
+		 problem.  */
+	      {
+		char *new_name = tmp_result_buf.h_name;
+		if (strcmp (old_result->h_name, new_name) != 0)
+		  array_add (&aliases,
+			     alloc_buffer_copy_string (&outbuf, new_name));
+	      }
+
+	      /* Report memory allocation failures during the
+		 expansion of the temporary arrays.  */
+	      if (array_has_failed (&addresses) || array_has_failed (&aliases))
+ 		{
+-		  tmp_buffer = malloc (newsize);
+-		  if (tmp_buffer != NULL)
+-		    {
+-		      assert ((((uintptr_t) tmp_buffer)
+-			       & (__alignof__ (struct hostent_data) - 1))
+-			      == 0);
+-		      tmp_buffer_malloced = true;
+-		      tmp_buflen = newsize;
+-		      goto again;
+-		    }
+		  *errnop = ENOMEM;
+		  *herrnop = NETDB_INTERNAL;
+		  status = NSS_STATUS_UNAVAIL;
+		  break;
+ 		}
+-	      else
+
+	      /* Request a larger output buffer if we ran out of room.  */
+	      if (alloc_buffer_has_failed (&outbuf))
+ 		{
+-		  tmp_buffer
+-		    = extend_alloca (tmp_buffer, tmp_buflen,
+-				     newsize
+-				     + __alignof__ (struct hostent_data));
+-		  tmp_buffer = (char *) (((uintptr_t) tmp_buffer
+-					  + __alignof__ (struct hostent_data)
+-					  - 1)
+-					 & ~(__alignof__ (struct hostent_data)
+-					     - 1));
+-		  goto again;
+		  *errnop = ERANGE;
+		  *herrnop = NETDB_INTERNAL;
+		  status = NSS_STATUS_TRYAGAIN;
+		  break;
+ 		}
+-	    }
+-	  else
+-	    status = NSS_STATUS_SUCCESS;
+-	out:
+-	  if (tmp_buffer_malloced)
+-	    free (tmp_buffer);
+
+	      result = old_result;
+	    } /* If match was found.  */
+
+	  /* If no match is found, loop around and fetch another
+	     line.  */
+
+	} /* status == NSS_STATUS_SUCCESS.  */
+      else
+	/* internal_getent returned an error.  */
+	break;
+    } /* while (true) */
+
+  /* Propagate the NSS_STATUS_TRYAGAIN error to the caller.  It means
+     that we may not have loaded the complete result.
+     NSS_STATUS_NOTFOUND, however, means that we reached the end of
+     the file successfully.  */
+  if (status != NSS_STATUS_TRYAGAIN)
+    status = NSS_STATUS_SUCCESS;
+
+  if (status == NSS_STATUS_SUCCESS)
+    {
+      /* Copy the address and alias arrays into the output buffer and
+	 add NULL terminators.  The pointed-to elements were directly
+	 written into the output buffer above and do not need to be
+	 copied again.  */
+      size_t addresses_count = array_size (&addresses);
+      size_t aliases_count = array_size (&aliases);
+      char **out_addresses = alloc_buffer_alloc_array
+	(&outbuf, char *, addresses_count + 1);
+      char **out_aliases = alloc_buffer_alloc_array
+	(&outbuf, char *, aliases_count + 1);
+      if (out_addresses == NULL || out_aliases == NULL)
+	{
+	  /* The output buffer is not large enough.  */
+	  *errnop = ERANGE;
+	  *herrnop = NETDB_INTERNAL;
+	  status = NSS_STATUS_TRYAGAIN;
+	  /* Fall through to function exit.  */
+	}
+      else
+	{
+	  /* Everything is allocated in place.  Make the copies and
+	     adjust the array pointers.  */
+	  memcpy (out_addresses, array_begin (&addresses),
+		  addresses_count * sizeof (char *));
+	  out_addresses[addresses_count] = NULL;
+	  memcpy (out_aliases, array_begin (&aliases),
+		  aliases_count * sizeof (char *));
+	  out_aliases[aliases_count] = NULL;
+
+	  result->h_addr_list = out_addresses;
+	  result->h_aliases = out_aliases;
+
+	  status = NSS_STATUS_SUCCESS;
+	}
+    }
+
+  scratch_buffer_free (&tmp_buffer);
+  array_free (&addresses);
+  array_free (&aliases);
+  return status;
+}
+
+enum nss_status
+_nss_files_gethostbyname3_r (const char *name, int af, struct hostent *result,
+			     char *buffer, size_t buflen, int *errnop,
+			     int *herrnop, int32_t *ttlp, char **canonp)
+{
+  FILE *stream = NULL;
+  uintptr_t pad = -(uintptr_t) buffer % __alignof__ (struct hostent_data);
+  buffer += pad;
+  buflen = buflen > pad ? buflen - pad : 0;
+
+  /* Open file.  */
+  enum nss_status status = internal_setent (&stream);
+
+  if (status == NSS_STATUS_SUCCESS)
+    {
+      /* XXX Is using _res to determine whether we want to convert IPv4
+         addresses to IPv6 addresses really the right thing to do?  */
+      int flags = (res_use_inet6 () ? AI_V4MAPPED : 0);
+
+      while ((status = internal_getent (stream, result, buffer, buflen, errnop,
+					herrnop, af, flags))
+	     == NSS_STATUS_SUCCESS)
+	{
+	  LOOKUP_NAME_CASE (h_name, h_aliases)
+ 	}
+ 
+      if (status == NSS_STATUS_SUCCESS
+	  && _res_hconf.flags & HCONF_FLAG_MULTI)
+	status = gethostbyname3_multi
+	  (stream, name, af, result, buffer, buflen, errnop, herrnop, flags);
+
+       internal_endent (&stream);
+     }
+ 
+Index: glibc-2.26/nss/tst-nss-files-hosts-multi.c
+===================================================================
+--- /dev/null
+++ glibc-2.26/nss/tst-nss-files-hosts-multi.c
+@@ -0,0 +1,331 @@
+/* Parse /etc/hosts in multi mode with many addresses/aliases.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <gnu/lib-names.h>
+#include <netdb.h>
+#include <nss.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/check_nss.h>
+#include <support/namespace.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+#include <support/test-driver.h>
+#include <support/xmemstream.h>
+#include <support/xstdio.h>
+#include <support/xunistd.h>
+#include <sys/resource.h>
+
+struct support_chroot *chroot_env;
+
+static void
+prepare (int argc, char **argv)
+{
+  chroot_env = support_chroot_create
+    ((struct support_chroot_configuration)
+     {
+       .resolv_conf = "",
+       .hosts = "",             /* See write_hosts below.  */
+       .host_conf = "multi on\n",
+     });
+}
+
+/* Create the /etc/hosts file from outside the chroot.  */
+static void
+write_hosts (int count)
+{
+  TEST_VERIFY (count > 0 && count <= 65535);
+  FILE *fp = xfopen (chroot_env->path_hosts, "w");
+  fputs ("127.0.0.1   localhost localhost.localdomain\n"
+         "::1         localhost localhost.localdomain\n",
+         fp);
+  for (int i = 0; i < count; ++i)
+    {
+      fprintf (fp, "10.4.%d.%d www4.example.com\n",
+               (i / 256) & 0xff, i & 0xff);
+      fprintf (fp, "10.46.%d.%d www.example.com\n",
+               (i / 256) & 0xff, i & 0xff);
+      fprintf (fp, "192.0.2.1 alias.example.com v4-%d.example.com\n", i);
+      fprintf (fp, "2001:db8::6:%x www6.example.com\n", i);
+      fprintf (fp, "2001:db8::46:%x www.example.com\n", i);
+      fprintf (fp, "2001:db8::1 alias.example.com v6-%d.example.com\n", i);
+    }
+  xfclose (fp);
+}
+
+/* Parameters of a single test.  */
+struct test_params
+{
+  const char *name;             /* Name to query.  */
+  const char *marker;           /* Address marker for the name.  */
+  int count;                    /* Number of addresses/aliases.  */
+  int family;                   /* AF_INET, AF_INET_6 or AF_UNSPEC.  */
+  bool canonname;               /* True if AI_CANONNAME should be enabled.  */
+};
+
+/* Expected result of gethostbyname/gethostbyname2.  */
+static char *
+expected_ghbn (const struct test_params *params)
+{
+  TEST_VERIFY (params->family == AF_INET || params->family == AF_INET6);
+
+  struct xmemstream expected;
+  xopen_memstream (&expected);
+  if (strcmp (params->name, "alias.example.com") == 0)
+    {
+      fprintf (expected.out, "name: %s\n", params->name);
+      char af;
+      if (params->family == AF_INET)
+        af = '4';
+      else
+        af = '6';
+      for (int i = 0; i < params->count; ++i)
+        fprintf (expected.out, "alias: v%c-%d.example.com\n", af, i);
+
+      for (int i = 0; i < params->count; ++i)
+        if (params->family == AF_INET)
+          fputs ("address: 192.0.2.1\n", expected.out);
+        else
+          fputs ("address: 2001:db8::1\n", expected.out);
+    }
+  else /* www/www4/www6 name.  */
+    {
+      bool do_ipv4 = params->family == AF_INET
+        && strncmp (params->name, "www6", 4) != 0;
+      bool do_ipv6 = params->family == AF_INET6
+        && strncmp (params->name, "www4", 4) != 0;
+      if (do_ipv4 || do_ipv6)
+        {
+          fprintf (expected.out, "name: %s\n", params->name);
+          if (do_ipv4)
+            for (int i = 0; i < params->count; ++i)
+              fprintf (expected.out, "address: 10.%s.%d.%d\n",
+                       params->marker, i / 256, i % 256);
+          if (do_ipv6)
+            for (int i = 0; i < params->count; ++i)
+              fprintf (expected.out, "address: 2001:db8::%s:%x\n",
+                       params->marker, i);
+        }
+      else
+        fputs ("error: HOST_NOT_FOUND\n", expected.out);
+    }
+  xfclose_memstream (&expected);
+  return expected.buffer;
+}
+
+/* Expected result of getaddrinfo.  */
+static char *
+expected_gai (const struct test_params *params)
+{
+  bool do_ipv4 = false;
+  bool do_ipv6 = false;
+  if (params->family == AF_UNSPEC)
+    do_ipv4 = do_ipv6 = true;
+  else if (params->family == AF_INET)
+    do_ipv4 = true;
+  else if (params->family == AF_INET6)
+    do_ipv6 = true;
+
+  struct xmemstream expected;
+  xopen_memstream (&expected);
+  if (strcmp (params->name, "alias.example.com") == 0)
+    {
+      if (params->canonname)
+        fprintf (expected.out,
+                 "flags: AI_CANONNAME\n"
+                 "canonname: %s\n",
+                 params->name);
+
+      if (do_ipv4)
+        for (int i = 0; i < params->count; ++i)
+          fputs ("address: STREAM/TCP 192.0.2.1 80\n", expected.out);
+      if (do_ipv6)
+        for (int i = 0; i < params->count; ++i)
+          fputs ("address: STREAM/TCP 2001:db8::1 80\n", expected.out);
+    }
+  else /* www/www4/www6 name.  */
+    {
+      if (strncmp (params->name, "www4", 4) == 0)
+        do_ipv6 = false;
+      else if (strncmp (params->name, "www6", 4) == 0)
+        do_ipv4 = false;
+      /* Otherwise, we have www as the name, so we do both.  */
+
+      if (do_ipv4 || do_ipv6)
+        {
+          if (params->canonname)
+            fprintf (expected.out,
+                     "flags: AI_CANONNAME\n"
+                     "canonname: %s\n",
+                     params->name);
+
+          if (do_ipv4)
+            for (int i = 0; i < params->count; ++i)
+              fprintf (expected.out, "address: STREAM/TCP 10.%s.%d.%d 80\n",
+                       params->marker, i / 256, i % 256);
+          if (do_ipv6)
+            for (int i = 0; i < params->count; ++i)
+              fprintf (expected.out,
+                       "address: STREAM/TCP 2001:db8::%s:%x 80\n",
+                       params->marker, i);
+        }
+      else
+        fputs ("error: Name or service not known\n", expected.out);
+    }
+  xfclose_memstream (&expected);
+  return expected.buffer;
+}
+
+static void
+run_gbhn_gai (struct test_params *params)
+{
+  char *ctx = xasprintf ("name=%s marker=%s count=%d family=%d",
+                         params->name, params->marker, params->count,
+                         params->family);
+  if (test_verbose > 0)
+    printf ("info: %s\n", ctx);
+
+  /* Check gethostbyname, gethostbyname2.  */
+  if (params->family == AF_INET)
+    {
+      char *expected = expected_ghbn (params);
+      check_hostent (ctx, gethostbyname (params->name), expected);
+      free (expected);
+    }
+  if (params->family != AF_UNSPEC)
+    {
+      char *expected = expected_ghbn (params);
+      check_hostent (ctx, gethostbyname2 (params->name, params->family),
+                     expected);
+      free (expected);
+    }
+
+  /* Check getaddrinfo.  */
+  for (int do_canonical = 0; do_canonical < 2; ++do_canonical)
+    {
+      params->canonname = do_canonical;
+      char *expected = expected_gai (params);
+      struct addrinfo hints =
+        {
+          .ai_family = params->family,
+          .ai_socktype = SOCK_STREAM,
+          .ai_protocol = IPPROTO_TCP,
+        };
+      if (do_canonical)
+        hints.ai_flags |= AI_CANONNAME;
+      struct addrinfo *ai;
+      int ret = getaddrinfo (params->name, "80", &hints, &ai);
+      check_addrinfo (ctx, ai, ret, expected);
+      if (ret == 0)
+        freeaddrinfo (ai);
+      free (expected);
+    }
+
+  free (ctx);
+}
+
+/* Callback for the subprocess which runs the test in a chroot.  */
+static void
+subprocess (void *closure)
+{
+  struct test_params *params = closure;
+
+  xchroot (chroot_env->path_chroot);
+
+  static const int families[] = { AF_INET, AF_INET6, AF_UNSPEC, -1 };
+  static const char *const names[] =
+    {
+      "www.example.com", "www4.example.com", "www6.example.com",
+      "alias.example.com",
+      NULL
+    };
+  static const char *const names_marker[] = { "46", "4", "6", "" };
+
+  for (int family_idx = 0; families[family_idx] >= 0; ++family_idx)
+    {
+      params->family = families[family_idx];
+      for (int names_idx = 0; names[names_idx] != NULL; ++names_idx)
+        {
+          params->name = names[names_idx];
+          params->marker = names_marker[names_idx];
+          run_gbhn_gai (params);
+        }
+    }
+}
+
+/* Run the test for a specific number of addresses/aliases.  */
+static void
+run_test (int count)
+{
+  write_hosts (count);
+
+  struct test_params params =
+    {
+      .count = count,
+    };
+
+  support_isolate_in_subprocess (subprocess, &params);
+}
+
+static int
+do_test (void)
+{
+  support_become_root ();
+  if (!support_can_chroot ())
+    return EXIT_UNSUPPORTED;
+
+  /* This test should not use gigabytes of memory.   */
+  {
+    struct rlimit limit;
+    if (getrlimit (RLIMIT_AS, &limit) != 0)
+      {
+        printf ("getrlimit (RLIMIT_AS) failed: %m\n");
+        return 1;
+      }
+    long target = 200 * 1024 * 1024;
+    if (limit.rlim_cur == RLIM_INFINITY || limit.rlim_cur > target)
+      {
+        limit.rlim_cur = target;
+        if (setrlimit (RLIMIT_AS, &limit) != 0)
+          {
+            printf ("setrlimit (RLIMIT_AS) failed: %m\n");
+            return 1;
+          }
+      }
+  }
+
+  __nss_configure_lookup ("hosts", "files");
+  if (dlopen (LIBNSS_FILES_SO, RTLD_LAZY) == NULL)
+    FAIL_EXIT1 ("could not load " LIBNSS_DNS_SO ": %s", dlerror ());
+
+  /* Run the tests with a few different address/alias counts.  */
+  for (int count = 1; count <= 111; ++count)
+    run_test (count);
+  run_test (1111);
+  run_test (22222);
+
+  support_chroot_free (chroot_env);
+  return 0;
+}
+
+#define PREPARE prepare
+#include <support/test-driver.c>
--- a/sysconf-uio-maxiov.patch
+++ b/sysconf-uio-maxiov.patch
@ -0,0 +1,125 @@
+2017-10-20  Florian Weimer  <fweimer@redhat.com>
+
+	[BZ #22321]
+	sysconf: Fix missing definition of UIO_MAXIOV on Linux.
+	* sysdeps/posix/sysconf.c: Include <sys/uio.h>.
+	* sysdeps/unix/sysv/linux/Makefile (tests): Add tst-sysconf-iov_max.
+	(tst-sysconf-iov_max): Link with tst-sysconf-iov_max-uapi.o.
+	* sysdeps/unix/sysv/linux/tst-sysconf-iov_max.c: New file.
+	* sysdeps/unix/sysv/linux/tst-sysconf-iov_max-uapi.c: Likewise.
+
+Index: glibc-2.26/sysdeps/posix/sysconf.c
+===================================================================
+--- glibc-2.26.orig/sysdeps/posix/sysconf.c
+++ glibc-2.26/sysdeps/posix/sysconf.c
+@@ -29,6 +29,7 @@
+ #include <sys/stat.h>
+ #include <sys/sysinfo.h>
+ #include <sys/types.h>
+#include <sys/uio.h>
+ #include <regex.h>
+ 
+ #define NEED_SPEC_ARRAY 0
+Index: glibc-2.26/sysdeps/unix/sysv/linux/Makefile
+===================================================================
+--- glibc-2.26.orig/sysdeps/unix/sysv/linux/Makefile
+++ glibc-2.26/sysdeps/unix/sysv/linux/Makefile
+@@ -50,7 +50,7 @@ sysdep_headers += sys/mount.h sys/acct.h
+ 		  bits/siginfo-arch.h bits/siginfo-consts-arch.h
+ 
+ tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \
+-	 tst-quota tst-sync_file_range test-errno-linux
+	 tst-quota tst-sync_file_range test-errno-linux tst-sysconf-iov_max
+ 
+ # Generate the list of SYS_* macros for the system calls (__NR_* macros).
+ 
+@@ -120,7 +120,11 @@ ifndef no_deps
+ -include $(objpfx)bits/syscall.d
+ endif
+ generated += bits/syscall.h bits/syscall.d
+-endif
+
+# Separate object file for access to the constant from the UAPI header.
+$(objpfx)tst-sysconf-iov_max: $(objpfx)tst-sysconf-iov_max-uapi.o
+
+endif # $(subdir) == misc
+ 
+ ifeq ($(subdir),time)
+ sysdep_headers += sys/timex.h bits/timex.h
+Index: glibc-2.26/sysdeps/unix/sysv/linux/tst-sysconf-iov_max-uapi.c
+===================================================================
+--- /dev/null
+++ glibc-2.26/sysdeps/unix/sysv/linux/tst-sysconf-iov_max-uapi.c
+@@ -0,0 +1,27 @@
+/* Check IOV_MAX definition: Helper function to capture UAPI header value.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Use a separate function to avoid header compatibility issues.  */
+
+#include <linux/uio.h>
+
+long
+uio_maxiov_value (void)
+{
+  return UIO_MAXIOV;
+}
+Index: glibc-2.26/sysdeps/unix/sysv/linux/tst-sysconf-iov_max.c
+===================================================================
+--- /dev/null
+++ glibc-2.26/sysdeps/unix/sysv/linux/tst-sysconf-iov_max.c
+@@ -0,0 +1,40 @@
+/* Check IOV_MAX definition for consistency (bug 22321).
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Defined in tst-sysconf-iov_max-uapi.c.  */
+long uio_maxiov_value (void);
+
+
+#include <limits.h>
+#include <support/check.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+static int
+do_test (void)
+{
+  TEST_VERIFY (_XOPEN_IOV_MAX == 16); /* Value required by POSIX.  */
+  TEST_VERIFY (uio_maxiov_value () >= _XOPEN_IOV_MAX);
+  TEST_VERIFY (IOV_MAX == uio_maxiov_value ());
+  TEST_VERIFY (UIO_MAXIOV == uio_maxiov_value ());
+  TEST_VERIFY (sysconf (_SC_UIO_MAXIOV) == uio_maxiov_value ());
+  TEST_VERIFY (sysconf (_SC_IOV_MAX) == uio_maxiov_value ());
+  return 0;
+}
+
+#include <support/test-driver.c>