glibc/glibc-memset-nontemporal.diff
Andreas Schwab 7db7ce4082 Accepting request 361238 from home:Andreas_Schwab:Factory
- no-long-double.patch: Don't use long double functions if NO_LONG_DOUBLE

- Update to glibc 2.23 release.
  * Unicode 8.0.0 Support
  * sched_setaffinity, pthread_setaffinity_np no longer attempt to guess
    the kernel-internal CPU set size
  * The fts.h header can now be used with -D_FILE_OFFSET_BITS=64
  * getaddrinfo now detects certain invalid responses on an internal
    netlink socket
  * A defect in the malloc implementation, present since glibc 2.15 (2012)
    or glibc 2.10 via --enable-experimental-malloc (2009), could result in
    the unnecessary serialization of memory allocation requests across
    threads
  * The obsolete header <regexp.h> has been removed
  * The obsolete functions bdflush, create_module, get_kernel_syms,
    query_module and uselib are no longer available to newly linked
    binaries
  * Optimized string, wcsmbs and memory functions for IBM z13.
  * Newly linked programs that define a variable called signgam will no
    longer have it set by the lgamma, lgammaf and lgammal functions
- Removed patches:
  * dont-remove-nodelete-flag.patch
  * openat64-readd-o-largefile.patch
  * mntent-blank-line.patch
  * opendir-o-directory-check.patch
  * strcoll-remove-strdiff-opt.patch
  * ld-pointer-guard.patch
  * tls-dtor-list-mangling.patch
  * powerpc-lock-elision-race.patch
  * prelink-elf-rtype-class.patch

OBS-URL: https://build.opensuse.org/request/show/361238
OBS-URL: https://build.opensuse.org/package/show/Base:System/glibc?expand=0&rev=423
2016-02-24 11:39:39 +00:00

41 lines
875 B
Diff

Fix for bnc #868622, slow memset for large block sizes.
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index db4fb84..9c42018 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -84,6 +84,9 @@ L(loop_start):
movdqu %xmm0, -48(%rdi,%rdx)
movdqu %xmm0, 48(%rdi)
movdqu %xmm0, -64(%rdi,%rdx)
+ mov __x86_shared_cache_size(%rip),%r9d # The largest cache size
+ cmp %r9,%rdx
+ ja L(nt_move)
addq %rdi, %rdx
andq $-64, %rdx
cmpq %rdx, %rcx
@@ -99,6 +102,23 @@ L(loop):
jne L(loop)
rep
ret
+L(nt_move):
+ addq %rdi, %rdx
+ andq $-64, %rdx
+ cmpq %rdx, %rcx
+ je L(return)
+ .p2align 4
+L(nt_loop):
+ movntdq %xmm0, (%rcx)
+ movntdq %xmm0, 16(%rcx)
+ movntdq %xmm0, 32(%rcx)
+ movntdq %xmm0, 48(%rcx)
+ addq $64, %rcx
+ cmpq %rcx, %rdx
+ jne L(nt_loop)
+ sfence
+ rep
+ ret
L(less_16_bytes):
movq %xmm0, %rcx
testb $24, %dl