SHA256
1
0
forked from pool/glibc
glibc/glibc-memset-nontemporal.diff
Stephan Kulow 3d30c029a1 Accepting request 237681 from Base:System
- psfaa.patch: copy filename argument in posix_spawn_file_actions_addopen
  (CVE-2014-4043, bnc#882600, BZ #17048)

- glibc-memset-nontemporal.diff: Speedup memset on x86_64
  (bnc#868622, BZ #16830) (forwarded request 237680 from Andreas_Schwab)

OBS-URL: https://build.opensuse.org/request/show/237681
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/glibc?expand=0&rev=174
2014-06-23 07:23:33 +00:00

41 lines
875 B
Diff

Fix for bnc #868622, slow memset for large block sizes.
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index db4fb84..9c42018 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -84,6 +84,9 @@ L(loop_start):
movdqu %xmm8, -48(%rdi,%rdx)
movdqu %xmm8, 48(%rdi)
movdqu %xmm8, -64(%rdi,%rdx)
+ mov __x86_shared_cache_size(%rip),%r9d # The largest cache size
+ cmp %r9,%rdx
+ ja L(nt_move)
addq %rdi, %rdx
andq $-64, %rdx
cmpq %rdx, %rcx
@@ -99,6 +102,23 @@ L(loop):
jne L(loop)
rep
ret
+L(nt_move):
+ addq %rdi, %rdx
+ andq $-64, %rdx
+ cmpq %rdx, %rcx
+ je L(return)
+ .p2align 4
+L(nt_loop):
+ movntdq %xmm8, (%rcx)
+ movntdq %xmm8, 16(%rcx)
+ movntdq %xmm8, 32(%rcx)
+ movntdq %xmm8, 48(%rcx)
+ addq $64, %rcx
+ cmpq %rcx, %rdx
+ jne L(nt_loop)
+ sfence
+ rep
+ ret
L(less_16_bytes):
movq %xmm8, %rcx
testb $24, %dl