SHA256
1
0
forked from pool/xxhash

Enable dispatch towards AVX512

This commit is contained in:
Jan Engelhardt 2024-09-04 13:47:51 +02:00
parent 679f82e202
commit 6587662ecc
5 changed files with 146 additions and 5 deletions

2
.gitignore vendored
View File

@ -1 +1,3 @@
.osc
*.tar
xh

8
Makefile Normal file
View File

@ -0,0 +1,8 @@
test: xh linux-6.6.1.tar linux-6.6.2.tar linux-6.6.3.tar linux-6.6.4.tar linux-6.6.5.tar
cat linux-6.*.tar{,,,,,,,} | time ./xh
%.tar:
wget -O- https://cdn.kernel.org/pub/linux/kernel/v6.x/$@.xz | xz -cd >$@
xh: xh.cpp
g++ -o xh xh.cpp -lxxhash -Wall -O2 -g

111
cpu-1135g7 Normal file
View File

@ -0,0 +1,111 @@
=== test ===
standard openSUSE xxhash packages of today
$ make
cat linux-6.*.tar{,,,,,,,} | time ./xh
5df8e93736b5a2bddd0b324a455b61fc
3.24user 6.69system 0:13.69elapsed 72%CPU (0avgtext+0avgdata 3824maxresident)k
0inputs+0outputs (0major+178minor)pagefaults 0swaps
14:04 f3:../xx/xxhash $ make
cat linux-6.*.tar{,,,,,,,} | time ./xh
5df8e93736b5a2bddd0b324a455b61fc
3.29user 6.05system 0:13.04elapsed 71%CPU (0avgtext+0avgdata 3720maxresident)k
0inputs+0outputs (0major+178minor)pagefaults 0swaps
14:05 f3:../xx/xxhash $ make
cat linux-6.*.tar{,,,,,,,} | time ./xh
5df8e93736b5a2bddd0b324a455b61fc
3.20user 6.29system 0:13.19elapsed 72%CPU (0avgtext+0avgdata 3664maxresident)k
0inputs+0outputs (0major+177minor)pagefaults 0swaps
14:05 f3:../xx/xxhash $ make
cat linux-6.*.tar{,,,,,,,} | time ./xh
5df8e93736b5a2bddd0b324a455b61fc
3.38user 5.91system 0:12.89elapsed 72%CPU (0avgtext+0avgdata 3712maxresident)k
0inputs+0outputs (0major+178minor)pagefaults 0swaps
Mean/Spread: 13.20 ± 0.31 sec (2.3% error)
=== switch packages ===
# rpm -U ...
=== verify AVX512 is now in use ===
Watch this space closely.
$ gdb xh
Reading symbols from xh...
(gdb) b main
Breakpoint 1 at 0x4010b0: file xh.cpp, line 7.
(gdb) r <xh.cpp
Starting program: /home/jengelh/obs_nosave/zu/home/jengelh/xx/xxhash/xh <xh.cpp
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
Breakpoint 1, main () at xh.cpp:7
7 auto state = XXH3_createState();
(gdb) b XXH_setDispatch
Breakpoint 2 at 0x7ffff7f9a6f4: file /usr/src/debug/xxHash-0.8.2/xxh_x86dispatch.c, line 706.
(gdb) r
Restart?-yes
Starting program: /home/jengelh/obs_nosave/zu/home/jengelh/xx/xxhash/xh <xh.cpp
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
Breakpoint 2, XXH_setDispatch () at /usr/src/debug/xxHash-0.8.2/xxh_x86dispatch.c:706
706 {
(gdb) bt
#0 XXH_setDispatch () at /usr/src/debug/xxHash-0.8.2/xxh_x86dispatch.c:706
#1 0x00007ffff7fca53e in call_init (env=0x7fffffffd998, argv=0x7fffffffd988, argc=1, l=<optimized out>) at dl-init.c:74
#2 call_init (l=<optimized out>, argc=1, argv=0x7fffffffd988, env=0x7fffffffd998) at dl-init.c:26
#3 0x00007ffff7fca63c in _dl_init (main_map=0x7ffff7fbe000, argc=1, argv=0x7fffffffd988, env=0x7fffffffd998) at dl-init.c:121
#4 0x00007ffff7fe2ba0 in _dl_start_user () from /lib64/ld-linux-x86-64.so.2
#5 0x0000000000000001 in ?? ()
#6 0x00007fffffffde8c in ?? ()
#7 0x0000000000000000 in ?? ()
(gdb) n
707 int vecID = XXH_featureTest();
(gdb)
719 XXH_g_dispatch = XXH_kDispatch[vecID];
(gdb) p vecID
$1 = 3 [= XXH_AVX512!]
=== test ===
$ make test
cat linux-6.*.tar{,,,,,,,} | time ./xh
5df8e93736b5a2bddd0b324a455b61fc
3.37user 6.16system 0:13.23elapsed 72%CPU (0avgtext+0avgdata 3708maxresident)k
0inputs+0outputs (0major+177minor)pagefaults 0swaps
14:07 f3:../xx/xxhash $ make test
cat linux-6.*.tar{,,,,,,,} | time ./xh
5df8e93736b5a2bddd0b324a455b61fc
3.23user 6.27system 0:13.27elapsed 71%CPU (0avgtext+0avgdata 3664maxresident)k
0inputs+0outputs (0major+176minor)pagefaults 0swaps
14:07 f3:../xx/xxhash $ make test
cat linux-6.*.tar{,,,,,,,} | time ./xh
5df8e93736b5a2bddd0b324a455b61fc
3.26user 6.77system 0:13.75elapsed 73%CPU (0avgtext+0avgdata 3712maxresident)k
0inputs+0outputs (0major+178minor)pagefaults 0swaps
14:07 f3:../xx/xxhash $ make test
cat linux-6.*.tar{,,,,,,,} | time ./xh
5df8e93736b5a2bddd0b324a455b61fc
3.19user 6.07system 0:12.96elapsed 71%CPU (0avgtext+0avgdata 3712maxresident)k
0inputs+0outputs (0major+178minor)pagefaults 0swaps
Mean/Spread: 13.30 ± 0.34s (2.5% error)
== Lessons learned ==
Prerequisite for good measurement:
* consistent(!) error rate (4 individual measurements is a bit on the low side, but ... anyway)
Result:
* The observed 13.2s mean is within the margin of measurement error of the 13.3 mean and viceversa
Conclusion:
* DISPATCH=1 made no impact on this machine

18
xh.cpp Normal file
View File

@ -0,0 +1,18 @@
#include <xxhash.h>
#include <unistd.h>
#include <cstdio>
int main()
{
auto state = XXH3_createState();
XXH3_128bits_reset(state);
char buffer[1048576];
ssize_t count;
while ((count = read(STDIN_FILENO, buffer, sizeof(buffer))) > 0)
XXH3_128bits_update(state, buffer, count);
XXH128_canonical_t canon{};
XXH128_canonicalFromHash(&canon, XXH3_128bits_digest(state));
for (auto u : canon.digest)
printf("%02x", u);
printf("\n");
}

View File

@ -60,9 +60,10 @@ Headers and other development files for xxHash.
%build
# ALLOW_AVX just means "we guarantee we policed our %%optflags".
export CFLAGS="%{optflags} -DXXH_X86DISPATCH_ALLOW_AVX=1"
export CFLAGS="%{optflags} -DXXH_X86DISPATCH_ALLOW_AVX=1 -Og"
export CXXFLAGS="$CFLAGS"
export LDFLAGS="%{?build_ldflags}"
export DISPATCH=1
# DISPATCH=1 if you want AVX2/AVX512. But it does not seem to perform any
# better than the lowest-denomimation code on at least the 1135G7 and 5950X
# CPUs, and for both LP64 as well as ILP32 — it seems to be all within margin
@ -70,16 +71,18 @@ export LDFLAGS="%{?build_ldflags}"
%make_build prefix=%{_prefix} libdir=%{_libdir}
%install
export CFLAGS="%{optflags} -DXXH_X86DISPATCH_ALLOW_AVX=1"
export CFLAGS="%{optflags} -DXXH_X86DISPATCH_ALLOW_AVX=1 -Og"
export CXXFLAGS="%{optflags}"
export LDFLAGS="%{?build_ldflags}"
export DISPATCH=1
%make_install prefix=%{_prefix} libdir=%{_libdir}
rm -rf %{buildroot}%{_libdir}/libxxhash.a
%check
export CFLAGS="%{optflags}"
export CFLAGS="%{optflags} -DXXH_X86DISPATCH_ALLOW_AVX=1"
export CXXFLAGS="%{optflags}"
export LDFLAGS="%{?build_ldflags}"
export DISPATCH=1
# not safe for parallel execution as it removes xxhash.o and recreates it with different flags
# the list is taken from test-all with non-working/irrelevant ones (such as ones that change the toolchain) removed
%make_build -j1 test test-unicode listL120 trailingWhitespace test-xxh-nnn-sums
@ -102,8 +105,7 @@ export LDFLAGS="%{?build_ldflags}"
%{_libdir}/libxxhash.so.*
%files devel
%{_includedir}/xxhash.h
%{_includedir}/xxh3.h
%{_includedir}/*
%{_libdir}/pkgconfig/libxxhash.pc
%{_libdir}/libxxhash.so