From 8980706c5cfe2b65998c1270fdbaba01c9983d462b1352caa90f8e55a0411c6f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 4 Sep 2024 13:47:51 +0200 Subject: [PATCH] Enable dispatch towards AVX512 --- .gitignore | 2 + Makefile | 8 ++++ cpu-1135g7 | 111 ++++++++++++++++++++++++++++++++++++++++++++++++++++ xh.cpp | 18 +++++++++ xxhash.spec | 12 +++--- 5 files changed, 146 insertions(+), 5 deletions(-) create mode 100644 Makefile create mode 100644 cpu-1135g7 create mode 100644 xh.cpp diff --git a/.gitignore b/.gitignore index 57affb6..9148b72 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ .osc +*.tar +xh diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6c4a19e --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +test: xh linux-6.6.1.tar linux-6.6.2.tar linux-6.6.3.tar linux-6.6.4.tar linux-6.6.5.tar + cat linux-6.*.tar{,,,,,,,} | time ./xh + +%.tar: + wget -O- https://cdn.kernel.org/pub/linux/kernel/v6.x/$@.xz | xz -cd >$@ + +xh: xh.cpp + g++ -o xh xh.cpp -lxxhash -Wall -O2 -g diff --git a/cpu-1135g7 b/cpu-1135g7 new file mode 100644 index 0000000..68c96a3 --- /dev/null +++ b/cpu-1135g7 @@ -0,0 +1,111 @@ + +=== test === + +standard openSUSE xxhash packages of today + +$ make +cat linux-6.*.tar{,,,,,,,} | time ./xh +5df8e93736b5a2bddd0b324a455b61fc +3.24user 6.69system 0:13.69elapsed 72%CPU (0avgtext+0avgdata 3824maxresident)k +0inputs+0outputs (0major+178minor)pagefaults 0swaps +14:04 f3:../xx/xxhash $ make +cat linux-6.*.tar{,,,,,,,} | time ./xh +5df8e93736b5a2bddd0b324a455b61fc +3.29user 6.05system 0:13.04elapsed 71%CPU (0avgtext+0avgdata 3720maxresident)k +0inputs+0outputs (0major+178minor)pagefaults 0swaps +14:05 f3:../xx/xxhash $ make +cat linux-6.*.tar{,,,,,,,} | time ./xh +5df8e93736b5a2bddd0b324a455b61fc +3.20user 6.29system 0:13.19elapsed 72%CPU (0avgtext+0avgdata 3664maxresident)k +0inputs+0outputs (0major+177minor)pagefaults 0swaps +14:05 f3:../xx/xxhash $ make +cat linux-6.*.tar{,,,,,,,} | time ./xh +5df8e93736b5a2bddd0b324a455b61fc +3.38user 5.91system 0:12.89elapsed 72%CPU (0avgtext+0avgdata 3712maxresident)k +0inputs+0outputs (0major+178minor)pagefaults 0swaps + +Mean/Spread: 13.20 ± 0.31 sec (2.3% error) + + +=== switch packages === + +# rpm -U ... + + +=== verify AVX512 is now in use === + +Watch this space closely. + +$ gdb xh +Reading symbols from xh... +(gdb) b main +Breakpoint 1 at 0x4010b0: file xh.cpp, line 7. +(gdb) r ) at dl-init.c:74 +#2 call_init (l=, argc=1, argv=0x7fffffffd988, env=0x7fffffffd998) at dl-init.c:26 +#3 0x00007ffff7fca63c in _dl_init (main_map=0x7ffff7fbe000, argc=1, argv=0x7fffffffd988, env=0x7fffffffd998) at dl-init.c:121 +#4 0x00007ffff7fe2ba0 in _dl_start_user () from /lib64/ld-linux-x86-64.so.2 +#5 0x0000000000000001 in ?? () +#6 0x00007fffffffde8c in ?? () +#7 0x0000000000000000 in ?? () +(gdb) n +707 int vecID = XXH_featureTest(); +(gdb) +719 XXH_g_dispatch = XXH_kDispatch[vecID]; +(gdb) p vecID +$1 = 3 [= XXH_AVX512!] + + +=== test === + +$ make test +cat linux-6.*.tar{,,,,,,,} | time ./xh +5df8e93736b5a2bddd0b324a455b61fc +3.37user 6.16system 0:13.23elapsed 72%CPU (0avgtext+0avgdata 3708maxresident)k +0inputs+0outputs (0major+177minor)pagefaults 0swaps +14:07 f3:../xx/xxhash $ make test +cat linux-6.*.tar{,,,,,,,} | time ./xh +5df8e93736b5a2bddd0b324a455b61fc +3.23user 6.27system 0:13.27elapsed 71%CPU (0avgtext+0avgdata 3664maxresident)k +0inputs+0outputs (0major+176minor)pagefaults 0swaps +14:07 f3:../xx/xxhash $ make test +cat linux-6.*.tar{,,,,,,,} | time ./xh +5df8e93736b5a2bddd0b324a455b61fc +3.26user 6.77system 0:13.75elapsed 73%CPU (0avgtext+0avgdata 3712maxresident)k +0inputs+0outputs (0major+178minor)pagefaults 0swaps +14:07 f3:../xx/xxhash $ make test +cat linux-6.*.tar{,,,,,,,} | time ./xh +5df8e93736b5a2bddd0b324a455b61fc +3.19user 6.07system 0:12.96elapsed 71%CPU (0avgtext+0avgdata 3712maxresident)k +0inputs+0outputs (0major+178minor)pagefaults 0swaps + +Mean/Spread: 13.30 ± 0.34s (2.5% error) + +== Lessons learned == + +Prerequisite for good measurement: + * consistent(!) error rate (4 individual measurements is a bit on the low side, but ... anyway) + +Result: + * The observed 13.2s mean is within the margin of measurement error of the 13.3 mean and viceversa + +Conclusion: + * DISPATCH=1 made no impact on this machine diff --git a/xh.cpp b/xh.cpp new file mode 100644 index 0000000..54d136e --- /dev/null +++ b/xh.cpp @@ -0,0 +1,18 @@ +#include +#include +#include + +int main() +{ + auto state = XXH3_createState(); + XXH3_128bits_reset(state); + char buffer[1048576]; + ssize_t count; + while ((count = read(STDIN_FILENO, buffer, sizeof(buffer))) > 0) + XXH3_128bits_update(state, buffer, count); + XXH128_canonical_t canon{}; + XXH128_canonicalFromHash(&canon, XXH3_128bits_digest(state)); + for (auto u : canon.digest) + printf("%02x", u); + printf("\n"); +} diff --git a/xxhash.spec b/xxhash.spec index a1bf827..5c7bca6 100644 --- a/xxhash.spec +++ b/xxhash.spec @@ -60,9 +60,10 @@ Headers and other development files for xxHash. %build # ALLOW_AVX just means "we guarantee we policed our %%optflags". -export CFLAGS="%{optflags} -DXXH_X86DISPATCH_ALLOW_AVX=1" +export CFLAGS="%{optflags} -DXXH_X86DISPATCH_ALLOW_AVX=1 -Og" export CXXFLAGS="$CFLAGS" export LDFLAGS="%{?build_ldflags}" +export DISPATCH=1 # DISPATCH=1 if you want AVX2/AVX512. But it does not seem to perform any # better than the lowest-denomimation code on at least the 1135G7 and 5950X # CPUs, and for both LP64 as well as ILP32 — it seems to be all within margin @@ -70,16 +71,18 @@ export LDFLAGS="%{?build_ldflags}" %make_build prefix=%{_prefix} libdir=%{_libdir} %install -export CFLAGS="%{optflags} -DXXH_X86DISPATCH_ALLOW_AVX=1" +export CFLAGS="%{optflags} -DXXH_X86DISPATCH_ALLOW_AVX=1 -Og" export CXXFLAGS="%{optflags}" export LDFLAGS="%{?build_ldflags}" +export DISPATCH=1 %make_install prefix=%{_prefix} libdir=%{_libdir} rm -rf %{buildroot}%{_libdir}/libxxhash.a %check -export CFLAGS="%{optflags}" +export CFLAGS="%{optflags} -DXXH_X86DISPATCH_ALLOW_AVX=1" export CXXFLAGS="%{optflags}" export LDFLAGS="%{?build_ldflags}" +export DISPATCH=1 # not safe for parallel execution as it removes xxhash.o and recreates it with different flags # the list is taken from test-all with non-working/irrelevant ones (such as ones that change the toolchain) removed %make_build -j1 test test-unicode listL120 trailingWhitespace test-xxh-nnn-sums @@ -102,8 +105,7 @@ export LDFLAGS="%{?build_ldflags}" %{_libdir}/libxxhash.so.* %files devel -%{_includedir}/xxhash.h -%{_includedir}/xxh3.h +%{_includedir}/* %{_libdir}/pkgconfig/libxxhash.pc %{_libdir}/libxxhash.so