SHA256
8
0
forked from pool/xsimd

- Add upstream changes fixing some krita issues:

* 0001-Fix-xsimd-available_architectures-.has-for-sve-and-r.patch
  * 0002-Fix-detection-of-SSE-AVX-AVX512-when-they-are-explic.patch

OBS-URL: https://build.opensuse.org/package/show/devel:libraries:c_c++/xsimd?expand=0&rev=17
This commit is contained in:
2024-07-10 12:27:34 +00:00
committed by Git OBS Bridge
commit 07399423af
7 changed files with 480 additions and 0 deletions

23
.gitattributes vendored Normal file
View File

@@ -0,0 +1,23 @@
## Default LFS
*.7z filter=lfs diff=lfs merge=lfs -text
*.bsp filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.gem filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.jar filter=lfs diff=lfs merge=lfs -text
*.lz filter=lfs diff=lfs merge=lfs -text
*.lzma filter=lfs diff=lfs merge=lfs -text
*.obscpio filter=lfs diff=lfs merge=lfs -text
*.oxt filter=lfs diff=lfs merge=lfs -text
*.pdf filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
*.rpm filter=lfs diff=lfs merge=lfs -text
*.tbz filter=lfs diff=lfs merge=lfs -text
*.tbz2 filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.ttf filter=lfs diff=lfs merge=lfs -text
*.txz filter=lfs diff=lfs merge=lfs -text
*.whl filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
.osc

View File

@@ -0,0 +1,87 @@
From 4f91d4a44eb9476572cf49a96cbe658eb871f47c Mon Sep 17 00:00:00 2001
From: Dmitry Kazakov <dimula73@gmail.com>
Date: Fri, 14 Jun 2024 10:19:55 +0200
Subject: [PATCH 1/2] Fix xsimd::available_architectures().has() for sve and
rvv archs
Ideally the patch CPU detection code should also check if the length
of SVE and RVV is actually supported by the current CPU implementation
(i.e. ZCR_Elx.LEN register for SVE and something else for RVV), but
I don't have such CPUs/emulators handy, so I cannot add such checks.
Given that xsimd::available_architectures().has() is a new feature
of XSIMD13 and the length check has never been present in XSIMD, this
bug is not a regression at least.
The patch also adds a unittest that reproduces the error the patch fixes
---
include/xsimd/config/xsimd_cpuid.hpp | 12 ++++++++++--
test/test_arch.cpp | 15 +++++++++++++++
2 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp
index f22089b..30a9da2 100644
--- a/include/xsimd/config/xsimd_cpuid.hpp
+++ b/include/xsimd/config/xsimd_cpuid.hpp
@@ -42,6 +42,10 @@ namespace xsimd
#define ARCH_FIELD_EX(arch, field_name) \
unsigned field_name; \
XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; }
+
+#define ARCH_FIELD_EX_REUSE(arch, field_name) \
+ XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; }
+
#define ARCH_FIELD(name) ARCH_FIELD_EX(name, name)
ARCH_FIELD(sse2)
@@ -72,8 +76,12 @@ namespace xsimd
ARCH_FIELD(neon)
ARCH_FIELD(neon64)
ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64)
- ARCH_FIELD(sve)
- ARCH_FIELD(rvv)
+ ARCH_FIELD_EX(detail::sve<512>, sve)
+ ARCH_FIELD_EX_REUSE(detail::sve<256>, sve)
+ ARCH_FIELD_EX_REUSE(detail::sve<128>, sve)
+ ARCH_FIELD_EX(detail::rvv<512>, rvv)
+ ARCH_FIELD_EX_REUSE(detail::rvv<256>, rvv)
+ ARCH_FIELD_EX_REUSE(detail::rvv<128>, rvv)
ARCH_FIELD(wasm)
#undef ARCH_FIELD
diff --git a/test/test_arch.cpp b/test/test_arch.cpp
index b420733..f1f50d5 100644
--- a/test/test_arch.cpp
+++ b/test/test_arch.cpp
@@ -38,6 +38,16 @@ struct check_supported
}
};
+struct check_cpu_has_intruction_set
+{
+ template <class Arch>
+ void operator()(Arch arch) const
+ {
+ static_assert(std::is_same<decltype(xsimd::available_architectures().has(arch)), bool>::value,
+ "cannot test instruction set availability on CPU");
+ }
+};
+
struct check_available
{
template <class Arch>
@@ -71,6 +81,11 @@ TEST_CASE("[multi arch support]")
xsimd::supported_architectures::for_each(check_supported {});
}
+ SUBCASE("xsimd::available_architectures::has")
+ {
+ xsimd::all_architectures::for_each(check_cpu_has_intruction_set {});
+ }
+
SUBCASE("xsimd::default_arch::name")
{
constexpr char const* name = xsimd::default_arch::name();
--
2.45.2

View File

@@ -0,0 +1,147 @@
From c2974c874e14557490eab76d2eebf9f8b9eb88f1 Mon Sep 17 00:00:00 2001
From: Dmitry Kazakov <dimula73@gmail.com>
Date: Tue, 28 May 2024 22:21:08 +0200
Subject: [PATCH 2/2] Fix detection of SSE/AVX/AVX512 when they are explicitly
disabled by OS
Some CPU vulnerability mitigations may disable AVX functionality
on the hardware level via the XCR0 register. We should check that
manually to verify that OS actually allows us to use this feature.
See https://bugs.kde.org/show_bug.cgi?id=484622
Fix #1025
---
include/xsimd/config/xsimd_cpuid.hpp | 91 ++++++++++++++++++++++------
1 file changed, 72 insertions(+), 19 deletions(-)
diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp
index 30a9da2..8021fce 100644
--- a/include/xsimd/config/xsimd_cpuid.hpp
+++ b/include/xsimd/config/xsimd_cpuid.hpp
@@ -122,6 +122,35 @@ namespace xsimd
#endif
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
+
+ auto get_xcr0_low = []() noexcept
+ {
+ uint32_t xcr0;
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+
+ xcr0 = (uint32_t)_xgetbv(0);
+
+#elif defined(__GNUC__)
+
+ __asm__(
+ "xorl %%ecx, %%ecx\n"
+ "xgetbv\n"
+ : "=a"(xcr0)
+ :
+#if defined(__i386__)
+ : "ecx", "edx"
+#else
+ : "rcx", "rdx"
+#endif
+ );
+
+#else /* _MSC_VER < 1400 */
+#error "_MSC_VER < 1400 is not supported"
+#endif /* _MSC_VER && _MSC_VER >= 1400 */
+ return xcr0;
+ };
+
auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept
{
@@ -156,19 +185,43 @@ namespace xsimd
get_cpuid(regs1, 0x1);
- sse2 = regs1[3] >> 26 & 1;
- sse3 = regs1[2] >> 0 & 1;
- ssse3 = regs1[2] >> 9 & 1;
- sse4_1 = regs1[2] >> 19 & 1;
- sse4_2 = regs1[2] >> 20 & 1;
- fma3_sse42 = regs1[2] >> 12 & 1;
+ // OS can explicitly disable the usage of SSE/AVX extensions
+ // by setting an appropriate flag in CR0 register
+ //
+ // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html
+
+ unsigned sse_state_os_enabled = 1;
+ unsigned avx_state_os_enabled = 1;
+ unsigned avx512_state_os_enabled = 1;
+
+ // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit
+ // 18] to enable XSETBV/XGETBV instructions to access XCR0 and
+ // to support processor extended state management using
+ // XSAVE/XRSTOR.
+ bool osxsave = regs1[2] >> 27 & 1;
+ if (osxsave)
+ {
+
+ uint32_t xcr0 = get_xcr0_low();
+
+ sse_state_os_enabled = xcr0 >> 1 & 1;
+ avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled;
+ avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled;
+ }
+
+ sse2 = regs1[3] >> 26 & sse_state_os_enabled;
+ sse3 = regs1[2] >> 0 & sse_state_os_enabled;
+ ssse3 = regs1[2] >> 9 & sse_state_os_enabled;
+ sse4_1 = regs1[2] >> 19 & sse_state_os_enabled;
+ sse4_2 = regs1[2] >> 20 & sse_state_os_enabled;
+ fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled;
- avx = regs1[2] >> 28 & 1;
+ avx = regs1[2] >> 28 & avx_state_os_enabled;
fma3_avx = avx && fma3_sse42;
int regs8[4];
get_cpuid(regs8, 0x80000001);
- fma4 = regs8[2] >> 16 & 1;
+ fma4 = regs8[2] >> 16 & avx_state_os_enabled;
// sse4a = regs[2] >> 6 & 1;
@@ -176,23 +229,23 @@ namespace xsimd
int regs7[4];
get_cpuid(regs7, 0x7);
- avx2 = regs7[1] >> 5 & 1;
+ avx2 = regs7[1] >> 5 & avx_state_os_enabled;
int regs7a[4];
get_cpuid(regs7a, 0x7, 0x1);
- avxvnni = regs7a[0] >> 4 & 1;
+ avxvnni = regs7a[0] >> 4 & avx_state_os_enabled;
fma3_avx2 = avx2 && fma3_sse42;
- avx512f = regs7[1] >> 16 & 1;
- avx512cd = regs7[1] >> 28 & 1;
- avx512dq = regs7[1] >> 17 & 1;
- avx512bw = regs7[1] >> 30 & 1;
- avx512er = regs7[1] >> 27 & 1;
- avx512pf = regs7[1] >> 26 & 1;
- avx512ifma = regs7[1] >> 21 & 1;
- avx512vbmi = regs7[2] >> 1 & 1;
- avx512vnni_bw = regs7[2] >> 11 & 1;
+ avx512f = regs7[1] >> 16 & avx512_state_os_enabled;
+ avx512cd = regs7[1] >> 28 & avx512_state_os_enabled;
+ avx512dq = regs7[1] >> 17 & avx512_state_os_enabled;
+ avx512bw = regs7[1] >> 30 & avx512_state_os_enabled;
+ avx512er = regs7[1] >> 27 & avx512_state_os_enabled;
+ avx512pf = regs7[1] >> 26 & avx512_state_os_enabled;
+ avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled;
+ avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled;
+ avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled;
avx512vnni_vbmi = avx512vbmi && avx512vnni_bw;
#endif
}
--
2.45.2

3
xsimd-13.0.0.tar.gz Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8bdbbad0c3e7afa38d88d0d484d70a1671a1d8aefff03f4223ab2eb6a41110a3
size 259967

101
xsimd.changes Normal file
View File

@@ -0,0 +1,101 @@
-------------------------------------------------------------------
Thu Jul 4 20:21:59 UTC 2024 - Christophe Marin <christophe@krop.fr>
- Add upstream changes fixing some krita issues:
* 0001-Fix-xsimd-available_architectures-.has-for-sve-and-r.patch
* 0002-Fix-detection-of-SSE-AVX-AVX512-when-they-are-explic.patch
-------------------------------------------------------------------
Fri May 17 09:38:45 UTC 2024 - Dirk Müller <dmueller@suse.com>
- update to 13.0.0:
* Most xsimd functions are flagged as always_inline
* Fix some xsimd scalar version (abs, bitofsign, signbit,
bitwise_cast, exp10)
* Move from batch_constant<batch<T, A>, Csts...> to
batch_constant<T, A, Csts...>
* Move from batch_bool_constant<batch<T, A>, Csts...> to
batch_bool_constant<T, A, Csts...>
* Provide an as_batch() method (resp. as_batch_bool) method for
batch_constant (resp. batch_bool_constant)
* New architecture emulated<N> for batches of N bits emulated
using scalar operations.
* Remove the version method from all architectures
* Support xsimd::avg and xsimd::avgr vector operation
* Model i8mm arm extension
* Fix dispatching mechanism
-------------------------------------------------------------------
Tue Mar 12 09:30:22 UTC 2024 - Bernhard Wiedemann <bwiedemann@suse.com>
- Move install commands into %install section
-------------------------------------------------------------------
Sun Dec 17 10:14:02 UTC 2023 - Dirk Müller <dmueller@suse.com>
- update to 12.1.1:
* Update readme with a section on adoption, and a section on
the history of the project
* Fix/avx512vnni implementation
* Fix regression on XSIMD_NO_SUPPORTED_ARCHITECTURE
-------------------------------------------------------------------
Sun Nov 26 13:53:34 UTC 2023 - Dirk Müller <dmueller@suse.com>
- update to 11.2.0:
* Provide shuffle operations of floating point batches
* Provide a generic implementation of xsimd::swizzle with
dynamic indices
* Implement rotl, rotr, rotate_left and rotate_right
* Let CMake figure out pkgconfig directories
* Add missing boolean operators in xsimd_api.hpp
* Initial Implementation for the new WASM based instruction set
* Provide a generic version for float to uint32_t conversion
-------------------------------------------------------------------
Tue Jun 27 15:32:48 UTC 2023 - Dirk Müller <dmueller@suse.com>
- update to 11.1.0:
* Introduce XSIMD_DEFAULT_ARCH to force default architecture
(if any)
* Remove C++ requirement on xsimd::exp10 scalar implementation
* Improve and test documentation
* Provide a generic reducer
* Fix ``find_package(xsimd)`` for xtl enabled xsimd, reloaded
* Cleanup benchmark code
* Provide avx512f implementation of FMA and variant
* Hexadecimal floating points are not a C++11 feature
* back to slow implementation of exp10 on Windows
* Changed bitwise_cast API
* Provide generic signed /unsigned type conversion
* Fixed sde location
* Feature/incr decr
* Cleanup documentation
-------------------------------------------------------------------
Sat Dec 3 16:55:40 UTC 2022 - Dirk Müller <dmueller@suse.com>
- update to 10.0.0:
* SVE: Implement scatter/gather
* SVE: Implement nearbyint and ldexp
* Use intrinsic instead of generic implementation of ldexp on avx512f
* SVE: Implement fused operations for all types
- drop 0001-Only-use-_mm_insert_epi64-on-64-bit-architecture.patch (upstream)
-------------------------------------------------------------------
Wed Nov 9 10:25:40 UTC 2022 - Christophe Giboudeaux <christophe@krop.fr>
- Update to 9.0.1. No changelog.
- Spec cleanup. Prepare xsimd for the factory submission
- Add upstream change:
* 0001-Only-use-_mm_insert_epi64-on-64-bit-architecture.patch
-------------------------------------------------------------------
Sun Jul 17 19:28:30 UTC 2022 - Matwey Kornilov <matwey.kornilov@gmail.com>
- Update to version 8.1.0
-------------------------------------------------------------------
Fri Mar 26 03:51:36 UTC 2021 - Daniel Weatherill <plasteredparrot@gmail.com>
- initial build from upstream version 7.4.9

118
xsimd.spec Normal file
View File

@@ -0,0 +1,118 @@
#
# spec file for package xsimd
#
# Copyright (c) 2024 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via https://bugs.opensuse.org/
#
Name: xsimd
Version: 13.0.0
Release: 0
Summary: C++ wrappers for SIMD intrinsics
License: BSD-3-Clause
Group: Development/Libraries/C and C++
URL: https://xsimd.readthedocs.io/en/latest/
Source0: https://github.com/xtensor-stack/xsimd/archive/refs/tags/%{version}.tar.gz#/%{name}-%{version}.tar.gz
# PATCH-FIX-UPSTREAM -- https://mail.kde.org/pipermail/distributions/2024-July/001511.html
Patch0: 0001-Fix-xsimd-available_architectures-.has-for-sve-and-r.patch
Patch1: 0002-Fix-detection-of-SSE-AVX-AVX512-when-they-are-explic.patch
BuildRequires: cmake
BuildRequires: doctest-devel
BuildRequires: doxygen
BuildRequires: gcc-c++
BuildRequires: make
BuildRequires: pkgconfig
BuildRequires: python3-breathe
BuildRequires: cmake(GTest)
%description
SIMD (Single Instruction, Multiple Data) is a feature of microprocessors that
has been available for many years. SIMD instructions perform a single operation
on a batch of values at once, and thus provide a way to significantly accelerate
code execution. However, these instructions differ between microprocessor
vendors and compilers.
xsimd provides a unified means for using these features for library authors.
Namely, it enables manipulation of batches of numbers with the same arithmetic
operators as for single values. It also provides accelerated implementation of
common mathematical functions operating on batches.
%prep
%autosetup -p1
%build
%cmake -DBUILD_TESTS:BOOL=ON
%cmake_build
# Build documentation
pushd %{_builddir}/%{name}-%{version}/docs
%make_build html
popd
%install
%cmake_install
#install documentation
mkdir -p %{buildroot}%{_docdir}/%{name}
cp -r %{_builddir}/%{name}-%{version}/docs/build/html/* %{buildroot}%{_docdir}/%{name}
%check
%ctest
%package devel
Summary: Development files for xsimd
%description devel
SIMD (Single Instruction, Multiple Data) is a feature of microprocessors that
has been available for many years. SIMD instructions perform a single operation
on a batch of values at once, and thus provide a way to significantly accelerate
code execution. However, these instructions differ between microprocessor
vendors and compilers.
xsimd provides a unified means for using these features for library authors.
Namely, it enables manipulation of batches of numbers with the same arithmetic
operators as for single values. It also provides accelerated implementation of
common mathematical functions operating on batches.
This package contains the developments files needed to use xsimd
%files devel
%license LICENSE
%{_includedir}/xsimd
%{_libdir}/cmake/xsimd
%{_libdir}/pkgconfig/xsimd.pc
%package doc
Summary: Documentation for xsimd
Group: Documentation/HTML
%description doc
SIMD (Single Instruction, Multiple Data) is a feature of microprocessors that
has been available for many years. SIMD instructions perform a single operation
on a batch of values at once, and thus provide a way to significantly accelerate
code execution. However, these instructions differ between microprocessor
vendors and compilers.
xsimd provides a unified means for using these features for library authors.
Namely, it enables manipulation of batches of numbers with the same arithmetic
operators as for single values. It also provides accelerated implementation of
common mathematical functions operating on batches.
This package contains the xsimd documentation
%files doc
%doc %{_docdir}/%{name}
%changelog