- Update to 3.5.3:

* https://github.com/oneapi-src/oneDNN/releases/tag/v3.5.3 OBS-URL: https://build.opensuse.org/package/show/science:machinelearning/onednn?expand=0&rev=30
2024-09-23 11:55:12 +00:00 · 2024-09-23 11:55:12 +00:00 · 094c796357
commit 094c796357
7 changed files with 546 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,23 @@
+## Default LFS
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.bsp filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.gem filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.jar filter=lfs diff=lfs merge=lfs -text
+*.lz filter=lfs diff=lfs merge=lfs -text
+*.lzma filter=lfs diff=lfs merge=lfs -text
+*.obscpio filter=lfs diff=lfs merge=lfs -text
+*.oxt filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.rpm filter=lfs diff=lfs merge=lfs -text
+*.tbz filter=lfs diff=lfs merge=lfs -text
+*.tbz2 filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.ttf filter=lfs diff=lfs merge=lfs -text
+*.txz filter=lfs diff=lfs merge=lfs -text
+*.whl filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+.osc
--- a/8
+++ b/8
@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<constraints>
+  <hardware>
+    <memory>
+      <size unit="G">8</size>
+    </memory>
+  </hardware>
+</constraints>
--- a/oneDNN-3.4.1.tar.gz
+++ b/oneDNN-3.4.1.tar.gz
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66a6512405664c2cd004811922173adabaa50d6aadc9352291d2d85f8b0f3d10
+size 13282745
--- a/oneDNN-3.5.3.tar.gz
+++ b/oneDNN-3.5.3.tar.gz
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddbc26c75978c5e864050f699dbefbf5bff9c0b8d2af827845708e1376471f17
+size 13190748
--- a/onednn.changes
+++ b/onednn.changes
@ -0,0 +1,316 @@
+-------------------------------------------------------------------
+Mon Sep 23 10:04:43 UTC 2024 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Update to 3.5.3:
+  * https://github.com/oneapi-src/oneDNN/releases/tag/v3.5.3
+
+-------------------------------------------------------------------
+Fri Apr 19 17:27:48 UTC 2024 - Alessandro de Oliveira Faria <cabelo@opensuse.org>
+
+- Update to 3.4.1:
+  * Fixed an issue with caching and serialization of primitives in 
+    deterministic mode (7ed604a)
+  * Introduced memory descriptor serialization API 
+    (4cad420, 929a27a, 9b848c8)
+  * Fixed incorrect results in fp64 convolution and deconvolution 
+    on Intel GPUs based on Xe-LPG architecture (ebe77b5, 0b399ac, 
+    d748d64, 9f4f3d5, 21a8cae)
+  * Fixed incorrect results in reorder with large sizes on 
+    Intel CPUs and GPUs (69a111e, 4b72361, 74a343b)
+  * Reduced creation time for deconvolution primitive on 
+    Intel CPUs (bec487e, 1eab005)
+  * Fixed performance regression in deconvolution on 
+    Intel CPUs (fbe5b97, 1dd3c6a)
+  * Removed dangling symblols from static builds 
+    (e92c404, 6f5621a)
+  * Fixed crash during platform detection on some 
+    AArch64-based systems (406a079)
+  * Fixed performance regression in int8 deconvolution on 
+    Intel CPUs (7e50e15)
+  * Fixed handling of zero points for matmul in verbose 
+    logs converter (15c7916) 
+
+-------------------------------------------------------------------
+Fri Dec  1 04:33:49 UTC 2023 - Alessandro de Oliveira Faria <cabelo@opensuse.org>
+
+- Update to 3.3.3:
+- This is a patch release containing the following changes to v3.3.2:
+  * Fixed performance regression in int8 convolutions on processors with Intel AVX-512 and Intel DL Boost support (a00661f)
+  * Fixed race condition during library initialization on Intel Data Center GPU Max Series (7dfcd11)
+  * Fixed accuracy issue in experimental Graph Compiler with LLVM code generator (8892e7e)
+  * Disabled int8 RNN implementation for cases with non-trivial strides (2195e4b)
+  * Fixed incorrect results in bfloat16 convolution implementation on processors with Intel AMX support (9f00af9)
+  * Fixed incorrect results in fp16 and int8 convolution on Intel Core Ultra integrated GPUs (69cef84, 79bc6cc, c9c0b09)
+
+-------------------------------------------------------------------
+Fri Dec  1 04:33:49 UTC 2023 - Alessandro de Oliveira Faria <cabelo@opensuse.org>
+
+- Update to 3.3.1:
+- This is a patch release containing the following changes to v3.3:
+  * Fixed int8 convolution accuracy issue on Intel GPUs (09c87c7)
+  * Switched internal stream to in-order mode for NVIDIA and AMD GPUs to avoid synchronization issues (db01d62)
+  * Fixed runtime error for avgpool_bwd operation in Graph API (d025ef6, 9e0602a, e0dc1b3)
+  * Fixed benchdnn error reporting for some Graph API cases (98dc9db)
+  * Fixed accuracy issue in experimental Graph Compiler for int8 MHA variant from StarCoder model (5476ef7)
+  * Fixed incorrect results for layer normalization with trivial dimensions on Intel GPUs (a2ec0a0)
+  * Removed redundant synchronization for out-of-order SYCL queues (a96e9b1)
+  * Fixed runtime error in experimental Graph Compiler for int8 MLP subgraph from LLAMA model (595543d)
+  * Fixed SEGFAULT in experimental Graph Compiler for fp32 MLP subgraph (4207105)
+  * Fixed incorrect results in experimental Graph Compiler for MLP subgraph (57e14b5)
+  * Fixed the issue with f16 inner product primitive with s8 output returning unimplemented on Intel GPUs (bf12207, 800b5e9, ec7054a)
+  * Fixed incorrect results for int8 deconvolution with zero-points on processors with Intel AMX instructions support (55d2cec)
+
+-------------------------------------------------------------------
+Tue Oct 10 05:53:25 UTC 2023 - Paolo Stivanin <info@paolostivanin.com>
+
+- Update to 3.3:
+  * 3.3: https://github.com/oneapi-src/oneDNN/releases/tag/v3.3
+  * 3.2: https://github.com/oneapi-src/oneDNN/releases/tag/v3.2
+  * 3.1: https://github.com/oneapi-src/oneDNN/releases/tag/v3.1
+- Drop upstreamed onednn-fix-gcc13.patch
+
+-------------------------------------------------------------------
+Tue Mar 21 10:27:40 UTC 2023 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Update to 3.0.1:
+  * Changes: https://github.com/oneapi-src/oneDNN/releases/tag/v3.0.1
+- Skipped 3.0:
+  * Changes: https://github.com/oneapi-src/oneDNN/releases/tag/v3.0
+- Add patch to fix build with GCC13:
+  * onednn-fix-gcc13.patch
+- Disable Arm Compute library support until fixed upstream
+  https://github.com/oneapi-src/oneDNN/issues/1599
+- Drop upstream patches:
+  * 1428.patch
+  * fa93750.patch
+
+-------------------------------------------------------------------
+Tue Sep 20 08:26:43 UTC 2022 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Add patch to fix build with latest Arm Compute Library:
+  * 1428.patch
+  * fa93750.patch (dep for 1428.patch)
+
+-------------------------------------------------------------------
+Tue Sep 13 05:22:52 UTC 2022 - Paolo Stivanin <info@paolostivanin.com>
+
+- Update to 2.6.2:
+  * https://github.com/oneapi-src/oneDNN/releases
+- Removed onednn-1045.patch.
+- Removed onednn-xbyak-aarch64.patch.
+
+-------------------------------------------------------------------
+Tue Jun 15 12:10:39 UTC 2021 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Fix build on aarch64:
+  * onednn-xbyak-aarch64.patch
+
+-------------------------------------------------------------------
+Tue Jun 15 08:31:16 UTC 2021 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Update to version 2.2.4:
+  * Fixed build error with GCC 11 (eda1add)
+  * Fixed an issue with reorder reporting unimplemented when
+    quantizing f32 weights to s8 (4f05b76, 5d3d1e1, cc77eef)
+  * Updated name for GPU gen12 architecture to xe (3d202c2)
+- Drop upstream patch:
+  * 0001-common-gpu-include-thread-and-limit-headers-to-fix-G.patch
+
+-------------------------------------------------------------------
+Thu Jun  3 01:38:56 UTC 2021 - Ferdinand Thiessen <rpm@fthiessen.de>
+
+- Update to version 2.2.3
+  * Fixed a bug in int8 depthwise convolution ptimitive with groups
+    and 1d spatial size for processors with AVX-512 and AVX2 support
+  * Fixed correctness issue for PReLU primitive
+  * Fixed corretness issue in reorder for blocked layouts with
+    zero padding
+  * Improved performance of weights reorders used by BRGEMM-based
+    convolution primitive for processors with AVX-512 support
+  * Added -fp-model=precise build flag for DPC++ code
+  * Fixed potential memory leak in matmul primitive
+  * Fixed performance of matmul primitive when fused with bias
+    update and sum
+  * Fixed a bug in matmul primitive when writing to non-contiguous
+    destination buffer
+- Add upstream patch for GCC11 support
+  * 0001-common-gpu-include-thread-and-limit-headers-to-fix-G.patch
+
+-------------------------------------------------------------------
+Thu May 27 08:10:13 UTC 2021 - Jan Engelhardt <jengelh@inai.de>
+
+- Update descriptions.
+
+-------------------------------------------------------------------
+Wed May 26 13:29:27 UTC 2021 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Update to 2.2.2, changes:
+  * Fixed performance regression in fp32 forward inner product for
+  shapes with number of output channels equal to 1 for processors
+  with Intel AVX-512 support (714b1fd)
+  * Fixed performance regression in forward convolutions with groups
+  for processors with Intel AVX-512 support(3555d4a)
+  * Removed -std=c++11 build flag for DPC++ headers (1fcb867)
+  * Fixed buffer access in initializing workspace in RNN
+  implementation on GPU (9b03091)
+  * Fixed fix a bug in convolution with 1x1 kernel and mixed
+  strides on processors with Intel AVX-512 support (d0b3e3f)
+  * Used getauxval for Linux to get CPU features on for AArch64
+  systems (25c4cea)
+  * Added -fp-model=precise build flag for DPC++ code (3e40e5e)
+  * Fixed out-of-bounds writes in elementwise primitive on
+  Intel Processor Graphics (bcf823c)
+- Fix build with Arm Compute Library:
+  * onednn-1045.patch
+
+-------------------------------------------------------------------
+Tue Apr 13 07:53:16 UTC 2021 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Update to 2.2.1, changes:
+  * From 2.2:
+  Fixed segfault for cases when primitive descriptor or attributed contain NaN (e6d05ec, dbca1e9, 0326b09, 0326b09)
+  Fixed engine creation failure for GPU subdevices (4c3a114)
+  Fixed long lines clipping in verbose output (70d70a8)
+  Fixed segfault in bfloat16 convolution weight gradient implementation on processors with Intel AMX support (a3a73a3)
+  Fixed performance regression in binary primitive with per_oc broadcast strategy (9ac85d8)
+  Worked around a bug with Microsoft Visual C++ compiler version detection in CMake 3.19 (2f39155)
+  Removed -std=c++11 build flag for DPC++ code to align with SYCL standard (1b026f5)
+  * Changes between 2.1 and 2.2:
+  Performance Optimizations
+    Intel Architecture processors
+      Improved performance of int8 compute functionality for future Intel Xeon Scalable processor (code name Sapphire Rapids). The functionality is disabled by default and should be enabled via CPU dispatcher control.
+      Improved performance of compute functionality for future Intel Core processor with Intel AVX2 and Intel DL Boost instructions support (code name Alder Lake).
+      Improved fp32 inner product forward propagation performance for processors with Intel AVX-512 support.
+      Improved dnnl_gemm performance for cases with n=1 on all supported processors.
+    Intel Graphics products
+      Introduced NHWC format support for activations for int8 primitives.
+    AArch64-based processors
+      Improved performance of fp32 and int8 convolution, and softmax primitives for processors with SVE 512 support.
+      Improved performance of fp32 convolution via Arm Compute Library (ACL).
+      Improved performance of convolution with a combination of sum and relu post-ops via ACL.
+  Functionality
+    Extended eltwise primitive with support for mish and hardswish algorithms.
+    Extended binary primitive with support for comparison operators.
+    Introduced support for post-ops in GPU resampling implementation.
+    Introduced asymmetric quantization support for int8 deconvolution.
+    Introduced binary post-ops support for matmul primitive.
+  Usability
+    Improved presentation of oneDNN primitives in VTune Amplifier.
+    Introduced Linux perf support for AArch64.
+    Introduced support for Fujitsu C++ compiler.
+    Introduced a build time check for minimal supported ACL version. Currently oneDNN requires ACL 21.02 or later.
+    Added support for cuDNN 8.x
+
+-------------------------------------------------------------------
+Wed Feb 17 14:17:47 UTC 2021 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Update to 2.1
+- Add Arm ComputeLibrary support on aarch64
+
+-------------------------------------------------------------------
+Mon Oct  5 06:16:30 UTC 2020 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Obsoletes mkl-dnn* <= %{version}
+
+-------------------------------------------------------------------
+Fri Oct  2 12:47:08 UTC 2020 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Rename mkl-dnn to onednn to follow upstream
+
+-------------------------------------------------------------------
+Wed Sep 23 13:36:02 UTC 2020 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Update to 1.6.3
+- Drop upstream patch:
+  * cmake-no-install-ocl-cmake.patch
+
+-------------------------------------------------------------------
+Wed Sep 23 13:16:39 UTC 2020 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Build on aarch64 and ppc64le which are now also supported
+- Provide oneDNN and oneDNN-devel as it is the new official name
+
+-------------------------------------------------------------------
+Tue May  5 07:38:34 UTC 2020 - Tomáš Chvátal <tchvatal@suse.com>
+
+- Update to 1.4:
+  * Performance improvements all over the board
+- Rebase patch cmake-no-install-ocl-cmake.patch
+
+-------------------------------------------------------------------
+Tue Mar 24 10:50:57 UTC 2020 - Tomáš Chvátal <tchvatal@suse.com>
+
+- Add constraints to not crash during testing on OOM
+
+-------------------------------------------------------------------
+Thu Feb 27 12:44:00 UTC 2020 - Tomáš Chvátal <tchvatal@suse.com>
+
+- Do not disable LTO there is no actual reason for that
+- Export LD_LIBRARY_PATH to fix older releases build
+
+-------------------------------------------------------------------
+Wed Feb 26 10:36:26 UTC 2020 - Tomáš Chvátal <tchvatal@suse.com>
+
+- There is no actual reason to not use github tag for tarball
+  fetching -> remove the service
+- Format with spec-cleaner
+- Use proper %cmake macros everywhere
+- Add configure options for cmake to set it up in a way we really
+  want
+- Add patch from Debian to not install OpenCL cmake finder:
+  * cmake-no-install-ocl-cmake.patch
+
+-------------------------------------------------------------------
+Thu Feb 20 10:26:52 UTC 2020 - Christian Goll <cgoll@suse.com>
+
+- enabled tests 
+
+-------------------------------------------------------------------
+Thu Jan 30 14:20:22 UTC 2020 - Christian Goll <cgoll@suse.com>
+
+- packaged separate benchnn packae with its input files
+- updated to v1.1.3 which includes
+ * Fixed the mean and variance memory descriptors in layer 
+   normalization (65f1908)
+ * Fixed the layer normalization formula (c176ceb)
+
+-------------------------------------------------------------------
+Wed Jan  8 15:21:54 UTC 2020 - Christian Goll <cgoll@suse.com>
+
+- updated to v1.1.2 
+  * Fixed threading over the spatial in bfloat16 batched
+    normalization (017b6c9)
+  * Fixed read past end-of-buffer error for int8 convolution (7d6f45e)
+  * Fixed condition for dispatching optimized channel blocking in 
+    fp32 backward convolution on Intel Xeon Phi(TM) processor (846eba1)
+  * Fixed fp32 backward convolution for shapes with spatial strides 
+    over the depth dimension (002e3ab)
+  * Fixed softmax with zero sizes on GPU (936bff4)
+  * Fixed int8 deconvolution with dilation when ih <= dh (3e3bacb)
+  * Enabled back fp32 -> u8 reorder for RNN (a2c2507)
+  * Fixed segmentation fault in bfloat16 backward convolution from 
+    kd_padding=0 computation (52d476c)
+  * Fixed segmentation fault in bfloat16 forward convolution due 
+    to push/pop imbalance (4f6e3d5)
+  * Fixed library version for OS X build (0d85005)
+  * Fixed padding by channels in concat (a265c7d)
+  * Added full text of third party licenses and 
+    copyright notices to LICENSE file (79f204c)
+  * Added separate README for binary packages (28f4c96)
+  * Fixed computing per-oc mask in RNN (ff3ffab)
+  * Added workaround for number of cores calculation in Xbyak (301b088)
+
+-------------------------------------------------------------------
+Mon Feb 11 16:35:48 UTC 2019 - cgoll@suse.com
+
+- added ARCH_OPT_FLAGS=""
+
+-------------------------------------------------------------------
+Tue Feb  5 07:45:53 UTC 2019 - Christian Goll <cgoll@suse.com>
+
+- Initial checking of the Intel(R) Math Kernel Library for 
+  Deep Neural Networks which can be used by:
+  * tensorflow
+  * Caffee
+  * PyTorch
+  and other machine learning tools
--- a/onednn.spec
+++ b/onednn.spec
@ -0,0 +1,192 @@
+#
+# spec file for package onednn
+#
+# Copyright (c) 2024 SUSE LLC
+#
+# All modifications and additions to the file contributed by third parties
+# remain the property of their copyright owners, unless otherwise agreed
+# upon. The license for this file, and modifications and additions to the
+# file, is the same license as for the pristine package itself (unless the
+# license for the pristine package is not an Open Source License, in which
+# case the license is the MIT License). An "Open Source License" is a
+# license that conforms to the Open Source Definition (Version 1.9)
+# published by the Open Source Initiative.
+
+# Please submit bugfixes or comments via https://bugs.opensuse.org/
+#
+
+
+%ifarch x86_64
+%bcond_without opencl
+%else
+# Build broken on non-x86, with openCL
+%bcond_with opencl
+%endif
+
+%ifarch aarch64
+# Disable ACL until fixed upstream - https://github.com/oneapi-src/oneDNN/issues/2109
+%bcond_with acl
+%else
+%bcond_with acl
+%endif
+
+%define libname libdnnl3
+Name:           onednn
+Version:        3.5.3
+Release:        0
+Summary:        Intel Math Kernel Library for Deep Neural Networks
+License:        Apache-2.0
+URL:            https://01.org/onednn
+Source0:        https://github.com/oneapi-src/oneDNN/archive/v%{version}/oneDNN-%{version}.tar.gz
+BuildRequires:  chrpath
+BuildRequires:  cmake
+BuildRequires:  doxygen
+BuildRequires:  fdupes
+BuildRequires:  gcc-c++
+BuildRequires:  graphviz
+BuildRequires:  texlive-dvips-bin
+%if %{with acl}
+BuildRequires:  ComputeLibrary-devel >= 24.04
+%endif
+%if %{with opencl}
+BuildRequires:  opencl-headers
+BuildRequires:  pkgconfig
+BuildRequires:  pkgconfig(OpenCL)
+%endif
+ExclusiveArch:  x86_64 aarch64 ppc64le
+Provides:       mkl-dnn = %{version}
+Obsoletes:      mkl-dnn <= %{version}
+Provides:       oneDNN = %{version}
+
+%description
+Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an
+open-source performance library for deep-learning applications. The library
+accelerates deep-learning applications and frameworks on Intel architecture.
+Intel MKL-DNN contains vectorized and threaded building blocks that you can use
+to implement deep neural networks (DNN) with C and C++ interfaces.
+
+%package -n benchdnn
+Summary:        Header files of Intel Math Kernel Library
+Requires:       %{libname} = %{version}
+
+%description -n benchdnn
+Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an
+open-source performance library for deep-learning applications. The library
+accelerates deep-learning applications and frameworks on Intel architecture.
+Intel MKL-DNN contains vectorized and threaded building blocks that you can use
+to implement deep neural networks (DNN) with C and C++ interfaces.
+
+This package only includes the benchmark utility including its input files.
+
+%package devel
+Summary:        Header files of Intel Math Kernel Library
+Requires:       %{libname} = %{version}
+Provides:       mkl-dnn-devel = %{version}
+Obsoletes:      mkl-dnn-devel <= %{version}
+Provides:       oneDNN-devel = %{version}
+
+%description devel
+Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an
+open-source performance library for deep-learning applications. The library
+accelerates deep-learning applications and frameworks on Intel architecture.
+Intel MKL-DNN contains vectorized and threaded building blocks that you can use
+to implement deep neural networks (DNN) with C and C++ interfaces.
+
+This package includes the required headers and library files to develop software
+with the Intel MKL-DNN.
+
+%package doc
+Summary:        Reference documentation for the Intel Math Kernel Library
+BuildArch:      noarch
+
+%description doc
+The reference documentation for the Intel Math Kernel Library can be installed
+with this package.
+
+%package -n %{libname}
+Summary:        Header files of Intel Math Kernel Library
+
+%description -n %{libname}
+Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an
+open-source performance library for deep-learning applications. The library
+accelerates deep-learning applications and frameworks on Intel architecture.
+Intel MKL-DNN contains vectorized and threaded building blocks that you can use
+to implement deep neural networks (DNN) with C and C++ interfaces.
+
+%prep
+%autosetup -p1 -n oneDNN-%{version}
+
+%build
+%cmake \
+  -DCMAKE_INSTALL_LIBDIR=%{_lib} \
+  -DMKLDNN_ARCH_OPT_FLAGS="" \
+  -DDNNL_CPU_RUNTIME=OMP \
+%if %{with acl}
+  -DDNNL_AARCH64_USE_ACL=ON \
+  -DACL_INCLUDE_DIR=%{_includedir} \
+  -DACL_LIBRARY=%{_libdir}/libarm_compute.so  \
+%endif
+%if %{with opencl}
+  -DDNNL_GPU_RUNTIME=OCL \
+%endif
+  -DDNNL_INSTALL_MODE=DEFAULT \
+  -DDNNL_BUILD_TESTS=ON \
+  -DONEDNN_BUILD_GRAPH=OFF \
+  -DDNNL_WERROR=OFF
+%cmake_build
+%cmake_build doc_doxygen
+
+%install
+%cmake_install
+# move the built doxygen data to normal location
+mkdir -p %{buildroot}%{_docdir}/%{name}
+mv %{buildroot}%{_datadir}/doc/dnnl/reference/* %{buildroot}%{_docdir}/%{name}
+%fdupes %{buildroot}%{_docdir}/%{name}
+# do use macros to install license/docu
+rm -r %{buildroot}%{_datadir}/doc/dnnl
+# Keep compatibility with mkl-dnn
+pushd %{buildroot}%{_includedir}
+ln -s . mkl-dnn
+popd
+# install the benchmark
+install -D build/tests/benchdnn/benchdnn %{buildroot}/%{_bindir}/benchdnn
+# move install shared lib
+mkdir -vp %{buildroot}%{_datadir}/benchdnn
+cp -vr build/tests/benchdnn/inputs  %{buildroot}%{_datadir}/benchdnn
+
+chrpath -d %{buildroot}/%{_bindir}/benchdnn
+
+%check
+# do not use macro so we can exclude all gpu and cross (gpu and cpu) tests (they need gpu set up)
+pushd build
+export LD_LIBRARY_PATH=%{buildroot}%{_libdir}
+ctest --output-on-failure --force-new-ctest-process %{_smp_mflags} -E '(gpu|cross)'
+popd
+
+%post -n %{libname} -p /sbin/ldconfig
+%postun -n %{libname} -p /sbin/ldconfig
+
+%files -n benchdnn
+%{_bindir}/benchdnn
+%{_datadir}/benchdnn
+
+%files devel
+%doc README.md
+%license LICENSE
+%{_includedir}/mkl-dnn
+%{_includedir}/dnnl*.h*
+%dir %{_includedir}/oneapi
+%dir %{_includedir}/oneapi/dnnl
+%{_includedir}/oneapi/dnnl/dnnl*.h*
+%{_libdir}/libdnnl.so
+%dir %{_libdir}/cmake/dnnl
+%{_libdir}/cmake/dnnl/*.cmake
+
+%files doc
+%{_docdir}/%{name}
+
+%files -n %{libname}
+%license LICENSE
+%{_libdir}/libdnnl.so.*
+
+%changelog