forked from pool/onednn
- Update to 3.5.3:
* https://github.com/oneapi-src/oneDNN/releases/tag/v3.5.3 OBS-URL: https://build.opensuse.org/package/show/science:machinelearning/onednn?expand=0&rev=30
This commit is contained in:
commit
094c796357
23
.gitattributes
vendored
Normal file
23
.gitattributes
vendored
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
## Default LFS
|
||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bsp filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gem filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.jar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lzma filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.obscpio filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.oxt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.png filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rpm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tbz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tbz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ttf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.txz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.whl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
.osc
|
8
_constraints
Normal file
8
_constraints
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<constraints>
|
||||||
|
<hardware>
|
||||||
|
<memory>
|
||||||
|
<size unit="G">8</size>
|
||||||
|
</memory>
|
||||||
|
</hardware>
|
||||||
|
</constraints>
|
3
oneDNN-3.4.1.tar.gz
Normal file
3
oneDNN-3.4.1.tar.gz
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:66a6512405664c2cd004811922173adabaa50d6aadc9352291d2d85f8b0f3d10
|
||||||
|
size 13282745
|
3
oneDNN-3.5.3.tar.gz
Normal file
3
oneDNN-3.5.3.tar.gz
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:ddbc26c75978c5e864050f699dbefbf5bff9c0b8d2af827845708e1376471f17
|
||||||
|
size 13190748
|
316
onednn.changes
Normal file
316
onednn.changes
Normal file
@ -0,0 +1,316 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Sep 23 10:04:43 UTC 2024 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Update to 3.5.3:
|
||||||
|
* https://github.com/oneapi-src/oneDNN/releases/tag/v3.5.3
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Apr 19 17:27:48 UTC 2024 - Alessandro de Oliveira Faria <cabelo@opensuse.org>
|
||||||
|
|
||||||
|
- Update to 3.4.1:
|
||||||
|
* Fixed an issue with caching and serialization of primitives in
|
||||||
|
deterministic mode (7ed604a)
|
||||||
|
* Introduced memory descriptor serialization API
|
||||||
|
(4cad420, 929a27a, 9b848c8)
|
||||||
|
* Fixed incorrect results in fp64 convolution and deconvolution
|
||||||
|
on Intel GPUs based on Xe-LPG architecture (ebe77b5, 0b399ac,
|
||||||
|
d748d64, 9f4f3d5, 21a8cae)
|
||||||
|
* Fixed incorrect results in reorder with large sizes on
|
||||||
|
Intel CPUs and GPUs (69a111e, 4b72361, 74a343b)
|
||||||
|
* Reduced creation time for deconvolution primitive on
|
||||||
|
Intel CPUs (bec487e, 1eab005)
|
||||||
|
* Fixed performance regression in deconvolution on
|
||||||
|
Intel CPUs (fbe5b97, 1dd3c6a)
|
||||||
|
* Removed dangling symblols from static builds
|
||||||
|
(e92c404, 6f5621a)
|
||||||
|
* Fixed crash during platform detection on some
|
||||||
|
AArch64-based systems (406a079)
|
||||||
|
* Fixed performance regression in int8 deconvolution on
|
||||||
|
Intel CPUs (7e50e15)
|
||||||
|
* Fixed handling of zero points for matmul in verbose
|
||||||
|
logs converter (15c7916)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Dec 1 04:33:49 UTC 2023 - Alessandro de Oliveira Faria <cabelo@opensuse.org>
|
||||||
|
|
||||||
|
- Update to 3.3.3:
|
||||||
|
- This is a patch release containing the following changes to v3.3.2:
|
||||||
|
* Fixed performance regression in int8 convolutions on processors with Intel AVX-512 and Intel DL Boost support (a00661f)
|
||||||
|
* Fixed race condition during library initialization on Intel Data Center GPU Max Series (7dfcd11)
|
||||||
|
* Fixed accuracy issue in experimental Graph Compiler with LLVM code generator (8892e7e)
|
||||||
|
* Disabled int8 RNN implementation for cases with non-trivial strides (2195e4b)
|
||||||
|
* Fixed incorrect results in bfloat16 convolution implementation on processors with Intel AMX support (9f00af9)
|
||||||
|
* Fixed incorrect results in fp16 and int8 convolution on Intel Core Ultra integrated GPUs (69cef84, 79bc6cc, c9c0b09)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Dec 1 04:33:49 UTC 2023 - Alessandro de Oliveira Faria <cabelo@opensuse.org>
|
||||||
|
|
||||||
|
- Update to 3.3.1:
|
||||||
|
- This is a patch release containing the following changes to v3.3:
|
||||||
|
* Fixed int8 convolution accuracy issue on Intel GPUs (09c87c7)
|
||||||
|
* Switched internal stream to in-order mode for NVIDIA and AMD GPUs to avoid synchronization issues (db01d62)
|
||||||
|
* Fixed runtime error for avgpool_bwd operation in Graph API (d025ef6, 9e0602a, e0dc1b3)
|
||||||
|
* Fixed benchdnn error reporting for some Graph API cases (98dc9db)
|
||||||
|
* Fixed accuracy issue in experimental Graph Compiler for int8 MHA variant from StarCoder model (5476ef7)
|
||||||
|
* Fixed incorrect results for layer normalization with trivial dimensions on Intel GPUs (a2ec0a0)
|
||||||
|
* Removed redundant synchronization for out-of-order SYCL queues (a96e9b1)
|
||||||
|
* Fixed runtime error in experimental Graph Compiler for int8 MLP subgraph from LLAMA model (595543d)
|
||||||
|
* Fixed SEGFAULT in experimental Graph Compiler for fp32 MLP subgraph (4207105)
|
||||||
|
* Fixed incorrect results in experimental Graph Compiler for MLP subgraph (57e14b5)
|
||||||
|
* Fixed the issue with f16 inner product primitive with s8 output returning unimplemented on Intel GPUs (bf12207, 800b5e9, ec7054a)
|
||||||
|
* Fixed incorrect results for int8 deconvolution with zero-points on processors with Intel AMX instructions support (55d2cec)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Oct 10 05:53:25 UTC 2023 - Paolo Stivanin <info@paolostivanin.com>
|
||||||
|
|
||||||
|
- Update to 3.3:
|
||||||
|
* 3.3: https://github.com/oneapi-src/oneDNN/releases/tag/v3.3
|
||||||
|
* 3.2: https://github.com/oneapi-src/oneDNN/releases/tag/v3.2
|
||||||
|
* 3.1: https://github.com/oneapi-src/oneDNN/releases/tag/v3.1
|
||||||
|
- Drop upstreamed onednn-fix-gcc13.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Mar 21 10:27:40 UTC 2023 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Update to 3.0.1:
|
||||||
|
* Changes: https://github.com/oneapi-src/oneDNN/releases/tag/v3.0.1
|
||||||
|
- Skipped 3.0:
|
||||||
|
* Changes: https://github.com/oneapi-src/oneDNN/releases/tag/v3.0
|
||||||
|
- Add patch to fix build with GCC13:
|
||||||
|
* onednn-fix-gcc13.patch
|
||||||
|
- Disable Arm Compute library support until fixed upstream
|
||||||
|
https://github.com/oneapi-src/oneDNN/issues/1599
|
||||||
|
- Drop upstream patches:
|
||||||
|
* 1428.patch
|
||||||
|
* fa93750.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Sep 20 08:26:43 UTC 2022 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Add patch to fix build with latest Arm Compute Library:
|
||||||
|
* 1428.patch
|
||||||
|
* fa93750.patch (dep for 1428.patch)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Sep 13 05:22:52 UTC 2022 - Paolo Stivanin <info@paolostivanin.com>
|
||||||
|
|
||||||
|
- Update to 2.6.2:
|
||||||
|
* https://github.com/oneapi-src/oneDNN/releases
|
||||||
|
- Removed onednn-1045.patch.
|
||||||
|
- Removed onednn-xbyak-aarch64.patch.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Jun 15 12:10:39 UTC 2021 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Fix build on aarch64:
|
||||||
|
* onednn-xbyak-aarch64.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Jun 15 08:31:16 UTC 2021 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Update to version 2.2.4:
|
||||||
|
* Fixed build error with GCC 11 (eda1add)
|
||||||
|
* Fixed an issue with reorder reporting unimplemented when
|
||||||
|
quantizing f32 weights to s8 (4f05b76, 5d3d1e1, cc77eef)
|
||||||
|
* Updated name for GPU gen12 architecture to xe (3d202c2)
|
||||||
|
- Drop upstream patch:
|
||||||
|
* 0001-common-gpu-include-thread-and-limit-headers-to-fix-G.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Jun 3 01:38:56 UTC 2021 - Ferdinand Thiessen <rpm@fthiessen.de>
|
||||||
|
|
||||||
|
- Update to version 2.2.3
|
||||||
|
* Fixed a bug in int8 depthwise convolution ptimitive with groups
|
||||||
|
and 1d spatial size for processors with AVX-512 and AVX2 support
|
||||||
|
* Fixed correctness issue for PReLU primitive
|
||||||
|
* Fixed corretness issue in reorder for blocked layouts with
|
||||||
|
zero padding
|
||||||
|
* Improved performance of weights reorders used by BRGEMM-based
|
||||||
|
convolution primitive for processors with AVX-512 support
|
||||||
|
* Added -fp-model=precise build flag for DPC++ code
|
||||||
|
* Fixed potential memory leak in matmul primitive
|
||||||
|
* Fixed performance of matmul primitive when fused with bias
|
||||||
|
update and sum
|
||||||
|
* Fixed a bug in matmul primitive when writing to non-contiguous
|
||||||
|
destination buffer
|
||||||
|
- Add upstream patch for GCC11 support
|
||||||
|
* 0001-common-gpu-include-thread-and-limit-headers-to-fix-G.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu May 27 08:10:13 UTC 2021 - Jan Engelhardt <jengelh@inai.de>
|
||||||
|
|
||||||
|
- Update descriptions.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed May 26 13:29:27 UTC 2021 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Update to 2.2.2, changes:
|
||||||
|
* Fixed performance regression in fp32 forward inner product for
|
||||||
|
shapes with number of output channels equal to 1 for processors
|
||||||
|
with Intel AVX-512 support (714b1fd)
|
||||||
|
* Fixed performance regression in forward convolutions with groups
|
||||||
|
for processors with Intel AVX-512 support(3555d4a)
|
||||||
|
* Removed -std=c++11 build flag for DPC++ headers (1fcb867)
|
||||||
|
* Fixed buffer access in initializing workspace in RNN
|
||||||
|
implementation on GPU (9b03091)
|
||||||
|
* Fixed fix a bug in convolution with 1x1 kernel and mixed
|
||||||
|
strides on processors with Intel AVX-512 support (d0b3e3f)
|
||||||
|
* Used getauxval for Linux to get CPU features on for AArch64
|
||||||
|
systems (25c4cea)
|
||||||
|
* Added -fp-model=precise build flag for DPC++ code (3e40e5e)
|
||||||
|
* Fixed out-of-bounds writes in elementwise primitive on
|
||||||
|
Intel Processor Graphics (bcf823c)
|
||||||
|
- Fix build with Arm Compute Library:
|
||||||
|
* onednn-1045.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Apr 13 07:53:16 UTC 2021 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Update to 2.2.1, changes:
|
||||||
|
* From 2.2:
|
||||||
|
Fixed segfault for cases when primitive descriptor or attributed contain NaN (e6d05ec, dbca1e9, 0326b09, 0326b09)
|
||||||
|
Fixed engine creation failure for GPU subdevices (4c3a114)
|
||||||
|
Fixed long lines clipping in verbose output (70d70a8)
|
||||||
|
Fixed segfault in bfloat16 convolution weight gradient implementation on processors with Intel AMX support (a3a73a3)
|
||||||
|
Fixed performance regression in binary primitive with per_oc broadcast strategy (9ac85d8)
|
||||||
|
Worked around a bug with Microsoft Visual C++ compiler version detection in CMake 3.19 (2f39155)
|
||||||
|
Removed -std=c++11 build flag for DPC++ code to align with SYCL standard (1b026f5)
|
||||||
|
* Changes between 2.1 and 2.2:
|
||||||
|
Performance Optimizations
|
||||||
|
Intel Architecture processors
|
||||||
|
Improved performance of int8 compute functionality for future Intel Xeon Scalable processor (code name Sapphire Rapids). The functionality is disabled by default and should be enabled via CPU dispatcher control.
|
||||||
|
Improved performance of compute functionality for future Intel Core processor with Intel AVX2 and Intel DL Boost instructions support (code name Alder Lake).
|
||||||
|
Improved fp32 inner product forward propagation performance for processors with Intel AVX-512 support.
|
||||||
|
Improved dnnl_gemm performance for cases with n=1 on all supported processors.
|
||||||
|
Intel Graphics products
|
||||||
|
Introduced NHWC format support for activations for int8 primitives.
|
||||||
|
AArch64-based processors
|
||||||
|
Improved performance of fp32 and int8 convolution, and softmax primitives for processors with SVE 512 support.
|
||||||
|
Improved performance of fp32 convolution via Arm Compute Library (ACL).
|
||||||
|
Improved performance of convolution with a combination of sum and relu post-ops via ACL.
|
||||||
|
Functionality
|
||||||
|
Extended eltwise primitive with support for mish and hardswish algorithms.
|
||||||
|
Extended binary primitive with support for comparison operators.
|
||||||
|
Introduced support for post-ops in GPU resampling implementation.
|
||||||
|
Introduced asymmetric quantization support for int8 deconvolution.
|
||||||
|
Introduced binary post-ops support for matmul primitive.
|
||||||
|
Usability
|
||||||
|
Improved presentation of oneDNN primitives in VTune Amplifier.
|
||||||
|
Introduced Linux perf support for AArch64.
|
||||||
|
Introduced support for Fujitsu C++ compiler.
|
||||||
|
Introduced a build time check for minimal supported ACL version. Currently oneDNN requires ACL 21.02 or later.
|
||||||
|
Added support for cuDNN 8.x
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Feb 17 14:17:47 UTC 2021 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Update to 2.1
|
||||||
|
- Add Arm ComputeLibrary support on aarch64
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Oct 5 06:16:30 UTC 2020 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Obsoletes mkl-dnn* <= %{version}
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Oct 2 12:47:08 UTC 2020 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Rename mkl-dnn to onednn to follow upstream
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Sep 23 13:36:02 UTC 2020 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Update to 1.6.3
|
||||||
|
- Drop upstream patch:
|
||||||
|
* cmake-no-install-ocl-cmake.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Sep 23 13:16:39 UTC 2020 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Build on aarch64 and ppc64le which are now also supported
|
||||||
|
- Provide oneDNN and oneDNN-devel as it is the new official name
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue May 5 07:38:34 UTC 2020 - Tomáš Chvátal <tchvatal@suse.com>
|
||||||
|
|
||||||
|
- Update to 1.4:
|
||||||
|
* Performance improvements all over the board
|
||||||
|
- Rebase patch cmake-no-install-ocl-cmake.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Mar 24 10:50:57 UTC 2020 - Tomáš Chvátal <tchvatal@suse.com>
|
||||||
|
|
||||||
|
- Add constraints to not crash during testing on OOM
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Feb 27 12:44:00 UTC 2020 - Tomáš Chvátal <tchvatal@suse.com>
|
||||||
|
|
||||||
|
- Do not disable LTO there is no actual reason for that
|
||||||
|
- Export LD_LIBRARY_PATH to fix older releases build
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Feb 26 10:36:26 UTC 2020 - Tomáš Chvátal <tchvatal@suse.com>
|
||||||
|
|
||||||
|
- There is no actual reason to not use github tag for tarball
|
||||||
|
fetching -> remove the service
|
||||||
|
- Format with spec-cleaner
|
||||||
|
- Use proper %cmake macros everywhere
|
||||||
|
- Add configure options for cmake to set it up in a way we really
|
||||||
|
want
|
||||||
|
- Add patch from Debian to not install OpenCL cmake finder:
|
||||||
|
* cmake-no-install-ocl-cmake.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Feb 20 10:26:52 UTC 2020 - Christian Goll <cgoll@suse.com>
|
||||||
|
|
||||||
|
- enabled tests
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Jan 30 14:20:22 UTC 2020 - Christian Goll <cgoll@suse.com>
|
||||||
|
|
||||||
|
- packaged separate benchnn packae with its input files
|
||||||
|
- updated to v1.1.3 which includes
|
||||||
|
* Fixed the mean and variance memory descriptors in layer
|
||||||
|
normalization (65f1908)
|
||||||
|
* Fixed the layer normalization formula (c176ceb)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Jan 8 15:21:54 UTC 2020 - Christian Goll <cgoll@suse.com>
|
||||||
|
|
||||||
|
- updated to v1.1.2
|
||||||
|
* Fixed threading over the spatial in bfloat16 batched
|
||||||
|
normalization (017b6c9)
|
||||||
|
* Fixed read past end-of-buffer error for int8 convolution (7d6f45e)
|
||||||
|
* Fixed condition for dispatching optimized channel blocking in
|
||||||
|
fp32 backward convolution on Intel Xeon Phi(TM) processor (846eba1)
|
||||||
|
* Fixed fp32 backward convolution for shapes with spatial strides
|
||||||
|
over the depth dimension (002e3ab)
|
||||||
|
* Fixed softmax with zero sizes on GPU (936bff4)
|
||||||
|
* Fixed int8 deconvolution with dilation when ih <= dh (3e3bacb)
|
||||||
|
* Enabled back fp32 -> u8 reorder for RNN (a2c2507)
|
||||||
|
* Fixed segmentation fault in bfloat16 backward convolution from
|
||||||
|
kd_padding=0 computation (52d476c)
|
||||||
|
* Fixed segmentation fault in bfloat16 forward convolution due
|
||||||
|
to push/pop imbalance (4f6e3d5)
|
||||||
|
* Fixed library version for OS X build (0d85005)
|
||||||
|
* Fixed padding by channels in concat (a265c7d)
|
||||||
|
* Added full text of third party licenses and
|
||||||
|
copyright notices to LICENSE file (79f204c)
|
||||||
|
* Added separate README for binary packages (28f4c96)
|
||||||
|
* Fixed computing per-oc mask in RNN (ff3ffab)
|
||||||
|
* Added workaround for number of cores calculation in Xbyak (301b088)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Feb 11 16:35:48 UTC 2019 - cgoll@suse.com
|
||||||
|
|
||||||
|
- added ARCH_OPT_FLAGS=""
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Feb 5 07:45:53 UTC 2019 - Christian Goll <cgoll@suse.com>
|
||||||
|
|
||||||
|
- Initial checking of the Intel(R) Math Kernel Library for
|
||||||
|
Deep Neural Networks which can be used by:
|
||||||
|
* tensorflow
|
||||||
|
* Caffee
|
||||||
|
* PyTorch
|
||||||
|
and other machine learning tools
|
192
onednn.spec
Normal file
192
onednn.spec
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
#
|
||||||
|
# spec file for package onednn
|
||||||
|
#
|
||||||
|
# Copyright (c) 2024 SUSE LLC
|
||||||
|
#
|
||||||
|
# All modifications and additions to the file contributed by third parties
|
||||||
|
# remain the property of their copyright owners, unless otherwise agreed
|
||||||
|
# upon. The license for this file, and modifications and additions to the
|
||||||
|
# file, is the same license as for the pristine package itself (unless the
|
||||||
|
# license for the pristine package is not an Open Source License, in which
|
||||||
|
# case the license is the MIT License). An "Open Source License" is a
|
||||||
|
# license that conforms to the Open Source Definition (Version 1.9)
|
||||||
|
# published by the Open Source Initiative.
|
||||||
|
|
||||||
|
# Please submit bugfixes or comments via https://bugs.opensuse.org/
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
%ifarch x86_64
|
||||||
|
%bcond_without opencl
|
||||||
|
%else
|
||||||
|
# Build broken on non-x86, with openCL
|
||||||
|
%bcond_with opencl
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifarch aarch64
|
||||||
|
# Disable ACL until fixed upstream - https://github.com/oneapi-src/oneDNN/issues/2109
|
||||||
|
%bcond_with acl
|
||||||
|
%else
|
||||||
|
%bcond_with acl
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define libname libdnnl3
|
||||||
|
Name: onednn
|
||||||
|
Version: 3.5.3
|
||||||
|
Release: 0
|
||||||
|
Summary: Intel Math Kernel Library for Deep Neural Networks
|
||||||
|
License: Apache-2.0
|
||||||
|
URL: https://01.org/onednn
|
||||||
|
Source0: https://github.com/oneapi-src/oneDNN/archive/v%{version}/oneDNN-%{version}.tar.gz
|
||||||
|
BuildRequires: chrpath
|
||||||
|
BuildRequires: cmake
|
||||||
|
BuildRequires: doxygen
|
||||||
|
BuildRequires: fdupes
|
||||||
|
BuildRequires: gcc-c++
|
||||||
|
BuildRequires: graphviz
|
||||||
|
BuildRequires: texlive-dvips-bin
|
||||||
|
%if %{with acl}
|
||||||
|
BuildRequires: ComputeLibrary-devel >= 24.04
|
||||||
|
%endif
|
||||||
|
%if %{with opencl}
|
||||||
|
BuildRequires: opencl-headers
|
||||||
|
BuildRequires: pkgconfig
|
||||||
|
BuildRequires: pkgconfig(OpenCL)
|
||||||
|
%endif
|
||||||
|
ExclusiveArch: x86_64 aarch64 ppc64le
|
||||||
|
Provides: mkl-dnn = %{version}
|
||||||
|
Obsoletes: mkl-dnn <= %{version}
|
||||||
|
Provides: oneDNN = %{version}
|
||||||
|
|
||||||
|
%description
|
||||||
|
Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an
|
||||||
|
open-source performance library for deep-learning applications. The library
|
||||||
|
accelerates deep-learning applications and frameworks on Intel architecture.
|
||||||
|
Intel MKL-DNN contains vectorized and threaded building blocks that you can use
|
||||||
|
to implement deep neural networks (DNN) with C and C++ interfaces.
|
||||||
|
|
||||||
|
%package -n benchdnn
|
||||||
|
Summary: Header files of Intel Math Kernel Library
|
||||||
|
Requires: %{libname} = %{version}
|
||||||
|
|
||||||
|
%description -n benchdnn
|
||||||
|
Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an
|
||||||
|
open-source performance library for deep-learning applications. The library
|
||||||
|
accelerates deep-learning applications and frameworks on Intel architecture.
|
||||||
|
Intel MKL-DNN contains vectorized and threaded building blocks that you can use
|
||||||
|
to implement deep neural networks (DNN) with C and C++ interfaces.
|
||||||
|
|
||||||
|
This package only includes the benchmark utility including its input files.
|
||||||
|
|
||||||
|
%package devel
|
||||||
|
Summary: Header files of Intel Math Kernel Library
|
||||||
|
Requires: %{libname} = %{version}
|
||||||
|
Provides: mkl-dnn-devel = %{version}
|
||||||
|
Obsoletes: mkl-dnn-devel <= %{version}
|
||||||
|
Provides: oneDNN-devel = %{version}
|
||||||
|
|
||||||
|
%description devel
|
||||||
|
Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an
|
||||||
|
open-source performance library for deep-learning applications. The library
|
||||||
|
accelerates deep-learning applications and frameworks on Intel architecture.
|
||||||
|
Intel MKL-DNN contains vectorized and threaded building blocks that you can use
|
||||||
|
to implement deep neural networks (DNN) with C and C++ interfaces.
|
||||||
|
|
||||||
|
This package includes the required headers and library files to develop software
|
||||||
|
with the Intel MKL-DNN.
|
||||||
|
|
||||||
|
%package doc
|
||||||
|
Summary: Reference documentation for the Intel Math Kernel Library
|
||||||
|
BuildArch: noarch
|
||||||
|
|
||||||
|
%description doc
|
||||||
|
The reference documentation for the Intel Math Kernel Library can be installed
|
||||||
|
with this package.
|
||||||
|
|
||||||
|
%package -n %{libname}
|
||||||
|
Summary: Header files of Intel Math Kernel Library
|
||||||
|
|
||||||
|
%description -n %{libname}
|
||||||
|
Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an
|
||||||
|
open-source performance library for deep-learning applications. The library
|
||||||
|
accelerates deep-learning applications and frameworks on Intel architecture.
|
||||||
|
Intel MKL-DNN contains vectorized and threaded building blocks that you can use
|
||||||
|
to implement deep neural networks (DNN) with C and C++ interfaces.
|
||||||
|
|
||||||
|
%prep
|
||||||
|
%autosetup -p1 -n oneDNN-%{version}
|
||||||
|
|
||||||
|
%build
|
||||||
|
%cmake \
|
||||||
|
-DCMAKE_INSTALL_LIBDIR=%{_lib} \
|
||||||
|
-DMKLDNN_ARCH_OPT_FLAGS="" \
|
||||||
|
-DDNNL_CPU_RUNTIME=OMP \
|
||||||
|
%if %{with acl}
|
||||||
|
-DDNNL_AARCH64_USE_ACL=ON \
|
||||||
|
-DACL_INCLUDE_DIR=%{_includedir} \
|
||||||
|
-DACL_LIBRARY=%{_libdir}/libarm_compute.so \
|
||||||
|
%endif
|
||||||
|
%if %{with opencl}
|
||||||
|
-DDNNL_GPU_RUNTIME=OCL \
|
||||||
|
%endif
|
||||||
|
-DDNNL_INSTALL_MODE=DEFAULT \
|
||||||
|
-DDNNL_BUILD_TESTS=ON \
|
||||||
|
-DONEDNN_BUILD_GRAPH=OFF \
|
||||||
|
-DDNNL_WERROR=OFF
|
||||||
|
%cmake_build
|
||||||
|
%cmake_build doc_doxygen
|
||||||
|
|
||||||
|
%install
|
||||||
|
%cmake_install
|
||||||
|
# move the built doxygen data to normal location
|
||||||
|
mkdir -p %{buildroot}%{_docdir}/%{name}
|
||||||
|
mv %{buildroot}%{_datadir}/doc/dnnl/reference/* %{buildroot}%{_docdir}/%{name}
|
||||||
|
%fdupes %{buildroot}%{_docdir}/%{name}
|
||||||
|
# do use macros to install license/docu
|
||||||
|
rm -r %{buildroot}%{_datadir}/doc/dnnl
|
||||||
|
# Keep compatibility with mkl-dnn
|
||||||
|
pushd %{buildroot}%{_includedir}
|
||||||
|
ln -s . mkl-dnn
|
||||||
|
popd
|
||||||
|
# install the benchmark
|
||||||
|
install -D build/tests/benchdnn/benchdnn %{buildroot}/%{_bindir}/benchdnn
|
||||||
|
# move install shared lib
|
||||||
|
mkdir -vp %{buildroot}%{_datadir}/benchdnn
|
||||||
|
cp -vr build/tests/benchdnn/inputs %{buildroot}%{_datadir}/benchdnn
|
||||||
|
|
||||||
|
chrpath -d %{buildroot}/%{_bindir}/benchdnn
|
||||||
|
|
||||||
|
%check
|
||||||
|
# do not use macro so we can exclude all gpu and cross (gpu and cpu) tests (they need gpu set up)
|
||||||
|
pushd build
|
||||||
|
export LD_LIBRARY_PATH=%{buildroot}%{_libdir}
|
||||||
|
ctest --output-on-failure --force-new-ctest-process %{_smp_mflags} -E '(gpu|cross)'
|
||||||
|
popd
|
||||||
|
|
||||||
|
%post -n %{libname} -p /sbin/ldconfig
|
||||||
|
%postun -n %{libname} -p /sbin/ldconfig
|
||||||
|
|
||||||
|
%files -n benchdnn
|
||||||
|
%{_bindir}/benchdnn
|
||||||
|
%{_datadir}/benchdnn
|
||||||
|
|
||||||
|
%files devel
|
||||||
|
%doc README.md
|
||||||
|
%license LICENSE
|
||||||
|
%{_includedir}/mkl-dnn
|
||||||
|
%{_includedir}/dnnl*.h*
|
||||||
|
%dir %{_includedir}/oneapi
|
||||||
|
%dir %{_includedir}/oneapi/dnnl
|
||||||
|
%{_includedir}/oneapi/dnnl/dnnl*.h*
|
||||||
|
%{_libdir}/libdnnl.so
|
||||||
|
%dir %{_libdir}/cmake/dnnl
|
||||||
|
%{_libdir}/cmake/dnnl/*.cmake
|
||||||
|
|
||||||
|
%files doc
|
||||||
|
%{_docdir}/%{name}
|
||||||
|
|
||||||
|
%files -n %{libname}
|
||||||
|
%license LICENSE
|
||||||
|
%{_libdir}/libdnnl.so.*
|
||||||
|
|
||||||
|
%changelog
|
Loading…
Reference in New Issue
Block a user