commit 094c796357060ff3a7629e16050634caa51bb991cb970468f7aff6829bb30253 Author: Guillaume GARDET Date: Mon Sep 23 11:55:12 2024 +0000 - Update to 3.5.3: * https://github.com/oneapi-src/oneDNN/releases/tag/v3.5.3 OBS-URL: https://build.opensuse.org/package/show/science:machinelearning/onednn?expand=0&rev=30 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/_constraints b/_constraints new file mode 100644 index 0000000..e81bd89 --- /dev/null +++ b/_constraints @@ -0,0 +1,8 @@ + + + + + 8 + + + diff --git a/oneDNN-3.4.1.tar.gz b/oneDNN-3.4.1.tar.gz new file mode 100644 index 0000000..9da5c62 --- /dev/null +++ b/oneDNN-3.4.1.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66a6512405664c2cd004811922173adabaa50d6aadc9352291d2d85f8b0f3d10 +size 13282745 diff --git a/oneDNN-3.5.3.tar.gz b/oneDNN-3.5.3.tar.gz new file mode 100644 index 0000000..613c0dd --- /dev/null +++ b/oneDNN-3.5.3.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddbc26c75978c5e864050f699dbefbf5bff9c0b8d2af827845708e1376471f17 +size 13190748 diff --git a/onednn.changes b/onednn.changes new file mode 100644 index 0000000..9f66777 --- /dev/null +++ b/onednn.changes @@ -0,0 +1,316 @@ +------------------------------------------------------------------- +Mon Sep 23 10:04:43 UTC 2024 - Guillaume GARDET + +- Update to 3.5.3: + * https://github.com/oneapi-src/oneDNN/releases/tag/v3.5.3 + +------------------------------------------------------------------- +Fri Apr 19 17:27:48 UTC 2024 - Alessandro de Oliveira Faria + +- Update to 3.4.1: + * Fixed an issue with caching and serialization of primitives in + deterministic mode (7ed604a) + * Introduced memory descriptor serialization API + (4cad420, 929a27a, 9b848c8) + * Fixed incorrect results in fp64 convolution and deconvolution + on Intel GPUs based on Xe-LPG architecture (ebe77b5, 0b399ac, + d748d64, 9f4f3d5, 21a8cae) + * Fixed incorrect results in reorder with large sizes on + Intel CPUs and GPUs (69a111e, 4b72361, 74a343b) + * Reduced creation time for deconvolution primitive on + Intel CPUs (bec487e, 1eab005) + * Fixed performance regression in deconvolution on + Intel CPUs (fbe5b97, 1dd3c6a) + * Removed dangling symblols from static builds + (e92c404, 6f5621a) + * Fixed crash during platform detection on some + AArch64-based systems (406a079) + * Fixed performance regression in int8 deconvolution on + Intel CPUs (7e50e15) + * Fixed handling of zero points for matmul in verbose + logs converter (15c7916) + +------------------------------------------------------------------- +Fri Dec 1 04:33:49 UTC 2023 - Alessandro de Oliveira Faria + +- Update to 3.3.3: +- This is a patch release containing the following changes to v3.3.2: + * Fixed performance regression in int8 convolutions on processors with Intel AVX-512 and Intel DL Boost support (a00661f) + * Fixed race condition during library initialization on Intel Data Center GPU Max Series (7dfcd11) + * Fixed accuracy issue in experimental Graph Compiler with LLVM code generator (8892e7e) + * Disabled int8 RNN implementation for cases with non-trivial strides (2195e4b) + * Fixed incorrect results in bfloat16 convolution implementation on processors with Intel AMX support (9f00af9) + * Fixed incorrect results in fp16 and int8 convolution on Intel Core Ultra integrated GPUs (69cef84, 79bc6cc, c9c0b09) + +------------------------------------------------------------------- +Fri Dec 1 04:33:49 UTC 2023 - Alessandro de Oliveira Faria + +- Update to 3.3.1: +- This is a patch release containing the following changes to v3.3: + * Fixed int8 convolution accuracy issue on Intel GPUs (09c87c7) + * Switched internal stream to in-order mode for NVIDIA and AMD GPUs to avoid synchronization issues (db01d62) + * Fixed runtime error for avgpool_bwd operation in Graph API (d025ef6, 9e0602a, e0dc1b3) + * Fixed benchdnn error reporting for some Graph API cases (98dc9db) + * Fixed accuracy issue in experimental Graph Compiler for int8 MHA variant from StarCoder model (5476ef7) + * Fixed incorrect results for layer normalization with trivial dimensions on Intel GPUs (a2ec0a0) + * Removed redundant synchronization for out-of-order SYCL queues (a96e9b1) + * Fixed runtime error in experimental Graph Compiler for int8 MLP subgraph from LLAMA model (595543d) + * Fixed SEGFAULT in experimental Graph Compiler for fp32 MLP subgraph (4207105) + * Fixed incorrect results in experimental Graph Compiler for MLP subgraph (57e14b5) + * Fixed the issue with f16 inner product primitive with s8 output returning unimplemented on Intel GPUs (bf12207, 800b5e9, ec7054a) + * Fixed incorrect results for int8 deconvolution with zero-points on processors with Intel AMX instructions support (55d2cec) + +------------------------------------------------------------------- +Tue Oct 10 05:53:25 UTC 2023 - Paolo Stivanin + +- Update to 3.3: + * 3.3: https://github.com/oneapi-src/oneDNN/releases/tag/v3.3 + * 3.2: https://github.com/oneapi-src/oneDNN/releases/tag/v3.2 + * 3.1: https://github.com/oneapi-src/oneDNN/releases/tag/v3.1 +- Drop upstreamed onednn-fix-gcc13.patch + +------------------------------------------------------------------- +Tue Mar 21 10:27:40 UTC 2023 - Guillaume GARDET + +- Update to 3.0.1: + * Changes: https://github.com/oneapi-src/oneDNN/releases/tag/v3.0.1 +- Skipped 3.0: + * Changes: https://github.com/oneapi-src/oneDNN/releases/tag/v3.0 +- Add patch to fix build with GCC13: + * onednn-fix-gcc13.patch +- Disable Arm Compute library support until fixed upstream + https://github.com/oneapi-src/oneDNN/issues/1599 +- Drop upstream patches: + * 1428.patch + * fa93750.patch + +------------------------------------------------------------------- +Tue Sep 20 08:26:43 UTC 2022 - Guillaume GARDET + +- Add patch to fix build with latest Arm Compute Library: + * 1428.patch + * fa93750.patch (dep for 1428.patch) + +------------------------------------------------------------------- +Tue Sep 13 05:22:52 UTC 2022 - Paolo Stivanin + +- Update to 2.6.2: + * https://github.com/oneapi-src/oneDNN/releases +- Removed onednn-1045.patch. +- Removed onednn-xbyak-aarch64.patch. + +------------------------------------------------------------------- +Tue Jun 15 12:10:39 UTC 2021 - Guillaume GARDET + +- Fix build on aarch64: + * onednn-xbyak-aarch64.patch + +------------------------------------------------------------------- +Tue Jun 15 08:31:16 UTC 2021 - Guillaume GARDET + +- Update to version 2.2.4: + * Fixed build error with GCC 11 (eda1add) + * Fixed an issue with reorder reporting unimplemented when + quantizing f32 weights to s8 (4f05b76, 5d3d1e1, cc77eef) + * Updated name for GPU gen12 architecture to xe (3d202c2) +- Drop upstream patch: + * 0001-common-gpu-include-thread-and-limit-headers-to-fix-G.patch + +------------------------------------------------------------------- +Thu Jun 3 01:38:56 UTC 2021 - Ferdinand Thiessen + +- Update to version 2.2.3 + * Fixed a bug in int8 depthwise convolution ptimitive with groups + and 1d spatial size for processors with AVX-512 and AVX2 support + * Fixed correctness issue for PReLU primitive + * Fixed corretness issue in reorder for blocked layouts with + zero padding + * Improved performance of weights reorders used by BRGEMM-based + convolution primitive for processors with AVX-512 support + * Added -fp-model=precise build flag for DPC++ code + * Fixed potential memory leak in matmul primitive + * Fixed performance of matmul primitive when fused with bias + update and sum + * Fixed a bug in matmul primitive when writing to non-contiguous + destination buffer +- Add upstream patch for GCC11 support + * 0001-common-gpu-include-thread-and-limit-headers-to-fix-G.patch + +------------------------------------------------------------------- +Thu May 27 08:10:13 UTC 2021 - Jan Engelhardt + +- Update descriptions. + +------------------------------------------------------------------- +Wed May 26 13:29:27 UTC 2021 - Guillaume GARDET + +- Update to 2.2.2, changes: + * Fixed performance regression in fp32 forward inner product for + shapes with number of output channels equal to 1 for processors + with Intel AVX-512 support (714b1fd) + * Fixed performance regression in forward convolutions with groups + for processors with Intel AVX-512 support(3555d4a) + * Removed -std=c++11 build flag for DPC++ headers (1fcb867) + * Fixed buffer access in initializing workspace in RNN + implementation on GPU (9b03091) + * Fixed fix a bug in convolution with 1x1 kernel and mixed + strides on processors with Intel AVX-512 support (d0b3e3f) + * Used getauxval for Linux to get CPU features on for AArch64 + systems (25c4cea) + * Added -fp-model=precise build flag for DPC++ code (3e40e5e) + * Fixed out-of-bounds writes in elementwise primitive on + Intel Processor Graphics (bcf823c) +- Fix build with Arm Compute Library: + * onednn-1045.patch + +------------------------------------------------------------------- +Tue Apr 13 07:53:16 UTC 2021 - Guillaume GARDET + +- Update to 2.2.1, changes: + * From 2.2: + Fixed segfault for cases when primitive descriptor or attributed contain NaN (e6d05ec, dbca1e9, 0326b09, 0326b09) + Fixed engine creation failure for GPU subdevices (4c3a114) + Fixed long lines clipping in verbose output (70d70a8) + Fixed segfault in bfloat16 convolution weight gradient implementation on processors with Intel AMX support (a3a73a3) + Fixed performance regression in binary primitive with per_oc broadcast strategy (9ac85d8) + Worked around a bug with Microsoft Visual C++ compiler version detection in CMake 3.19 (2f39155) + Removed -std=c++11 build flag for DPC++ code to align with SYCL standard (1b026f5) + * Changes between 2.1 and 2.2: + Performance Optimizations + Intel Architecture processors + Improved performance of int8 compute functionality for future Intel Xeon Scalable processor (code name Sapphire Rapids). The functionality is disabled by default and should be enabled via CPU dispatcher control. + Improved performance of compute functionality for future Intel Core processor with Intel AVX2 and Intel DL Boost instructions support (code name Alder Lake). + Improved fp32 inner product forward propagation performance for processors with Intel AVX-512 support. + Improved dnnl_gemm performance for cases with n=1 on all supported processors. + Intel Graphics products + Introduced NHWC format support for activations for int8 primitives. + AArch64-based processors + Improved performance of fp32 and int8 convolution, and softmax primitives for processors with SVE 512 support. + Improved performance of fp32 convolution via Arm Compute Library (ACL). + Improved performance of convolution with a combination of sum and relu post-ops via ACL. + Functionality + Extended eltwise primitive with support for mish and hardswish algorithms. + Extended binary primitive with support for comparison operators. + Introduced support for post-ops in GPU resampling implementation. + Introduced asymmetric quantization support for int8 deconvolution. + Introduced binary post-ops support for matmul primitive. + Usability + Improved presentation of oneDNN primitives in VTune Amplifier. + Introduced Linux perf support for AArch64. + Introduced support for Fujitsu C++ compiler. + Introduced a build time check for minimal supported ACL version. Currently oneDNN requires ACL 21.02 or later. + Added support for cuDNN 8.x + +------------------------------------------------------------------- +Wed Feb 17 14:17:47 UTC 2021 - Guillaume GARDET + +- Update to 2.1 +- Add Arm ComputeLibrary support on aarch64 + +------------------------------------------------------------------- +Mon Oct 5 06:16:30 UTC 2020 - Guillaume GARDET + +- Obsoletes mkl-dnn* <= %{version} + +------------------------------------------------------------------- +Fri Oct 2 12:47:08 UTC 2020 - Guillaume GARDET + +- Rename mkl-dnn to onednn to follow upstream + +------------------------------------------------------------------- +Wed Sep 23 13:36:02 UTC 2020 - Guillaume GARDET + +- Update to 1.6.3 +- Drop upstream patch: + * cmake-no-install-ocl-cmake.patch + +------------------------------------------------------------------- +Wed Sep 23 13:16:39 UTC 2020 - Guillaume GARDET + +- Build on aarch64 and ppc64le which are now also supported +- Provide oneDNN and oneDNN-devel as it is the new official name + +------------------------------------------------------------------- +Tue May 5 07:38:34 UTC 2020 - Tomáš Chvátal + +- Update to 1.4: + * Performance improvements all over the board +- Rebase patch cmake-no-install-ocl-cmake.patch + +------------------------------------------------------------------- +Tue Mar 24 10:50:57 UTC 2020 - Tomáš Chvátal + +- Add constraints to not crash during testing on OOM + +------------------------------------------------------------------- +Thu Feb 27 12:44:00 UTC 2020 - Tomáš Chvátal + +- Do not disable LTO there is no actual reason for that +- Export LD_LIBRARY_PATH to fix older releases build + +------------------------------------------------------------------- +Wed Feb 26 10:36:26 UTC 2020 - Tomáš Chvátal + +- There is no actual reason to not use github tag for tarball + fetching -> remove the service +- Format with spec-cleaner +- Use proper %cmake macros everywhere +- Add configure options for cmake to set it up in a way we really + want +- Add patch from Debian to not install OpenCL cmake finder: + * cmake-no-install-ocl-cmake.patch + +------------------------------------------------------------------- +Thu Feb 20 10:26:52 UTC 2020 - Christian Goll + +- enabled tests + +------------------------------------------------------------------- +Thu Jan 30 14:20:22 UTC 2020 - Christian Goll + +- packaged separate benchnn packae with its input files +- updated to v1.1.3 which includes + * Fixed the mean and variance memory descriptors in layer + normalization (65f1908) + * Fixed the layer normalization formula (c176ceb) + +------------------------------------------------------------------- +Wed Jan 8 15:21:54 UTC 2020 - Christian Goll + +- updated to v1.1.2 + * Fixed threading over the spatial in bfloat16 batched + normalization (017b6c9) + * Fixed read past end-of-buffer error for int8 convolution (7d6f45e) + * Fixed condition for dispatching optimized channel blocking in + fp32 backward convolution on Intel Xeon Phi(TM) processor (846eba1) + * Fixed fp32 backward convolution for shapes with spatial strides + over the depth dimension (002e3ab) + * Fixed softmax with zero sizes on GPU (936bff4) + * Fixed int8 deconvolution with dilation when ih <= dh (3e3bacb) + * Enabled back fp32 -> u8 reorder for RNN (a2c2507) + * Fixed segmentation fault in bfloat16 backward convolution from + kd_padding=0 computation (52d476c) + * Fixed segmentation fault in bfloat16 forward convolution due + to push/pop imbalance (4f6e3d5) + * Fixed library version for OS X build (0d85005) + * Fixed padding by channels in concat (a265c7d) + * Added full text of third party licenses and + copyright notices to LICENSE file (79f204c) + * Added separate README for binary packages (28f4c96) + * Fixed computing per-oc mask in RNN (ff3ffab) + * Added workaround for number of cores calculation in Xbyak (301b088) + +------------------------------------------------------------------- +Mon Feb 11 16:35:48 UTC 2019 - cgoll@suse.com + +- added ARCH_OPT_FLAGS="" + +------------------------------------------------------------------- +Tue Feb 5 07:45:53 UTC 2019 - Christian Goll + +- Initial checking of the Intel(R) Math Kernel Library for + Deep Neural Networks which can be used by: + * tensorflow + * Caffee + * PyTorch + and other machine learning tools diff --git a/onednn.spec b/onednn.spec new file mode 100644 index 0000000..aaeb3b3 --- /dev/null +++ b/onednn.spec @@ -0,0 +1,192 @@ +# +# spec file for package onednn +# +# Copyright (c) 2024 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%ifarch x86_64 +%bcond_without opencl +%else +# Build broken on non-x86, with openCL +%bcond_with opencl +%endif + +%ifarch aarch64 +# Disable ACL until fixed upstream - https://github.com/oneapi-src/oneDNN/issues/2109 +%bcond_with acl +%else +%bcond_with acl +%endif + +%define libname libdnnl3 +Name: onednn +Version: 3.5.3 +Release: 0 +Summary: Intel Math Kernel Library for Deep Neural Networks +License: Apache-2.0 +URL: https://01.org/onednn +Source0: https://github.com/oneapi-src/oneDNN/archive/v%{version}/oneDNN-%{version}.tar.gz +BuildRequires: chrpath +BuildRequires: cmake +BuildRequires: doxygen +BuildRequires: fdupes +BuildRequires: gcc-c++ +BuildRequires: graphviz +BuildRequires: texlive-dvips-bin +%if %{with acl} +BuildRequires: ComputeLibrary-devel >= 24.04 +%endif +%if %{with opencl} +BuildRequires: opencl-headers +BuildRequires: pkgconfig +BuildRequires: pkgconfig(OpenCL) +%endif +ExclusiveArch: x86_64 aarch64 ppc64le +Provides: mkl-dnn = %{version} +Obsoletes: mkl-dnn <= %{version} +Provides: oneDNN = %{version} + +%description +Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an +open-source performance library for deep-learning applications. The library +accelerates deep-learning applications and frameworks on Intel architecture. +Intel MKL-DNN contains vectorized and threaded building blocks that you can use +to implement deep neural networks (DNN) with C and C++ interfaces. + +%package -n benchdnn +Summary: Header files of Intel Math Kernel Library +Requires: %{libname} = %{version} + +%description -n benchdnn +Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an +open-source performance library for deep-learning applications. The library +accelerates deep-learning applications and frameworks on Intel architecture. +Intel MKL-DNN contains vectorized and threaded building blocks that you can use +to implement deep neural networks (DNN) with C and C++ interfaces. + +This package only includes the benchmark utility including its input files. + +%package devel +Summary: Header files of Intel Math Kernel Library +Requires: %{libname} = %{version} +Provides: mkl-dnn-devel = %{version} +Obsoletes: mkl-dnn-devel <= %{version} +Provides: oneDNN-devel = %{version} + +%description devel +Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an +open-source performance library for deep-learning applications. The library +accelerates deep-learning applications and frameworks on Intel architecture. +Intel MKL-DNN contains vectorized and threaded building blocks that you can use +to implement deep neural networks (DNN) with C and C++ interfaces. + +This package includes the required headers and library files to develop software +with the Intel MKL-DNN. + +%package doc +Summary: Reference documentation for the Intel Math Kernel Library +BuildArch: noarch + +%description doc +The reference documentation for the Intel Math Kernel Library can be installed +with this package. + +%package -n %{libname} +Summary: Header files of Intel Math Kernel Library + +%description -n %{libname} +Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN) is an +open-source performance library for deep-learning applications. The library +accelerates deep-learning applications and frameworks on Intel architecture. +Intel MKL-DNN contains vectorized and threaded building blocks that you can use +to implement deep neural networks (DNN) with C and C++ interfaces. + +%prep +%autosetup -p1 -n oneDNN-%{version} + +%build +%cmake \ + -DCMAKE_INSTALL_LIBDIR=%{_lib} \ + -DMKLDNN_ARCH_OPT_FLAGS="" \ + -DDNNL_CPU_RUNTIME=OMP \ +%if %{with acl} + -DDNNL_AARCH64_USE_ACL=ON \ + -DACL_INCLUDE_DIR=%{_includedir} \ + -DACL_LIBRARY=%{_libdir}/libarm_compute.so \ +%endif +%if %{with opencl} + -DDNNL_GPU_RUNTIME=OCL \ +%endif + -DDNNL_INSTALL_MODE=DEFAULT \ + -DDNNL_BUILD_TESTS=ON \ + -DONEDNN_BUILD_GRAPH=OFF \ + -DDNNL_WERROR=OFF +%cmake_build +%cmake_build doc_doxygen + +%install +%cmake_install +# move the built doxygen data to normal location +mkdir -p %{buildroot}%{_docdir}/%{name} +mv %{buildroot}%{_datadir}/doc/dnnl/reference/* %{buildroot}%{_docdir}/%{name} +%fdupes %{buildroot}%{_docdir}/%{name} +# do use macros to install license/docu +rm -r %{buildroot}%{_datadir}/doc/dnnl +# Keep compatibility with mkl-dnn +pushd %{buildroot}%{_includedir} +ln -s . mkl-dnn +popd +# install the benchmark +install -D build/tests/benchdnn/benchdnn %{buildroot}/%{_bindir}/benchdnn +# move install shared lib +mkdir -vp %{buildroot}%{_datadir}/benchdnn +cp -vr build/tests/benchdnn/inputs %{buildroot}%{_datadir}/benchdnn + +chrpath -d %{buildroot}/%{_bindir}/benchdnn + +%check +# do not use macro so we can exclude all gpu and cross (gpu and cpu) tests (they need gpu set up) +pushd build +export LD_LIBRARY_PATH=%{buildroot}%{_libdir} +ctest --output-on-failure --force-new-ctest-process %{_smp_mflags} -E '(gpu|cross)' +popd + +%post -n %{libname} -p /sbin/ldconfig +%postun -n %{libname} -p /sbin/ldconfig + +%files -n benchdnn +%{_bindir}/benchdnn +%{_datadir}/benchdnn + +%files devel +%doc README.md +%license LICENSE +%{_includedir}/mkl-dnn +%{_includedir}/dnnl*.h* +%dir %{_includedir}/oneapi +%dir %{_includedir}/oneapi/dnnl +%{_includedir}/oneapi/dnnl/dnnl*.h* +%{_libdir}/libdnnl.so +%dir %{_libdir}/cmake/dnnl +%{_libdir}/cmake/dnnl/*.cmake + +%files doc +%{_docdir}/%{name} + +%files -n %{libname} +%license LICENSE +%{_libdir}/libdnnl.so.* + +%changelog