diff --git a/1428.patch b/1428.patch deleted file mode 100644 index 2021fa9..0000000 --- a/1428.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 24d4c1379d3c4cbf423f979fa25283b1914159c7 Mon Sep 17 00:00:00 2001 -From: Diana Bite -Date: Thu, 21 Jul 2022 12:27:17 +0100 -Subject: [PATCH] cpu: aarch64: acl: disallow large padding in ACL wino to be - consist with oneDNN - ---- - src/cpu/aarch64/acl_convolution_utils.cpp | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/src/cpu/aarch64/acl_convolution_utils.cpp b/src/cpu/aarch64/acl_convolution_utils.cpp -index db3bde6a48..542033df0e 100644 ---- a/src/cpu/aarch64/acl_convolution_utils.cpp -+++ b/src/cpu/aarch64/acl_convolution_utils.cpp -@@ -307,10 +307,17 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, - // General Compute Library checks, memory tags are also set there - CHECK(acl_init_conf(acp, src_md, weights_md, dst_md, bias_md, cd, attr)); - -- const bool wino_shape_ok // unit strides only, no dilations -+ const bool shape_ok -+ // only unit strides allowed - = (acp.padstride_info.stride() == std::pair {1, 1}) -+ // Note: Compute Library supports arbitrary padding for wino kernels -+ // but we only allow small padding to be consistent with oneDNN -+ && (acp.padstride_info.pad().first <= 1) // padding left/right -+ && (acp.padstride_info.pad().second <= 1) // padding top/bottom -+ // only non-dilated convolutions allowed - && (acp.dilation_info == arm_compute::Size2D(1, 1)); -- if (!wino_shape_ok) return status::unimplemented; -+ -+ ACL_CHECK_SUPPORT(!shape_ok, "shape not supported by winograd kernels"); - - // clang-format off - // Validate convolution manually to check for return status diff --git a/fa93750.patch b/fa93750.patch deleted file mode 100644 index e3a03b0..0000000 --- a/fa93750.patch +++ /dev/null @@ -1,1222 +0,0 @@ -From fa93750bfb821fe05e3190b36f52b5bd88a57110 Mon Sep 17 00:00:00 2001 -From: Diana Bite -Date: Thu, 24 Feb 2022 14:25:49 +0000 -Subject: [PATCH] cpu: aarch64: acl: fix inner_prod test failure and improve - validation - ---- - src/cpu/aarch64/acl_binary.hpp | 8 +- - src/cpu/aarch64/acl_convolution_utils.cpp | 65 ++----- - src/cpu/aarch64/acl_eltwise.hpp | 4 +- - src/cpu/aarch64/acl_eltwise_utils.cpp | 17 +- - src/cpu/aarch64/acl_gemm_convolution.hpp | 6 +- - .../aarch64/acl_indirect_gemm_convolution.hpp | 6 +- - src/cpu/aarch64/acl_inner_product.cpp | 13 +- - src/cpu/aarch64/acl_inner_product.hpp | 179 +++++++++++++++-- - src/cpu/aarch64/acl_inner_product_utils.cpp | 181 ------------------ - src/cpu/aarch64/acl_inner_product_utils.hpp | 62 ------ - src/cpu/aarch64/acl_softmax.hpp | 17 +- - src/cpu/aarch64/acl_utils.cpp | 4 +- - src/cpu/aarch64/acl_utils.hpp | 32 +++- - src/cpu/aarch64/acl_winograd_convolution.hpp | 6 +- - src/cpu/aarch64/matmul/acl_matmul.cpp | 4 +- - src/cpu/aarch64/matmul/acl_matmul.hpp | 4 +- - src/cpu/aarch64/matmul/acl_matmul_utils.cpp | 45 ++--- - src/cpu/aarch64/matmul/acl_matmul_utils.hpp | 6 +- - tests/benchdnn/inputs/ip/test_ip_acl | 26 +++ - 19 files changed, 281 insertions(+), 404 deletions(-) - delete mode 100644 src/cpu/aarch64/acl_inner_product_utils.cpp - delete mode 100644 src/cpu/aarch64/acl_inner_product_utils.hpp - create mode 100644 tests/benchdnn/inputs/ip/test_ip_acl - -diff --git a/src/cpu/aarch64/acl_binary.hpp b/src/cpu/aarch64/acl_binary.hpp -index 77adb45bef..122b094587 100644 ---- a/src/cpu/aarch64/acl_binary.hpp -+++ b/src/cpu/aarch64/acl_binary.hpp -@@ -125,7 +125,7 @@ struct acl_binary_t : public primitive_t { - - status_t init(engine_t *engine) { - -- using namespace acl_common_utils; -+ using namespace acl_utils; - - // Only support f32 and s32 for now - data_type_t ddt = dst_md(0)->data_type; -@@ -179,11 +179,7 @@ struct acl_binary_t : public primitive_t { - } - - // Call operator specific validate function to check support -- arm_compute::Status acl_st = validate(asp_); -- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); -- return status::unimplemented; -- } -+ ACL_CHECK_VALID(validate(asp_)); - - // Initialize the ACL threads - acl_thread_bind(); -diff --git a/src/cpu/aarch64/acl_convolution_utils.cpp b/src/cpu/aarch64/acl_convolution_utils.cpp -index ca91de49e3..e072dc5490 100644 ---- a/src/cpu/aarch64/acl_convolution_utils.cpp -+++ b/src/cpu/aarch64/acl_convolution_utils.cpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2020-2021 Arm Ltd. and affiliates -+* Copyright 2020-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -156,10 +156,10 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md, - const auto acl_layout = is_nspc ? arm_compute::DataLayout::NHWC - : arm_compute::DataLayout::NCHW; - -- auto acl_src_data_t = acl_common_utils::get_acl_data_t(src_d.data_type()); -- auto acl_wei_data_t = acl_common_utils::get_acl_data_t(wei_d.data_type()); -- auto acl_dst_data_t = acl_common_utils::get_acl_data_t(dst_d.data_type()); -- auto acl_bia_data_t = acl_common_utils::get_acl_data_t(bia_d.data_type()); -+ auto acl_src_data_t = acl_utils::get_acl_data_t(src_d.data_type()); -+ auto acl_wei_data_t = acl_utils::get_acl_data_t(wei_d.data_type()); -+ auto acl_dst_data_t = acl_utils::get_acl_data_t(dst_d.data_type()); -+ auto acl_bia_data_t = acl_utils::get_acl_data_t(bia_d.data_type()); - - if (acl_bia_data_t == arm_compute::DataType::UNKNOWN) - acl_bia_data_t = arm_compute::DataType::F32; -@@ -212,33 +212,14 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md, - // is_eltwise(true) here stands for eltwise.scale == 1.f check - acp.sum_with_eltwise = (post_ops.len() == 2) && post_ops.entry_[0].is_sum() - && post_ops.entry_[1].is_eltwise(true); -- acp.act_info = acl_common_utils::get_acl_act(attr); -+ acp.act_info = acl_utils::get_acl_act(attr); - - if (acp.sum_with_eltwise) { -- // clang-format off -- // Validate activation layer manually to check for return status -- auto acl_al_st = arm_compute::NEActivationLayer::validate( -- &acp.dst_info, -- &acp.dst_info, -- acp.act_info); -- // clang-format on -- if (acl_al_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_al_st.error_description().c_str()); -- return status::unimplemented; -- } -- -- // clang-format off -- // Validate arithmetic addition manually to check for return status -- auto acl_aa_st = arm_compute::NEArithmeticAddition::validate( -- &acp.dst_info, -- &acp.dst_info, -- &acp.dst_info, -- arm_compute::ConvertPolicy::SATURATE); -- // clang-format on -- if (acl_aa_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_aa_st.error_description().c_str()); -- return status::unimplemented; -- } -+ ACL_CHECK_VALID(arm_compute::NEActivationLayer::validate( // eltwise -+ &acp.dst_info, &acp.dst_info, acp.act_info)); -+ ACL_CHECK_VALID(arm_compute::NEArithmeticAddition::validate( // sum -+ &acp.dst_info, &acp.dst_info, &acp.dst_info, -+ arm_compute::ConvertPolicy::SATURATE)); - } - - return status::success; -@@ -254,7 +235,7 @@ status_t init_conf_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - - // clang-format off - // Validate convolution manually to check for return status -- auto acl_st = arm_compute::NEGEMMConvolutionLayer::validate( -+ ACL_CHECK_VALID(arm_compute::NEGEMMConvolutionLayer::validate( - &acp.src_info, - &acp.wei_info, - acp.with_bias ? &acp.bia_info : nullptr, -@@ -263,12 +244,8 @@ status_t init_conf_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - acp.weights_info, - acp.dilation_info, - acp.act_info, -- acp.fast_math); -+ acp.fast_math)); - // clang-format on -- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); -- return status::unimplemented; -- } - - return status::success; - } -@@ -289,7 +266,7 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - - // clang-format off - // NOTE: indirect convolution method supports only nhwc layout. -- auto acl_st = arm_compute::NEGEMMConv2d::validate( -+ ACL_CHECK_VALID(arm_compute::NEGEMMConv2d::validate( - &acp.src_info, - &acp.wei_info, - acp.with_bias ? &acp.bia_info : nullptr, -@@ -298,12 +275,8 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - acp.dilation_info, - acp.act_info, - acp.fast_math, -- 1)); -+ 1))); - // clang-format on -- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); -- return status::unimplemented; -- } - - return status::success; - } -@@ -336,19 +309,15 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, - - // clang-format off - // Validate convolution manually to check for return status -- auto acl_st = arm_compute::NEWinogradConvolutionLayer::validate( -+ ACL_CHECK_VALID(arm_compute::NEWinogradConvolutionLayer::validate( - &acp.src_info, - &acp.wei_info, - acp.with_bias ? &acp.bia_info : nullptr, - &acp.dst_info, - acp.padstride_info, - acp.act_info, -- true); // enable_fast_math flag in ACL Winograd -+ true)); // enable_fast_math flag in ACL Winograd - // clang-format on -- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); -- return status::unimplemented; -- } - - return status::success; - } -diff --git a/src/cpu/aarch64/acl_eltwise.hpp b/src/cpu/aarch64/acl_eltwise.hpp -index a55b89272c..381368aabb 100644 ---- a/src/cpu/aarch64/acl_eltwise.hpp -+++ b/src/cpu/aarch64/acl_eltwise.hpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021 Arm Ltd. and affiliates -+* Copyright 2021-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -78,7 +78,7 @@ struct acl_eltwise_fwd_t : public primitive_t { - aep_, data_md_, *desc(), *attr()); - if (conf_status != status::success) return status::unimplemented; - -- acl_common_utils::acl_thread_bind(); -+ acl_utils::acl_thread_bind(); - - return status::success; - } -diff --git a/src/cpu/aarch64/acl_eltwise_utils.cpp b/src/cpu/aarch64/acl_eltwise_utils.cpp -index 35e809e042..880b6aeaae 100644 ---- a/src/cpu/aarch64/acl_eltwise_utils.cpp -+++ b/src/cpu/aarch64/acl_eltwise_utils.cpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021 Arm Ltd. and affiliates -+* Copyright 2021-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -46,7 +46,7 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md, - - const alg_kind_t eltwise_alg = ed.alg_kind; - -- bool activation_supported = acl_common_utils::acl_act_ok(eltwise_alg); -+ bool activation_supported = acl_utils::acl_act_ok(eltwise_alg); - if (!activation_supported) { return status::unimplemented; } - - // batch size -@@ -69,8 +69,8 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md, - const auto acl_layout = is_nspc ? arm_compute::DataLayout::NHWC - : arm_compute::DataLayout::NCHW; - -- auto acl_src_data_t = acl_common_utils::get_acl_data_t(data_d.data_type()); -- auto acl_dst_data_t = acl_common_utils::get_acl_data_t(data_d.data_type()); -+ auto acl_src_data_t = acl_utils::get_acl_data_t(data_d.data_type()); -+ auto acl_dst_data_t = acl_utils::get_acl_data_t(data_d.data_type()); - - // clang-format off - aep.src_info = arm_compute::TensorInfo( -@@ -93,7 +93,7 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md, - aep.dst_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0)); - } - -- aep.act_info = acl_common_utils::get_acl_act(ed); -+ aep.act_info = acl_utils::get_acl_act(ed); - - return status::success; - } -@@ -105,14 +105,11 @@ status_t init_conf_eltwise(acl_eltwise_conf_t &aep, memory_desc_t &data_md, - CHECK(acl_eltwise_check(aep, data_md, ed, attr)); - - // clang-format off -- auto acl_st = arm_compute::NEActivationLayer::validate( -+ ACL_CHECK_VALID(arm_compute::NEActivationLayer::validate( - &aep.src_info, - &aep.dst_info, -- aep.act_info); -+ aep.act_info)); - // clang-format on -- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { -- return status::unimplemented; -- } - - return status::success; - } -diff --git a/src/cpu/aarch64/acl_gemm_convolution.hpp b/src/cpu/aarch64/acl_gemm_convolution.hpp -index 3e7542b6bf..496f501211 100644 ---- a/src/cpu/aarch64/acl_gemm_convolution.hpp -+++ b/src/cpu/aarch64/acl_gemm_convolution.hpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2020-2021 Arm Ltd. and affiliates -+* Copyright 2020-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -108,7 +108,7 @@ struct acl_gemm_convolution_fwd_t : public primitive_t { - src_md_, weights_md_, dst_md_, bias_md_, *desc(), *attr()); - if (conf_status != status::success) return status::unimplemented; - -- acl_common_utils::acl_thread_bind(); -+ acl_utils::acl_thread_bind(); - - return status::success; - } -@@ -146,7 +146,7 @@ struct acl_gemm_convolution_fwd_t : public primitive_t { - // sum+eltwise post-ops - if (eltwise_only || sum_with_eltwise) { - const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg; -- eltwise_ok = acl_common_utils::acl_act_ok(act_type); -+ eltwise_ok = acl_utils::acl_act_ok(act_type); - } - - return eltwise_ok || (po.len() == 0); -diff --git a/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp -index 0a0021aeee..18e757a2c9 100644 ---- a/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp -+++ b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021 Arm Ltd. and affiliates -+* Copyright 2021-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -109,7 +109,7 @@ struct acl_indirect_gemm_convolution_fwd_t : public primitive_t { - *attr()); - if (conf_status != status::success) return status::unimplemented; - -- acl_common_utils::acl_thread_bind(); -+ acl_utils::acl_thread_bind(); - - return status::success; - } -@@ -134,7 +134,7 @@ struct acl_indirect_gemm_convolution_fwd_t : public primitive_t { - // sum+eltwise post-ops - if (eltwise_only || sum_with_eltwise) { - const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg; -- eltwise_ok = acl_common_utils::acl_act_ok(act_type); -+ eltwise_ok = acl_utils::acl_act_ok(act_type); - } - - return eltwise_ok || (po.len() == 0); -diff --git a/src/cpu/aarch64/acl_inner_product.cpp b/src/cpu/aarch64/acl_inner_product.cpp -index 7a316135f8..f355a657c7 100644 ---- a/src/cpu/aarch64/acl_inner_product.cpp -+++ b/src/cpu/aarch64/acl_inner_product.cpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021 Arm Ltd. and affiliates -+* Copyright 2021-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -21,23 +21,18 @@ namespace impl { - namespace cpu { - namespace aarch64 { - --using namespace dnnl::impl::status; --using namespace dnnl::impl::memory_tracking::names; --using namespace dnnl::impl::utils; -- - status_t acl_inner_product_fwd_t::execute_forward(const exec_ctx_t &ctx) const { - // Lock here is needed because resource_mapper does not support - // concurrent multithreaded access. - std::lock_guard _lock {this->mtx}; - -- status_t status = status::success; - auto src_base = CTX_IN_MEM(const data_t *, DNNL_ARG_SRC); - auto wei_base = CTX_IN_MEM(const data_t *, DNNL_ARG_WEIGHTS); - auto bia_base = CTX_IN_MEM(const data_t *, DNNL_ARG_BIAS); - auto dst_base = CTX_OUT_MEM(data_t *, DNNL_ARG_DST); - -- bool with_bias = pd()->aip_.with_bias; -- bool with_sum = pd()->aip_.with_sum; -+ bool with_bias = pd()->aip.with_bias; -+ bool with_sum = pd()->aip.with_sum; - - // Retrieve primitive resource and configured Compute Library objects - auto *acl_resource -@@ -64,7 +59,7 @@ status_t acl_inner_product_fwd_t::execute_forward(const exec_ctx_t &ctx) const { - acl_obj.dst_tensor.allocator()->free(); - if (with_bias) { acl_obj.bia_tensor.allocator()->free(); } - -- return status; -+ return status::success; - } - - } // namespace aarch64 -diff --git a/src/cpu/aarch64/acl_inner_product.hpp b/src/cpu/aarch64/acl_inner_product.hpp -index dd742ea0bc..e5a9bdcc8a 100644 ---- a/src/cpu/aarch64/acl_inner_product.hpp -+++ b/src/cpu/aarch64/acl_inner_product.hpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021 Arm Ltd. and affiliates -+* Copyright 2021-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -17,15 +17,34 @@ - #ifndef CPU_AARCH64_ACL_INNER_PRODUCT_HPP - #define CPU_AARCH64_ACL_INNER_PRODUCT_HPP - -+#include "cpu/aarch64/acl_utils.hpp" - #include "cpu/cpu_inner_product_pd.hpp" - --#include "cpu/aarch64/acl_inner_product_utils.hpp" -- - namespace dnnl { - namespace impl { - namespace cpu { - namespace aarch64 { - -+struct acl_ip_obj_t { -+ arm_compute::NEFullyConnectedLayer fc; -+ arm_compute::NEArithmeticAddition add; -+ arm_compute::Tensor src_tensor; -+ arm_compute::Tensor wei_tensor; -+ arm_compute::Tensor bia_tensor; -+ arm_compute::Tensor dst_tensor; -+ arm_compute::Tensor dst_acc_tensor; -+}; -+ -+struct acl_ip_conf_t { -+ bool with_bias; -+ bool with_sum; -+ arm_compute::TensorInfo src_info; -+ arm_compute::TensorInfo wei_info; -+ arm_compute::TensorInfo bia_info; -+ arm_compute::TensorInfo dst_info; -+ arm_compute::FullyConnectedLayerInfo fc_info; -+}; -+ - struct acl_ip_resource_t : public resource_t { - acl_ip_resource_t() : acl_ip_obj_(utils::make_unique()) {} - -@@ -71,33 +90,26 @@ struct acl_inner_product_fwd_t : public primitive_t { - struct pd_t : public cpu_inner_product_fwd_pd_t { - using cpu_inner_product_fwd_pd_t::cpu_inner_product_fwd_pd_t; - -- DECLARE_COMMON_PD_T("inner_product:acl", acl_inner_product_fwd_t); -+ DECLARE_COMMON_PD_T("acl", acl_inner_product_fwd_t); - - status_t init(engine_t *engine) { -- using namespace utils; -- - const bool ok = is_fwd() && !has_zero_dim_memory() - && expect_data_types(data_type::f32, data_type::f32, - data_type::f32, data_type::f32, data_type::f32) - && attr()->has_default_values( - primitive_attr_t::skip_mask_t::post_ops, - data_type::f32) -- && (set_default_params() == status::success) -- && post_ops_ok(); -+ && set_default_params() == status::success && post_ops_ok(); - - if (!ok) return status::unimplemented; - -- auto conf_status = acl_inner_product_utils::init_conf_ip(aip_, -- src_md_, weights_md_, dst_md_, bias_md_, *desc(), *attr()); -- // conf_status here can be either status::success or status::unimplemented -- if (conf_status != status::success) return conf_status; -- -- acl_common_utils::acl_thread_bind(); -+ CHECK(init_conf_ip(aip, src_md_, weights_md_, dst_md_, bias_md_, -+ *desc(), *attr())); - - return status::success; - } - -- acl_ip_conf_t aip_; -+ acl_ip_conf_t aip; - - protected: - bool post_ops_ok() const { -@@ -111,16 +123,149 @@ struct acl_inner_product_fwd_t : public primitive_t { - // Compute Library supports here only one eltwise post-op or sum - if (po.len() == 1 && is_eltwise(0)) { - const auto act_type = po.entry_[0].eltwise.alg; -- eltwise_ok = acl_common_utils::acl_act_ok(act_type); -+ eltwise_ok = acl_utils::acl_act_ok(act_type); - } - - return eltwise_ok || (po.len() == 1 && is_sum(0)) - || (po.len() == 0); - } -+ -+ status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md, -+ memory_desc_t &wei_md, memory_desc_t &dst_md, -+ memory_desc_t &bias_md, const inner_product_desc_t &ipd, -+ const primitive_attr_t &attr) { -+ -+ ACL_CHECK_SUPPORT(src_md.ndims != wei_md.ndims, -+ "source and weights dimensions must match"); -+ -+ const int ndims = src_md.ndims; -+ -+ const bool is_2d = (ndims == 2); -+ const bool is_4d = (ndims == 4); -+ -+ ACL_CHECK_SUPPORT( -+ !(is_2d || is_4d), "ACL supports only 2d or 4d cases"); -+ -+ // batch size -+ const int n = src_md.dims[0]; -+ -+ // input and output channels -+ const int ic = src_md.dims[1]; -+ const int oc = dst_md.dims[1]; -+ -+ // source spatial dimensions -+ const int ih = is_4d ? src_md.dims[ndims - 2] : 0; -+ const int iw = is_4d ? src_md.dims[ndims - 1] : 0; -+ -+ // weights spatial dimensions -+ const int kh = is_4d ? wei_md.dims[ndims - 2] : 0; -+ const int kw = is_4d ? wei_md.dims[ndims - 1] : 0; -+ -+ // Only NCHW or NHWC derivatives supported by ACL kernels -+ using namespace format_tag; -+ auto src_tag = memory_desc_matches_one_of_tag( -+ src_md, nhwc, nchw, nc, cn); -+ auto wei_tag = memory_desc_matches_one_of_tag( -+ wei_md, ohwi, oihw, oi, io); -+ auto dst_tag = memory_desc_matches_one_of_tag(dst_md, nc, cn); -+ -+ ACL_CHECK_SUPPORT( -+ utils::one_of(format_tag::undef, src_tag, wei_tag, dst_tag), -+ "unsupported memory layout"); -+ -+ ACL_CHECK_SUPPORT(is_2d && src_tag != dst_tag, -+ "for src and dst layouts must match"); -+ -+ arm_compute::TensorShape src_shape, wei_shape; -+ if (is_2d) { -+ src_shape = (src_tag == nc) ? arm_compute::TensorShape(ic, n) -+ : arm_compute::TensorShape(n, ic); -+ -+ wei_shape = (wei_tag == io) ? arm_compute::TensorShape(oc, ic) -+ : arm_compute::TensorShape(ic, oc); -+ } -+ if (is_4d) { -+ src_shape = (src_tag == nhwc) -+ ? arm_compute::TensorShape(ic, iw, ih, n) -+ : arm_compute::TensorShape(iw, ih, ic, n); -+ -+ // ACL requires the weights to be in 2D flattened shape -+ const int flattened_ic = is_4d ? ic * kh * kw : ic; -+ wei_shape = arm_compute::TensorShape(flattened_ic, oc); -+ } -+ -+ arm_compute::DataLayout src_layout = (src_tag == nhwc) -+ ? arm_compute::DataLayout::NHWC -+ : arm_compute::DataLayout::NCHW; -+ -+ arm_compute::DataLayout wei_layout = (wei_tag == ohwi) -+ ? arm_compute::DataLayout::NHWC -+ : arm_compute::DataLayout::NCHW; -+ -+ aip.src_info = arm_compute::TensorInfo( -+ src_shape, 1, arm_compute::DataType::F32, src_layout); -+ -+ aip.wei_info = arm_compute::TensorInfo( -+ wei_shape, 1, arm_compute::DataType::F32, wei_layout); -+ -+ aip.dst_info -+ = arm_compute::TensorInfo(arm_compute::TensorShape(oc, n), -+ 1, arm_compute::DataType::F32); -+ -+ aip.with_bias = ipd.bias_desc.format_kind != format_kind::undef; -+ aip.bia_info = arm_compute::TensorInfo(aip.with_bias -+ ? arm_compute::TensorShape(oc) -+ : arm_compute::TensorShape(), -+ 1, arm_compute::DataType::F32); -+ -+ aip.fc_info.weights_trained_layout = wei_layout; -+ if (is_2d && wei_tag != src_tag) { -+ // weights are already transposed -+ aip.fc_info.transpose_weights = false; -+ } -+ -+ // Either activation or sum is supported as post-op at the moment -+ aip.fc_info.activation_info = acl_utils::get_acl_act(attr); -+ const auto &post_ops = attr.post_ops_; -+ aip.with_sum = (post_ops.len() == 1) && post_ops.entry_[0].is_sum(); -+ -+ // Fast math mode -+ auto math_mode = get_fpmath_mode(); -+ bool is_fastmath_enabled = utils::one_of( -+ math_mode, fpmath_mode::bf16, fpmath_mode::any); -+ aip.fc_info.enable_fast_math = is_fastmath_enabled; -+ -+ // clang-format off -+ // Validate fully connected layer manually to check for return status -+ ACL_CHECK_VALID(arm_compute::NEFullyConnectedLayer::validate( -+ &aip.src_info, -+ &aip.wei_info, -+ aip.with_bias ? &aip.bia_info : nullptr, -+ &aip.dst_info, -+ aip.fc_info)); -+ -+ if (aip.with_sum) { -+ // Validate arithmetic addition manually to check for return status -+ ACL_CHECK_VALID(arm_compute::NEArithmeticAddition::validate( -+ &aip.dst_info, -+ &aip.dst_info, -+ &aip.dst_info, -+ arm_compute::ConvertPolicy::SATURATE)); -+ // clang-format on -+ } -+ -+ return status::success; -+ } - }; // pd_t - - acl_inner_product_fwd_t(const pd_t *apd) : primitive_t(apd) {} - -+ status_t init(engine_t *engine) override { -+ acl_utils::acl_thread_bind(); -+ -+ return status::success; -+ } -+ - status_t create_resource( - engine_t *engine, resource_mapper_t &mapper) const override { - if (mapper.has_resource(this)) return status::success; -@@ -129,7 +274,7 @@ struct acl_inner_product_fwd_t : public primitive_t { - if (!r) return status::out_of_memory; - - // Configure the resource based on information from primitive descriptor -- auto st = r->configure(pd()->aip_); -+ auto st = r->configure(pd()->aip); - if (st == status::success) { mapper.add(this, std::move(r)); } - - return st; -diff --git a/src/cpu/aarch64/acl_inner_product_utils.cpp b/src/cpu/aarch64/acl_inner_product_utils.cpp -deleted file mode 100644 -index c8fab86f22..0000000000 ---- a/src/cpu/aarch64/acl_inner_product_utils.cpp -+++ /dev/null -@@ -1,181 +0,0 @@ --/******************************************************************************* --* Copyright 2021 Arm Ltd. and affiliates --* --* Licensed under the Apache License, Version 2.0 (the "License"); --* you may not use this file except in compliance with the License. --* You may obtain a copy of the License at --* --* http://www.apache.org/licenses/LICENSE-2.0 --* --* Unless required by applicable law or agreed to in writing, software --* distributed under the License is distributed on an "AS IS" BASIS, --* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --* See the License for the specific language governing permissions and --* limitations under the License. --*******************************************************************************/ -- --#include "cpu/aarch64/acl_inner_product_utils.hpp" -- --namespace dnnl { --namespace impl { --namespace cpu { --namespace aarch64 { -- --namespace acl_inner_product_utils { -- --using namespace format_tag; --using namespace utils; --using namespace status; -- --status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md, -- memory_desc_t &wei_md, memory_desc_t &dst_md, memory_desc_t &bias_md, -- const inner_product_desc_t &ipd, const primitive_attr_t &attr) { -- const memory_desc_wrapper src_d(&src_md); -- const memory_desc_wrapper wei_d(&wei_md); -- const memory_desc_wrapper dst_d(&dst_md); -- const memory_desc_wrapper bia_d(&bias_md); -- -- // Compute Library currently supports forward propagation only -- const prop_kind_t prop_kind = ipd.prop_kind; -- const bool is_fwd = (prop_kind == dnnl_forward_training) -- || (prop_kind == dnnl_forward_inference); -- if (!is_fwd) return status::unimplemented; -- -- const int with_groups = wei_d.ndims() == src_d.ndims() + 1; -- const int ndims = src_d.ndims(); -- -- // There are two sub-cases: src & wei tensors are either 2- or 4-dimensional -- const bool is_2d = (ndims == 2) && (wei_d.ndims() == 2); -- const bool is_4d = (ndims == 4) && (wei_d.ndims() == 4); -- -- // Compute Library unsupported shape scenarios -- // FP32 only is supported at the moment -- if (one_of(true, !(is_4d || is_2d), with_groups)) { return unimplemented; } -- -- // batch size -- const int mb = src_d.dims()[0]; -- -- // src/input channels, height, width -- const int ic = src_d.dims()[1]; -- const int ih = is_4d ? src_d.dims()[ndims - 2] : 0; -- const int iw = is_4d ? src_d.dims()[ndims - 1] : 0; -- -- // dst/output channels -- const int oc = dst_d.dims()[1]; -- -- // weights height, width -- const int kh = is_4d ? wei_d.dims()[with_groups + ndims - 2] : 0; -- const int kw = is_4d ? wei_d.dims()[with_groups + ndims - 1] : 0; -- -- aip.with_bias = ipd.bias_desc.format_kind != format_kind::undef; -- -- // Data layout is already defined thus should only be checked -- auto src_tag = memory_desc_matches_one_of_tag(src_md, nhwc, nchw, nc, cn); -- auto wei_tag = memory_desc_matches_one_of_tag(wei_md, ohwi, oihw, oi, io); -- auto dst_tag = memory_desc_matches_one_of_tag(dst_md, nc, cn); -- if (one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) { -- return status::unimplemented; -- } -- -- arm_compute::TensorShape src_shape {(src_tag == nc) -- ? arm_compute::TensorShape(ic, mb) -- : arm_compute::TensorShape(mb, ic)}; -- if (is_4d) { -- src_shape = (src_tag == nhwc) -- ? arm_compute::TensorShape(ic, iw, ih, mb) -- : arm_compute::TensorShape(iw, ih, ic, mb); -- } -- -- // Compute Library requires the weights to be 2-dimensional for FC layer -- arm_compute::TensorShape wei_shape { -- arm_compute::TensorShape(is_4d ? ic * kh * kw : ic, oc)}; -- if (is_2d && wei_tag == io) { -- wei_shape = arm_compute::TensorShape(oc, ic); -- } -- -- arm_compute::DataLayout wei_layout {(wei_tag == ohwi || wei_tag == oi) -- ? arm_compute::DataLayout::NHWC -- : arm_compute::DataLayout::NCHW}; -- -- // clang-format off -- aip.src_info = arm_compute::TensorInfo( -- src_shape, -- 1, -- arm_compute::DataType::F32, -- (src_tag == nhwc || src_tag == nc) ? -- arm_compute::DataLayout::NHWC : arm_compute::DataLayout::NCHW); -- -- aip.wei_info = arm_compute::TensorInfo( -- wei_shape, -- 1, -- arm_compute::DataType::F32, -- wei_layout); -- -- aip.dst_info = arm_compute::TensorInfo( -- (dst_tag == nhwc || dst_tag == nc) ? -- arm_compute::TensorShape(oc, mb) : arm_compute::TensorShape(mb, oc), -- 1, -- arm_compute::DataType::F32, -- (dst_tag == nhwc || dst_tag == nc) ? -- arm_compute::DataLayout::NHWC : arm_compute::DataLayout::NCHW); -- -- aip.bia_info = arm_compute::TensorInfo( -- aip.with_bias ? -- arm_compute::TensorShape(oc) : arm_compute::TensorShape(), -- 1, -- arm_compute::DataType::F32); -- // clang-format on -- -- aip.fc_info.weights_trained_layout = wei_layout; -- if (is_2d && wei_tag != src_tag) { aip.fc_info.transpose_weights = false; } -- -- // Either activation or sum is supported as post-op at the moment -- aip.fc_info.activation_info = acl_common_utils::get_acl_act(attr); -- const auto &post_ops = attr.post_ops_; -- aip.with_sum = (post_ops.len() == 1) && post_ops.entry_[0].is_sum(); -- -- // Fast math mode -- auto math_mode = get_fpmath_mode(); -- bool is_fastmath_enabled -- = one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any); -- aip.fc_info.enable_fast_math = is_fastmath_enabled; -- -- // clang-format off -- // Validate fully connected layer manually to check for return status -- auto acl_st = arm_compute::NEFullyConnectedLayer::validate( -- &aip.src_info, -- &aip.wei_info, -- aip.with_bias ? &aip.bia_info : nullptr, -- &aip.dst_info, -- aip.fc_info); -- // clang-format on -- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); -- return status::unimplemented; -- } -- -- if (aip.with_sum) { -- // clang-format off -- // Validate arithmetic addition manually to check for return status -- auto acl_aa_st = arm_compute::NEArithmeticAddition::validate( -- &aip.dst_info, -- &aip.dst_info, -- &aip.dst_info, -- arm_compute::ConvertPolicy::SATURATE); -- -- // clang-format on -- if (acl_aa_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_aa_st.error_description().c_str()); -- return status::unimplemented; -- } -- } -- -- return status::success; --} -- --} // namespace acl_inner_product_utils -- --} // namespace aarch64 --} // namespace cpu --} // namespace impl --} // namespace dnnl -diff --git a/src/cpu/aarch64/acl_inner_product_utils.hpp b/src/cpu/aarch64/acl_inner_product_utils.hpp -deleted file mode 100644 -index 022d0e3349..0000000000 ---- a/src/cpu/aarch64/acl_inner_product_utils.hpp -+++ /dev/null -@@ -1,62 +0,0 @@ --/******************************************************************************* --* Copyright 2021 Arm Ltd. and affiliates --* --* Licensed under the Apache License, Version 2.0 (the "License"); --* you may not use this file except in compliance with the License. --* You may obtain a copy of the License at --* --* http://www.apache.org/licenses/LICENSE-2.0 --* --* Unless required by applicable law or agreed to in writing, software --* distributed under the License is distributed on an "AS IS" BASIS, --* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --* See the License for the specific language governing permissions and --* limitations under the License. --*******************************************************************************/ -- --#ifndef CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP --#define CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP -- --#include "cpu/cpu_inner_product_pd.hpp" -- --#include "cpu/aarch64/acl_utils.hpp" -- --namespace dnnl { --namespace impl { --namespace cpu { --namespace aarch64 { -- --struct acl_ip_obj_t { -- arm_compute::NEFullyConnectedLayer fc; -- arm_compute::NEArithmeticAddition add; -- arm_compute::Tensor src_tensor; -- arm_compute::Tensor wei_tensor; -- arm_compute::Tensor bia_tensor; -- arm_compute::Tensor dst_tensor; -- arm_compute::Tensor dst_acc_tensor; --}; -- --struct acl_ip_conf_t { -- bool with_bias; -- bool with_sum; -- arm_compute::TensorInfo src_info; -- arm_compute::TensorInfo wei_info; -- arm_compute::TensorInfo bia_info; -- arm_compute::TensorInfo dst_info; -- arm_compute::FullyConnectedLayerInfo fc_info; --}; -- --namespace acl_inner_product_utils { -- --status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md, -- memory_desc_t &wei_md, memory_desc_t &dst_md, memory_desc_t &bias_md, -- const inner_product_desc_t &ipd, const primitive_attr_t &attr); -- --} // namespace acl_inner_product_utils -- --} // namespace aarch64 --} // namespace cpu --} // namespace impl --} // namespace dnnl -- --#endif // CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP -diff --git a/src/cpu/aarch64/acl_softmax.hpp b/src/cpu/aarch64/acl_softmax.hpp -index a4bfd0c3bd..4ee7139a93 100644 ---- a/src/cpu/aarch64/acl_softmax.hpp -+++ b/src/cpu/aarch64/acl_softmax.hpp -@@ -129,7 +129,7 @@ struct acl_softmax_fwd_t : public primitive_t { - arm_compute::DataLayout acl_layout = arm_compute::DataLayout::NHWC; - - const arm_compute::DataType acl_data_t -- = acl_common_utils::get_acl_data_t(data_type); -+ = acl_utils::get_acl_data_t(data_type); - - const int threads = dnnl_get_max_threads(); - if (inner_size_ == 1) { -@@ -189,20 +189,15 @@ struct acl_softmax_fwd_t : public primitive_t { - } - - // Validate manually to check for return status -- arm_compute::Status acl_st; - if (asp_.is_logsoftmax) { -- acl_st = arm_compute::NELogSoftmaxLayer::validate( -- &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis); -+ ACL_CHECK_VALID(arm_compute::NELogSoftmaxLayer::validate( -+ &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis)); - } else { -- acl_st = arm_compute::NESoftmaxLayer::validate( -- &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis); -- } -- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); -- return status::unimplemented; -+ ACL_CHECK_VALID(arm_compute::NESoftmaxLayer::validate( -+ &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis)); - } - -- acl_common_utils::acl_thread_bind(); -+ acl_utils::acl_thread_bind(); - - return status::success; - } -diff --git a/src/cpu/aarch64/acl_utils.cpp b/src/cpu/aarch64/acl_utils.cpp -index a69f14b6f9..098217f50e 100644 ---- a/src/cpu/aarch64/acl_utils.cpp -+++ b/src/cpu/aarch64/acl_utils.cpp -@@ -21,7 +21,7 @@ namespace impl { - namespace cpu { - namespace aarch64 { - --namespace acl_common_utils { -+namespace acl_utils { - - using namespace dnnl::impl::alg_kind; - using namespace data_type; -@@ -247,7 +247,7 @@ status_t permute_common_dense_dimension_to_last(memory_desc_t *d0_permed, - return status::success; - } - --} // namespace acl_common_utils -+} // namespace acl_utils - - } // namespace aarch64 - } // namespace cpu -diff --git a/src/cpu/aarch64/acl_utils.hpp b/src/cpu/aarch64/acl_utils.hpp -index 565cde66a9..bb8efc998c 100644 ---- a/src/cpu/aarch64/acl_utils.hpp -+++ b/src/cpu/aarch64/acl_utils.hpp -@@ -21,13 +21,10 @@ - - #include "oneapi/dnnl/dnnl_types.h" - --#include "common/bfloat16.hpp" --#include "common/c_types_map.hpp" - #include "common/dnnl_thread.hpp" - #include "common/memory_tracking.hpp" - #include "common/primitive.hpp" - #include "common/utils.hpp" -- - #include "cpu/cpu_engine.hpp" - - #include "arm_compute/runtime/NEON/NEFunctions.h" -@@ -38,7 +35,7 @@ namespace impl { - namespace cpu { - namespace aarch64 { - --namespace acl_common_utils { -+namespace acl_utils { - - arm_compute::DataType get_acl_data_t(const dnnl_data_type_t dt); - arm_compute::ActivationLayerInfo get_acl_act(const primitive_attr_t &attr); -@@ -68,12 +65,33 @@ status_t permute_common_dense_dimension_to_last(memory_desc_t *d0_permed, - const memory_desc_t *d0, const memory_desc_t *d1, - const memory_desc_t *d2); - --#define MAYBE_REPORT_ACL_ERROR(msg) \ -+// Logs a custom 'info' line describing an unsupported case -+#define LOG_ACL_UNSUPPORTED(msg) \ -+ do { \ -+ if (get_verbose() >= 2) \ -+ printf("onednn_verbose,cpu,acl,unsupported: %s\n", (msg)); \ -+ } while (0) -+ -+// Returns unimplemented if error code x is NOT OK -+#define ACL_CHECK_VALID(x) \ -+ do { \ -+ arm_compute::Status s = x; \ -+ if (s.error_code() != arm_compute::ErrorCode::OK) { \ -+ LOG_ACL_UNSUPPORTED(s.error_description().c_str()); \ -+ return dnnl::impl::status::unimplemented; \ -+ } \ -+ } while (0) -+ -+// Returns unimplemented on condition x == true -+#define ACL_CHECK_SUPPORT(x, msg) \ - do { \ -- if (get_verbose()) printf("onednn_verbose,cpu,error,acl,%s\n", (msg)); \ -+ if (x) { \ -+ LOG_ACL_UNSUPPORTED(msg); \ -+ return dnnl::impl::status::unimplemented; \ -+ } \ - } while (0) - --} // namespace acl_common_utils -+} // namespace acl_utils - - } // namespace aarch64 - } // namespace cpu -diff --git a/src/cpu/aarch64/acl_winograd_convolution.hpp b/src/cpu/aarch64/acl_winograd_convolution.hpp -index 29e44eb189..223b3bc9b8 100644 ---- a/src/cpu/aarch64/acl_winograd_convolution.hpp -+++ b/src/cpu/aarch64/acl_winograd_convolution.hpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2020-2021 Arm Ltd. and affiliates -+* Copyright 2020-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -107,7 +107,7 @@ struct acl_wino_convolution_fwd_t : public primitive_t { - - set_default_alg_kind(alg_kind::convolution_winograd); - -- acl_common_utils::acl_thread_bind(); -+ acl_utils::acl_thread_bind(); - - return status::success; - } -@@ -130,7 +130,7 @@ struct acl_wino_convolution_fwd_t : public primitive_t { - // sum+eltwise post-ops - if (eltwise_only || sum_with_eltwise) { - const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg; -- eltwise_ok = acl_common_utils::acl_act_ok(act_type); -+ eltwise_ok = acl_utils::acl_act_ok(act_type); - } - - return eltwise_ok || (po.len() == 0); -diff --git a/src/cpu/aarch64/matmul/acl_matmul.cpp b/src/cpu/aarch64/matmul/acl_matmul.cpp -index 3945fda6fc..6f9bb9b9ad 100644 ---- a/src/cpu/aarch64/matmul/acl_matmul.cpp -+++ b/src/cpu/aarch64/matmul/acl_matmul.cpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021 Arm Ltd. and affiliates -+* Copyright 2021-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -84,4 +84,4 @@ status_t acl_matmul_t::execute_forward(const exec_ctx_t &ctx) const { - } // namespace aarch64 - } // namespace cpu - } // namespace impl --} // namespace dnnl -\ No newline at end of file -+} // namespace dnnl -diff --git a/src/cpu/aarch64/matmul/acl_matmul.hpp b/src/cpu/aarch64/matmul/acl_matmul.hpp -index 6ba17e86dd..e69f4d9592 100644 ---- a/src/cpu/aarch64/matmul/acl_matmul.hpp -+++ b/src/cpu/aarch64/matmul/acl_matmul.hpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021 Arm Ltd. and affiliates -+* Copyright 2021-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -87,7 +87,7 @@ struct acl_matmul_t : public primitive_t { - if (conf_status != status::success) return status::unimplemented; - // Number of threads in Compute Library is set by OMP_NUM_THREADS - // dnnl_get_max_threads() == OMP_NUM_THREADS -- acl_common_utils::acl_thread_bind(); -+ acl_utils::acl_thread_bind(); - - return status::success; - } -diff --git a/src/cpu/aarch64/matmul/acl_matmul_utils.cpp b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp -index 76599d8bb1..ba266b4303 100644 ---- a/src/cpu/aarch64/matmul/acl_matmul_utils.cpp -+++ b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021 Arm Ltd. and affiliates -+* Copyright 2021-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -22,14 +22,10 @@ namespace dnnl { - namespace impl { - namespace cpu { - namespace aarch64 { --namespace matmul { - --using namespace dnnl::impl::status; --using namespace dnnl::impl::utils; --using namespace dnnl::impl::cpu::matmul; --using namespace prop_kind; -+using namespace alg_kind; -+using namespace cpu::matmul; - using namespace format_tag; --using namespace dnnl::impl::alg_kind; - - namespace acl_matmul_utils { - -@@ -58,7 +54,7 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md, - wei_md, abcd, abdc, abc, acb, ab, ba); - auto dst_tag = memory_desc_matches_one_of_tag( - dst_md, abcd, abdc, abc, acb, ab, ba); -- if (one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) { -+ if (utils::one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) { - return status::unimplemented; - } - amp.is_transA = helper.transA() == 'T'; -@@ -85,7 +81,7 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md, - // Fast-math mode - auto math_mode = get_fpmath_mode(); - bool is_fastmath_enabled -- = one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any); -+ = utils::one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any); - amp.gemm_info.set_fast_math(is_fastmath_enabled); - - // Fused ReLU activation -@@ -95,29 +91,15 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md, - amp.alpha = attr.output_scales_.scales_[0]; - - // Validate ACL transpose -- if (amp.is_transA) { -- auto acl_transA_st = arm_compute::NETranspose::validate( -- &.src_acc_info, &.src_info); -- if (acl_transA_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_transA_st.error_description().c_str()); -- return status::unimplemented; -- } -- } -- if (amp.is_transB) { -- auto acl_transB_st = arm_compute::NETranspose::validate( -- &.wei_acc_info, &.wei_info); -- if (acl_transB_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_transB_st.error_description().c_str()); -- return status::unimplemented; -- } -- } -+ if (amp.is_transA) -+ ACL_CHECK_VALID(arm_compute::NETranspose::validate( -+ &.src_acc_info, &.src_info)); -+ if (amp.is_transB) -+ ACL_CHECK_VALID(arm_compute::NETranspose::validate( -+ &.wei_acc_info, &.wei_info)); - // Validate ACL GEMM -- auto acl_st = arm_compute::NEGEMM::validate(&.src_info, &.wei_info, -- nullptr, &.dst_info, amp.alpha, 0.0f, amp.gemm_info); -- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { -- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); -- return status::unimplemented; -- } -+ ACL_CHECK_VALID(arm_compute::NEGEMM::validate(&.src_info, &.wei_info, -+ nullptr, &.dst_info, amp.alpha, 0.0f, amp.gemm_info)); - - return status::success; - } -@@ -175,7 +157,6 @@ bool acl_act_ok(alg_kind_t eltwise_activation) { - - } // namespace acl_matmul_utils - --} // namespace matmul - } // namespace aarch64 - } // namespace cpu - } // namespace impl -diff --git a/src/cpu/aarch64/matmul/acl_matmul_utils.hpp b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp -index 1411dc4f4b..248dbe5a09 100644 ---- a/src/cpu/aarch64/matmul/acl_matmul_utils.hpp -+++ b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021 Arm Ltd. and affiliates -+* Copyright 2021-2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -25,7 +25,6 @@ namespace dnnl { - namespace impl { - namespace cpu { - namespace aarch64 { --namespace matmul { - - struct acl_matmul_obj_t { - arm_compute::NEGEMM gemm; -@@ -61,10 +60,9 @@ arm_compute::ActivationLayerInfo get_acl_act(const primitive_attr_t &attr); - bool acl_act_ok(alg_kind_t eltwise_activation); - } // namespace acl_matmul_utils - --} // namespace matmul - } // namespace aarch64 - } // namespace cpu - } // namespace impl - } // namespace dnnl - --#endif // CPU_AARCH64_ACL_MATMUL_UTILS_HPP -\ No newline at end of file -+#endif // CPU_AARCH64_ACL_MATMUL_UTILS_HPP -diff --git a/tests/benchdnn/inputs/ip/test_ip_acl b/tests/benchdnn/inputs/ip/test_ip_acl -new file mode 100644 -index 0000000000..a8873c30a8 ---- /dev/null -+++ b/tests/benchdnn/inputs/ip/test_ip_acl -@@ -0,0 +1,26 @@ -+--reset -+ -+# do not test other implementations -+--skip-impl='ref,jit' -+ -+# test format::any -+--batch=shapes_ci -+ -+# only inference, with and without bias -+--dir=FWD_I,FWD_B -+# test all shapes -+--batch=set_all -+ -+# with and without batches -+--mb=0,2 -+ -+# test non-spatial layout combinations -+--stag=ab,ba -+--wtag=ab,ba -+--batch=shapes_0d -+ -+# test spatial layout combinations -+--stag=abx,axb -+--wtag=abx,axb -+# 2d-spatial dimensions -+--batch=shapes_googlenet_v1 diff --git a/oneDNN-2.6.2.tar.gz b/oneDNN-2.6.2.tar.gz deleted file mode 100644 index cb5bc1f..0000000 --- a/oneDNN-2.6.2.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:baed0a7426189c6e5f1cb242c6d282d3638802cbd05772e1c4810a83bd7bc4df -size 5846494 diff --git a/oneDNN-3.0.1.tar.gz b/oneDNN-3.0.1.tar.gz new file mode 100644 index 0000000..4a68b60 --- /dev/null +++ b/oneDNN-3.0.1.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7aca425e9895b791a942ae755edc28f1b9f9fe9bf94291c59f33ebcf5859f2c +size 7189577 diff --git a/onednn-fix-gcc13.patch b/onednn-fix-gcc13.patch new file mode 100644 index 0000000..fd1569a --- /dev/null +++ b/onednn-fix-gcc13.patch @@ -0,0 +1,10 @@ +--- oneDNN-3.0.1/src/cpu/aarch64/xbyak_aarch64/xbyak_aarch64/xbyak_aarch64.h.orig 2023-02-24 03:36:27.000000000 +0100 ++++ oneDNN-3.0.1/src/cpu/aarch64/xbyak_aarch64/xbyak_aarch64/xbyak_aarch64.h 2023-03-21 13:44:05.786852048 +0100 +@@ -25,6 +25,7 @@ + #endif + + #include ++#include + #include + #include + #include diff --git a/onednn.changes b/onednn.changes index 73885c3..baf05a5 100644 --- a/onednn.changes +++ b/onednn.changes @@ -1,3 +1,18 @@ +------------------------------------------------------------------- +Tue Mar 21 10:27:40 UTC 2023 - Guillaume GARDET + +- Update to 3.0.1: + * Changes: https://github.com/oneapi-src/oneDNN/releases/tag/v3.0.1 +- Skipped 3.0: + * Changes: https://github.com/oneapi-src/oneDNN/releases/tag/v3.0 +- Add patch to fix build with GCC13: + * onednn-fix-gcc13.patch +- Disable Arm Compute library support until fixed upstream + https://github.com/oneapi-src/oneDNN/issues/1599 +- Drop upstream patches: + * 1428.patch + * fa93750.patch + ------------------------------------------------------------------- Tue Sep 20 08:26:43 UTC 2022 - Guillaume GARDET diff --git a/onednn.spec b/onednn.spec index 0d5849a..dd6b5c0 100644 --- a/onednn.spec +++ b/onednn.spec @@ -1,7 +1,7 @@ # # spec file for package onednn # -# Copyright (c) 2022 SUSE LLC +# Copyright (c) 2023 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -24,23 +24,22 @@ %endif %ifarch aarch64 -%bcond_without acl +# Disable ACL until fixed upstream - https://github.com/oneapi-src/oneDNN/issues/1599 +%bcond_with acl %else %bcond_with acl %endif -%define libname libdnnl2 +%define libname libdnnl3 Name: onednn -Version: 2.6.2 +Version: 3.0.1 Release: 0 Summary: Intel Math Kernel Library for Deep Neural Networks License: Apache-2.0 URL: https://01.org/onednn Source0: https://github.com/oneapi-src/oneDNN/archive/v%{version}/oneDNN-%{version}.tar.gz -# PATCH-FIX-UPSTREAM - deps for Patch2 -Patch1: fa93750.patch -# PATCH-FIX-UPSTREAM - Fix build with latest ACL - https://github.com/oneapi-src/oneDNN/pull/1428 -Patch2: 1428.patch +# PATCH-FIX-UPSTREAM - https://github.com/oneapi-src/oneDNN/issues/1600 +Patch1: onednn-fix-gcc13.patch BuildRequires: chrpath BuildRequires: cmake BuildRequires: doxygen @@ -49,7 +48,7 @@ BuildRequires: gcc-c++ BuildRequires: graphviz BuildRequires: texlive-dvips-bin %if %{with acl} -BuildRequires: ComputeLibrary-devel +BuildRequires: ComputeLibrary-devel >= 22.08 %endif %if %{with opencl} BuildRequires: opencl-headers