diff --git a/1428.patch b/1428.patch new file mode 100644 index 0000000..2021fa9 --- /dev/null +++ b/1428.patch @@ -0,0 +1,34 @@ +From 24d4c1379d3c4cbf423f979fa25283b1914159c7 Mon Sep 17 00:00:00 2001 +From: Diana Bite +Date: Thu, 21 Jul 2022 12:27:17 +0100 +Subject: [PATCH] cpu: aarch64: acl: disallow large padding in ACL wino to be + consist with oneDNN + +--- + src/cpu/aarch64/acl_convolution_utils.cpp | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/src/cpu/aarch64/acl_convolution_utils.cpp b/src/cpu/aarch64/acl_convolution_utils.cpp +index db3bde6a48..542033df0e 100644 +--- a/src/cpu/aarch64/acl_convolution_utils.cpp ++++ b/src/cpu/aarch64/acl_convolution_utils.cpp +@@ -307,10 +307,17 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, + // General Compute Library checks, memory tags are also set there + CHECK(acl_init_conf(acp, src_md, weights_md, dst_md, bias_md, cd, attr)); + +- const bool wino_shape_ok // unit strides only, no dilations ++ const bool shape_ok ++ // only unit strides allowed + = (acp.padstride_info.stride() == std::pair {1, 1}) ++ // Note: Compute Library supports arbitrary padding for wino kernels ++ // but we only allow small padding to be consistent with oneDNN ++ && (acp.padstride_info.pad().first <= 1) // padding left/right ++ && (acp.padstride_info.pad().second <= 1) // padding top/bottom ++ // only non-dilated convolutions allowed + && (acp.dilation_info == arm_compute::Size2D(1, 1)); +- if (!wino_shape_ok) return status::unimplemented; ++ ++ ACL_CHECK_SUPPORT(!shape_ok, "shape not supported by winograd kernels"); + + // clang-format off + // Validate convolution manually to check for return status diff --git a/fa93750.patch b/fa93750.patch new file mode 100644 index 0000000..e3a03b0 --- /dev/null +++ b/fa93750.patch @@ -0,0 +1,1222 @@ +From fa93750bfb821fe05e3190b36f52b5bd88a57110 Mon Sep 17 00:00:00 2001 +From: Diana Bite +Date: Thu, 24 Feb 2022 14:25:49 +0000 +Subject: [PATCH] cpu: aarch64: acl: fix inner_prod test failure and improve + validation + +--- + src/cpu/aarch64/acl_binary.hpp | 8 +- + src/cpu/aarch64/acl_convolution_utils.cpp | 65 ++----- + src/cpu/aarch64/acl_eltwise.hpp | 4 +- + src/cpu/aarch64/acl_eltwise_utils.cpp | 17 +- + src/cpu/aarch64/acl_gemm_convolution.hpp | 6 +- + .../aarch64/acl_indirect_gemm_convolution.hpp | 6 +- + src/cpu/aarch64/acl_inner_product.cpp | 13 +- + src/cpu/aarch64/acl_inner_product.hpp | 179 +++++++++++++++-- + src/cpu/aarch64/acl_inner_product_utils.cpp | 181 ------------------ + src/cpu/aarch64/acl_inner_product_utils.hpp | 62 ------ + src/cpu/aarch64/acl_softmax.hpp | 17 +- + src/cpu/aarch64/acl_utils.cpp | 4 +- + src/cpu/aarch64/acl_utils.hpp | 32 +++- + src/cpu/aarch64/acl_winograd_convolution.hpp | 6 +- + src/cpu/aarch64/matmul/acl_matmul.cpp | 4 +- + src/cpu/aarch64/matmul/acl_matmul.hpp | 4 +- + src/cpu/aarch64/matmul/acl_matmul_utils.cpp | 45 ++--- + src/cpu/aarch64/matmul/acl_matmul_utils.hpp | 6 +- + tests/benchdnn/inputs/ip/test_ip_acl | 26 +++ + 19 files changed, 281 insertions(+), 404 deletions(-) + delete mode 100644 src/cpu/aarch64/acl_inner_product_utils.cpp + delete mode 100644 src/cpu/aarch64/acl_inner_product_utils.hpp + create mode 100644 tests/benchdnn/inputs/ip/test_ip_acl + +diff --git a/src/cpu/aarch64/acl_binary.hpp b/src/cpu/aarch64/acl_binary.hpp +index 77adb45bef..122b094587 100644 +--- a/src/cpu/aarch64/acl_binary.hpp ++++ b/src/cpu/aarch64/acl_binary.hpp +@@ -125,7 +125,7 @@ struct acl_binary_t : public primitive_t { + + status_t init(engine_t *engine) { + +- using namespace acl_common_utils; ++ using namespace acl_utils; + + // Only support f32 and s32 for now + data_type_t ddt = dst_md(0)->data_type; +@@ -179,11 +179,7 @@ struct acl_binary_t : public primitive_t { + } + + // Call operator specific validate function to check support +- arm_compute::Status acl_st = validate(asp_); +- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); +- return status::unimplemented; +- } ++ ACL_CHECK_VALID(validate(asp_)); + + // Initialize the ACL threads + acl_thread_bind(); +diff --git a/src/cpu/aarch64/acl_convolution_utils.cpp b/src/cpu/aarch64/acl_convolution_utils.cpp +index ca91de49e3..e072dc5490 100644 +--- a/src/cpu/aarch64/acl_convolution_utils.cpp ++++ b/src/cpu/aarch64/acl_convolution_utils.cpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2020-2021 Arm Ltd. and affiliates ++* Copyright 2020-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -156,10 +156,10 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md, + const auto acl_layout = is_nspc ? arm_compute::DataLayout::NHWC + : arm_compute::DataLayout::NCHW; + +- auto acl_src_data_t = acl_common_utils::get_acl_data_t(src_d.data_type()); +- auto acl_wei_data_t = acl_common_utils::get_acl_data_t(wei_d.data_type()); +- auto acl_dst_data_t = acl_common_utils::get_acl_data_t(dst_d.data_type()); +- auto acl_bia_data_t = acl_common_utils::get_acl_data_t(bia_d.data_type()); ++ auto acl_src_data_t = acl_utils::get_acl_data_t(src_d.data_type()); ++ auto acl_wei_data_t = acl_utils::get_acl_data_t(wei_d.data_type()); ++ auto acl_dst_data_t = acl_utils::get_acl_data_t(dst_d.data_type()); ++ auto acl_bia_data_t = acl_utils::get_acl_data_t(bia_d.data_type()); + + if (acl_bia_data_t == arm_compute::DataType::UNKNOWN) + acl_bia_data_t = arm_compute::DataType::F32; +@@ -212,33 +212,14 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md, + // is_eltwise(true) here stands for eltwise.scale == 1.f check + acp.sum_with_eltwise = (post_ops.len() == 2) && post_ops.entry_[0].is_sum() + && post_ops.entry_[1].is_eltwise(true); +- acp.act_info = acl_common_utils::get_acl_act(attr); ++ acp.act_info = acl_utils::get_acl_act(attr); + + if (acp.sum_with_eltwise) { +- // clang-format off +- // Validate activation layer manually to check for return status +- auto acl_al_st = arm_compute::NEActivationLayer::validate( +- &acp.dst_info, +- &acp.dst_info, +- acp.act_info); +- // clang-format on +- if (acl_al_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_al_st.error_description().c_str()); +- return status::unimplemented; +- } +- +- // clang-format off +- // Validate arithmetic addition manually to check for return status +- auto acl_aa_st = arm_compute::NEArithmeticAddition::validate( +- &acp.dst_info, +- &acp.dst_info, +- &acp.dst_info, +- arm_compute::ConvertPolicy::SATURATE); +- // clang-format on +- if (acl_aa_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_aa_st.error_description().c_str()); +- return status::unimplemented; +- } ++ ACL_CHECK_VALID(arm_compute::NEActivationLayer::validate( // eltwise ++ &acp.dst_info, &acp.dst_info, acp.act_info)); ++ ACL_CHECK_VALID(arm_compute::NEArithmeticAddition::validate( // sum ++ &acp.dst_info, &acp.dst_info, &acp.dst_info, ++ arm_compute::ConvertPolicy::SATURATE)); + } + + return status::success; +@@ -254,7 +235,7 @@ status_t init_conf_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, + + // clang-format off + // Validate convolution manually to check for return status +- auto acl_st = arm_compute::NEGEMMConvolutionLayer::validate( ++ ACL_CHECK_VALID(arm_compute::NEGEMMConvolutionLayer::validate( + &acp.src_info, + &acp.wei_info, + acp.with_bias ? &acp.bia_info : nullptr, +@@ -263,12 +244,8 @@ status_t init_conf_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, + acp.weights_info, + acp.dilation_info, + acp.act_info, +- acp.fast_math); ++ acp.fast_math)); + // clang-format on +- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); +- return status::unimplemented; +- } + + return status::success; + } +@@ -289,7 +266,7 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, + + // clang-format off + // NOTE: indirect convolution method supports only nhwc layout. +- auto acl_st = arm_compute::NEGEMMConv2d::validate( ++ ACL_CHECK_VALID(arm_compute::NEGEMMConv2d::validate( + &acp.src_info, + &acp.wei_info, + acp.with_bias ? &acp.bia_info : nullptr, +@@ -298,12 +275,8 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, + acp.dilation_info, + acp.act_info, + acp.fast_math, +- 1)); ++ 1))); + // clang-format on +- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); +- return status::unimplemented; +- } + + return status::success; + } +@@ -336,19 +309,15 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, + + // clang-format off + // Validate convolution manually to check for return status +- auto acl_st = arm_compute::NEWinogradConvolutionLayer::validate( ++ ACL_CHECK_VALID(arm_compute::NEWinogradConvolutionLayer::validate( + &acp.src_info, + &acp.wei_info, + acp.with_bias ? &acp.bia_info : nullptr, + &acp.dst_info, + acp.padstride_info, + acp.act_info, +- true); // enable_fast_math flag in ACL Winograd ++ true)); // enable_fast_math flag in ACL Winograd + // clang-format on +- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); +- return status::unimplemented; +- } + + return status::success; + } +diff --git a/src/cpu/aarch64/acl_eltwise.hpp b/src/cpu/aarch64/acl_eltwise.hpp +index a55b89272c..381368aabb 100644 +--- a/src/cpu/aarch64/acl_eltwise.hpp ++++ b/src/cpu/aarch64/acl_eltwise.hpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2021 Arm Ltd. and affiliates ++* Copyright 2021-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -78,7 +78,7 @@ struct acl_eltwise_fwd_t : public primitive_t { + aep_, data_md_, *desc(), *attr()); + if (conf_status != status::success) return status::unimplemented; + +- acl_common_utils::acl_thread_bind(); ++ acl_utils::acl_thread_bind(); + + return status::success; + } +diff --git a/src/cpu/aarch64/acl_eltwise_utils.cpp b/src/cpu/aarch64/acl_eltwise_utils.cpp +index 35e809e042..880b6aeaae 100644 +--- a/src/cpu/aarch64/acl_eltwise_utils.cpp ++++ b/src/cpu/aarch64/acl_eltwise_utils.cpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2021 Arm Ltd. and affiliates ++* Copyright 2021-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -46,7 +46,7 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md, + + const alg_kind_t eltwise_alg = ed.alg_kind; + +- bool activation_supported = acl_common_utils::acl_act_ok(eltwise_alg); ++ bool activation_supported = acl_utils::acl_act_ok(eltwise_alg); + if (!activation_supported) { return status::unimplemented; } + + // batch size +@@ -69,8 +69,8 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md, + const auto acl_layout = is_nspc ? arm_compute::DataLayout::NHWC + : arm_compute::DataLayout::NCHW; + +- auto acl_src_data_t = acl_common_utils::get_acl_data_t(data_d.data_type()); +- auto acl_dst_data_t = acl_common_utils::get_acl_data_t(data_d.data_type()); ++ auto acl_src_data_t = acl_utils::get_acl_data_t(data_d.data_type()); ++ auto acl_dst_data_t = acl_utils::get_acl_data_t(data_d.data_type()); + + // clang-format off + aep.src_info = arm_compute::TensorInfo( +@@ -93,7 +93,7 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md, + aep.dst_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0)); + } + +- aep.act_info = acl_common_utils::get_acl_act(ed); ++ aep.act_info = acl_utils::get_acl_act(ed); + + return status::success; + } +@@ -105,14 +105,11 @@ status_t init_conf_eltwise(acl_eltwise_conf_t &aep, memory_desc_t &data_md, + CHECK(acl_eltwise_check(aep, data_md, ed, attr)); + + // clang-format off +- auto acl_st = arm_compute::NEActivationLayer::validate( ++ ACL_CHECK_VALID(arm_compute::NEActivationLayer::validate( + &aep.src_info, + &aep.dst_info, +- aep.act_info); ++ aep.act_info)); + // clang-format on +- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { +- return status::unimplemented; +- } + + return status::success; + } +diff --git a/src/cpu/aarch64/acl_gemm_convolution.hpp b/src/cpu/aarch64/acl_gemm_convolution.hpp +index 3e7542b6bf..496f501211 100644 +--- a/src/cpu/aarch64/acl_gemm_convolution.hpp ++++ b/src/cpu/aarch64/acl_gemm_convolution.hpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2020-2021 Arm Ltd. and affiliates ++* Copyright 2020-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -108,7 +108,7 @@ struct acl_gemm_convolution_fwd_t : public primitive_t { + src_md_, weights_md_, dst_md_, bias_md_, *desc(), *attr()); + if (conf_status != status::success) return status::unimplemented; + +- acl_common_utils::acl_thread_bind(); ++ acl_utils::acl_thread_bind(); + + return status::success; + } +@@ -146,7 +146,7 @@ struct acl_gemm_convolution_fwd_t : public primitive_t { + // sum+eltwise post-ops + if (eltwise_only || sum_with_eltwise) { + const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg; +- eltwise_ok = acl_common_utils::acl_act_ok(act_type); ++ eltwise_ok = acl_utils::acl_act_ok(act_type); + } + + return eltwise_ok || (po.len() == 0); +diff --git a/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp +index 0a0021aeee..18e757a2c9 100644 +--- a/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp ++++ b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2021 Arm Ltd. and affiliates ++* Copyright 2021-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -109,7 +109,7 @@ struct acl_indirect_gemm_convolution_fwd_t : public primitive_t { + *attr()); + if (conf_status != status::success) return status::unimplemented; + +- acl_common_utils::acl_thread_bind(); ++ acl_utils::acl_thread_bind(); + + return status::success; + } +@@ -134,7 +134,7 @@ struct acl_indirect_gemm_convolution_fwd_t : public primitive_t { + // sum+eltwise post-ops + if (eltwise_only || sum_with_eltwise) { + const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg; +- eltwise_ok = acl_common_utils::acl_act_ok(act_type); ++ eltwise_ok = acl_utils::acl_act_ok(act_type); + } + + return eltwise_ok || (po.len() == 0); +diff --git a/src/cpu/aarch64/acl_inner_product.cpp b/src/cpu/aarch64/acl_inner_product.cpp +index 7a316135f8..f355a657c7 100644 +--- a/src/cpu/aarch64/acl_inner_product.cpp ++++ b/src/cpu/aarch64/acl_inner_product.cpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2021 Arm Ltd. and affiliates ++* Copyright 2021-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -21,23 +21,18 @@ namespace impl { + namespace cpu { + namespace aarch64 { + +-using namespace dnnl::impl::status; +-using namespace dnnl::impl::memory_tracking::names; +-using namespace dnnl::impl::utils; +- + status_t acl_inner_product_fwd_t::execute_forward(const exec_ctx_t &ctx) const { + // Lock here is needed because resource_mapper does not support + // concurrent multithreaded access. + std::lock_guard _lock {this->mtx}; + +- status_t status = status::success; + auto src_base = CTX_IN_MEM(const data_t *, DNNL_ARG_SRC); + auto wei_base = CTX_IN_MEM(const data_t *, DNNL_ARG_WEIGHTS); + auto bia_base = CTX_IN_MEM(const data_t *, DNNL_ARG_BIAS); + auto dst_base = CTX_OUT_MEM(data_t *, DNNL_ARG_DST); + +- bool with_bias = pd()->aip_.with_bias; +- bool with_sum = pd()->aip_.with_sum; ++ bool with_bias = pd()->aip.with_bias; ++ bool with_sum = pd()->aip.with_sum; + + // Retrieve primitive resource and configured Compute Library objects + auto *acl_resource +@@ -64,7 +59,7 @@ status_t acl_inner_product_fwd_t::execute_forward(const exec_ctx_t &ctx) const { + acl_obj.dst_tensor.allocator()->free(); + if (with_bias) { acl_obj.bia_tensor.allocator()->free(); } + +- return status; ++ return status::success; + } + + } // namespace aarch64 +diff --git a/src/cpu/aarch64/acl_inner_product.hpp b/src/cpu/aarch64/acl_inner_product.hpp +index dd742ea0bc..e5a9bdcc8a 100644 +--- a/src/cpu/aarch64/acl_inner_product.hpp ++++ b/src/cpu/aarch64/acl_inner_product.hpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2021 Arm Ltd. and affiliates ++* Copyright 2021-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -17,15 +17,34 @@ + #ifndef CPU_AARCH64_ACL_INNER_PRODUCT_HPP + #define CPU_AARCH64_ACL_INNER_PRODUCT_HPP + ++#include "cpu/aarch64/acl_utils.hpp" + #include "cpu/cpu_inner_product_pd.hpp" + +-#include "cpu/aarch64/acl_inner_product_utils.hpp" +- + namespace dnnl { + namespace impl { + namespace cpu { + namespace aarch64 { + ++struct acl_ip_obj_t { ++ arm_compute::NEFullyConnectedLayer fc; ++ arm_compute::NEArithmeticAddition add; ++ arm_compute::Tensor src_tensor; ++ arm_compute::Tensor wei_tensor; ++ arm_compute::Tensor bia_tensor; ++ arm_compute::Tensor dst_tensor; ++ arm_compute::Tensor dst_acc_tensor; ++}; ++ ++struct acl_ip_conf_t { ++ bool with_bias; ++ bool with_sum; ++ arm_compute::TensorInfo src_info; ++ arm_compute::TensorInfo wei_info; ++ arm_compute::TensorInfo bia_info; ++ arm_compute::TensorInfo dst_info; ++ arm_compute::FullyConnectedLayerInfo fc_info; ++}; ++ + struct acl_ip_resource_t : public resource_t { + acl_ip_resource_t() : acl_ip_obj_(utils::make_unique()) {} + +@@ -71,33 +90,26 @@ struct acl_inner_product_fwd_t : public primitive_t { + struct pd_t : public cpu_inner_product_fwd_pd_t { + using cpu_inner_product_fwd_pd_t::cpu_inner_product_fwd_pd_t; + +- DECLARE_COMMON_PD_T("inner_product:acl", acl_inner_product_fwd_t); ++ DECLARE_COMMON_PD_T("acl", acl_inner_product_fwd_t); + + status_t init(engine_t *engine) { +- using namespace utils; +- + const bool ok = is_fwd() && !has_zero_dim_memory() + && expect_data_types(data_type::f32, data_type::f32, + data_type::f32, data_type::f32, data_type::f32) + && attr()->has_default_values( + primitive_attr_t::skip_mask_t::post_ops, + data_type::f32) +- && (set_default_params() == status::success) +- && post_ops_ok(); ++ && set_default_params() == status::success && post_ops_ok(); + + if (!ok) return status::unimplemented; + +- auto conf_status = acl_inner_product_utils::init_conf_ip(aip_, +- src_md_, weights_md_, dst_md_, bias_md_, *desc(), *attr()); +- // conf_status here can be either status::success or status::unimplemented +- if (conf_status != status::success) return conf_status; +- +- acl_common_utils::acl_thread_bind(); ++ CHECK(init_conf_ip(aip, src_md_, weights_md_, dst_md_, bias_md_, ++ *desc(), *attr())); + + return status::success; + } + +- acl_ip_conf_t aip_; ++ acl_ip_conf_t aip; + + protected: + bool post_ops_ok() const { +@@ -111,16 +123,149 @@ struct acl_inner_product_fwd_t : public primitive_t { + // Compute Library supports here only one eltwise post-op or sum + if (po.len() == 1 && is_eltwise(0)) { + const auto act_type = po.entry_[0].eltwise.alg; +- eltwise_ok = acl_common_utils::acl_act_ok(act_type); ++ eltwise_ok = acl_utils::acl_act_ok(act_type); + } + + return eltwise_ok || (po.len() == 1 && is_sum(0)) + || (po.len() == 0); + } ++ ++ status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md, ++ memory_desc_t &wei_md, memory_desc_t &dst_md, ++ memory_desc_t &bias_md, const inner_product_desc_t &ipd, ++ const primitive_attr_t &attr) { ++ ++ ACL_CHECK_SUPPORT(src_md.ndims != wei_md.ndims, ++ "source and weights dimensions must match"); ++ ++ const int ndims = src_md.ndims; ++ ++ const bool is_2d = (ndims == 2); ++ const bool is_4d = (ndims == 4); ++ ++ ACL_CHECK_SUPPORT( ++ !(is_2d || is_4d), "ACL supports only 2d or 4d cases"); ++ ++ // batch size ++ const int n = src_md.dims[0]; ++ ++ // input and output channels ++ const int ic = src_md.dims[1]; ++ const int oc = dst_md.dims[1]; ++ ++ // source spatial dimensions ++ const int ih = is_4d ? src_md.dims[ndims - 2] : 0; ++ const int iw = is_4d ? src_md.dims[ndims - 1] : 0; ++ ++ // weights spatial dimensions ++ const int kh = is_4d ? wei_md.dims[ndims - 2] : 0; ++ const int kw = is_4d ? wei_md.dims[ndims - 1] : 0; ++ ++ // Only NCHW or NHWC derivatives supported by ACL kernels ++ using namespace format_tag; ++ auto src_tag = memory_desc_matches_one_of_tag( ++ src_md, nhwc, nchw, nc, cn); ++ auto wei_tag = memory_desc_matches_one_of_tag( ++ wei_md, ohwi, oihw, oi, io); ++ auto dst_tag = memory_desc_matches_one_of_tag(dst_md, nc, cn); ++ ++ ACL_CHECK_SUPPORT( ++ utils::one_of(format_tag::undef, src_tag, wei_tag, dst_tag), ++ "unsupported memory layout"); ++ ++ ACL_CHECK_SUPPORT(is_2d && src_tag != dst_tag, ++ "for src and dst layouts must match"); ++ ++ arm_compute::TensorShape src_shape, wei_shape; ++ if (is_2d) { ++ src_shape = (src_tag == nc) ? arm_compute::TensorShape(ic, n) ++ : arm_compute::TensorShape(n, ic); ++ ++ wei_shape = (wei_tag == io) ? arm_compute::TensorShape(oc, ic) ++ : arm_compute::TensorShape(ic, oc); ++ } ++ if (is_4d) { ++ src_shape = (src_tag == nhwc) ++ ? arm_compute::TensorShape(ic, iw, ih, n) ++ : arm_compute::TensorShape(iw, ih, ic, n); ++ ++ // ACL requires the weights to be in 2D flattened shape ++ const int flattened_ic = is_4d ? ic * kh * kw : ic; ++ wei_shape = arm_compute::TensorShape(flattened_ic, oc); ++ } ++ ++ arm_compute::DataLayout src_layout = (src_tag == nhwc) ++ ? arm_compute::DataLayout::NHWC ++ : arm_compute::DataLayout::NCHW; ++ ++ arm_compute::DataLayout wei_layout = (wei_tag == ohwi) ++ ? arm_compute::DataLayout::NHWC ++ : arm_compute::DataLayout::NCHW; ++ ++ aip.src_info = arm_compute::TensorInfo( ++ src_shape, 1, arm_compute::DataType::F32, src_layout); ++ ++ aip.wei_info = arm_compute::TensorInfo( ++ wei_shape, 1, arm_compute::DataType::F32, wei_layout); ++ ++ aip.dst_info ++ = arm_compute::TensorInfo(arm_compute::TensorShape(oc, n), ++ 1, arm_compute::DataType::F32); ++ ++ aip.with_bias = ipd.bias_desc.format_kind != format_kind::undef; ++ aip.bia_info = arm_compute::TensorInfo(aip.with_bias ++ ? arm_compute::TensorShape(oc) ++ : arm_compute::TensorShape(), ++ 1, arm_compute::DataType::F32); ++ ++ aip.fc_info.weights_trained_layout = wei_layout; ++ if (is_2d && wei_tag != src_tag) { ++ // weights are already transposed ++ aip.fc_info.transpose_weights = false; ++ } ++ ++ // Either activation or sum is supported as post-op at the moment ++ aip.fc_info.activation_info = acl_utils::get_acl_act(attr); ++ const auto &post_ops = attr.post_ops_; ++ aip.with_sum = (post_ops.len() == 1) && post_ops.entry_[0].is_sum(); ++ ++ // Fast math mode ++ auto math_mode = get_fpmath_mode(); ++ bool is_fastmath_enabled = utils::one_of( ++ math_mode, fpmath_mode::bf16, fpmath_mode::any); ++ aip.fc_info.enable_fast_math = is_fastmath_enabled; ++ ++ // clang-format off ++ // Validate fully connected layer manually to check for return status ++ ACL_CHECK_VALID(arm_compute::NEFullyConnectedLayer::validate( ++ &aip.src_info, ++ &aip.wei_info, ++ aip.with_bias ? &aip.bia_info : nullptr, ++ &aip.dst_info, ++ aip.fc_info)); ++ ++ if (aip.with_sum) { ++ // Validate arithmetic addition manually to check for return status ++ ACL_CHECK_VALID(arm_compute::NEArithmeticAddition::validate( ++ &aip.dst_info, ++ &aip.dst_info, ++ &aip.dst_info, ++ arm_compute::ConvertPolicy::SATURATE)); ++ // clang-format on ++ } ++ ++ return status::success; ++ } + }; // pd_t + + acl_inner_product_fwd_t(const pd_t *apd) : primitive_t(apd) {} + ++ status_t init(engine_t *engine) override { ++ acl_utils::acl_thread_bind(); ++ ++ return status::success; ++ } ++ + status_t create_resource( + engine_t *engine, resource_mapper_t &mapper) const override { + if (mapper.has_resource(this)) return status::success; +@@ -129,7 +274,7 @@ struct acl_inner_product_fwd_t : public primitive_t { + if (!r) return status::out_of_memory; + + // Configure the resource based on information from primitive descriptor +- auto st = r->configure(pd()->aip_); ++ auto st = r->configure(pd()->aip); + if (st == status::success) { mapper.add(this, std::move(r)); } + + return st; +diff --git a/src/cpu/aarch64/acl_inner_product_utils.cpp b/src/cpu/aarch64/acl_inner_product_utils.cpp +deleted file mode 100644 +index c8fab86f22..0000000000 +--- a/src/cpu/aarch64/acl_inner_product_utils.cpp ++++ /dev/null +@@ -1,181 +0,0 @@ +-/******************************************************************************* +-* Copyright 2021 Arm Ltd. and affiliates +-* +-* Licensed under the Apache License, Version 2.0 (the "License"); +-* you may not use this file except in compliance with the License. +-* You may obtain a copy of the License at +-* +-* http://www.apache.org/licenses/LICENSE-2.0 +-* +-* Unless required by applicable law or agreed to in writing, software +-* distributed under the License is distributed on an "AS IS" BASIS, +-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-* See the License for the specific language governing permissions and +-* limitations under the License. +-*******************************************************************************/ +- +-#include "cpu/aarch64/acl_inner_product_utils.hpp" +- +-namespace dnnl { +-namespace impl { +-namespace cpu { +-namespace aarch64 { +- +-namespace acl_inner_product_utils { +- +-using namespace format_tag; +-using namespace utils; +-using namespace status; +- +-status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md, +- memory_desc_t &wei_md, memory_desc_t &dst_md, memory_desc_t &bias_md, +- const inner_product_desc_t &ipd, const primitive_attr_t &attr) { +- const memory_desc_wrapper src_d(&src_md); +- const memory_desc_wrapper wei_d(&wei_md); +- const memory_desc_wrapper dst_d(&dst_md); +- const memory_desc_wrapper bia_d(&bias_md); +- +- // Compute Library currently supports forward propagation only +- const prop_kind_t prop_kind = ipd.prop_kind; +- const bool is_fwd = (prop_kind == dnnl_forward_training) +- || (prop_kind == dnnl_forward_inference); +- if (!is_fwd) return status::unimplemented; +- +- const int with_groups = wei_d.ndims() == src_d.ndims() + 1; +- const int ndims = src_d.ndims(); +- +- // There are two sub-cases: src & wei tensors are either 2- or 4-dimensional +- const bool is_2d = (ndims == 2) && (wei_d.ndims() == 2); +- const bool is_4d = (ndims == 4) && (wei_d.ndims() == 4); +- +- // Compute Library unsupported shape scenarios +- // FP32 only is supported at the moment +- if (one_of(true, !(is_4d || is_2d), with_groups)) { return unimplemented; } +- +- // batch size +- const int mb = src_d.dims()[0]; +- +- // src/input channels, height, width +- const int ic = src_d.dims()[1]; +- const int ih = is_4d ? src_d.dims()[ndims - 2] : 0; +- const int iw = is_4d ? src_d.dims()[ndims - 1] : 0; +- +- // dst/output channels +- const int oc = dst_d.dims()[1]; +- +- // weights height, width +- const int kh = is_4d ? wei_d.dims()[with_groups + ndims - 2] : 0; +- const int kw = is_4d ? wei_d.dims()[with_groups + ndims - 1] : 0; +- +- aip.with_bias = ipd.bias_desc.format_kind != format_kind::undef; +- +- // Data layout is already defined thus should only be checked +- auto src_tag = memory_desc_matches_one_of_tag(src_md, nhwc, nchw, nc, cn); +- auto wei_tag = memory_desc_matches_one_of_tag(wei_md, ohwi, oihw, oi, io); +- auto dst_tag = memory_desc_matches_one_of_tag(dst_md, nc, cn); +- if (one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) { +- return status::unimplemented; +- } +- +- arm_compute::TensorShape src_shape {(src_tag == nc) +- ? arm_compute::TensorShape(ic, mb) +- : arm_compute::TensorShape(mb, ic)}; +- if (is_4d) { +- src_shape = (src_tag == nhwc) +- ? arm_compute::TensorShape(ic, iw, ih, mb) +- : arm_compute::TensorShape(iw, ih, ic, mb); +- } +- +- // Compute Library requires the weights to be 2-dimensional for FC layer +- arm_compute::TensorShape wei_shape { +- arm_compute::TensorShape(is_4d ? ic * kh * kw : ic, oc)}; +- if (is_2d && wei_tag == io) { +- wei_shape = arm_compute::TensorShape(oc, ic); +- } +- +- arm_compute::DataLayout wei_layout {(wei_tag == ohwi || wei_tag == oi) +- ? arm_compute::DataLayout::NHWC +- : arm_compute::DataLayout::NCHW}; +- +- // clang-format off +- aip.src_info = arm_compute::TensorInfo( +- src_shape, +- 1, +- arm_compute::DataType::F32, +- (src_tag == nhwc || src_tag == nc) ? +- arm_compute::DataLayout::NHWC : arm_compute::DataLayout::NCHW); +- +- aip.wei_info = arm_compute::TensorInfo( +- wei_shape, +- 1, +- arm_compute::DataType::F32, +- wei_layout); +- +- aip.dst_info = arm_compute::TensorInfo( +- (dst_tag == nhwc || dst_tag == nc) ? +- arm_compute::TensorShape(oc, mb) : arm_compute::TensorShape(mb, oc), +- 1, +- arm_compute::DataType::F32, +- (dst_tag == nhwc || dst_tag == nc) ? +- arm_compute::DataLayout::NHWC : arm_compute::DataLayout::NCHW); +- +- aip.bia_info = arm_compute::TensorInfo( +- aip.with_bias ? +- arm_compute::TensorShape(oc) : arm_compute::TensorShape(), +- 1, +- arm_compute::DataType::F32); +- // clang-format on +- +- aip.fc_info.weights_trained_layout = wei_layout; +- if (is_2d && wei_tag != src_tag) { aip.fc_info.transpose_weights = false; } +- +- // Either activation or sum is supported as post-op at the moment +- aip.fc_info.activation_info = acl_common_utils::get_acl_act(attr); +- const auto &post_ops = attr.post_ops_; +- aip.with_sum = (post_ops.len() == 1) && post_ops.entry_[0].is_sum(); +- +- // Fast math mode +- auto math_mode = get_fpmath_mode(); +- bool is_fastmath_enabled +- = one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any); +- aip.fc_info.enable_fast_math = is_fastmath_enabled; +- +- // clang-format off +- // Validate fully connected layer manually to check for return status +- auto acl_st = arm_compute::NEFullyConnectedLayer::validate( +- &aip.src_info, +- &aip.wei_info, +- aip.with_bias ? &aip.bia_info : nullptr, +- &aip.dst_info, +- aip.fc_info); +- // clang-format on +- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); +- return status::unimplemented; +- } +- +- if (aip.with_sum) { +- // clang-format off +- // Validate arithmetic addition manually to check for return status +- auto acl_aa_st = arm_compute::NEArithmeticAddition::validate( +- &aip.dst_info, +- &aip.dst_info, +- &aip.dst_info, +- arm_compute::ConvertPolicy::SATURATE); +- +- // clang-format on +- if (acl_aa_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_aa_st.error_description().c_str()); +- return status::unimplemented; +- } +- } +- +- return status::success; +-} +- +-} // namespace acl_inner_product_utils +- +-} // namespace aarch64 +-} // namespace cpu +-} // namespace impl +-} // namespace dnnl +diff --git a/src/cpu/aarch64/acl_inner_product_utils.hpp b/src/cpu/aarch64/acl_inner_product_utils.hpp +deleted file mode 100644 +index 022d0e3349..0000000000 +--- a/src/cpu/aarch64/acl_inner_product_utils.hpp ++++ /dev/null +@@ -1,62 +0,0 @@ +-/******************************************************************************* +-* Copyright 2021 Arm Ltd. and affiliates +-* +-* Licensed under the Apache License, Version 2.0 (the "License"); +-* you may not use this file except in compliance with the License. +-* You may obtain a copy of the License at +-* +-* http://www.apache.org/licenses/LICENSE-2.0 +-* +-* Unless required by applicable law or agreed to in writing, software +-* distributed under the License is distributed on an "AS IS" BASIS, +-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-* See the License for the specific language governing permissions and +-* limitations under the License. +-*******************************************************************************/ +- +-#ifndef CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP +-#define CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP +- +-#include "cpu/cpu_inner_product_pd.hpp" +- +-#include "cpu/aarch64/acl_utils.hpp" +- +-namespace dnnl { +-namespace impl { +-namespace cpu { +-namespace aarch64 { +- +-struct acl_ip_obj_t { +- arm_compute::NEFullyConnectedLayer fc; +- arm_compute::NEArithmeticAddition add; +- arm_compute::Tensor src_tensor; +- arm_compute::Tensor wei_tensor; +- arm_compute::Tensor bia_tensor; +- arm_compute::Tensor dst_tensor; +- arm_compute::Tensor dst_acc_tensor; +-}; +- +-struct acl_ip_conf_t { +- bool with_bias; +- bool with_sum; +- arm_compute::TensorInfo src_info; +- arm_compute::TensorInfo wei_info; +- arm_compute::TensorInfo bia_info; +- arm_compute::TensorInfo dst_info; +- arm_compute::FullyConnectedLayerInfo fc_info; +-}; +- +-namespace acl_inner_product_utils { +- +-status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md, +- memory_desc_t &wei_md, memory_desc_t &dst_md, memory_desc_t &bias_md, +- const inner_product_desc_t &ipd, const primitive_attr_t &attr); +- +-} // namespace acl_inner_product_utils +- +-} // namespace aarch64 +-} // namespace cpu +-} // namespace impl +-} // namespace dnnl +- +-#endif // CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP +diff --git a/src/cpu/aarch64/acl_softmax.hpp b/src/cpu/aarch64/acl_softmax.hpp +index a4bfd0c3bd..4ee7139a93 100644 +--- a/src/cpu/aarch64/acl_softmax.hpp ++++ b/src/cpu/aarch64/acl_softmax.hpp +@@ -129,7 +129,7 @@ struct acl_softmax_fwd_t : public primitive_t { + arm_compute::DataLayout acl_layout = arm_compute::DataLayout::NHWC; + + const arm_compute::DataType acl_data_t +- = acl_common_utils::get_acl_data_t(data_type); ++ = acl_utils::get_acl_data_t(data_type); + + const int threads = dnnl_get_max_threads(); + if (inner_size_ == 1) { +@@ -189,20 +189,15 @@ struct acl_softmax_fwd_t : public primitive_t { + } + + // Validate manually to check for return status +- arm_compute::Status acl_st; + if (asp_.is_logsoftmax) { +- acl_st = arm_compute::NELogSoftmaxLayer::validate( +- &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis); ++ ACL_CHECK_VALID(arm_compute::NELogSoftmaxLayer::validate( ++ &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis)); + } else { +- acl_st = arm_compute::NESoftmaxLayer::validate( +- &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis); +- } +- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); +- return status::unimplemented; ++ ACL_CHECK_VALID(arm_compute::NESoftmaxLayer::validate( ++ &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis)); + } + +- acl_common_utils::acl_thread_bind(); ++ acl_utils::acl_thread_bind(); + + return status::success; + } +diff --git a/src/cpu/aarch64/acl_utils.cpp b/src/cpu/aarch64/acl_utils.cpp +index a69f14b6f9..098217f50e 100644 +--- a/src/cpu/aarch64/acl_utils.cpp ++++ b/src/cpu/aarch64/acl_utils.cpp +@@ -21,7 +21,7 @@ namespace impl { + namespace cpu { + namespace aarch64 { + +-namespace acl_common_utils { ++namespace acl_utils { + + using namespace dnnl::impl::alg_kind; + using namespace data_type; +@@ -247,7 +247,7 @@ status_t permute_common_dense_dimension_to_last(memory_desc_t *d0_permed, + return status::success; + } + +-} // namespace acl_common_utils ++} // namespace acl_utils + + } // namespace aarch64 + } // namespace cpu +diff --git a/src/cpu/aarch64/acl_utils.hpp b/src/cpu/aarch64/acl_utils.hpp +index 565cde66a9..bb8efc998c 100644 +--- a/src/cpu/aarch64/acl_utils.hpp ++++ b/src/cpu/aarch64/acl_utils.hpp +@@ -21,13 +21,10 @@ + + #include "oneapi/dnnl/dnnl_types.h" + +-#include "common/bfloat16.hpp" +-#include "common/c_types_map.hpp" + #include "common/dnnl_thread.hpp" + #include "common/memory_tracking.hpp" + #include "common/primitive.hpp" + #include "common/utils.hpp" +- + #include "cpu/cpu_engine.hpp" + + #include "arm_compute/runtime/NEON/NEFunctions.h" +@@ -38,7 +35,7 @@ namespace impl { + namespace cpu { + namespace aarch64 { + +-namespace acl_common_utils { ++namespace acl_utils { + + arm_compute::DataType get_acl_data_t(const dnnl_data_type_t dt); + arm_compute::ActivationLayerInfo get_acl_act(const primitive_attr_t &attr); +@@ -68,12 +65,33 @@ status_t permute_common_dense_dimension_to_last(memory_desc_t *d0_permed, + const memory_desc_t *d0, const memory_desc_t *d1, + const memory_desc_t *d2); + +-#define MAYBE_REPORT_ACL_ERROR(msg) \ ++// Logs a custom 'info' line describing an unsupported case ++#define LOG_ACL_UNSUPPORTED(msg) \ ++ do { \ ++ if (get_verbose() >= 2) \ ++ printf("onednn_verbose,cpu,acl,unsupported: %s\n", (msg)); \ ++ } while (0) ++ ++// Returns unimplemented if error code x is NOT OK ++#define ACL_CHECK_VALID(x) \ ++ do { \ ++ arm_compute::Status s = x; \ ++ if (s.error_code() != arm_compute::ErrorCode::OK) { \ ++ LOG_ACL_UNSUPPORTED(s.error_description().c_str()); \ ++ return dnnl::impl::status::unimplemented; \ ++ } \ ++ } while (0) ++ ++// Returns unimplemented on condition x == true ++#define ACL_CHECK_SUPPORT(x, msg) \ + do { \ +- if (get_verbose()) printf("onednn_verbose,cpu,error,acl,%s\n", (msg)); \ ++ if (x) { \ ++ LOG_ACL_UNSUPPORTED(msg); \ ++ return dnnl::impl::status::unimplemented; \ ++ } \ + } while (0) + +-} // namespace acl_common_utils ++} // namespace acl_utils + + } // namespace aarch64 + } // namespace cpu +diff --git a/src/cpu/aarch64/acl_winograd_convolution.hpp b/src/cpu/aarch64/acl_winograd_convolution.hpp +index 29e44eb189..223b3bc9b8 100644 +--- a/src/cpu/aarch64/acl_winograd_convolution.hpp ++++ b/src/cpu/aarch64/acl_winograd_convolution.hpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2020-2021 Arm Ltd. and affiliates ++* Copyright 2020-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -107,7 +107,7 @@ struct acl_wino_convolution_fwd_t : public primitive_t { + + set_default_alg_kind(alg_kind::convolution_winograd); + +- acl_common_utils::acl_thread_bind(); ++ acl_utils::acl_thread_bind(); + + return status::success; + } +@@ -130,7 +130,7 @@ struct acl_wino_convolution_fwd_t : public primitive_t { + // sum+eltwise post-ops + if (eltwise_only || sum_with_eltwise) { + const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg; +- eltwise_ok = acl_common_utils::acl_act_ok(act_type); ++ eltwise_ok = acl_utils::acl_act_ok(act_type); + } + + return eltwise_ok || (po.len() == 0); +diff --git a/src/cpu/aarch64/matmul/acl_matmul.cpp b/src/cpu/aarch64/matmul/acl_matmul.cpp +index 3945fda6fc..6f9bb9b9ad 100644 +--- a/src/cpu/aarch64/matmul/acl_matmul.cpp ++++ b/src/cpu/aarch64/matmul/acl_matmul.cpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2021 Arm Ltd. and affiliates ++* Copyright 2021-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -84,4 +84,4 @@ status_t acl_matmul_t::execute_forward(const exec_ctx_t &ctx) const { + } // namespace aarch64 + } // namespace cpu + } // namespace impl +-} // namespace dnnl +\ No newline at end of file ++} // namespace dnnl +diff --git a/src/cpu/aarch64/matmul/acl_matmul.hpp b/src/cpu/aarch64/matmul/acl_matmul.hpp +index 6ba17e86dd..e69f4d9592 100644 +--- a/src/cpu/aarch64/matmul/acl_matmul.hpp ++++ b/src/cpu/aarch64/matmul/acl_matmul.hpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2021 Arm Ltd. and affiliates ++* Copyright 2021-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -87,7 +87,7 @@ struct acl_matmul_t : public primitive_t { + if (conf_status != status::success) return status::unimplemented; + // Number of threads in Compute Library is set by OMP_NUM_THREADS + // dnnl_get_max_threads() == OMP_NUM_THREADS +- acl_common_utils::acl_thread_bind(); ++ acl_utils::acl_thread_bind(); + + return status::success; + } +diff --git a/src/cpu/aarch64/matmul/acl_matmul_utils.cpp b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp +index 76599d8bb1..ba266b4303 100644 +--- a/src/cpu/aarch64/matmul/acl_matmul_utils.cpp ++++ b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2021 Arm Ltd. and affiliates ++* Copyright 2021-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -22,14 +22,10 @@ namespace dnnl { + namespace impl { + namespace cpu { + namespace aarch64 { +-namespace matmul { + +-using namespace dnnl::impl::status; +-using namespace dnnl::impl::utils; +-using namespace dnnl::impl::cpu::matmul; +-using namespace prop_kind; ++using namespace alg_kind; ++using namespace cpu::matmul; + using namespace format_tag; +-using namespace dnnl::impl::alg_kind; + + namespace acl_matmul_utils { + +@@ -58,7 +54,7 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md, + wei_md, abcd, abdc, abc, acb, ab, ba); + auto dst_tag = memory_desc_matches_one_of_tag( + dst_md, abcd, abdc, abc, acb, ab, ba); +- if (one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) { ++ if (utils::one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) { + return status::unimplemented; + } + amp.is_transA = helper.transA() == 'T'; +@@ -85,7 +81,7 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md, + // Fast-math mode + auto math_mode = get_fpmath_mode(); + bool is_fastmath_enabled +- = one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any); ++ = utils::one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any); + amp.gemm_info.set_fast_math(is_fastmath_enabled); + + // Fused ReLU activation +@@ -95,29 +91,15 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md, + amp.alpha = attr.output_scales_.scales_[0]; + + // Validate ACL transpose +- if (amp.is_transA) { +- auto acl_transA_st = arm_compute::NETranspose::validate( +- &.src_acc_info, &.src_info); +- if (acl_transA_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_transA_st.error_description().c_str()); +- return status::unimplemented; +- } +- } +- if (amp.is_transB) { +- auto acl_transB_st = arm_compute::NETranspose::validate( +- &.wei_acc_info, &.wei_info); +- if (acl_transB_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_transB_st.error_description().c_str()); +- return status::unimplemented; +- } +- } ++ if (amp.is_transA) ++ ACL_CHECK_VALID(arm_compute::NETranspose::validate( ++ &.src_acc_info, &.src_info)); ++ if (amp.is_transB) ++ ACL_CHECK_VALID(arm_compute::NETranspose::validate( ++ &.wei_acc_info, &.wei_info)); + // Validate ACL GEMM +- auto acl_st = arm_compute::NEGEMM::validate(&.src_info, &.wei_info, +- nullptr, &.dst_info, amp.alpha, 0.0f, amp.gemm_info); +- if (acl_st.error_code() != arm_compute::ErrorCode::OK) { +- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str()); +- return status::unimplemented; +- } ++ ACL_CHECK_VALID(arm_compute::NEGEMM::validate(&.src_info, &.wei_info, ++ nullptr, &.dst_info, amp.alpha, 0.0f, amp.gemm_info)); + + return status::success; + } +@@ -175,7 +157,6 @@ bool acl_act_ok(alg_kind_t eltwise_activation) { + + } // namespace acl_matmul_utils + +-} // namespace matmul + } // namespace aarch64 + } // namespace cpu + } // namespace impl +diff --git a/src/cpu/aarch64/matmul/acl_matmul_utils.hpp b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp +index 1411dc4f4b..248dbe5a09 100644 +--- a/src/cpu/aarch64/matmul/acl_matmul_utils.hpp ++++ b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp +@@ -1,5 +1,5 @@ + /******************************************************************************* +-* Copyright 2021 Arm Ltd. and affiliates ++* Copyright 2021-2022 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -25,7 +25,6 @@ namespace dnnl { + namespace impl { + namespace cpu { + namespace aarch64 { +-namespace matmul { + + struct acl_matmul_obj_t { + arm_compute::NEGEMM gemm; +@@ -61,10 +60,9 @@ arm_compute::ActivationLayerInfo get_acl_act(const primitive_attr_t &attr); + bool acl_act_ok(alg_kind_t eltwise_activation); + } // namespace acl_matmul_utils + +-} // namespace matmul + } // namespace aarch64 + } // namespace cpu + } // namespace impl + } // namespace dnnl + +-#endif // CPU_AARCH64_ACL_MATMUL_UTILS_HPP +\ No newline at end of file ++#endif // CPU_AARCH64_ACL_MATMUL_UTILS_HPP +diff --git a/tests/benchdnn/inputs/ip/test_ip_acl b/tests/benchdnn/inputs/ip/test_ip_acl +new file mode 100644 +index 0000000000..a8873c30a8 +--- /dev/null ++++ b/tests/benchdnn/inputs/ip/test_ip_acl +@@ -0,0 +1,26 @@ ++--reset ++ ++# do not test other implementations ++--skip-impl='ref,jit' ++ ++# test format::any ++--batch=shapes_ci ++ ++# only inference, with and without bias ++--dir=FWD_I,FWD_B ++# test all shapes ++--batch=set_all ++ ++# with and without batches ++--mb=0,2 ++ ++# test non-spatial layout combinations ++--stag=ab,ba ++--wtag=ab,ba ++--batch=shapes_0d ++ ++# test spatial layout combinations ++--stag=abx,axb ++--wtag=abx,axb ++# 2d-spatial dimensions ++--batch=shapes_googlenet_v1 diff --git a/onednn.changes b/onednn.changes index 6b834a3..73885c3 100644 --- a/onednn.changes +++ b/onednn.changes @@ -1,3 +1,10 @@ +------------------------------------------------------------------- +Tue Sep 20 08:26:43 UTC 2022 - Guillaume GARDET + +- Add patch to fix build with latest Arm Compute Library: + * 1428.patch + * fa93750.patch (dep for 1428.patch) + ------------------------------------------------------------------- Tue Sep 13 05:22:52 UTC 2022 - Paolo Stivanin diff --git a/onednn.spec b/onednn.spec index ae17253..0d5849a 100644 --- a/onednn.spec +++ b/onednn.spec @@ -37,6 +37,10 @@ Summary: Intel Math Kernel Library for Deep Neural Networks License: Apache-2.0 URL: https://01.org/onednn Source0: https://github.com/oneapi-src/oneDNN/archive/v%{version}/oneDNN-%{version}.tar.gz +# PATCH-FIX-UPSTREAM - deps for Patch2 +Patch1: fa93750.patch +# PATCH-FIX-UPSTREAM - Fix build with latest ACL - https://github.com/oneapi-src/oneDNN/pull/1428 +Patch2: 1428.patch BuildRequires: chrpath BuildRequires: cmake BuildRequires: doxygen