forked from pool/onednn
Guillaume GARDET
f2a81390aa
- Add patch to fix build with latest Arm Compute Library: * 1428.patch * fa93750.patch (dep for 1428.patch) OBS-URL: https://build.opensuse.org/request/show/1005196 OBS-URL: https://build.opensuse.org/package/show/science:machinelearning/onednn?expand=0&rev=18
1223 lines
48 KiB
Diff
1223 lines
48 KiB
Diff
From fa93750bfb821fe05e3190b36f52b5bd88a57110 Mon Sep 17 00:00:00 2001
|
|
From: Diana Bite <diana.bite@arm.com>
|
|
Date: Thu, 24 Feb 2022 14:25:49 +0000
|
|
Subject: [PATCH] cpu: aarch64: acl: fix inner_prod test failure and improve
|
|
validation
|
|
|
|
---
|
|
src/cpu/aarch64/acl_binary.hpp | 8 +-
|
|
src/cpu/aarch64/acl_convolution_utils.cpp | 65 ++-----
|
|
src/cpu/aarch64/acl_eltwise.hpp | 4 +-
|
|
src/cpu/aarch64/acl_eltwise_utils.cpp | 17 +-
|
|
src/cpu/aarch64/acl_gemm_convolution.hpp | 6 +-
|
|
.../aarch64/acl_indirect_gemm_convolution.hpp | 6 +-
|
|
src/cpu/aarch64/acl_inner_product.cpp | 13 +-
|
|
src/cpu/aarch64/acl_inner_product.hpp | 179 +++++++++++++++--
|
|
src/cpu/aarch64/acl_inner_product_utils.cpp | 181 ------------------
|
|
src/cpu/aarch64/acl_inner_product_utils.hpp | 62 ------
|
|
src/cpu/aarch64/acl_softmax.hpp | 17 +-
|
|
src/cpu/aarch64/acl_utils.cpp | 4 +-
|
|
src/cpu/aarch64/acl_utils.hpp | 32 +++-
|
|
src/cpu/aarch64/acl_winograd_convolution.hpp | 6 +-
|
|
src/cpu/aarch64/matmul/acl_matmul.cpp | 4 +-
|
|
src/cpu/aarch64/matmul/acl_matmul.hpp | 4 +-
|
|
src/cpu/aarch64/matmul/acl_matmul_utils.cpp | 45 ++---
|
|
src/cpu/aarch64/matmul/acl_matmul_utils.hpp | 6 +-
|
|
tests/benchdnn/inputs/ip/test_ip_acl | 26 +++
|
|
19 files changed, 281 insertions(+), 404 deletions(-)
|
|
delete mode 100644 src/cpu/aarch64/acl_inner_product_utils.cpp
|
|
delete mode 100644 src/cpu/aarch64/acl_inner_product_utils.hpp
|
|
create mode 100644 tests/benchdnn/inputs/ip/test_ip_acl
|
|
|
|
diff --git a/src/cpu/aarch64/acl_binary.hpp b/src/cpu/aarch64/acl_binary.hpp
|
|
index 77adb45bef..122b094587 100644
|
|
--- a/src/cpu/aarch64/acl_binary.hpp
|
|
+++ b/src/cpu/aarch64/acl_binary.hpp
|
|
@@ -125,7 +125,7 @@ struct acl_binary_t : public primitive_t {
|
|
|
|
status_t init(engine_t *engine) {
|
|
|
|
- using namespace acl_common_utils;
|
|
+ using namespace acl_utils;
|
|
|
|
// Only support f32 and s32 for now
|
|
data_type_t ddt = dst_md(0)->data_type;
|
|
@@ -179,11 +179,7 @@ struct acl_binary_t : public primitive_t {
|
|
}
|
|
|
|
// Call operator specific validate function to check support
|
|
- arm_compute::Status acl_st = validate(asp_);
|
|
- if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
- }
|
|
+ ACL_CHECK_VALID(validate(asp_));
|
|
|
|
// Initialize the ACL threads
|
|
acl_thread_bind();
|
|
diff --git a/src/cpu/aarch64/acl_convolution_utils.cpp b/src/cpu/aarch64/acl_convolution_utils.cpp
|
|
index ca91de49e3..e072dc5490 100644
|
|
--- a/src/cpu/aarch64/acl_convolution_utils.cpp
|
|
+++ b/src/cpu/aarch64/acl_convolution_utils.cpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2020-2021 Arm Ltd. and affiliates
|
|
+* Copyright 2020-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -156,10 +156,10 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md,
|
|
const auto acl_layout = is_nspc ? arm_compute::DataLayout::NHWC
|
|
: arm_compute::DataLayout::NCHW;
|
|
|
|
- auto acl_src_data_t = acl_common_utils::get_acl_data_t(src_d.data_type());
|
|
- auto acl_wei_data_t = acl_common_utils::get_acl_data_t(wei_d.data_type());
|
|
- auto acl_dst_data_t = acl_common_utils::get_acl_data_t(dst_d.data_type());
|
|
- auto acl_bia_data_t = acl_common_utils::get_acl_data_t(bia_d.data_type());
|
|
+ auto acl_src_data_t = acl_utils::get_acl_data_t(src_d.data_type());
|
|
+ auto acl_wei_data_t = acl_utils::get_acl_data_t(wei_d.data_type());
|
|
+ auto acl_dst_data_t = acl_utils::get_acl_data_t(dst_d.data_type());
|
|
+ auto acl_bia_data_t = acl_utils::get_acl_data_t(bia_d.data_type());
|
|
|
|
if (acl_bia_data_t == arm_compute::DataType::UNKNOWN)
|
|
acl_bia_data_t = arm_compute::DataType::F32;
|
|
@@ -212,33 +212,14 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md,
|
|
// is_eltwise(true) here stands for eltwise.scale == 1.f check
|
|
acp.sum_with_eltwise = (post_ops.len() == 2) && post_ops.entry_[0].is_sum()
|
|
&& post_ops.entry_[1].is_eltwise(true);
|
|
- acp.act_info = acl_common_utils::get_acl_act(attr);
|
|
+ acp.act_info = acl_utils::get_acl_act(attr);
|
|
|
|
if (acp.sum_with_eltwise) {
|
|
- // clang-format off
|
|
- // Validate activation layer manually to check for return status
|
|
- auto acl_al_st = arm_compute::NEActivationLayer::validate(
|
|
- &acp.dst_info,
|
|
- &acp.dst_info,
|
|
- acp.act_info);
|
|
- // clang-format on
|
|
- if (acl_al_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_al_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
- }
|
|
-
|
|
- // clang-format off
|
|
- // Validate arithmetic addition manually to check for return status
|
|
- auto acl_aa_st = arm_compute::NEArithmeticAddition::validate(
|
|
- &acp.dst_info,
|
|
- &acp.dst_info,
|
|
- &acp.dst_info,
|
|
- arm_compute::ConvertPolicy::SATURATE);
|
|
- // clang-format on
|
|
- if (acl_aa_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_aa_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
- }
|
|
+ ACL_CHECK_VALID(arm_compute::NEActivationLayer::validate( // eltwise
|
|
+ &acp.dst_info, &acp.dst_info, acp.act_info));
|
|
+ ACL_CHECK_VALID(arm_compute::NEArithmeticAddition::validate( // sum
|
|
+ &acp.dst_info, &acp.dst_info, &acp.dst_info,
|
|
+ arm_compute::ConvertPolicy::SATURATE));
|
|
}
|
|
|
|
return status::success;
|
|
@@ -254,7 +235,7 @@ status_t init_conf_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md,
|
|
|
|
// clang-format off
|
|
// Validate convolution manually to check for return status
|
|
- auto acl_st = arm_compute::NEGEMMConvolutionLayer::validate(
|
|
+ ACL_CHECK_VALID(arm_compute::NEGEMMConvolutionLayer::validate(
|
|
&acp.src_info,
|
|
&acp.wei_info,
|
|
acp.with_bias ? &acp.bia_info : nullptr,
|
|
@@ -263,12 +244,8 @@ status_t init_conf_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md,
|
|
acp.weights_info,
|
|
acp.dilation_info,
|
|
acp.act_info,
|
|
- acp.fast_math);
|
|
+ acp.fast_math));
|
|
// clang-format on
|
|
- if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
- }
|
|
|
|
return status::success;
|
|
}
|
|
@@ -289,7 +266,7 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md,
|
|
|
|
// clang-format off
|
|
// NOTE: indirect convolution method supports only nhwc layout.
|
|
- auto acl_st = arm_compute::NEGEMMConv2d::validate(
|
|
+ ACL_CHECK_VALID(arm_compute::NEGEMMConv2d::validate(
|
|
&acp.src_info,
|
|
&acp.wei_info,
|
|
acp.with_bias ? &acp.bia_info : nullptr,
|
|
@@ -298,12 +275,8 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md,
|
|
acp.dilation_info,
|
|
acp.act_info,
|
|
acp.fast_math,
|
|
- 1));
|
|
+ 1)));
|
|
// clang-format on
|
|
- if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
- }
|
|
|
|
return status::success;
|
|
}
|
|
@@ -336,19 +309,15 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md,
|
|
|
|
// clang-format off
|
|
// Validate convolution manually to check for return status
|
|
- auto acl_st = arm_compute::NEWinogradConvolutionLayer::validate(
|
|
+ ACL_CHECK_VALID(arm_compute::NEWinogradConvolutionLayer::validate(
|
|
&acp.src_info,
|
|
&acp.wei_info,
|
|
acp.with_bias ? &acp.bia_info : nullptr,
|
|
&acp.dst_info,
|
|
acp.padstride_info,
|
|
acp.act_info,
|
|
- true); // enable_fast_math flag in ACL Winograd
|
|
+ true)); // enable_fast_math flag in ACL Winograd
|
|
// clang-format on
|
|
- if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
- }
|
|
|
|
return status::success;
|
|
}
|
|
diff --git a/src/cpu/aarch64/acl_eltwise.hpp b/src/cpu/aarch64/acl_eltwise.hpp
|
|
index a55b89272c..381368aabb 100644
|
|
--- a/src/cpu/aarch64/acl_eltwise.hpp
|
|
+++ b/src/cpu/aarch64/acl_eltwise.hpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2021 Arm Ltd. and affiliates
|
|
+* Copyright 2021-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -78,7 +78,7 @@ struct acl_eltwise_fwd_t : public primitive_t {
|
|
aep_, data_md_, *desc(), *attr());
|
|
if (conf_status != status::success) return status::unimplemented;
|
|
|
|
- acl_common_utils::acl_thread_bind();
|
|
+ acl_utils::acl_thread_bind();
|
|
|
|
return status::success;
|
|
}
|
|
diff --git a/src/cpu/aarch64/acl_eltwise_utils.cpp b/src/cpu/aarch64/acl_eltwise_utils.cpp
|
|
index 35e809e042..880b6aeaae 100644
|
|
--- a/src/cpu/aarch64/acl_eltwise_utils.cpp
|
|
+++ b/src/cpu/aarch64/acl_eltwise_utils.cpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2021 Arm Ltd. and affiliates
|
|
+* Copyright 2021-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -46,7 +46,7 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md,
|
|
|
|
const alg_kind_t eltwise_alg = ed.alg_kind;
|
|
|
|
- bool activation_supported = acl_common_utils::acl_act_ok(eltwise_alg);
|
|
+ bool activation_supported = acl_utils::acl_act_ok(eltwise_alg);
|
|
if (!activation_supported) { return status::unimplemented; }
|
|
|
|
// batch size
|
|
@@ -69,8 +69,8 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md,
|
|
const auto acl_layout = is_nspc ? arm_compute::DataLayout::NHWC
|
|
: arm_compute::DataLayout::NCHW;
|
|
|
|
- auto acl_src_data_t = acl_common_utils::get_acl_data_t(data_d.data_type());
|
|
- auto acl_dst_data_t = acl_common_utils::get_acl_data_t(data_d.data_type());
|
|
+ auto acl_src_data_t = acl_utils::get_acl_data_t(data_d.data_type());
|
|
+ auto acl_dst_data_t = acl_utils::get_acl_data_t(data_d.data_type());
|
|
|
|
// clang-format off
|
|
aep.src_info = arm_compute::TensorInfo(
|
|
@@ -93,7 +93,7 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md,
|
|
aep.dst_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0));
|
|
}
|
|
|
|
- aep.act_info = acl_common_utils::get_acl_act(ed);
|
|
+ aep.act_info = acl_utils::get_acl_act(ed);
|
|
|
|
return status::success;
|
|
}
|
|
@@ -105,14 +105,11 @@ status_t init_conf_eltwise(acl_eltwise_conf_t &aep, memory_desc_t &data_md,
|
|
CHECK(acl_eltwise_check(aep, data_md, ed, attr));
|
|
|
|
// clang-format off
|
|
- auto acl_st = arm_compute::NEActivationLayer::validate(
|
|
+ ACL_CHECK_VALID(arm_compute::NEActivationLayer::validate(
|
|
&aep.src_info,
|
|
&aep.dst_info,
|
|
- aep.act_info);
|
|
+ aep.act_info));
|
|
// clang-format on
|
|
- if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- return status::unimplemented;
|
|
- }
|
|
|
|
return status::success;
|
|
}
|
|
diff --git a/src/cpu/aarch64/acl_gemm_convolution.hpp b/src/cpu/aarch64/acl_gemm_convolution.hpp
|
|
index 3e7542b6bf..496f501211 100644
|
|
--- a/src/cpu/aarch64/acl_gemm_convolution.hpp
|
|
+++ b/src/cpu/aarch64/acl_gemm_convolution.hpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2020-2021 Arm Ltd. and affiliates
|
|
+* Copyright 2020-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -108,7 +108,7 @@ struct acl_gemm_convolution_fwd_t : public primitive_t {
|
|
src_md_, weights_md_, dst_md_, bias_md_, *desc(), *attr());
|
|
if (conf_status != status::success) return status::unimplemented;
|
|
|
|
- acl_common_utils::acl_thread_bind();
|
|
+ acl_utils::acl_thread_bind();
|
|
|
|
return status::success;
|
|
}
|
|
@@ -146,7 +146,7 @@ struct acl_gemm_convolution_fwd_t : public primitive_t {
|
|
// sum+eltwise post-ops
|
|
if (eltwise_only || sum_with_eltwise) {
|
|
const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg;
|
|
- eltwise_ok = acl_common_utils::acl_act_ok(act_type);
|
|
+ eltwise_ok = acl_utils::acl_act_ok(act_type);
|
|
}
|
|
|
|
return eltwise_ok || (po.len() == 0);
|
|
diff --git a/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp
|
|
index 0a0021aeee..18e757a2c9 100644
|
|
--- a/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp
|
|
+++ b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2021 Arm Ltd. and affiliates
|
|
+* Copyright 2021-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -109,7 +109,7 @@ struct acl_indirect_gemm_convolution_fwd_t : public primitive_t {
|
|
*attr());
|
|
if (conf_status != status::success) return status::unimplemented;
|
|
|
|
- acl_common_utils::acl_thread_bind();
|
|
+ acl_utils::acl_thread_bind();
|
|
|
|
return status::success;
|
|
}
|
|
@@ -134,7 +134,7 @@ struct acl_indirect_gemm_convolution_fwd_t : public primitive_t {
|
|
// sum+eltwise post-ops
|
|
if (eltwise_only || sum_with_eltwise) {
|
|
const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg;
|
|
- eltwise_ok = acl_common_utils::acl_act_ok(act_type);
|
|
+ eltwise_ok = acl_utils::acl_act_ok(act_type);
|
|
}
|
|
|
|
return eltwise_ok || (po.len() == 0);
|
|
diff --git a/src/cpu/aarch64/acl_inner_product.cpp b/src/cpu/aarch64/acl_inner_product.cpp
|
|
index 7a316135f8..f355a657c7 100644
|
|
--- a/src/cpu/aarch64/acl_inner_product.cpp
|
|
+++ b/src/cpu/aarch64/acl_inner_product.cpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2021 Arm Ltd. and affiliates
|
|
+* Copyright 2021-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -21,23 +21,18 @@ namespace impl {
|
|
namespace cpu {
|
|
namespace aarch64 {
|
|
|
|
-using namespace dnnl::impl::status;
|
|
-using namespace dnnl::impl::memory_tracking::names;
|
|
-using namespace dnnl::impl::utils;
|
|
-
|
|
status_t acl_inner_product_fwd_t::execute_forward(const exec_ctx_t &ctx) const {
|
|
// Lock here is needed because resource_mapper does not support
|
|
// concurrent multithreaded access.
|
|
std::lock_guard<std::mutex> _lock {this->mtx};
|
|
|
|
- status_t status = status::success;
|
|
auto src_base = CTX_IN_MEM(const data_t *, DNNL_ARG_SRC);
|
|
auto wei_base = CTX_IN_MEM(const data_t *, DNNL_ARG_WEIGHTS);
|
|
auto bia_base = CTX_IN_MEM(const data_t *, DNNL_ARG_BIAS);
|
|
auto dst_base = CTX_OUT_MEM(data_t *, DNNL_ARG_DST);
|
|
|
|
- bool with_bias = pd()->aip_.with_bias;
|
|
- bool with_sum = pd()->aip_.with_sum;
|
|
+ bool with_bias = pd()->aip.with_bias;
|
|
+ bool with_sum = pd()->aip.with_sum;
|
|
|
|
// Retrieve primitive resource and configured Compute Library objects
|
|
auto *acl_resource
|
|
@@ -64,7 +59,7 @@ status_t acl_inner_product_fwd_t::execute_forward(const exec_ctx_t &ctx) const {
|
|
acl_obj.dst_tensor.allocator()->free();
|
|
if (with_bias) { acl_obj.bia_tensor.allocator()->free(); }
|
|
|
|
- return status;
|
|
+ return status::success;
|
|
}
|
|
|
|
} // namespace aarch64
|
|
diff --git a/src/cpu/aarch64/acl_inner_product.hpp b/src/cpu/aarch64/acl_inner_product.hpp
|
|
index dd742ea0bc..e5a9bdcc8a 100644
|
|
--- a/src/cpu/aarch64/acl_inner_product.hpp
|
|
+++ b/src/cpu/aarch64/acl_inner_product.hpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2021 Arm Ltd. and affiliates
|
|
+* Copyright 2021-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -17,15 +17,34 @@
|
|
#ifndef CPU_AARCH64_ACL_INNER_PRODUCT_HPP
|
|
#define CPU_AARCH64_ACL_INNER_PRODUCT_HPP
|
|
|
|
+#include "cpu/aarch64/acl_utils.hpp"
|
|
#include "cpu/cpu_inner_product_pd.hpp"
|
|
|
|
-#include "cpu/aarch64/acl_inner_product_utils.hpp"
|
|
-
|
|
namespace dnnl {
|
|
namespace impl {
|
|
namespace cpu {
|
|
namespace aarch64 {
|
|
|
|
+struct acl_ip_obj_t {
|
|
+ arm_compute::NEFullyConnectedLayer fc;
|
|
+ arm_compute::NEArithmeticAddition add;
|
|
+ arm_compute::Tensor src_tensor;
|
|
+ arm_compute::Tensor wei_tensor;
|
|
+ arm_compute::Tensor bia_tensor;
|
|
+ arm_compute::Tensor dst_tensor;
|
|
+ arm_compute::Tensor dst_acc_tensor;
|
|
+};
|
|
+
|
|
+struct acl_ip_conf_t {
|
|
+ bool with_bias;
|
|
+ bool with_sum;
|
|
+ arm_compute::TensorInfo src_info;
|
|
+ arm_compute::TensorInfo wei_info;
|
|
+ arm_compute::TensorInfo bia_info;
|
|
+ arm_compute::TensorInfo dst_info;
|
|
+ arm_compute::FullyConnectedLayerInfo fc_info;
|
|
+};
|
|
+
|
|
struct acl_ip_resource_t : public resource_t {
|
|
acl_ip_resource_t() : acl_ip_obj_(utils::make_unique<acl_ip_obj_t>()) {}
|
|
|
|
@@ -71,33 +90,26 @@ struct acl_inner_product_fwd_t : public primitive_t {
|
|
struct pd_t : public cpu_inner_product_fwd_pd_t {
|
|
using cpu_inner_product_fwd_pd_t::cpu_inner_product_fwd_pd_t;
|
|
|
|
- DECLARE_COMMON_PD_T("inner_product:acl", acl_inner_product_fwd_t);
|
|
+ DECLARE_COMMON_PD_T("acl", acl_inner_product_fwd_t);
|
|
|
|
status_t init(engine_t *engine) {
|
|
- using namespace utils;
|
|
-
|
|
const bool ok = is_fwd() && !has_zero_dim_memory()
|
|
&& expect_data_types(data_type::f32, data_type::f32,
|
|
data_type::f32, data_type::f32, data_type::f32)
|
|
&& attr()->has_default_values(
|
|
primitive_attr_t::skip_mask_t::post_ops,
|
|
data_type::f32)
|
|
- && (set_default_params() == status::success)
|
|
- && post_ops_ok();
|
|
+ && set_default_params() == status::success && post_ops_ok();
|
|
|
|
if (!ok) return status::unimplemented;
|
|
|
|
- auto conf_status = acl_inner_product_utils::init_conf_ip(aip_,
|
|
- src_md_, weights_md_, dst_md_, bias_md_, *desc(), *attr());
|
|
- // conf_status here can be either status::success or status::unimplemented
|
|
- if (conf_status != status::success) return conf_status;
|
|
-
|
|
- acl_common_utils::acl_thread_bind();
|
|
+ CHECK(init_conf_ip(aip, src_md_, weights_md_, dst_md_, bias_md_,
|
|
+ *desc(), *attr()));
|
|
|
|
return status::success;
|
|
}
|
|
|
|
- acl_ip_conf_t aip_;
|
|
+ acl_ip_conf_t aip;
|
|
|
|
protected:
|
|
bool post_ops_ok() const {
|
|
@@ -111,16 +123,149 @@ struct acl_inner_product_fwd_t : public primitive_t {
|
|
// Compute Library supports here only one eltwise post-op or sum
|
|
if (po.len() == 1 && is_eltwise(0)) {
|
|
const auto act_type = po.entry_[0].eltwise.alg;
|
|
- eltwise_ok = acl_common_utils::acl_act_ok(act_type);
|
|
+ eltwise_ok = acl_utils::acl_act_ok(act_type);
|
|
}
|
|
|
|
return eltwise_ok || (po.len() == 1 && is_sum(0))
|
|
|| (po.len() == 0);
|
|
}
|
|
+
|
|
+ status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md,
|
|
+ memory_desc_t &wei_md, memory_desc_t &dst_md,
|
|
+ memory_desc_t &bias_md, const inner_product_desc_t &ipd,
|
|
+ const primitive_attr_t &attr) {
|
|
+
|
|
+ ACL_CHECK_SUPPORT(src_md.ndims != wei_md.ndims,
|
|
+ "source and weights dimensions must match");
|
|
+
|
|
+ const int ndims = src_md.ndims;
|
|
+
|
|
+ const bool is_2d = (ndims == 2);
|
|
+ const bool is_4d = (ndims == 4);
|
|
+
|
|
+ ACL_CHECK_SUPPORT(
|
|
+ !(is_2d || is_4d), "ACL supports only 2d or 4d cases");
|
|
+
|
|
+ // batch size
|
|
+ const int n = src_md.dims[0];
|
|
+
|
|
+ // input and output channels
|
|
+ const int ic = src_md.dims[1];
|
|
+ const int oc = dst_md.dims[1];
|
|
+
|
|
+ // source spatial dimensions
|
|
+ const int ih = is_4d ? src_md.dims[ndims - 2] : 0;
|
|
+ const int iw = is_4d ? src_md.dims[ndims - 1] : 0;
|
|
+
|
|
+ // weights spatial dimensions
|
|
+ const int kh = is_4d ? wei_md.dims[ndims - 2] : 0;
|
|
+ const int kw = is_4d ? wei_md.dims[ndims - 1] : 0;
|
|
+
|
|
+ // Only NCHW or NHWC derivatives supported by ACL kernels
|
|
+ using namespace format_tag;
|
|
+ auto src_tag = memory_desc_matches_one_of_tag(
|
|
+ src_md, nhwc, nchw, nc, cn);
|
|
+ auto wei_tag = memory_desc_matches_one_of_tag(
|
|
+ wei_md, ohwi, oihw, oi, io);
|
|
+ auto dst_tag = memory_desc_matches_one_of_tag(dst_md, nc, cn);
|
|
+
|
|
+ ACL_CHECK_SUPPORT(
|
|
+ utils::one_of(format_tag::undef, src_tag, wei_tag, dst_tag),
|
|
+ "unsupported memory layout");
|
|
+
|
|
+ ACL_CHECK_SUPPORT(is_2d && src_tag != dst_tag,
|
|
+ "for src and dst layouts must match");
|
|
+
|
|
+ arm_compute::TensorShape src_shape, wei_shape;
|
|
+ if (is_2d) {
|
|
+ src_shape = (src_tag == nc) ? arm_compute::TensorShape(ic, n)
|
|
+ : arm_compute::TensorShape(n, ic);
|
|
+
|
|
+ wei_shape = (wei_tag == io) ? arm_compute::TensorShape(oc, ic)
|
|
+ : arm_compute::TensorShape(ic, oc);
|
|
+ }
|
|
+ if (is_4d) {
|
|
+ src_shape = (src_tag == nhwc)
|
|
+ ? arm_compute::TensorShape(ic, iw, ih, n)
|
|
+ : arm_compute::TensorShape(iw, ih, ic, n);
|
|
+
|
|
+ // ACL requires the weights to be in 2D flattened shape
|
|
+ const int flattened_ic = is_4d ? ic * kh * kw : ic;
|
|
+ wei_shape = arm_compute::TensorShape(flattened_ic, oc);
|
|
+ }
|
|
+
|
|
+ arm_compute::DataLayout src_layout = (src_tag == nhwc)
|
|
+ ? arm_compute::DataLayout::NHWC
|
|
+ : arm_compute::DataLayout::NCHW;
|
|
+
|
|
+ arm_compute::DataLayout wei_layout = (wei_tag == ohwi)
|
|
+ ? arm_compute::DataLayout::NHWC
|
|
+ : arm_compute::DataLayout::NCHW;
|
|
+
|
|
+ aip.src_info = arm_compute::TensorInfo(
|
|
+ src_shape, 1, arm_compute::DataType::F32, src_layout);
|
|
+
|
|
+ aip.wei_info = arm_compute::TensorInfo(
|
|
+ wei_shape, 1, arm_compute::DataType::F32, wei_layout);
|
|
+
|
|
+ aip.dst_info
|
|
+ = arm_compute::TensorInfo(arm_compute::TensorShape(oc, n),
|
|
+ 1, arm_compute::DataType::F32);
|
|
+
|
|
+ aip.with_bias = ipd.bias_desc.format_kind != format_kind::undef;
|
|
+ aip.bia_info = arm_compute::TensorInfo(aip.with_bias
|
|
+ ? arm_compute::TensorShape(oc)
|
|
+ : arm_compute::TensorShape(),
|
|
+ 1, arm_compute::DataType::F32);
|
|
+
|
|
+ aip.fc_info.weights_trained_layout = wei_layout;
|
|
+ if (is_2d && wei_tag != src_tag) {
|
|
+ // weights are already transposed
|
|
+ aip.fc_info.transpose_weights = false;
|
|
+ }
|
|
+
|
|
+ // Either activation or sum is supported as post-op at the moment
|
|
+ aip.fc_info.activation_info = acl_utils::get_acl_act(attr);
|
|
+ const auto &post_ops = attr.post_ops_;
|
|
+ aip.with_sum = (post_ops.len() == 1) && post_ops.entry_[0].is_sum();
|
|
+
|
|
+ // Fast math mode
|
|
+ auto math_mode = get_fpmath_mode();
|
|
+ bool is_fastmath_enabled = utils::one_of(
|
|
+ math_mode, fpmath_mode::bf16, fpmath_mode::any);
|
|
+ aip.fc_info.enable_fast_math = is_fastmath_enabled;
|
|
+
|
|
+ // clang-format off
|
|
+ // Validate fully connected layer manually to check for return status
|
|
+ ACL_CHECK_VALID(arm_compute::NEFullyConnectedLayer::validate(
|
|
+ &aip.src_info,
|
|
+ &aip.wei_info,
|
|
+ aip.with_bias ? &aip.bia_info : nullptr,
|
|
+ &aip.dst_info,
|
|
+ aip.fc_info));
|
|
+
|
|
+ if (aip.with_sum) {
|
|
+ // Validate arithmetic addition manually to check for return status
|
|
+ ACL_CHECK_VALID(arm_compute::NEArithmeticAddition::validate(
|
|
+ &aip.dst_info,
|
|
+ &aip.dst_info,
|
|
+ &aip.dst_info,
|
|
+ arm_compute::ConvertPolicy::SATURATE));
|
|
+ // clang-format on
|
|
+ }
|
|
+
|
|
+ return status::success;
|
|
+ }
|
|
}; // pd_t
|
|
|
|
acl_inner_product_fwd_t(const pd_t *apd) : primitive_t(apd) {}
|
|
|
|
+ status_t init(engine_t *engine) override {
|
|
+ acl_utils::acl_thread_bind();
|
|
+
|
|
+ return status::success;
|
|
+ }
|
|
+
|
|
status_t create_resource(
|
|
engine_t *engine, resource_mapper_t &mapper) const override {
|
|
if (mapper.has_resource(this)) return status::success;
|
|
@@ -129,7 +274,7 @@ struct acl_inner_product_fwd_t : public primitive_t {
|
|
if (!r) return status::out_of_memory;
|
|
|
|
// Configure the resource based on information from primitive descriptor
|
|
- auto st = r->configure(pd()->aip_);
|
|
+ auto st = r->configure(pd()->aip);
|
|
if (st == status::success) { mapper.add(this, std::move(r)); }
|
|
|
|
return st;
|
|
diff --git a/src/cpu/aarch64/acl_inner_product_utils.cpp b/src/cpu/aarch64/acl_inner_product_utils.cpp
|
|
deleted file mode 100644
|
|
index c8fab86f22..0000000000
|
|
--- a/src/cpu/aarch64/acl_inner_product_utils.cpp
|
|
+++ /dev/null
|
|
@@ -1,181 +0,0 @@
|
|
-/*******************************************************************************
|
|
-* Copyright 2021 Arm Ltd. and affiliates
|
|
-*
|
|
-* Licensed under the Apache License, Version 2.0 (the "License");
|
|
-* you may not use this file except in compliance with the License.
|
|
-* You may obtain a copy of the License at
|
|
-*
|
|
-* http://www.apache.org/licenses/LICENSE-2.0
|
|
-*
|
|
-* Unless required by applicable law or agreed to in writing, software
|
|
-* distributed under the License is distributed on an "AS IS" BASIS,
|
|
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
-* See the License for the specific language governing permissions and
|
|
-* limitations under the License.
|
|
-*******************************************************************************/
|
|
-
|
|
-#include "cpu/aarch64/acl_inner_product_utils.hpp"
|
|
-
|
|
-namespace dnnl {
|
|
-namespace impl {
|
|
-namespace cpu {
|
|
-namespace aarch64 {
|
|
-
|
|
-namespace acl_inner_product_utils {
|
|
-
|
|
-using namespace format_tag;
|
|
-using namespace utils;
|
|
-using namespace status;
|
|
-
|
|
-status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md,
|
|
- memory_desc_t &wei_md, memory_desc_t &dst_md, memory_desc_t &bias_md,
|
|
- const inner_product_desc_t &ipd, const primitive_attr_t &attr) {
|
|
- const memory_desc_wrapper src_d(&src_md);
|
|
- const memory_desc_wrapper wei_d(&wei_md);
|
|
- const memory_desc_wrapper dst_d(&dst_md);
|
|
- const memory_desc_wrapper bia_d(&bias_md);
|
|
-
|
|
- // Compute Library currently supports forward propagation only
|
|
- const prop_kind_t prop_kind = ipd.prop_kind;
|
|
- const bool is_fwd = (prop_kind == dnnl_forward_training)
|
|
- || (prop_kind == dnnl_forward_inference);
|
|
- if (!is_fwd) return status::unimplemented;
|
|
-
|
|
- const int with_groups = wei_d.ndims() == src_d.ndims() + 1;
|
|
- const int ndims = src_d.ndims();
|
|
-
|
|
- // There are two sub-cases: src & wei tensors are either 2- or 4-dimensional
|
|
- const bool is_2d = (ndims == 2) && (wei_d.ndims() == 2);
|
|
- const bool is_4d = (ndims == 4) && (wei_d.ndims() == 4);
|
|
-
|
|
- // Compute Library unsupported shape scenarios
|
|
- // FP32 only is supported at the moment
|
|
- if (one_of(true, !(is_4d || is_2d), with_groups)) { return unimplemented; }
|
|
-
|
|
- // batch size
|
|
- const int mb = src_d.dims()[0];
|
|
-
|
|
- // src/input channels, height, width
|
|
- const int ic = src_d.dims()[1];
|
|
- const int ih = is_4d ? src_d.dims()[ndims - 2] : 0;
|
|
- const int iw = is_4d ? src_d.dims()[ndims - 1] : 0;
|
|
-
|
|
- // dst/output channels
|
|
- const int oc = dst_d.dims()[1];
|
|
-
|
|
- // weights height, width
|
|
- const int kh = is_4d ? wei_d.dims()[with_groups + ndims - 2] : 0;
|
|
- const int kw = is_4d ? wei_d.dims()[with_groups + ndims - 1] : 0;
|
|
-
|
|
- aip.with_bias = ipd.bias_desc.format_kind != format_kind::undef;
|
|
-
|
|
- // Data layout is already defined thus should only be checked
|
|
- auto src_tag = memory_desc_matches_one_of_tag(src_md, nhwc, nchw, nc, cn);
|
|
- auto wei_tag = memory_desc_matches_one_of_tag(wei_md, ohwi, oihw, oi, io);
|
|
- auto dst_tag = memory_desc_matches_one_of_tag(dst_md, nc, cn);
|
|
- if (one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) {
|
|
- return status::unimplemented;
|
|
- }
|
|
-
|
|
- arm_compute::TensorShape src_shape {(src_tag == nc)
|
|
- ? arm_compute::TensorShape(ic, mb)
|
|
- : arm_compute::TensorShape(mb, ic)};
|
|
- if (is_4d) {
|
|
- src_shape = (src_tag == nhwc)
|
|
- ? arm_compute::TensorShape(ic, iw, ih, mb)
|
|
- : arm_compute::TensorShape(iw, ih, ic, mb);
|
|
- }
|
|
-
|
|
- // Compute Library requires the weights to be 2-dimensional for FC layer
|
|
- arm_compute::TensorShape wei_shape {
|
|
- arm_compute::TensorShape(is_4d ? ic * kh * kw : ic, oc)};
|
|
- if (is_2d && wei_tag == io) {
|
|
- wei_shape = arm_compute::TensorShape(oc, ic);
|
|
- }
|
|
-
|
|
- arm_compute::DataLayout wei_layout {(wei_tag == ohwi || wei_tag == oi)
|
|
- ? arm_compute::DataLayout::NHWC
|
|
- : arm_compute::DataLayout::NCHW};
|
|
-
|
|
- // clang-format off
|
|
- aip.src_info = arm_compute::TensorInfo(
|
|
- src_shape,
|
|
- 1,
|
|
- arm_compute::DataType::F32,
|
|
- (src_tag == nhwc || src_tag == nc) ?
|
|
- arm_compute::DataLayout::NHWC : arm_compute::DataLayout::NCHW);
|
|
-
|
|
- aip.wei_info = arm_compute::TensorInfo(
|
|
- wei_shape,
|
|
- 1,
|
|
- arm_compute::DataType::F32,
|
|
- wei_layout);
|
|
-
|
|
- aip.dst_info = arm_compute::TensorInfo(
|
|
- (dst_tag == nhwc || dst_tag == nc) ?
|
|
- arm_compute::TensorShape(oc, mb) : arm_compute::TensorShape(mb, oc),
|
|
- 1,
|
|
- arm_compute::DataType::F32,
|
|
- (dst_tag == nhwc || dst_tag == nc) ?
|
|
- arm_compute::DataLayout::NHWC : arm_compute::DataLayout::NCHW);
|
|
-
|
|
- aip.bia_info = arm_compute::TensorInfo(
|
|
- aip.with_bias ?
|
|
- arm_compute::TensorShape(oc) : arm_compute::TensorShape(),
|
|
- 1,
|
|
- arm_compute::DataType::F32);
|
|
- // clang-format on
|
|
-
|
|
- aip.fc_info.weights_trained_layout = wei_layout;
|
|
- if (is_2d && wei_tag != src_tag) { aip.fc_info.transpose_weights = false; }
|
|
-
|
|
- // Either activation or sum is supported as post-op at the moment
|
|
- aip.fc_info.activation_info = acl_common_utils::get_acl_act(attr);
|
|
- const auto &post_ops = attr.post_ops_;
|
|
- aip.with_sum = (post_ops.len() == 1) && post_ops.entry_[0].is_sum();
|
|
-
|
|
- // Fast math mode
|
|
- auto math_mode = get_fpmath_mode();
|
|
- bool is_fastmath_enabled
|
|
- = one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any);
|
|
- aip.fc_info.enable_fast_math = is_fastmath_enabled;
|
|
-
|
|
- // clang-format off
|
|
- // Validate fully connected layer manually to check for return status
|
|
- auto acl_st = arm_compute::NEFullyConnectedLayer::validate(
|
|
- &aip.src_info,
|
|
- &aip.wei_info,
|
|
- aip.with_bias ? &aip.bia_info : nullptr,
|
|
- &aip.dst_info,
|
|
- aip.fc_info);
|
|
- // clang-format on
|
|
- if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
- }
|
|
-
|
|
- if (aip.with_sum) {
|
|
- // clang-format off
|
|
- // Validate arithmetic addition manually to check for return status
|
|
- auto acl_aa_st = arm_compute::NEArithmeticAddition::validate(
|
|
- &aip.dst_info,
|
|
- &aip.dst_info,
|
|
- &aip.dst_info,
|
|
- arm_compute::ConvertPolicy::SATURATE);
|
|
-
|
|
- // clang-format on
|
|
- if (acl_aa_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_aa_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
- }
|
|
- }
|
|
-
|
|
- return status::success;
|
|
-}
|
|
-
|
|
-} // namespace acl_inner_product_utils
|
|
-
|
|
-} // namespace aarch64
|
|
-} // namespace cpu
|
|
-} // namespace impl
|
|
-} // namespace dnnl
|
|
diff --git a/src/cpu/aarch64/acl_inner_product_utils.hpp b/src/cpu/aarch64/acl_inner_product_utils.hpp
|
|
deleted file mode 100644
|
|
index 022d0e3349..0000000000
|
|
--- a/src/cpu/aarch64/acl_inner_product_utils.hpp
|
|
+++ /dev/null
|
|
@@ -1,62 +0,0 @@
|
|
-/*******************************************************************************
|
|
-* Copyright 2021 Arm Ltd. and affiliates
|
|
-*
|
|
-* Licensed under the Apache License, Version 2.0 (the "License");
|
|
-* you may not use this file except in compliance with the License.
|
|
-* You may obtain a copy of the License at
|
|
-*
|
|
-* http://www.apache.org/licenses/LICENSE-2.0
|
|
-*
|
|
-* Unless required by applicable law or agreed to in writing, software
|
|
-* distributed under the License is distributed on an "AS IS" BASIS,
|
|
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
-* See the License for the specific language governing permissions and
|
|
-* limitations under the License.
|
|
-*******************************************************************************/
|
|
-
|
|
-#ifndef CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP
|
|
-#define CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP
|
|
-
|
|
-#include "cpu/cpu_inner_product_pd.hpp"
|
|
-
|
|
-#include "cpu/aarch64/acl_utils.hpp"
|
|
-
|
|
-namespace dnnl {
|
|
-namespace impl {
|
|
-namespace cpu {
|
|
-namespace aarch64 {
|
|
-
|
|
-struct acl_ip_obj_t {
|
|
- arm_compute::NEFullyConnectedLayer fc;
|
|
- arm_compute::NEArithmeticAddition add;
|
|
- arm_compute::Tensor src_tensor;
|
|
- arm_compute::Tensor wei_tensor;
|
|
- arm_compute::Tensor bia_tensor;
|
|
- arm_compute::Tensor dst_tensor;
|
|
- arm_compute::Tensor dst_acc_tensor;
|
|
-};
|
|
-
|
|
-struct acl_ip_conf_t {
|
|
- bool with_bias;
|
|
- bool with_sum;
|
|
- arm_compute::TensorInfo src_info;
|
|
- arm_compute::TensorInfo wei_info;
|
|
- arm_compute::TensorInfo bia_info;
|
|
- arm_compute::TensorInfo dst_info;
|
|
- arm_compute::FullyConnectedLayerInfo fc_info;
|
|
-};
|
|
-
|
|
-namespace acl_inner_product_utils {
|
|
-
|
|
-status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md,
|
|
- memory_desc_t &wei_md, memory_desc_t &dst_md, memory_desc_t &bias_md,
|
|
- const inner_product_desc_t &ipd, const primitive_attr_t &attr);
|
|
-
|
|
-} // namespace acl_inner_product_utils
|
|
-
|
|
-} // namespace aarch64
|
|
-} // namespace cpu
|
|
-} // namespace impl
|
|
-} // namespace dnnl
|
|
-
|
|
-#endif // CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP
|
|
diff --git a/src/cpu/aarch64/acl_softmax.hpp b/src/cpu/aarch64/acl_softmax.hpp
|
|
index a4bfd0c3bd..4ee7139a93 100644
|
|
--- a/src/cpu/aarch64/acl_softmax.hpp
|
|
+++ b/src/cpu/aarch64/acl_softmax.hpp
|
|
@@ -129,7 +129,7 @@ struct acl_softmax_fwd_t : public primitive_t {
|
|
arm_compute::DataLayout acl_layout = arm_compute::DataLayout::NHWC;
|
|
|
|
const arm_compute::DataType acl_data_t
|
|
- = acl_common_utils::get_acl_data_t(data_type);
|
|
+ = acl_utils::get_acl_data_t(data_type);
|
|
|
|
const int threads = dnnl_get_max_threads();
|
|
if (inner_size_ == 1) {
|
|
@@ -189,20 +189,15 @@ struct acl_softmax_fwd_t : public primitive_t {
|
|
}
|
|
|
|
// Validate manually to check for return status
|
|
- arm_compute::Status acl_st;
|
|
if (asp_.is_logsoftmax) {
|
|
- acl_st = arm_compute::NELogSoftmaxLayer::validate(
|
|
- &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis);
|
|
+ ACL_CHECK_VALID(arm_compute::NELogSoftmaxLayer::validate(
|
|
+ &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis));
|
|
} else {
|
|
- acl_st = arm_compute::NESoftmaxLayer::validate(
|
|
- &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis);
|
|
- }
|
|
- if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
+ ACL_CHECK_VALID(arm_compute::NESoftmaxLayer::validate(
|
|
+ &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis));
|
|
}
|
|
|
|
- acl_common_utils::acl_thread_bind();
|
|
+ acl_utils::acl_thread_bind();
|
|
|
|
return status::success;
|
|
}
|
|
diff --git a/src/cpu/aarch64/acl_utils.cpp b/src/cpu/aarch64/acl_utils.cpp
|
|
index a69f14b6f9..098217f50e 100644
|
|
--- a/src/cpu/aarch64/acl_utils.cpp
|
|
+++ b/src/cpu/aarch64/acl_utils.cpp
|
|
@@ -21,7 +21,7 @@ namespace impl {
|
|
namespace cpu {
|
|
namespace aarch64 {
|
|
|
|
-namespace acl_common_utils {
|
|
+namespace acl_utils {
|
|
|
|
using namespace dnnl::impl::alg_kind;
|
|
using namespace data_type;
|
|
@@ -247,7 +247,7 @@ status_t permute_common_dense_dimension_to_last(memory_desc_t *d0_permed,
|
|
return status::success;
|
|
}
|
|
|
|
-} // namespace acl_common_utils
|
|
+} // namespace acl_utils
|
|
|
|
} // namespace aarch64
|
|
} // namespace cpu
|
|
diff --git a/src/cpu/aarch64/acl_utils.hpp b/src/cpu/aarch64/acl_utils.hpp
|
|
index 565cde66a9..bb8efc998c 100644
|
|
--- a/src/cpu/aarch64/acl_utils.hpp
|
|
+++ b/src/cpu/aarch64/acl_utils.hpp
|
|
@@ -21,13 +21,10 @@
|
|
|
|
#include "oneapi/dnnl/dnnl_types.h"
|
|
|
|
-#include "common/bfloat16.hpp"
|
|
-#include "common/c_types_map.hpp"
|
|
#include "common/dnnl_thread.hpp"
|
|
#include "common/memory_tracking.hpp"
|
|
#include "common/primitive.hpp"
|
|
#include "common/utils.hpp"
|
|
-
|
|
#include "cpu/cpu_engine.hpp"
|
|
|
|
#include "arm_compute/runtime/NEON/NEFunctions.h"
|
|
@@ -38,7 +35,7 @@ namespace impl {
|
|
namespace cpu {
|
|
namespace aarch64 {
|
|
|
|
-namespace acl_common_utils {
|
|
+namespace acl_utils {
|
|
|
|
arm_compute::DataType get_acl_data_t(const dnnl_data_type_t dt);
|
|
arm_compute::ActivationLayerInfo get_acl_act(const primitive_attr_t &attr);
|
|
@@ -68,12 +65,33 @@ status_t permute_common_dense_dimension_to_last(memory_desc_t *d0_permed,
|
|
const memory_desc_t *d0, const memory_desc_t *d1,
|
|
const memory_desc_t *d2);
|
|
|
|
-#define MAYBE_REPORT_ACL_ERROR(msg) \
|
|
+// Logs a custom 'info' line describing an unsupported case
|
|
+#define LOG_ACL_UNSUPPORTED(msg) \
|
|
+ do { \
|
|
+ if (get_verbose() >= 2) \
|
|
+ printf("onednn_verbose,cpu,acl,unsupported: %s\n", (msg)); \
|
|
+ } while (0)
|
|
+
|
|
+// Returns unimplemented if error code x is NOT OK
|
|
+#define ACL_CHECK_VALID(x) \
|
|
+ do { \
|
|
+ arm_compute::Status s = x; \
|
|
+ if (s.error_code() != arm_compute::ErrorCode::OK) { \
|
|
+ LOG_ACL_UNSUPPORTED(s.error_description().c_str()); \
|
|
+ return dnnl::impl::status::unimplemented; \
|
|
+ } \
|
|
+ } while (0)
|
|
+
|
|
+// Returns unimplemented on condition x == true
|
|
+#define ACL_CHECK_SUPPORT(x, msg) \
|
|
do { \
|
|
- if (get_verbose()) printf("onednn_verbose,cpu,error,acl,%s\n", (msg)); \
|
|
+ if (x) { \
|
|
+ LOG_ACL_UNSUPPORTED(msg); \
|
|
+ return dnnl::impl::status::unimplemented; \
|
|
+ } \
|
|
} while (0)
|
|
|
|
-} // namespace acl_common_utils
|
|
+} // namespace acl_utils
|
|
|
|
} // namespace aarch64
|
|
} // namespace cpu
|
|
diff --git a/src/cpu/aarch64/acl_winograd_convolution.hpp b/src/cpu/aarch64/acl_winograd_convolution.hpp
|
|
index 29e44eb189..223b3bc9b8 100644
|
|
--- a/src/cpu/aarch64/acl_winograd_convolution.hpp
|
|
+++ b/src/cpu/aarch64/acl_winograd_convolution.hpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2020-2021 Arm Ltd. and affiliates
|
|
+* Copyright 2020-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -107,7 +107,7 @@ struct acl_wino_convolution_fwd_t : public primitive_t {
|
|
|
|
set_default_alg_kind(alg_kind::convolution_winograd);
|
|
|
|
- acl_common_utils::acl_thread_bind();
|
|
+ acl_utils::acl_thread_bind();
|
|
|
|
return status::success;
|
|
}
|
|
@@ -130,7 +130,7 @@ struct acl_wino_convolution_fwd_t : public primitive_t {
|
|
// sum+eltwise post-ops
|
|
if (eltwise_only || sum_with_eltwise) {
|
|
const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg;
|
|
- eltwise_ok = acl_common_utils::acl_act_ok(act_type);
|
|
+ eltwise_ok = acl_utils::acl_act_ok(act_type);
|
|
}
|
|
|
|
return eltwise_ok || (po.len() == 0);
|
|
diff --git a/src/cpu/aarch64/matmul/acl_matmul.cpp b/src/cpu/aarch64/matmul/acl_matmul.cpp
|
|
index 3945fda6fc..6f9bb9b9ad 100644
|
|
--- a/src/cpu/aarch64/matmul/acl_matmul.cpp
|
|
+++ b/src/cpu/aarch64/matmul/acl_matmul.cpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2021 Arm Ltd. and affiliates
|
|
+* Copyright 2021-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -84,4 +84,4 @@ status_t acl_matmul_t::execute_forward(const exec_ctx_t &ctx) const {
|
|
} // namespace aarch64
|
|
} // namespace cpu
|
|
} // namespace impl
|
|
-} // namespace dnnl
|
|
\ No newline at end of file
|
|
+} // namespace dnnl
|
|
diff --git a/src/cpu/aarch64/matmul/acl_matmul.hpp b/src/cpu/aarch64/matmul/acl_matmul.hpp
|
|
index 6ba17e86dd..e69f4d9592 100644
|
|
--- a/src/cpu/aarch64/matmul/acl_matmul.hpp
|
|
+++ b/src/cpu/aarch64/matmul/acl_matmul.hpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2021 Arm Ltd. and affiliates
|
|
+* Copyright 2021-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -87,7 +87,7 @@ struct acl_matmul_t : public primitive_t {
|
|
if (conf_status != status::success) return status::unimplemented;
|
|
// Number of threads in Compute Library is set by OMP_NUM_THREADS
|
|
// dnnl_get_max_threads() == OMP_NUM_THREADS
|
|
- acl_common_utils::acl_thread_bind();
|
|
+ acl_utils::acl_thread_bind();
|
|
|
|
return status::success;
|
|
}
|
|
diff --git a/src/cpu/aarch64/matmul/acl_matmul_utils.cpp b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp
|
|
index 76599d8bb1..ba266b4303 100644
|
|
--- a/src/cpu/aarch64/matmul/acl_matmul_utils.cpp
|
|
+++ b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2021 Arm Ltd. and affiliates
|
|
+* Copyright 2021-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -22,14 +22,10 @@ namespace dnnl {
|
|
namespace impl {
|
|
namespace cpu {
|
|
namespace aarch64 {
|
|
-namespace matmul {
|
|
|
|
-using namespace dnnl::impl::status;
|
|
-using namespace dnnl::impl::utils;
|
|
-using namespace dnnl::impl::cpu::matmul;
|
|
-using namespace prop_kind;
|
|
+using namespace alg_kind;
|
|
+using namespace cpu::matmul;
|
|
using namespace format_tag;
|
|
-using namespace dnnl::impl::alg_kind;
|
|
|
|
namespace acl_matmul_utils {
|
|
|
|
@@ -58,7 +54,7 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md,
|
|
wei_md, abcd, abdc, abc, acb, ab, ba);
|
|
auto dst_tag = memory_desc_matches_one_of_tag(
|
|
dst_md, abcd, abdc, abc, acb, ab, ba);
|
|
- if (one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) {
|
|
+ if (utils::one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) {
|
|
return status::unimplemented;
|
|
}
|
|
amp.is_transA = helper.transA() == 'T';
|
|
@@ -85,7 +81,7 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md,
|
|
// Fast-math mode
|
|
auto math_mode = get_fpmath_mode();
|
|
bool is_fastmath_enabled
|
|
- = one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any);
|
|
+ = utils::one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any);
|
|
amp.gemm_info.set_fast_math(is_fastmath_enabled);
|
|
|
|
// Fused ReLU activation
|
|
@@ -95,29 +91,15 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md,
|
|
amp.alpha = attr.output_scales_.scales_[0];
|
|
|
|
// Validate ACL transpose
|
|
- if (amp.is_transA) {
|
|
- auto acl_transA_st = arm_compute::NETranspose::validate(
|
|
- &.src_acc_info, &.src_info);
|
|
- if (acl_transA_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_transA_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
- }
|
|
- }
|
|
- if (amp.is_transB) {
|
|
- auto acl_transB_st = arm_compute::NETranspose::validate(
|
|
- &.wei_acc_info, &.wei_info);
|
|
- if (acl_transB_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_transB_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
- }
|
|
- }
|
|
+ if (amp.is_transA)
|
|
+ ACL_CHECK_VALID(arm_compute::NETranspose::validate(
|
|
+ &.src_acc_info, &.src_info));
|
|
+ if (amp.is_transB)
|
|
+ ACL_CHECK_VALID(arm_compute::NETranspose::validate(
|
|
+ &.wei_acc_info, &.wei_info));
|
|
// Validate ACL GEMM
|
|
- auto acl_st = arm_compute::NEGEMM::validate(&.src_info, &.wei_info,
|
|
- nullptr, &.dst_info, amp.alpha, 0.0f, amp.gemm_info);
|
|
- if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
|
|
- MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
|
|
- return status::unimplemented;
|
|
- }
|
|
+ ACL_CHECK_VALID(arm_compute::NEGEMM::validate(&.src_info, &.wei_info,
|
|
+ nullptr, &.dst_info, amp.alpha, 0.0f, amp.gemm_info));
|
|
|
|
return status::success;
|
|
}
|
|
@@ -175,7 +157,6 @@ bool acl_act_ok(alg_kind_t eltwise_activation) {
|
|
|
|
} // namespace acl_matmul_utils
|
|
|
|
-} // namespace matmul
|
|
} // namespace aarch64
|
|
} // namespace cpu
|
|
} // namespace impl
|
|
diff --git a/src/cpu/aarch64/matmul/acl_matmul_utils.hpp b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp
|
|
index 1411dc4f4b..248dbe5a09 100644
|
|
--- a/src/cpu/aarch64/matmul/acl_matmul_utils.hpp
|
|
+++ b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
-* Copyright 2021 Arm Ltd. and affiliates
|
|
+* Copyright 2021-2022 Arm Ltd. and affiliates
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
@@ -25,7 +25,6 @@ namespace dnnl {
|
|
namespace impl {
|
|
namespace cpu {
|
|
namespace aarch64 {
|
|
-namespace matmul {
|
|
|
|
struct acl_matmul_obj_t {
|
|
arm_compute::NEGEMM gemm;
|
|
@@ -61,10 +60,9 @@ arm_compute::ActivationLayerInfo get_acl_act(const primitive_attr_t &attr);
|
|
bool acl_act_ok(alg_kind_t eltwise_activation);
|
|
} // namespace acl_matmul_utils
|
|
|
|
-} // namespace matmul
|
|
} // namespace aarch64
|
|
} // namespace cpu
|
|
} // namespace impl
|
|
} // namespace dnnl
|
|
|
|
-#endif // CPU_AARCH64_ACL_MATMUL_UTILS_HPP
|
|
\ No newline at end of file
|
|
+#endif // CPU_AARCH64_ACL_MATMUL_UTILS_HPP
|
|
diff --git a/tests/benchdnn/inputs/ip/test_ip_acl b/tests/benchdnn/inputs/ip/test_ip_acl
|
|
new file mode 100644
|
|
index 0000000000..a8873c30a8
|
|
--- /dev/null
|
|
+++ b/tests/benchdnn/inputs/ip/test_ip_acl
|
|
@@ -0,0 +1,26 @@
|
|
+--reset
|
|
+
|
|
+# do not test other implementations
|
|
+--skip-impl='ref,jit'
|
|
+
|
|
+# test format::any
|
|
+--batch=shapes_ci
|
|
+
|
|
+# only inference, with and without bias
|
|
+--dir=FWD_I,FWD_B
|
|
+# test all shapes
|
|
+--batch=set_all
|
|
+
|
|
+# with and without batches
|
|
+--mb=0,2
|
|
+
|
|
+# test non-spatial layout combinations
|
|
+--stag=ab,ba
|
|
+--wtag=ab,ba
|
|
+--batch=shapes_0d
|
|
+
|
|
+# test spatial layout combinations
|
|
+--stag=abx,axb
|
|
+--wtag=abx,axb
|
|
+# 2d-spatial dimensions
|
|
+--batch=shapes_googlenet_v1
|