diff --git a/1428.patch b/1428.patch
deleted file mode 100644
index 2021fa9..0000000
--- a/1428.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 24d4c1379d3c4cbf423f979fa25283b1914159c7 Mon Sep 17 00:00:00 2001
-From: Diana Bite <diana.bite@arm.com>
-Date: Thu, 21 Jul 2022 12:27:17 +0100
-Subject: [PATCH] cpu: aarch64: acl: disallow large padding in ACL wino to be
- consist with oneDNN
-
----
- src/cpu/aarch64/acl_convolution_utils.cpp | 11 +++++++++--
- 1 file changed, 9 insertions(+), 2 deletions(-)
-
-diff --git a/src/cpu/aarch64/acl_convolution_utils.cpp b/src/cpu/aarch64/acl_convolution_utils.cpp
-index db3bde6a48..542033df0e 100644
---- a/src/cpu/aarch64/acl_convolution_utils.cpp
-+++ b/src/cpu/aarch64/acl_convolution_utils.cpp
-@@ -307,10 +307,17 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md,
-     // General Compute Library checks, memory tags are also set there
-     CHECK(acl_init_conf(acp, src_md, weights_md, dst_md, bias_md, cd, attr));
- 
--    const bool wino_shape_ok // unit strides only, no dilations
-+    const bool shape_ok
-+            // only unit strides allowed
-             = (acp.padstride_info.stride() == std::pair<uint, uint> {1, 1})
-+            // Note: Compute Library supports arbitrary padding for wino kernels
-+            // but we only allow small padding to be consistent with oneDNN
-+            && (acp.padstride_info.pad().first <= 1) // padding left/right
-+            && (acp.padstride_info.pad().second <= 1) // padding top/bottom
-+            // only non-dilated convolutions allowed
-             && (acp.dilation_info == arm_compute::Size2D(1, 1));
--    if (!wino_shape_ok) return status::unimplemented;
-+
-+    ACL_CHECK_SUPPORT(!shape_ok, "shape not supported by winograd kernels");
- 
-     // clang-format off
-     // Validate convolution manually to check for return status
diff --git a/fa93750.patch b/fa93750.patch
deleted file mode 100644
index e3a03b0..0000000
--- a/fa93750.patch
+++ /dev/null
@@ -1,1222 +0,0 @@
-From fa93750bfb821fe05e3190b36f52b5bd88a57110 Mon Sep 17 00:00:00 2001
-From: Diana Bite <diana.bite@arm.com>
-Date: Thu, 24 Feb 2022 14:25:49 +0000
-Subject: [PATCH] cpu: aarch64: acl: fix inner_prod test failure and improve
- validation
-
----
- src/cpu/aarch64/acl_binary.hpp                |   8 +-
- src/cpu/aarch64/acl_convolution_utils.cpp     |  65 ++-----
- src/cpu/aarch64/acl_eltwise.hpp               |   4 +-
- src/cpu/aarch64/acl_eltwise_utils.cpp         |  17 +-
- src/cpu/aarch64/acl_gemm_convolution.hpp      |   6 +-
- .../aarch64/acl_indirect_gemm_convolution.hpp |   6 +-
- src/cpu/aarch64/acl_inner_product.cpp         |  13 +-
- src/cpu/aarch64/acl_inner_product.hpp         | 179 +++++++++++++++--
- src/cpu/aarch64/acl_inner_product_utils.cpp   | 181 ------------------
- src/cpu/aarch64/acl_inner_product_utils.hpp   |  62 ------
- src/cpu/aarch64/acl_softmax.hpp               |  17 +-
- src/cpu/aarch64/acl_utils.cpp                 |   4 +-
- src/cpu/aarch64/acl_utils.hpp                 |  32 +++-
- src/cpu/aarch64/acl_winograd_convolution.hpp  |   6 +-
- src/cpu/aarch64/matmul/acl_matmul.cpp         |   4 +-
- src/cpu/aarch64/matmul/acl_matmul.hpp         |   4 +-
- src/cpu/aarch64/matmul/acl_matmul_utils.cpp   |  45 ++---
- src/cpu/aarch64/matmul/acl_matmul_utils.hpp   |   6 +-
- tests/benchdnn/inputs/ip/test_ip_acl          |  26 +++
- 19 files changed, 281 insertions(+), 404 deletions(-)
- delete mode 100644 src/cpu/aarch64/acl_inner_product_utils.cpp
- delete mode 100644 src/cpu/aarch64/acl_inner_product_utils.hpp
- create mode 100644 tests/benchdnn/inputs/ip/test_ip_acl
-
-diff --git a/src/cpu/aarch64/acl_binary.hpp b/src/cpu/aarch64/acl_binary.hpp
-index 77adb45bef..122b094587 100644
---- a/src/cpu/aarch64/acl_binary.hpp
-+++ b/src/cpu/aarch64/acl_binary.hpp
-@@ -125,7 +125,7 @@ struct acl_binary_t : public primitive_t {
- 
-         status_t init(engine_t *engine) {
- 
--            using namespace acl_common_utils;
-+            using namespace acl_utils;
- 
-             // Only support f32 and s32 for now
-             data_type_t ddt = dst_md(0)->data_type;
-@@ -179,11 +179,7 @@ struct acl_binary_t : public primitive_t {
-             }
- 
-             // Call operator specific validate function to check support
--            arm_compute::Status acl_st = validate(asp_);
--            if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
--                MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
--                return status::unimplemented;
--            }
-+            ACL_CHECK_VALID(validate(asp_));
- 
-             // Initialize the ACL threads
-             acl_thread_bind();
-diff --git a/src/cpu/aarch64/acl_convolution_utils.cpp b/src/cpu/aarch64/acl_convolution_utils.cpp
-index ca91de49e3..e072dc5490 100644
---- a/src/cpu/aarch64/acl_convolution_utils.cpp
-+++ b/src/cpu/aarch64/acl_convolution_utils.cpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2020-2021 Arm Ltd. and affiliates
-+* Copyright 2020-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -156,10 +156,10 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md,
-     const auto acl_layout = is_nspc ? arm_compute::DataLayout::NHWC
-                                     : arm_compute::DataLayout::NCHW;
- 
--    auto acl_src_data_t = acl_common_utils::get_acl_data_t(src_d.data_type());
--    auto acl_wei_data_t = acl_common_utils::get_acl_data_t(wei_d.data_type());
--    auto acl_dst_data_t = acl_common_utils::get_acl_data_t(dst_d.data_type());
--    auto acl_bia_data_t = acl_common_utils::get_acl_data_t(bia_d.data_type());
-+    auto acl_src_data_t = acl_utils::get_acl_data_t(src_d.data_type());
-+    auto acl_wei_data_t = acl_utils::get_acl_data_t(wei_d.data_type());
-+    auto acl_dst_data_t = acl_utils::get_acl_data_t(dst_d.data_type());
-+    auto acl_bia_data_t = acl_utils::get_acl_data_t(bia_d.data_type());
- 
-     if (acl_bia_data_t == arm_compute::DataType::UNKNOWN)
-         acl_bia_data_t = arm_compute::DataType::F32;
-@@ -212,33 +212,14 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md,
-     // is_eltwise(true) here stands for eltwise.scale == 1.f check
-     acp.sum_with_eltwise = (post_ops.len() == 2) && post_ops.entry_[0].is_sum()
-             && post_ops.entry_[1].is_eltwise(true);
--    acp.act_info = acl_common_utils::get_acl_act(attr);
-+    acp.act_info = acl_utils::get_acl_act(attr);
- 
-     if (acp.sum_with_eltwise) {
--        // clang-format off
--        // Validate activation layer manually to check for return status
--        auto acl_al_st = arm_compute::NEActivationLayer::validate(
--            &acp.dst_info,
--            &acp.dst_info,
--            acp.act_info);
--        // clang-format on
--        if (acl_al_st.error_code() != arm_compute::ErrorCode::OK) {
--            MAYBE_REPORT_ACL_ERROR(acl_al_st.error_description().c_str());
--            return status::unimplemented;
--        }
--
--        // clang-format off
--        // Validate arithmetic addition manually to check for return status
--        auto acl_aa_st = arm_compute::NEArithmeticAddition::validate(
--            &acp.dst_info,
--            &acp.dst_info,
--            &acp.dst_info,
--            arm_compute::ConvertPolicy::SATURATE);
--        // clang-format on
--        if (acl_aa_st.error_code() != arm_compute::ErrorCode::OK) {
--            MAYBE_REPORT_ACL_ERROR(acl_aa_st.error_description().c_str());
--            return status::unimplemented;
--        }
-+        ACL_CHECK_VALID(arm_compute::NEActivationLayer::validate( // eltwise
-+                &acp.dst_info, &acp.dst_info, acp.act_info));
-+        ACL_CHECK_VALID(arm_compute::NEArithmeticAddition::validate( // sum
-+                &acp.dst_info, &acp.dst_info, &acp.dst_info,
-+                arm_compute::ConvertPolicy::SATURATE));
-     }
- 
-     return status::success;
-@@ -254,7 +235,7 @@ status_t init_conf_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md,
- 
-     // clang-format off
-     // Validate convolution manually to check for return status
--    auto acl_st = arm_compute::NEGEMMConvolutionLayer::validate(
-+    ACL_CHECK_VALID(arm_compute::NEGEMMConvolutionLayer::validate(
-         &acp.src_info,
-         &acp.wei_info,
-         acp.with_bias ? &acp.bia_info : nullptr,
-@@ -263,12 +244,8 @@ status_t init_conf_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md,
-         acp.weights_info,
-         acp.dilation_info,
-         acp.act_info,
--        acp.fast_math);
-+        acp.fast_math));
-     // clang-format on
--    if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
--        MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
--        return status::unimplemented;
--    }
- 
-     return status::success;
- }
-@@ -289,7 +266,7 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md,
- 
-     // clang-format off
-     // NOTE: indirect convolution method supports only nhwc layout.
--    auto acl_st = arm_compute::NEGEMMConv2d::validate(
-+    ACL_CHECK_VALID(arm_compute::NEGEMMConv2d::validate(
-         &acp.src_info,
-         &acp.wei_info,
-         acp.with_bias ? &acp.bia_info : nullptr,
-@@ -298,12 +275,8 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md,
-                                 acp.dilation_info,
-                                 acp.act_info,
-                                 acp.fast_math,
--                                1));
-+                                1)));
-     // clang-format on
--    if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
--        MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
--        return status::unimplemented;
--    }
- 
-     return status::success;
- }
-@@ -336,19 +309,15 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md,
- 
-     // clang-format off
-     // Validate convolution manually to check for return status
--    auto acl_st = arm_compute::NEWinogradConvolutionLayer::validate(
-+    ACL_CHECK_VALID(arm_compute::NEWinogradConvolutionLayer::validate(
-         &acp.src_info,
-         &acp.wei_info,
-         acp.with_bias ? &acp.bia_info : nullptr,
-         &acp.dst_info,
-         acp.padstride_info,
-         acp.act_info,
--        true); // enable_fast_math flag in ACL Winograd
-+        true)); // enable_fast_math flag in ACL Winograd
-     // clang-format on
--    if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
--        MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
--        return status::unimplemented;
--    }
- 
-     return status::success;
- }
-diff --git a/src/cpu/aarch64/acl_eltwise.hpp b/src/cpu/aarch64/acl_eltwise.hpp
-index a55b89272c..381368aabb 100644
---- a/src/cpu/aarch64/acl_eltwise.hpp
-+++ b/src/cpu/aarch64/acl_eltwise.hpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2021 Arm Ltd. and affiliates
-+* Copyright 2021-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -78,7 +78,7 @@ struct acl_eltwise_fwd_t : public primitive_t {
-                     aep_, data_md_, *desc(), *attr());
-             if (conf_status != status::success) return status::unimplemented;
- 
--            acl_common_utils::acl_thread_bind();
-+            acl_utils::acl_thread_bind();
- 
-             return status::success;
-         }
-diff --git a/src/cpu/aarch64/acl_eltwise_utils.cpp b/src/cpu/aarch64/acl_eltwise_utils.cpp
-index 35e809e042..880b6aeaae 100644
---- a/src/cpu/aarch64/acl_eltwise_utils.cpp
-+++ b/src/cpu/aarch64/acl_eltwise_utils.cpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2021 Arm Ltd. and affiliates
-+* Copyright 2021-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -46,7 +46,7 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md,
- 
-     const alg_kind_t eltwise_alg = ed.alg_kind;
- 
--    bool activation_supported = acl_common_utils::acl_act_ok(eltwise_alg);
-+    bool activation_supported = acl_utils::acl_act_ok(eltwise_alg);
-     if (!activation_supported) { return status::unimplemented; }
- 
-     // batch size
-@@ -69,8 +69,8 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md,
-     const auto acl_layout = is_nspc ? arm_compute::DataLayout::NHWC
-                                     : arm_compute::DataLayout::NCHW;
- 
--    auto acl_src_data_t = acl_common_utils::get_acl_data_t(data_d.data_type());
--    auto acl_dst_data_t = acl_common_utils::get_acl_data_t(data_d.data_type());
-+    auto acl_src_data_t = acl_utils::get_acl_data_t(data_d.data_type());
-+    auto acl_dst_data_t = acl_utils::get_acl_data_t(data_d.data_type());
- 
-     // clang-format off
-     aep.src_info = arm_compute::TensorInfo(
-@@ -93,7 +93,7 @@ status_t acl_eltwise_check(acl_eltwise_conf_t &aep, memory_desc_t &data_md,
-         aep.dst_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0));
-     }
- 
--    aep.act_info = acl_common_utils::get_acl_act(ed);
-+    aep.act_info = acl_utils::get_acl_act(ed);
- 
-     return status::success;
- }
-@@ -105,14 +105,11 @@ status_t init_conf_eltwise(acl_eltwise_conf_t &aep, memory_desc_t &data_md,
-     CHECK(acl_eltwise_check(aep, data_md, ed, attr));
- 
-     // clang-format off
--    auto acl_st = arm_compute::NEActivationLayer::validate(
-+    ACL_CHECK_VALID(arm_compute::NEActivationLayer::validate(
-         &aep.src_info,
-         &aep.dst_info,
--        aep.act_info);
-+        aep.act_info));
-     // clang-format on
--    if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
--        return status::unimplemented;
--    }
- 
-     return status::success;
- }
-diff --git a/src/cpu/aarch64/acl_gemm_convolution.hpp b/src/cpu/aarch64/acl_gemm_convolution.hpp
-index 3e7542b6bf..496f501211 100644
---- a/src/cpu/aarch64/acl_gemm_convolution.hpp
-+++ b/src/cpu/aarch64/acl_gemm_convolution.hpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2020-2021 Arm Ltd. and affiliates
-+* Copyright 2020-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -108,7 +108,7 @@ struct acl_gemm_convolution_fwd_t : public primitive_t {
-                     src_md_, weights_md_, dst_md_, bias_md_, *desc(), *attr());
-             if (conf_status != status::success) return status::unimplemented;
- 
--            acl_common_utils::acl_thread_bind();
-+            acl_utils::acl_thread_bind();
- 
-             return status::success;
-         }
-@@ -146,7 +146,7 @@ struct acl_gemm_convolution_fwd_t : public primitive_t {
-             // sum+eltwise post-ops
-             if (eltwise_only || sum_with_eltwise) {
-                 const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg;
--                eltwise_ok = acl_common_utils::acl_act_ok(act_type);
-+                eltwise_ok = acl_utils::acl_act_ok(act_type);
-             }
- 
-             return eltwise_ok || (po.len() == 0);
-diff --git a/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp
-index 0a0021aeee..18e757a2c9 100644
---- a/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp
-+++ b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2021 Arm Ltd. and affiliates
-+* Copyright 2021-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -109,7 +109,7 @@ struct acl_indirect_gemm_convolution_fwd_t : public primitive_t {
-                     *attr());
-             if (conf_status != status::success) return status::unimplemented;
- 
--            acl_common_utils::acl_thread_bind();
-+            acl_utils::acl_thread_bind();
- 
-             return status::success;
-         }
-@@ -134,7 +134,7 @@ struct acl_indirect_gemm_convolution_fwd_t : public primitive_t {
-             // sum+eltwise post-ops
-             if (eltwise_only || sum_with_eltwise) {
-                 const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg;
--                eltwise_ok = acl_common_utils::acl_act_ok(act_type);
-+                eltwise_ok = acl_utils::acl_act_ok(act_type);
-             }
- 
-             return eltwise_ok || (po.len() == 0);
-diff --git a/src/cpu/aarch64/acl_inner_product.cpp b/src/cpu/aarch64/acl_inner_product.cpp
-index 7a316135f8..f355a657c7 100644
---- a/src/cpu/aarch64/acl_inner_product.cpp
-+++ b/src/cpu/aarch64/acl_inner_product.cpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2021 Arm Ltd. and affiliates
-+* Copyright 2021-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -21,23 +21,18 @@ namespace impl {
- namespace cpu {
- namespace aarch64 {
- 
--using namespace dnnl::impl::status;
--using namespace dnnl::impl::memory_tracking::names;
--using namespace dnnl::impl::utils;
--
- status_t acl_inner_product_fwd_t::execute_forward(const exec_ctx_t &ctx) const {
-     // Lock here is needed because resource_mapper does not support
-     // concurrent multithreaded access.
-     std::lock_guard<std::mutex> _lock {this->mtx};
- 
--    status_t status = status::success;
-     auto src_base = CTX_IN_MEM(const data_t *, DNNL_ARG_SRC);
-     auto wei_base = CTX_IN_MEM(const data_t *, DNNL_ARG_WEIGHTS);
-     auto bia_base = CTX_IN_MEM(const data_t *, DNNL_ARG_BIAS);
-     auto dst_base = CTX_OUT_MEM(data_t *, DNNL_ARG_DST);
- 
--    bool with_bias = pd()->aip_.with_bias;
--    bool with_sum = pd()->aip_.with_sum;
-+    bool with_bias = pd()->aip.with_bias;
-+    bool with_sum = pd()->aip.with_sum;
- 
-     // Retrieve primitive resource and configured Compute Library objects
-     auto *acl_resource
-@@ -64,7 +59,7 @@ status_t acl_inner_product_fwd_t::execute_forward(const exec_ctx_t &ctx) const {
-     acl_obj.dst_tensor.allocator()->free();
-     if (with_bias) { acl_obj.bia_tensor.allocator()->free(); }
- 
--    return status;
-+    return status::success;
- }
- 
- } // namespace aarch64
-diff --git a/src/cpu/aarch64/acl_inner_product.hpp b/src/cpu/aarch64/acl_inner_product.hpp
-index dd742ea0bc..e5a9bdcc8a 100644
---- a/src/cpu/aarch64/acl_inner_product.hpp
-+++ b/src/cpu/aarch64/acl_inner_product.hpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2021 Arm Ltd. and affiliates
-+* Copyright 2021-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -17,15 +17,34 @@
- #ifndef CPU_AARCH64_ACL_INNER_PRODUCT_HPP
- #define CPU_AARCH64_ACL_INNER_PRODUCT_HPP
- 
-+#include "cpu/aarch64/acl_utils.hpp"
- #include "cpu/cpu_inner_product_pd.hpp"
- 
--#include "cpu/aarch64/acl_inner_product_utils.hpp"
--
- namespace dnnl {
- namespace impl {
- namespace cpu {
- namespace aarch64 {
- 
-+struct acl_ip_obj_t {
-+    arm_compute::NEFullyConnectedLayer fc;
-+    arm_compute::NEArithmeticAddition add;
-+    arm_compute::Tensor src_tensor;
-+    arm_compute::Tensor wei_tensor;
-+    arm_compute::Tensor bia_tensor;
-+    arm_compute::Tensor dst_tensor;
-+    arm_compute::Tensor dst_acc_tensor;
-+};
-+
-+struct acl_ip_conf_t {
-+    bool with_bias;
-+    bool with_sum;
-+    arm_compute::TensorInfo src_info;
-+    arm_compute::TensorInfo wei_info;
-+    arm_compute::TensorInfo bia_info;
-+    arm_compute::TensorInfo dst_info;
-+    arm_compute::FullyConnectedLayerInfo fc_info;
-+};
-+
- struct acl_ip_resource_t : public resource_t {
-     acl_ip_resource_t() : acl_ip_obj_(utils::make_unique<acl_ip_obj_t>()) {}
- 
-@@ -71,33 +90,26 @@ struct acl_inner_product_fwd_t : public primitive_t {
-     struct pd_t : public cpu_inner_product_fwd_pd_t {
-         using cpu_inner_product_fwd_pd_t::cpu_inner_product_fwd_pd_t;
- 
--        DECLARE_COMMON_PD_T("inner_product:acl", acl_inner_product_fwd_t);
-+        DECLARE_COMMON_PD_T("acl", acl_inner_product_fwd_t);
- 
-         status_t init(engine_t *engine) {
--            using namespace utils;
--
-             const bool ok = is_fwd() && !has_zero_dim_memory()
-                     && expect_data_types(data_type::f32, data_type::f32,
-                             data_type::f32, data_type::f32, data_type::f32)
-                     && attr()->has_default_values(
-                             primitive_attr_t::skip_mask_t::post_ops,
-                             data_type::f32)
--                    && (set_default_params() == status::success)
--                    && post_ops_ok();
-+                    && set_default_params() == status::success && post_ops_ok();
- 
-             if (!ok) return status::unimplemented;
- 
--            auto conf_status = acl_inner_product_utils::init_conf_ip(aip_,
--                    src_md_, weights_md_, dst_md_, bias_md_, *desc(), *attr());
--            // conf_status here can be either status::success or status::unimplemented
--            if (conf_status != status::success) return conf_status;
--
--            acl_common_utils::acl_thread_bind();
-+            CHECK(init_conf_ip(aip, src_md_, weights_md_, dst_md_, bias_md_,
-+                    *desc(), *attr()));
- 
-             return status::success;
-         }
- 
--        acl_ip_conf_t aip_;
-+        acl_ip_conf_t aip;
- 
-     protected:
-         bool post_ops_ok() const {
-@@ -111,16 +123,149 @@ struct acl_inner_product_fwd_t : public primitive_t {
-             // Compute Library supports here only one eltwise post-op or sum
-             if (po.len() == 1 && is_eltwise(0)) {
-                 const auto act_type = po.entry_[0].eltwise.alg;
--                eltwise_ok = acl_common_utils::acl_act_ok(act_type);
-+                eltwise_ok = acl_utils::acl_act_ok(act_type);
-             }
- 
-             return eltwise_ok || (po.len() == 1 && is_sum(0))
-                     || (po.len() == 0);
-         }
-+
-+        status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md,
-+                memory_desc_t &wei_md, memory_desc_t &dst_md,
-+                memory_desc_t &bias_md, const inner_product_desc_t &ipd,
-+                const primitive_attr_t &attr) {
-+
-+            ACL_CHECK_SUPPORT(src_md.ndims != wei_md.ndims,
-+                    "source and weights dimensions must match");
-+
-+            const int ndims = src_md.ndims;
-+
-+            const bool is_2d = (ndims == 2);
-+            const bool is_4d = (ndims == 4);
-+
-+            ACL_CHECK_SUPPORT(
-+                    !(is_2d || is_4d), "ACL supports only 2d or 4d cases");
-+
-+            // batch size
-+            const int n = src_md.dims[0];
-+
-+            // input and output channels
-+            const int ic = src_md.dims[1];
-+            const int oc = dst_md.dims[1];
-+
-+            // source spatial dimensions
-+            const int ih = is_4d ? src_md.dims[ndims - 2] : 0;
-+            const int iw = is_4d ? src_md.dims[ndims - 1] : 0;
-+
-+            // weights spatial dimensions
-+            const int kh = is_4d ? wei_md.dims[ndims - 2] : 0;
-+            const int kw = is_4d ? wei_md.dims[ndims - 1] : 0;
-+
-+            // Only NCHW or NHWC derivatives supported by ACL kernels
-+            using namespace format_tag;
-+            auto src_tag = memory_desc_matches_one_of_tag(
-+                    src_md, nhwc, nchw, nc, cn);
-+            auto wei_tag = memory_desc_matches_one_of_tag(
-+                    wei_md, ohwi, oihw, oi, io);
-+            auto dst_tag = memory_desc_matches_one_of_tag(dst_md, nc, cn);
-+
-+            ACL_CHECK_SUPPORT(
-+                    utils::one_of(format_tag::undef, src_tag, wei_tag, dst_tag),
-+                    "unsupported memory layout");
-+
-+            ACL_CHECK_SUPPORT(is_2d && src_tag != dst_tag,
-+                    "for src and dst layouts must match");
-+
-+            arm_compute::TensorShape src_shape, wei_shape;
-+            if (is_2d) {
-+                src_shape = (src_tag == nc) ? arm_compute::TensorShape(ic, n)
-+                                            : arm_compute::TensorShape(n, ic);
-+
-+                wei_shape = (wei_tag == io) ? arm_compute::TensorShape(oc, ic)
-+                                            : arm_compute::TensorShape(ic, oc);
-+            }
-+            if (is_4d) {
-+                src_shape = (src_tag == nhwc)
-+                        ? arm_compute::TensorShape(ic, iw, ih, n)
-+                        : arm_compute::TensorShape(iw, ih, ic, n);
-+
-+                // ACL requires the weights to be in 2D flattened shape
-+                const int flattened_ic = is_4d ? ic * kh * kw : ic;
-+                wei_shape = arm_compute::TensorShape(flattened_ic, oc);
-+            }
-+
-+            arm_compute::DataLayout src_layout = (src_tag == nhwc)
-+                    ? arm_compute::DataLayout::NHWC
-+                    : arm_compute::DataLayout::NCHW;
-+
-+            arm_compute::DataLayout wei_layout = (wei_tag == ohwi)
-+                    ? arm_compute::DataLayout::NHWC
-+                    : arm_compute::DataLayout::NCHW;
-+
-+            aip.src_info = arm_compute::TensorInfo(
-+                    src_shape, 1, arm_compute::DataType::F32, src_layout);
-+
-+            aip.wei_info = arm_compute::TensorInfo(
-+                    wei_shape, 1, arm_compute::DataType::F32, wei_layout);
-+
-+            aip.dst_info
-+                    = arm_compute::TensorInfo(arm_compute::TensorShape(oc, n),
-+                            1, arm_compute::DataType::F32);
-+
-+            aip.with_bias = ipd.bias_desc.format_kind != format_kind::undef;
-+            aip.bia_info = arm_compute::TensorInfo(aip.with_bias
-+                            ? arm_compute::TensorShape(oc)
-+                            : arm_compute::TensorShape(),
-+                    1, arm_compute::DataType::F32);
-+
-+            aip.fc_info.weights_trained_layout = wei_layout;
-+            if (is_2d && wei_tag != src_tag) {
-+                // weights are already transposed
-+                aip.fc_info.transpose_weights = false;
-+            }
-+
-+            // Either activation or sum is supported as post-op at the moment
-+            aip.fc_info.activation_info = acl_utils::get_acl_act(attr);
-+            const auto &post_ops = attr.post_ops_;
-+            aip.with_sum = (post_ops.len() == 1) && post_ops.entry_[0].is_sum();
-+
-+            // Fast math mode
-+            auto math_mode = get_fpmath_mode();
-+            bool is_fastmath_enabled = utils::one_of(
-+                    math_mode, fpmath_mode::bf16, fpmath_mode::any);
-+            aip.fc_info.enable_fast_math = is_fastmath_enabled;
-+
-+            // clang-format off
-+            // Validate fully connected layer manually to check for return status
-+            ACL_CHECK_VALID(arm_compute::NEFullyConnectedLayer::validate(
-+                &aip.src_info,
-+                &aip.wei_info,
-+                aip.with_bias ? &aip.bia_info : nullptr,
-+                &aip.dst_info,
-+                aip.fc_info));
-+
-+            if (aip.with_sum) {
-+                // Validate arithmetic addition manually to check for return status
-+                ACL_CHECK_VALID(arm_compute::NEArithmeticAddition::validate(
-+                    &aip.dst_info,
-+                    &aip.dst_info,
-+                    &aip.dst_info,
-+                    arm_compute::ConvertPolicy::SATURATE));
-+                // clang-format on
-+            }
-+
-+            return status::success;
-+        }
-     }; // pd_t
- 
-     acl_inner_product_fwd_t(const pd_t *apd) : primitive_t(apd) {}
- 
-+    status_t init(engine_t *engine) override {
-+        acl_utils::acl_thread_bind();
-+
-+        return status::success;
-+    }
-+
-     status_t create_resource(
-             engine_t *engine, resource_mapper_t &mapper) const override {
-         if (mapper.has_resource(this)) return status::success;
-@@ -129,7 +274,7 @@ struct acl_inner_product_fwd_t : public primitive_t {
-         if (!r) return status::out_of_memory;
- 
-         // Configure the resource based on information from primitive descriptor
--        auto st = r->configure(pd()->aip_);
-+        auto st = r->configure(pd()->aip);
-         if (st == status::success) { mapper.add(this, std::move(r)); }
- 
-         return st;
-diff --git a/src/cpu/aarch64/acl_inner_product_utils.cpp b/src/cpu/aarch64/acl_inner_product_utils.cpp
-deleted file mode 100644
-index c8fab86f22..0000000000
---- a/src/cpu/aarch64/acl_inner_product_utils.cpp
-+++ /dev/null
-@@ -1,181 +0,0 @@
--/*******************************************************************************
--* Copyright 2021 Arm Ltd. and affiliates
--*
--* Licensed under the Apache License, Version 2.0 (the "License");
--* you may not use this file except in compliance with the License.
--* You may obtain a copy of the License at
--*
--*     http://www.apache.org/licenses/LICENSE-2.0
--*
--* Unless required by applicable law or agreed to in writing, software
--* distributed under the License is distributed on an "AS IS" BASIS,
--* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--* See the License for the specific language governing permissions and
--* limitations under the License.
--*******************************************************************************/
--
--#include "cpu/aarch64/acl_inner_product_utils.hpp"
--
--namespace dnnl {
--namespace impl {
--namespace cpu {
--namespace aarch64 {
--
--namespace acl_inner_product_utils {
--
--using namespace format_tag;
--using namespace utils;
--using namespace status;
--
--status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md,
--        memory_desc_t &wei_md, memory_desc_t &dst_md, memory_desc_t &bias_md,
--        const inner_product_desc_t &ipd, const primitive_attr_t &attr) {
--    const memory_desc_wrapper src_d(&src_md);
--    const memory_desc_wrapper wei_d(&wei_md);
--    const memory_desc_wrapper dst_d(&dst_md);
--    const memory_desc_wrapper bia_d(&bias_md);
--
--    // Compute Library currently supports forward propagation only
--    const prop_kind_t prop_kind = ipd.prop_kind;
--    const bool is_fwd = (prop_kind == dnnl_forward_training)
--            || (prop_kind == dnnl_forward_inference);
--    if (!is_fwd) return status::unimplemented;
--
--    const int with_groups = wei_d.ndims() == src_d.ndims() + 1;
--    const int ndims = src_d.ndims();
--
--    // There are two sub-cases: src & wei tensors are either 2- or 4-dimensional
--    const bool is_2d = (ndims == 2) && (wei_d.ndims() == 2);
--    const bool is_4d = (ndims == 4) && (wei_d.ndims() == 4);
--
--    // Compute Library unsupported shape scenarios
--    // FP32 only is supported at the moment
--    if (one_of(true, !(is_4d || is_2d), with_groups)) { return unimplemented; }
--
--    // batch size
--    const int mb = src_d.dims()[0];
--
--    // src/input channels, height, width
--    const int ic = src_d.dims()[1];
--    const int ih = is_4d ? src_d.dims()[ndims - 2] : 0;
--    const int iw = is_4d ? src_d.dims()[ndims - 1] : 0;
--
--    // dst/output channels
--    const int oc = dst_d.dims()[1];
--
--    // weights height, width
--    const int kh = is_4d ? wei_d.dims()[with_groups + ndims - 2] : 0;
--    const int kw = is_4d ? wei_d.dims()[with_groups + ndims - 1] : 0;
--
--    aip.with_bias = ipd.bias_desc.format_kind != format_kind::undef;
--
--    // Data layout is already defined thus should only be checked
--    auto src_tag = memory_desc_matches_one_of_tag(src_md, nhwc, nchw, nc, cn);
--    auto wei_tag = memory_desc_matches_one_of_tag(wei_md, ohwi, oihw, oi, io);
--    auto dst_tag = memory_desc_matches_one_of_tag(dst_md, nc, cn);
--    if (one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) {
--        return status::unimplemented;
--    }
--
--    arm_compute::TensorShape src_shape {(src_tag == nc)
--                    ? arm_compute::TensorShape(ic, mb)
--                    : arm_compute::TensorShape(mb, ic)};
--    if (is_4d) {
--        src_shape = (src_tag == nhwc)
--                ? arm_compute::TensorShape(ic, iw, ih, mb)
--                : arm_compute::TensorShape(iw, ih, ic, mb);
--    }
--
--    // Compute Library requires the weights to be 2-dimensional for FC layer
--    arm_compute::TensorShape wei_shape {
--            arm_compute::TensorShape(is_4d ? ic * kh * kw : ic, oc)};
--    if (is_2d && wei_tag == io) {
--        wei_shape = arm_compute::TensorShape(oc, ic);
--    }
--
--    arm_compute::DataLayout wei_layout {(wei_tag == ohwi || wei_tag == oi)
--                    ? arm_compute::DataLayout::NHWC
--                    : arm_compute::DataLayout::NCHW};
--
--    // clang-format off
--    aip.src_info = arm_compute::TensorInfo(
--            src_shape,
--            1,
--            arm_compute::DataType::F32,
--            (src_tag == nhwc || src_tag == nc) ?
--            arm_compute::DataLayout::NHWC : arm_compute::DataLayout::NCHW);
--
--    aip.wei_info = arm_compute::TensorInfo(
--            wei_shape,
--            1,
--            arm_compute::DataType::F32,
--            wei_layout);
--
--    aip.dst_info = arm_compute::TensorInfo(
--            (dst_tag == nhwc || dst_tag == nc) ?
--            arm_compute::TensorShape(oc, mb) : arm_compute::TensorShape(mb, oc),
--            1,
--            arm_compute::DataType::F32,
--            (dst_tag == nhwc || dst_tag == nc) ?
--            arm_compute::DataLayout::NHWC : arm_compute::DataLayout::NCHW);
--
--    aip.bia_info = arm_compute::TensorInfo(
--            aip.with_bias ?
--            arm_compute::TensorShape(oc) : arm_compute::TensorShape(),
--            1,
--            arm_compute::DataType::F32);
--    // clang-format on
--
--    aip.fc_info.weights_trained_layout = wei_layout;
--    if (is_2d && wei_tag != src_tag) { aip.fc_info.transpose_weights = false; }
--
--    // Either activation or sum is supported as post-op at the moment
--    aip.fc_info.activation_info = acl_common_utils::get_acl_act(attr);
--    const auto &post_ops = attr.post_ops_;
--    aip.with_sum = (post_ops.len() == 1) && post_ops.entry_[0].is_sum();
--
--    // Fast math mode
--    auto math_mode = get_fpmath_mode();
--    bool is_fastmath_enabled
--            = one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any);
--    aip.fc_info.enable_fast_math = is_fastmath_enabled;
--
--    // clang-format off
--    // Validate fully connected layer manually to check for return status
--    auto acl_st = arm_compute::NEFullyConnectedLayer::validate(
--        &aip.src_info,
--        &aip.wei_info,
--        aip.with_bias ? &aip.bia_info : nullptr,
--        &aip.dst_info,
--        aip.fc_info);
--    // clang-format on
--    if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
--        MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
--        return status::unimplemented;
--    }
--
--    if (aip.with_sum) {
--        // clang-format off
--        // Validate arithmetic addition manually to check for return status
--        auto acl_aa_st = arm_compute::NEArithmeticAddition::validate(
--            &aip.dst_info,
--            &aip.dst_info,
--            &aip.dst_info,
--            arm_compute::ConvertPolicy::SATURATE);
--
--        // clang-format on
--        if (acl_aa_st.error_code() != arm_compute::ErrorCode::OK) {
--            MAYBE_REPORT_ACL_ERROR(acl_aa_st.error_description().c_str());
--            return status::unimplemented;
--        }
--    }
--
--    return status::success;
--}
--
--} // namespace acl_inner_product_utils
--
--} // namespace aarch64
--} // namespace cpu
--} // namespace impl
--} // namespace dnnl
-diff --git a/src/cpu/aarch64/acl_inner_product_utils.hpp b/src/cpu/aarch64/acl_inner_product_utils.hpp
-deleted file mode 100644
-index 022d0e3349..0000000000
---- a/src/cpu/aarch64/acl_inner_product_utils.hpp
-+++ /dev/null
-@@ -1,62 +0,0 @@
--/*******************************************************************************
--* Copyright 2021 Arm Ltd. and affiliates
--*
--* Licensed under the Apache License, Version 2.0 (the "License");
--* you may not use this file except in compliance with the License.
--* You may obtain a copy of the License at
--*
--*     http://www.apache.org/licenses/LICENSE-2.0
--*
--* Unless required by applicable law or agreed to in writing, software
--* distributed under the License is distributed on an "AS IS" BASIS,
--* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--* See the License for the specific language governing permissions and
--* limitations under the License.
--*******************************************************************************/
--
--#ifndef CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP
--#define CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP
--
--#include "cpu/cpu_inner_product_pd.hpp"
--
--#include "cpu/aarch64/acl_utils.hpp"
--
--namespace dnnl {
--namespace impl {
--namespace cpu {
--namespace aarch64 {
--
--struct acl_ip_obj_t {
--    arm_compute::NEFullyConnectedLayer fc;
--    arm_compute::NEArithmeticAddition add;
--    arm_compute::Tensor src_tensor;
--    arm_compute::Tensor wei_tensor;
--    arm_compute::Tensor bia_tensor;
--    arm_compute::Tensor dst_tensor;
--    arm_compute::Tensor dst_acc_tensor;
--};
--
--struct acl_ip_conf_t {
--    bool with_bias;
--    bool with_sum;
--    arm_compute::TensorInfo src_info;
--    arm_compute::TensorInfo wei_info;
--    arm_compute::TensorInfo bia_info;
--    arm_compute::TensorInfo dst_info;
--    arm_compute::FullyConnectedLayerInfo fc_info;
--};
--
--namespace acl_inner_product_utils {
--
--status_t init_conf_ip(acl_ip_conf_t &aip, memory_desc_t &src_md,
--        memory_desc_t &wei_md, memory_desc_t &dst_md, memory_desc_t &bias_md,
--        const inner_product_desc_t &ipd, const primitive_attr_t &attr);
--
--} // namespace acl_inner_product_utils
--
--} // namespace aarch64
--} // namespace cpu
--} // namespace impl
--} // namespace dnnl
--
--#endif // CPU_AARCH64_ACL_INNER_PRODUCT_UTILS_HPP
-diff --git a/src/cpu/aarch64/acl_softmax.hpp b/src/cpu/aarch64/acl_softmax.hpp
-index a4bfd0c3bd..4ee7139a93 100644
---- a/src/cpu/aarch64/acl_softmax.hpp
-+++ b/src/cpu/aarch64/acl_softmax.hpp
-@@ -129,7 +129,7 @@ struct acl_softmax_fwd_t : public primitive_t {
-             arm_compute::DataLayout acl_layout = arm_compute::DataLayout::NHWC;
- 
-             const arm_compute::DataType acl_data_t
--                    = acl_common_utils::get_acl_data_t(data_type);
-+                    = acl_utils::get_acl_data_t(data_type);
- 
-             const int threads = dnnl_get_max_threads();
-             if (inner_size_ == 1) {
-@@ -189,20 +189,15 @@ struct acl_softmax_fwd_t : public primitive_t {
-             }
- 
-             // Validate manually to check for return status
--            arm_compute::Status acl_st;
-             if (asp_.is_logsoftmax) {
--                acl_st = arm_compute::NELogSoftmaxLayer::validate(
--                        &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis);
-+                ACL_CHECK_VALID(arm_compute::NELogSoftmaxLayer::validate(
-+                        &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis));
-             } else {
--                acl_st = arm_compute::NESoftmaxLayer::validate(
--                        &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis);
--            }
--            if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
--                MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
--                return status::unimplemented;
-+                ACL_CHECK_VALID(arm_compute::NESoftmaxLayer::validate(
-+                        &asp_.src_info, &asp_.dst_info, asp_.beta, asp_.axis));
-             }
- 
--            acl_common_utils::acl_thread_bind();
-+            acl_utils::acl_thread_bind();
- 
-             return status::success;
-         }
-diff --git a/src/cpu/aarch64/acl_utils.cpp b/src/cpu/aarch64/acl_utils.cpp
-index a69f14b6f9..098217f50e 100644
---- a/src/cpu/aarch64/acl_utils.cpp
-+++ b/src/cpu/aarch64/acl_utils.cpp
-@@ -21,7 +21,7 @@ namespace impl {
- namespace cpu {
- namespace aarch64 {
- 
--namespace acl_common_utils {
-+namespace acl_utils {
- 
- using namespace dnnl::impl::alg_kind;
- using namespace data_type;
-@@ -247,7 +247,7 @@ status_t permute_common_dense_dimension_to_last(memory_desc_t *d0_permed,
-     return status::success;
- }
- 
--} // namespace acl_common_utils
-+} // namespace acl_utils
- 
- } // namespace aarch64
- } // namespace cpu
-diff --git a/src/cpu/aarch64/acl_utils.hpp b/src/cpu/aarch64/acl_utils.hpp
-index 565cde66a9..bb8efc998c 100644
---- a/src/cpu/aarch64/acl_utils.hpp
-+++ b/src/cpu/aarch64/acl_utils.hpp
-@@ -21,13 +21,10 @@
- 
- #include "oneapi/dnnl/dnnl_types.h"
- 
--#include "common/bfloat16.hpp"
--#include "common/c_types_map.hpp"
- #include "common/dnnl_thread.hpp"
- #include "common/memory_tracking.hpp"
- #include "common/primitive.hpp"
- #include "common/utils.hpp"
--
- #include "cpu/cpu_engine.hpp"
- 
- #include "arm_compute/runtime/NEON/NEFunctions.h"
-@@ -38,7 +35,7 @@ namespace impl {
- namespace cpu {
- namespace aarch64 {
- 
--namespace acl_common_utils {
-+namespace acl_utils {
- 
- arm_compute::DataType get_acl_data_t(const dnnl_data_type_t dt);
- arm_compute::ActivationLayerInfo get_acl_act(const primitive_attr_t &attr);
-@@ -68,12 +65,33 @@ status_t permute_common_dense_dimension_to_last(memory_desc_t *d0_permed,
-         const memory_desc_t *d0, const memory_desc_t *d1,
-         const memory_desc_t *d2);
- 
--#define MAYBE_REPORT_ACL_ERROR(msg) \
-+// Logs a custom 'info' line describing an unsupported case
-+#define LOG_ACL_UNSUPPORTED(msg) \
-+    do { \
-+        if (get_verbose() >= 2) \
-+            printf("onednn_verbose,cpu,acl,unsupported: %s\n", (msg)); \
-+    } while (0)
-+
-+// Returns unimplemented if error code x is NOT OK
-+#define ACL_CHECK_VALID(x) \
-+    do { \
-+        arm_compute::Status s = x; \
-+        if (s.error_code() != arm_compute::ErrorCode::OK) { \
-+            LOG_ACL_UNSUPPORTED(s.error_description().c_str()); \
-+            return dnnl::impl::status::unimplemented; \
-+        } \
-+    } while (0)
-+
-+// Returns unimplemented on condition x == true
-+#define ACL_CHECK_SUPPORT(x, msg) \
-     do { \
--        if (get_verbose()) printf("onednn_verbose,cpu,error,acl,%s\n", (msg)); \
-+        if (x) { \
-+            LOG_ACL_UNSUPPORTED(msg); \
-+            return dnnl::impl::status::unimplemented; \
-+        } \
-     } while (0)
- 
--} // namespace acl_common_utils
-+} // namespace acl_utils
- 
- } // namespace aarch64
- } // namespace cpu
-diff --git a/src/cpu/aarch64/acl_winograd_convolution.hpp b/src/cpu/aarch64/acl_winograd_convolution.hpp
-index 29e44eb189..223b3bc9b8 100644
---- a/src/cpu/aarch64/acl_winograd_convolution.hpp
-+++ b/src/cpu/aarch64/acl_winograd_convolution.hpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2020-2021 Arm Ltd. and affiliates
-+* Copyright 2020-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -107,7 +107,7 @@ struct acl_wino_convolution_fwd_t : public primitive_t {
- 
-             set_default_alg_kind(alg_kind::convolution_winograd);
- 
--            acl_common_utils::acl_thread_bind();
-+            acl_utils::acl_thread_bind();
- 
-             return status::success;
-         }
-@@ -130,7 +130,7 @@ struct acl_wino_convolution_fwd_t : public primitive_t {
-             // sum+eltwise post-ops
-             if (eltwise_only || sum_with_eltwise) {
-                 const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg;
--                eltwise_ok = acl_common_utils::acl_act_ok(act_type);
-+                eltwise_ok = acl_utils::acl_act_ok(act_type);
-             }
- 
-             return eltwise_ok || (po.len() == 0);
-diff --git a/src/cpu/aarch64/matmul/acl_matmul.cpp b/src/cpu/aarch64/matmul/acl_matmul.cpp
-index 3945fda6fc..6f9bb9b9ad 100644
---- a/src/cpu/aarch64/matmul/acl_matmul.cpp
-+++ b/src/cpu/aarch64/matmul/acl_matmul.cpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2021 Arm Ltd. and affiliates
-+* Copyright 2021-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -84,4 +84,4 @@ status_t acl_matmul_t::execute_forward(const exec_ctx_t &ctx) const {
- } // namespace aarch64
- } // namespace cpu
- } // namespace impl
--} // namespace dnnl
-\ No newline at end of file
-+} // namespace dnnl
-diff --git a/src/cpu/aarch64/matmul/acl_matmul.hpp b/src/cpu/aarch64/matmul/acl_matmul.hpp
-index 6ba17e86dd..e69f4d9592 100644
---- a/src/cpu/aarch64/matmul/acl_matmul.hpp
-+++ b/src/cpu/aarch64/matmul/acl_matmul.hpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2021 Arm Ltd. and affiliates
-+* Copyright 2021-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -87,7 +87,7 @@ struct acl_matmul_t : public primitive_t {
-             if (conf_status != status::success) return status::unimplemented;
-             // Number of threads in Compute Library is set by OMP_NUM_THREADS
-             // dnnl_get_max_threads() == OMP_NUM_THREADS
--            acl_common_utils::acl_thread_bind();
-+            acl_utils::acl_thread_bind();
- 
-             return status::success;
-         }
-diff --git a/src/cpu/aarch64/matmul/acl_matmul_utils.cpp b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp
-index 76599d8bb1..ba266b4303 100644
---- a/src/cpu/aarch64/matmul/acl_matmul_utils.cpp
-+++ b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2021 Arm Ltd. and affiliates
-+* Copyright 2021-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -22,14 +22,10 @@ namespace dnnl {
- namespace impl {
- namespace cpu {
- namespace aarch64 {
--namespace matmul {
- 
--using namespace dnnl::impl::status;
--using namespace dnnl::impl::utils;
--using namespace dnnl::impl::cpu::matmul;
--using namespace prop_kind;
-+using namespace alg_kind;
-+using namespace cpu::matmul;
- using namespace format_tag;
--using namespace dnnl::impl::alg_kind;
- 
- namespace acl_matmul_utils {
- 
-@@ -58,7 +54,7 @@ status_t init_conf_matmul(acl_matmul_conf_t &amp, memory_desc_t &src_md,
-             wei_md, abcd, abdc, abc, acb, ab, ba);
-     auto dst_tag = memory_desc_matches_one_of_tag(
-             dst_md, abcd, abdc, abc, acb, ab, ba);
--    if (one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) {
-+    if (utils::one_of(format_tag::undef, src_tag, wei_tag, dst_tag)) {
-         return status::unimplemented;
-     }
-     amp.is_transA = helper.transA() == 'T';
-@@ -85,7 +81,7 @@ status_t init_conf_matmul(acl_matmul_conf_t &amp, memory_desc_t &src_md,
-     // Fast-math mode
-     auto math_mode = get_fpmath_mode();
-     bool is_fastmath_enabled
--            = one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any);
-+            = utils::one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any);
-     amp.gemm_info.set_fast_math(is_fastmath_enabled);
- 
-     // Fused ReLU activation
-@@ -95,29 +91,15 @@ status_t init_conf_matmul(acl_matmul_conf_t &amp, memory_desc_t &src_md,
-     amp.alpha = attr.output_scales_.scales_[0];
- 
-     // Validate ACL transpose
--    if (amp.is_transA) {
--        auto acl_transA_st = arm_compute::NETranspose::validate(
--                &amp.src_acc_info, &amp.src_info);
--        if (acl_transA_st.error_code() != arm_compute::ErrorCode::OK) {
--            MAYBE_REPORT_ACL_ERROR(acl_transA_st.error_description().c_str());
--            return status::unimplemented;
--        }
--    }
--    if (amp.is_transB) {
--        auto acl_transB_st = arm_compute::NETranspose::validate(
--                &amp.wei_acc_info, &amp.wei_info);
--        if (acl_transB_st.error_code() != arm_compute::ErrorCode::OK) {
--            MAYBE_REPORT_ACL_ERROR(acl_transB_st.error_description().c_str());
--            return status::unimplemented;
--        }
--    }
-+    if (amp.is_transA)
-+        ACL_CHECK_VALID(arm_compute::NETranspose::validate(
-+                &amp.src_acc_info, &amp.src_info));
-+    if (amp.is_transB)
-+        ACL_CHECK_VALID(arm_compute::NETranspose::validate(
-+                &amp.wei_acc_info, &amp.wei_info));
-     // Validate ACL GEMM
--    auto acl_st = arm_compute::NEGEMM::validate(&amp.src_info, &amp.wei_info,
--            nullptr, &amp.dst_info, amp.alpha, 0.0f, amp.gemm_info);
--    if (acl_st.error_code() != arm_compute::ErrorCode::OK) {
--        MAYBE_REPORT_ACL_ERROR(acl_st.error_description().c_str());
--        return status::unimplemented;
--    }
-+    ACL_CHECK_VALID(arm_compute::NEGEMM::validate(&amp.src_info, &amp.wei_info,
-+            nullptr, &amp.dst_info, amp.alpha, 0.0f, amp.gemm_info));
- 
-     return status::success;
- }
-@@ -175,7 +157,6 @@ bool acl_act_ok(alg_kind_t eltwise_activation) {
- 
- } // namespace acl_matmul_utils
- 
--} // namespace matmul
- } // namespace aarch64
- } // namespace cpu
- } // namespace impl
-diff --git a/src/cpu/aarch64/matmul/acl_matmul_utils.hpp b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp
-index 1411dc4f4b..248dbe5a09 100644
---- a/src/cpu/aarch64/matmul/acl_matmul_utils.hpp
-+++ b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp
-@@ -1,5 +1,5 @@
- /*******************************************************************************
--* Copyright 2021 Arm Ltd. and affiliates
-+* Copyright 2021-2022 Arm Ltd. and affiliates
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
-@@ -25,7 +25,6 @@ namespace dnnl {
- namespace impl {
- namespace cpu {
- namespace aarch64 {
--namespace matmul {
- 
- struct acl_matmul_obj_t {
-     arm_compute::NEGEMM gemm;
-@@ -61,10 +60,9 @@ arm_compute::ActivationLayerInfo get_acl_act(const primitive_attr_t &attr);
- bool acl_act_ok(alg_kind_t eltwise_activation);
- } // namespace acl_matmul_utils
- 
--} // namespace matmul
- } // namespace aarch64
- } // namespace cpu
- } // namespace impl
- } // namespace dnnl
- 
--#endif // CPU_AARCH64_ACL_MATMUL_UTILS_HPP
-\ No newline at end of file
-+#endif // CPU_AARCH64_ACL_MATMUL_UTILS_HPP
-diff --git a/tests/benchdnn/inputs/ip/test_ip_acl b/tests/benchdnn/inputs/ip/test_ip_acl
-new file mode 100644
-index 0000000000..a8873c30a8
---- /dev/null
-+++ b/tests/benchdnn/inputs/ip/test_ip_acl
-@@ -0,0 +1,26 @@
-+--reset
-+
-+# do not test other implementations
-+--skip-impl='ref,jit'
-+
-+# test format::any
-+--batch=shapes_ci
-+
-+# only inference, with and without bias
-+--dir=FWD_I,FWD_B
-+# test all shapes
-+--batch=set_all
-+
-+# with and without batches
-+--mb=0,2
-+
-+# test non-spatial layout combinations
-+--stag=ab,ba
-+--wtag=ab,ba
-+--batch=shapes_0d
-+
-+# test spatial layout combinations
-+--stag=abx,axb
-+--wtag=abx,axb
-+# 2d-spatial dimensions
-+--batch=shapes_googlenet_v1
diff --git a/oneDNN-2.6.2.tar.gz b/oneDNN-2.6.2.tar.gz
deleted file mode 100644
index cb5bc1f..0000000
--- a/oneDNN-2.6.2.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:baed0a7426189c6e5f1cb242c6d282d3638802cbd05772e1c4810a83bd7bc4df
-size 5846494
diff --git a/oneDNN-3.0.1.tar.gz b/oneDNN-3.0.1.tar.gz
new file mode 100644
index 0000000..4a68b60
--- /dev/null
+++ b/oneDNN-3.0.1.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7aca425e9895b791a942ae755edc28f1b9f9fe9bf94291c59f33ebcf5859f2c
+size 7189577
diff --git a/onednn-fix-gcc13.patch b/onednn-fix-gcc13.patch
new file mode 100644
index 0000000..fd1569a
--- /dev/null
+++ b/onednn-fix-gcc13.patch
@@ -0,0 +1,10 @@
+--- oneDNN-3.0.1/src/cpu/aarch64/xbyak_aarch64/xbyak_aarch64/xbyak_aarch64.h.orig	2023-02-24 03:36:27.000000000 +0100
++++ oneDNN-3.0.1/src/cpu/aarch64/xbyak_aarch64/xbyak_aarch64/xbyak_aarch64.h	2023-03-21 13:44:05.786852048 +0100
+@@ -25,6 +25,7 @@
+ #endif
+ 
+ #include <algorithm>
++#include <cstdint>
+ #include <deque>
+ #include <initializer_list>
+ #include <iostream>
diff --git a/onednn.changes b/onednn.changes
index 73885c3..baf05a5 100644
--- a/onednn.changes
+++ b/onednn.changes
@@ -1,3 +1,18 @@
+-------------------------------------------------------------------
+Tue Mar 21 10:27:40 UTC 2023 - Guillaume GARDET <guillaume.gardet@opensuse.org>
+
+- Update to 3.0.1:
+  * Changes: https://github.com/oneapi-src/oneDNN/releases/tag/v3.0.1
+- Skipped 3.0:
+  * Changes: https://github.com/oneapi-src/oneDNN/releases/tag/v3.0
+- Add patch to fix build with GCC13:
+  * onednn-fix-gcc13.patch
+- Disable Arm Compute library support until fixed upstream
+  https://github.com/oneapi-src/oneDNN/issues/1599
+- Drop upstream patches:
+  * 1428.patch
+  * fa93750.patch
+
 -------------------------------------------------------------------
 Tue Sep 20 08:26:43 UTC 2022 - Guillaume GARDET <guillaume.gardet@opensuse.org>
 
diff --git a/onednn.spec b/onednn.spec
index 0d5849a..dd6b5c0 100644
--- a/onednn.spec
+++ b/onednn.spec
@@ -1,7 +1,7 @@
 #
 # spec file for package onednn
 #
-# Copyright (c) 2022 SUSE LLC
+# Copyright (c) 2023 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -24,23 +24,22 @@
 %endif
 
 %ifarch aarch64
-%bcond_without acl
+# Disable ACL until fixed upstream - https://github.com/oneapi-src/oneDNN/issues/1599
+%bcond_with acl
 %else
 %bcond_with acl
 %endif
 
-%define libname libdnnl2
+%define libname libdnnl3
 Name:           onednn
-Version:        2.6.2
+Version:        3.0.1
 Release:        0
 Summary:        Intel Math Kernel Library for Deep Neural Networks
 License:        Apache-2.0
 URL:            https://01.org/onednn
 Source0:        https://github.com/oneapi-src/oneDNN/archive/v%{version}/oneDNN-%{version}.tar.gz
-# PATCH-FIX-UPSTREAM - deps for Patch2
-Patch1:         fa93750.patch
-# PATCH-FIX-UPSTREAM - Fix build with latest ACL - https://github.com/oneapi-src/oneDNN/pull/1428
-Patch2:         1428.patch
+# PATCH-FIX-UPSTREAM - https://github.com/oneapi-src/oneDNN/issues/1600
+Patch1:         onednn-fix-gcc13.patch
 BuildRequires:  chrpath
 BuildRequires:  cmake
 BuildRequires:  doxygen
@@ -49,7 +48,7 @@ BuildRequires:  gcc-c++
 BuildRequires:  graphviz
 BuildRequires:  texlive-dvips-bin
 %if %{with acl}
-BuildRequires:  ComputeLibrary-devel
+BuildRequires:  ComputeLibrary-devel >= 22.08
 %endif
 %if %{with opencl}
 BuildRequires:  opencl-headers