diff --git a/01_install_FindEigen3.patch b/01_install_FindEigen3.patch
deleted file mode 100644
index c1ceba6..0000000
--- a/01_install_FindEigen3.patch
+++ /dev/null
@@ -1,16 +0,0 @@
-Index: eigen-3.4.0/CMakeLists.txt
-===================================================================
---- eigen-3.4.0.orig/CMakeLists.txt
-+++ eigen-3.4.0/CMakeLists.txt
-@@ -466,6 +466,11 @@ install(FILES
- DESTINATION ${INCLUDE_INSTALL_DIR} COMPONENT Devel
- )
-
-+install(FILES
-+ cmake/FindEigen3.cmake
-+ DESTINATION ${CMAKE_INSTALL_PREFIX}/share/cmake/Modules
-+ )
-+
- if(EIGEN_BUILD_PKGCONFIG)
- configure_file(eigen3.pc.in eigen3.pc @ONLY)
- install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
diff --git a/_constraints b/_constraints
new file mode 100644
index 0000000..9b40922
--- /dev/null
+++ b/_constraints
@@ -0,0 +1,16 @@
+
+
+
+
+ eigen3:docs
+
+
+
+ 5
+
+
+ 7
+
+
+
+
diff --git a/eigen3-3.3.1-fixcmake.patch b/eigen3-3.3.1-fixcmake.patch
deleted file mode 100644
index 7bc8d5b..0000000
--- a/eigen3-3.3.1-fixcmake.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-Index: eigen-3.4.0/CMakeLists.txt
-===================================================================
---- eigen-3.4.0.orig/CMakeLists.txt
-+++ eigen-3.4.0/CMakeLists.txt
-@@ -598,7 +598,7 @@ set ( EIGEN_VERSION_MAJOR ${EIGEN_WORLD
- set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} )
- set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} )
- set ( EIGEN_DEFINITIONS "")
--set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" )
-+set ( EIGEN_INCLUDE_DIR ${INCLUDE_INSTALL_DIR} )
- set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
-
- include (CMakePackageConfigHelpers)
diff --git a/eigen3.changes b/eigen3.changes
index dcea8cd..c29e8df 100644
--- a/eigen3.changes
+++ b/eigen3.changes
@@ -1,3 +1,13 @@
+-------------------------------------------------------------------
+Tue Feb 1 23:06:35 UTC 2022 - Stefan BrĂ¼ns
+
+- Add _constraints for docs(+test) to avoid OOM build failures
+- Drop obsolete/unnecessary patches
+ * eigen3-3.3.1-fixcmake.patch (no longer has any effect)
+ * 01_install_FindEigen3.patch (CMake Config mode is preferred)
+- Fix build for ppc64le (affects test and dependent packages, e.g.
+ arpack-ng), add fix_ppc64le_always_inline_680.patch
+
-------------------------------------------------------------------
Fri Aug 20 01:01:50 UTC 2021 - Atri Bhattacharya
diff --git a/eigen3.spec b/eigen3.spec
index fc0b952..a23bafb 100644
--- a/eigen3.spec
+++ b/eigen3.spec
@@ -1,7 +1,7 @@
#
# spec file
#
-# Copyright (c) 2021 SUSE LLC
+# Copyright (c) 2022 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -41,10 +41,8 @@ URL: http://eigen.tuxfamily.org/
Source0: https://gitlab.com/libeigen/eigen/-/archive/%{version}/%{srcname}-%{version}.tar.bz2
Patch0: 0001-Disable-Altivec-for-ppc64le.patch
Patch1: 0001-Do-stack-allignment-on-ppc.patch
-# PATCH-FIX-OPENSUSE 01_install_FindEigen3.patch asterios.dramis@gmail.com -- Install FindEigen3.cmake
-Patch3: 01_install_FindEigen3.patch
-# PATCH-FIX-OPENSUSE eigen3-3.3.1-fixcmake.patch -- Fix double {prefix} as we use INCLUDE_INSTALL_DIR with {_includedir}
-Patch4: eigen3-3.3.1-fixcmake.patch
+# PATCH-FIX-UPSTREAM -- https://gitlab.com/libeigen/eigen/-/merge_requests/680.patch
+Patch2: fix_ppc64le_always_inline_680.patch
%if %{with tests}
# SECTION Patches to fix tests
# PATCH-FIX-UPSTREAM eigen3-googlehash-detection.patch badshah400@gmail.com -- GoogleHash needs C++11 std to compile test code and be succesfully detected
@@ -123,9 +121,9 @@ echo "HTML_TIMESTAMP = NO" >> doc/Doxyfile.in
-DEIGEN_TEST_OPENMP:Bool=%{?with_tests:ON}%{!?with_tests:OFF}
%if "%{flavor}" == ""
-make %{?_smp_mflags} all %{?with_tests:buildtests}
+%cmake_build all %{?with_tests:buildtests}
%else
-make %{?_smp_mflags} doc
+%cmake_build doc
%endif
rm -f doc/html/*.tgz
@@ -160,7 +158,6 @@ export EIGEN_REPEAT=1
%{_includedir}/eigen3/
%{_datadir}/eigen3/
%{_datadir}/pkgconfig/eigen3.pc
-%{_datadir}/cmake/Modules/FindEigen3.cmake
%endif
diff --git a/fix_ppc64le_always_inline_680.patch b/fix_ppc64le_always_inline_680.patch
new file mode 100644
index 0000000..2dc2df2
--- /dev/null
+++ b/fix_ppc64le_always_inline_680.patch
@@ -0,0 +1,3138 @@
+From 9e3873b1dce3ba65980c7e7b979325dac2fb4bbd Mon Sep 17 00:00:00 2001
+From: Chip-Kerchner
+Date: Wed, 20 Oct 2021 11:06:50 -0500
+Subject: [PATCH 1/2] New branch for inverting rows and depth in non-vectorized
+ portion of packing.
+
+---
+ Eigen/src/Core/arch/AltiVec/Complex.h | 10 +-
+ Eigen/src/Core/arch/AltiVec/MatrixProduct.h | 1546 ++++++++---------
+ .../Core/arch/AltiVec/MatrixProductCommon.h | 206 +--
+ .../src/Core/arch/AltiVec/MatrixProductMMA.h | 335 ++--
+ 4 files changed, 927 insertions(+), 1170 deletions(-)
+
+diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
+index f730ce8d3..4fd923e84 100644
+--- a/Eigen/src/Core/arch/AltiVec/Complex.h
++++ b/Eigen/src/Core/arch/AltiVec/Complex.h
+@@ -129,20 +129,20 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex<
+ template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { pstore((float*)to, from.v); }
+ template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
+
+-EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex* from0, const std::complex* from1)
++EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex& from0, const std::complex& from1)
+ {
+ Packet4f res0, res1;
+ #ifdef __VSX__
+- __asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (*from0));
+- __asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (*from1));
++ __asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (from0));
++ __asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (from1));
+ #ifdef _BIG_ENDIAN
+ __asm__ ("xxpermdi %x0, %x1, %x2, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
+ #else
+ __asm__ ("xxpermdi %x0, %x2, %x1, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
+ #endif
+ #else
+- *reinterpret_cast *>(&res0) = *from0;
+- *reinterpret_cast *>(&res1) = *from1;
++ *reinterpret_cast *>(&res0) = from0;
++ *reinterpret_cast *>(&res1) = from1;
+ res0 = vec_perm(res0, res1, p16uc_TRANSPOSE64_HI);
+ #endif
+ return Packet2cf(res0);
+diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
+index 1d67d60d0..bd5da3623 100644
+--- a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
++++ b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
+@@ -166,24 +166,23 @@ EIGEN_STRONG_INLINE void symm_pack_complex_rhs_helper(std::complex* bloc
+
+ rir += vectorDelta;
+ }
+- if (j < cols)
++
++ for(; j < cols; j++)
+ {
+- rii = rir + ((cols - j) * rows);
++ rii = rir + rows;
+
+ for(Index i = k2; i < depth; i++)
+ {
+- Index k = j;
+- for(; k < cols; k++)
+- {
+- std::complex v = getAdjointVal(i, k, rhs);
++ std::complex v = getAdjointVal(i, j, rhs);
+
+- blockBf[rir] = v.real();
+- blockBf[rii] = v.imag();
++ blockBf[rir] = v.real();
++ blockBf[rii] = v.imag();
+
+- rir += 1;
+- rii += 1;
+- }
++ rir += 1;
++ rii += 1;
+ }
++
++ rir += rows;
+ }
+ }
+
+@@ -262,19 +261,15 @@ EIGEN_STRONG_INLINE void symm_pack_rhs_helper(Scalar* blockB, const Scalar* _rhs
+ }
+ }
+
+- if (j < cols)
++ for(; j < cols; j++)
+ {
+ for(Index i = k2; i < depth; i++)
+ {
+- Index k = j;
+- for(; k < cols; k++)
+- {
+- if(k <= i)
+- blockB[ri] = rhs(i, k);
+- else
+- blockB[ri] = rhs(k, i);
+- ri += 1;
+- }
++ if(j <= i)
++ blockB[ri] = rhs(i, j);
++ else
++ blockB[ri] = rhs(j, i);
++ ri += 1;
+ }
+ }
+ }
+@@ -408,22 +403,18 @@ struct symm_pack_lhs
+ * and offset and behaves accordingly.
+ **/
+
+-template
+-EIGEN_ALWAYS_INLINE void storeBlock(Scalar* to, PacketBlock& block)
+-{
+- const Index size = 16 / sizeof(Scalar);
+- pstore(to + (0 * size), block.packet[0]);
+- pstore(to + (1 * size), block.packet[1]);
+- pstore(to + (2 * size), block.packet[2]);
+- pstore(to + (3 * size), block.packet[3]);
+-}
+-
+-template
+-EIGEN_ALWAYS_INLINE void storeBlock(Scalar* to, PacketBlock& block)
++template
++EIGEN_ALWAYS_INLINE void storeBlock(Scalar* to, PacketBlock& block)
+ {
+ const Index size = 16 / sizeof(Scalar);
+ pstore(to + (0 * size), block.packet[0]);
+ pstore(to + (1 * size), block.packet[1]);
++ if (N > 2) {
++ pstore(to + (2 * size), block.packet[2]);
++ }
++ if (N > 3) {
++ pstore(to + (3 * size), block.packet[3]);
++ }
+ }
+
+ // General template for lhs & rhs complex packing.
+@@ -449,9 +440,9 @@ struct dhs_cpack {
+ PacketBlock cblock;
+
+ if (UseLhs) {
+- bload(cblock, lhs, j, i);
++ bload(cblock, lhs, j, i);
+ } else {
+- bload(cblock, lhs, i, j);
++ bload(cblock, lhs, i, j);
+ }
+
+ blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[4].v, p16uc_GETREAL32);
+@@ -478,8 +469,8 @@ struct dhs_cpack {
+ ptranspose(blocki);
+ }
+
+- storeBlock(blockAt + rir, blockr);
+- storeBlock(blockAt + rii, blocki);
++ storeBlock(blockAt + rir, blockr);
++ storeBlock(blockAt + rii, blocki);
+
+ rir += 4*vectorSize;
+ rii += 4*vectorSize;
+@@ -499,21 +490,12 @@ struct dhs_cpack {
+ cblock.packet[1] = lhs.template loadPacket(i, j + 2);
+ }
+ } else {
+- std::complex lhs0, lhs1;
+ if (UseLhs) {
+- lhs0 = lhs(j + 0, i);
+- lhs1 = lhs(j + 1, i);
+- cblock.packet[0] = pload2(&lhs0, &lhs1);
+- lhs0 = lhs(j + 2, i);
+- lhs1 = lhs(j + 3, i);
+- cblock.packet[1] = pload2(&lhs0, &lhs1);
++ cblock.packet[0] = pload2(lhs(j + 0, i), lhs(j + 1, i));
++ cblock.packet[1] = pload2(lhs(j + 2, i), lhs(j + 3, i));
+ } else {
+- lhs0 = lhs(i, j + 0);
+- lhs1 = lhs(i, j + 1);
+- cblock.packet[0] = pload2(&lhs0, &lhs1);
+- lhs0 = lhs(i, j + 2);
+- lhs1 = lhs(i, j + 3);
+- cblock.packet[1] = pload2(&lhs0, &lhs1);
++ cblock.packet[0] = pload2(lhs(i, j + 0), lhs(i, j + 1));
++ cblock.packet[1] = pload2(lhs(i, j + 2), lhs(i, j + 3));
+ }
+ }
+
+@@ -535,34 +517,50 @@ struct dhs_cpack {
+ rir += ((PanelMode) ? (vectorSize*(2*stride - depth)) : vectorDelta);
+ }
+
+- if (j < rows)
++ if (!UseLhs)
+ {
+- if(PanelMode) rir += (offset*(rows - j - vectorSize));
+- rii = rir + (((PanelMode) ? stride : depth) * (rows - j));
++ if(PanelMode) rir -= (offset*(vectorSize - 1));
+
+- for(Index i = 0; i < depth; i++)
++ for(; j < rows; j++)
+ {
+- Index k = j;
+- for(; k < rows; k++)
++ rii = rir + ((PanelMode) ? stride : depth);
++
++ for(Index i = 0; i < depth; i++)
+ {
+- if (UseLhs) {
++ blockAt[rir] = lhs(i, j).real();
++
++ if(Conjugate)
++ blockAt[rii] = -lhs(i, j).imag();
++ else
++ blockAt[rii] = lhs(i, j).imag();
++
++ rir += 1;
++ rii += 1;
++ }
++
++ rir += ((PanelMode) ? (2*stride - depth) : depth);
++ }
++ } else {
++ if (j < rows)
++ {
++ if(PanelMode) rir += (offset*(rows - j - vectorSize));
++ rii = rir + (((PanelMode) ? stride : depth) * (rows - j));
++
++ for(Index i = 0; i < depth; i++)
++ {
++ Index k = j;
++ for(; k < rows; k++)
++ {
+ blockAt[rir] = lhs(k, i).real();
+
+ if(Conjugate)
+ blockAt[rii] = -lhs(k, i).imag();
+ else
+ blockAt[rii] = lhs(k, i).imag();
+- } else {
+- blockAt[rir] = lhs(i, k).real();
+
+- if(Conjugate)
+- blockAt[rii] = -lhs(i, k).imag();
+- else
+- blockAt[rii] = lhs(i, k).imag();
++ rir += 1;
++ rii += 1;
+ }
+-
+- rir += 1;
+- rii += 1;
+ }
+ }
+ }
+@@ -588,16 +586,16 @@ struct dhs_pack{
+ PacketBlock block;
+
+ if (UseLhs) {
+- bload(block, lhs, j, i);
++ bload(block, lhs, j, i);
+ } else {
+- bload(block, lhs, i, j);
++ bload(block, lhs, i, j);
+ }
+ if(((StorageOrder == RowMajor) && UseLhs) || ((StorageOrder == ColMajor) && !UseLhs))
+ {
+ ptranspose(block);
+ }
+
+- storeBlock(blockA + ri, block);
++ storeBlock(blockA + ri, block);
+
+ ri += 4*vectorSize;
+ }
+@@ -632,21 +630,33 @@ struct dhs_pack{
+ if(PanelMode) ri += vectorSize*(stride - offset - depth);
+ }
+
+- if (j < rows)
++ if (!UseLhs)
+ {
+- if(PanelMode) ri += offset*(rows - j);
++ if(PanelMode) ri += offset;
+
+- for(Index i = 0; i < depth; i++)
++ for(; j < rows; j++)
+ {
+- Index k = j;
+- for(; k < rows; k++)
++ for(Index i = 0; i < depth; i++)
+ {
+- if (UseLhs) {
++ blockA[ri] = lhs(i, j);
++ ri += 1;
++ }
++
++ if(PanelMode) ri += stride - depth;
++ }
++ } else {
++ if (j < rows)
++ {
++ if(PanelMode) ri += offset*(rows - j);
++
++ for(Index i = 0; i < depth; i++)
++ {
++ Index k = j;
++ for(; k < rows; k++)
++ {
+ blockA[ri] = lhs(k, i);
+- } else {
+- blockA[ri] = lhs(i, k);
++ ri += 1;
+ }
+- ri += 1;
+ }
+ }
+ }
+@@ -682,7 +692,7 @@ struct dhs_pack(j, i + 1);
+ }
+
+- storeBlock(blockA + ri, block);
++ storeBlock(blockA + ri, block);
+
+ ri += 2*vectorSize;
+ }
+@@ -759,7 +769,7 @@ struct dhs_pack(i + 1, j + 0); //[b1 b2]
+ block.packet[3] = rhs.template loadPacket(i + 1, j + 2); //[b3 b4]
+
+- storeBlock(blockB + ri, block);
++ storeBlock(blockB + ri, block);
+ }
+
+ ri += 4*vectorSize;
+@@ -790,19 +800,17 @@ struct dhs_pack(blockAt + rir, blockr);
+- storeBlock(blockAt + rii, blocki);
++ storeBlock(blockAt + rir, blockr);
++ storeBlock(blockAt + rii, blocki);
+
+ rir += 2*vectorSize;
+ rii += 2*vectorSize;
+@@ -943,7 +951,7 @@ struct dhs_cpack cblock;
+ PacketBlock blockr, blocki;
+
+- bload(cblock, rhs, i, j);
++ bload(cblock, rhs, i, j);
+
+ blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);
+ blockr.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETREAL64);
+@@ -957,8 +965,8 @@ struct dhs_cpack(blockBt + rir, blockr);
+- storeBlock(blockBt + rii, blocki);
++ storeBlock(blockBt + rir, blockr);
++ storeBlock(blockBt + rii, blocki);
+
+ rir += 2*vectorSize;
+ rii += 2*vectorSize;
+@@ -967,27 +975,26 @@ struct dhs_cpack
+-EIGEN_ALWAYS_INLINE void pger_common(PacketBlock* acc, const Packet& lhsV, const Packet* rhsV)
+-{
+- if(NegativeAccumulate)
+- {
+- acc->packet[0] = vec_nmsub(lhsV, rhsV[0], acc->packet[0]);
+- acc->packet[1] = vec_nmsub(lhsV, rhsV[1], acc->packet[1]);
+- acc->packet[2] = vec_nmsub(lhsV, rhsV[2], acc->packet[2]);
+- acc->packet[3] = vec_nmsub(lhsV, rhsV[3], acc->packet[3]);
+- } else {
+- acc->packet[0] = vec_madd(lhsV, rhsV[0], acc->packet[0]);
+- acc->packet[1] = vec_madd(lhsV, rhsV[1], acc->packet[1]);
+- acc->packet[2] = vec_madd(lhsV, rhsV[2], acc->packet[2]);
+- acc->packet[3] = vec_madd(lhsV, rhsV[3], acc->packet[3]);
+- }
+-}
+-
+-template
+-EIGEN_ALWAYS_INLINE void pger_common(PacketBlock* acc, const Packet& lhsV, const Packet* rhsV)
++template
++EIGEN_ALWAYS_INLINE void pger_common(PacketBlock* acc, const Packet& lhsV, const Packet* rhsV)
+ {
+ if(NegativeAccumulate)
+ {
+ acc->packet[0] = vec_nmsub(lhsV, rhsV[0], acc->packet[0]);
++ if (N > 1) {
++ acc->packet[1] = vec_nmsub(lhsV, rhsV[1], acc->packet[1]);
++ }
++ if (N > 2) {
++ acc->packet[2] = vec_nmsub(lhsV, rhsV[2], acc->packet[2]);
++ }
++ if (N > 3) {
++ acc->packet[3] = vec_nmsub(lhsV, rhsV[3], acc->packet[3]);
++ }
+ } else {
+ acc->packet[0] = vec_madd(lhsV, rhsV[0], acc->packet[0]);
++ if (N > 1) {
++ acc->packet[1] = vec_madd(lhsV, rhsV[1], acc->packet[1]);
++ }
++ if (N > 2) {
++ acc->packet[2] = vec_madd(lhsV, rhsV[2], acc->packet[2]);
++ }
++ if (N > 3) {
++ acc->packet[3] = vec_madd(lhsV, rhsV[3], acc->packet[3]);
++ }
+ }
+ }
+
+@@ -1030,11 +1038,11 @@ EIGEN_ALWAYS_INLINE void pger(PacketBlock* acc, const Scalar* lhs, con
+ {
+ Packet lhsV = pload(lhs);
+
+- pger_common(acc, lhsV, rhsV);
++ pger_common(acc, lhsV, rhsV);
+ }
+
+-template
+-EIGEN_ALWAYS_INLINE void loadPacketRemaining(const Scalar* lhs, Packet &lhsV, Index remaining_rows)
++template
++EIGEN_ALWAYS_INLINE void loadPacketRemaining(const Scalar* lhs, Packet &lhsV)
+ {
+ #ifdef _ARCH_PWR9
+ lhsV = vec_xl_len((Scalar *)lhs, remaining_rows * sizeof(Scalar));
+@@ -1046,32 +1054,32 @@ EIGEN_ALWAYS_INLINE void loadPacketRemaining(const Scalar* lhs, Packet &lhsV, In
+ #endif
+ }
+
+-template
+-EIGEN_ALWAYS_INLINE void pger(PacketBlock* acc, const Scalar* lhs, const Packet* rhsV, Index remaining_rows)
++template
++EIGEN_ALWAYS_INLINE void pger(PacketBlock* acc, const Scalar* lhs, const Packet* rhsV)
+ {
+ Packet lhsV;
+- loadPacketRemaining(lhs, lhsV, remaining_rows);
++ loadPacketRemaining(lhs, lhsV);
+
+- pger_common(acc, lhsV, rhsV);
++ pger_common(acc, lhsV, rhsV);
+ }
+
+ // 512-bits rank1-update of complex acc. It takes decoupled accumulators as entries. It also takes cares of mixed types real * complex and complex * real.
+ template
+ EIGEN_ALWAYS_INLINE void pgerc_common(PacketBlock* accReal, PacketBlock* accImag, const Packet &lhsV, const Packet &lhsVi, const Packet* rhsV, const Packet* rhsVi)
+ {
+- pger_common(accReal, lhsV, rhsV);
++ pger_common(accReal, lhsV, rhsV);
+ if(LhsIsReal)
+ {
+- pger_common(accImag, lhsV, rhsVi);
++ pger_common(accImag, lhsV, rhsVi);
+ EIGEN_UNUSED_VARIABLE(lhsVi);
+ } else {
+ if (!RhsIsReal) {
+- pger_common(accReal, lhsVi, rhsVi);
+- pger_common(accImag, lhsV, rhsVi);
++ pger_common(accReal, lhsVi, rhsVi);
++ pger_common(accImag, lhsV, rhsVi);
+ } else {
+ EIGEN_UNUSED_VARIABLE(rhsVi);
+ }
+- pger_common(accImag, lhsVi, rhsV);
++ pger_common(accImag, lhsVi, rhsV);
+ }
+ }
+
+@@ -1086,8 +1094,8 @@ EIGEN_ALWAYS_INLINE void pgerc(PacketBlock* accReal, PacketBlock(accReal, accImag, lhsV, lhsVi, rhsV, rhsVi);
+ }
+
+-template
+-EIGEN_ALWAYS_INLINE void loadPacketRemaining(const Scalar* lhs_ptr, const Scalar* lhs_ptr_imag, Packet &lhsV, Packet &lhsVi, Index remaining_rows)
++template
++EIGEN_ALWAYS_INLINE void loadPacketRemaining(const Scalar* lhs_ptr, const Scalar* lhs_ptr_imag, Packet &lhsV, Packet &lhsVi)
+ {
+ #ifdef _ARCH_PWR9
+ lhsV = vec_xl_len((Scalar *)lhs_ptr, remaining_rows * sizeof(Scalar));
+@@ -1103,11 +1111,11 @@ EIGEN_ALWAYS_INLINE void loadPacketRemaining(const Scalar* lhs_ptr, const Scalar
+ #endif
+ }
+
+-template
+-EIGEN_ALWAYS_INLINE void pgerc(PacketBlock* accReal, PacketBlock* accImag, const Scalar* lhs_ptr, const Scalar* lhs_ptr_imag, const Packet* rhsV, const Packet* rhsVi, Index remaining_rows)
++template
++EIGEN_ALWAYS_INLINE void pgerc(PacketBlock* accReal, PacketBlock* accImag, const Scalar* lhs_ptr, const Scalar* lhs_ptr_imag, const Packet* rhsV, const Packet* rhsVi)
+ {
+ Packet lhsV, lhsVi;
+- loadPacketRemaining(lhs_ptr, lhs_ptr_imag, lhsV, lhsVi, remaining_rows);
++ loadPacketRemaining(lhs_ptr, lhs_ptr_imag, lhsV, lhsVi);
+
+ pgerc_common(accReal, accImag, lhsV, lhsVi, rhsV, rhsVi);
+ }
+@@ -1119,132 +1127,142 @@ EIGEN_ALWAYS_INLINE Packet ploadLhs(const Scalar* lhs)
+ }
+
+ // Zero the accumulator on PacketBlock.
+-template
+-EIGEN_ALWAYS_INLINE void bsetzero(PacketBlock& acc)
+-{
+- acc.packet[0] = pset1((Scalar)0);
+- acc.packet[1] = pset1((Scalar)0);
+- acc.packet[2] = pset1((Scalar)0);
+- acc.packet[3] = pset1((Scalar)0);
+-}
+-
+-template
+-EIGEN_ALWAYS_INLINE void bsetzero(PacketBlock& acc)
++template
++EIGEN_ALWAYS_INLINE void bsetzero(PacketBlock& acc)
+ {
+ acc.packet[0] = pset1((Scalar)0);
++ if (N > 1) {
++ acc.packet[1] = pset1((Scalar)0);
++ }
++ if (N > 2) {
++ acc.packet[2] = pset1((Scalar)0);
++ }
++ if (N > 3) {
++ acc.packet[3] = pset1((Scalar)0);
++ }
+ }
+
+ // Scale the PacketBlock vectors by alpha.
+-template
+-EIGEN_ALWAYS_INLINE void bscale(PacketBlock& acc, PacketBlock& accZ, const Packet& pAlpha)
+-{
+- acc.packet[0] = pmadd(pAlpha, accZ.packet[0], acc.packet[0]);
+- acc.packet[1] = pmadd(pAlpha, accZ.packet[1], acc.packet[1]);
+- acc.packet[2] = pmadd(pAlpha, accZ.packet[2], acc.packet[2]);
+- acc.packet[3] = pmadd(pAlpha, accZ.packet[3], acc.packet[3]);
+-}
+-
+-template
+-EIGEN_ALWAYS_INLINE void bscale(PacketBlock& acc, PacketBlock& accZ, const Packet& pAlpha)
++template
++EIGEN_ALWAYS_INLINE void bscale(PacketBlock& acc, PacketBlock& accZ, const Packet& pAlpha)
+ {
+ acc.packet[0] = pmadd(pAlpha, accZ.packet[0], acc.packet[0]);
++ if (N > 1) {
++ acc.packet[1] = pmadd(pAlpha, accZ.packet[1], acc.packet[1]);
++ }
++ if (N > 2) {
++ acc.packet[2] = pmadd(pAlpha, accZ.packet[2], acc.packet[2]);
++ }
++ if (N > 3) {
++ acc.packet[3] = pmadd(pAlpha, accZ.packet[3], acc.packet[3]);
++ }
+ }
+
+-template
+-EIGEN_ALWAYS_INLINE void bscalec_common(PacketBlock& acc, PacketBlock& accZ, const Packet& pAlpha)
+-{
+- acc.packet[0] = pmul(accZ.packet[0], pAlpha);
+- acc.packet[1] = pmul(accZ.packet[1], pAlpha);
+- acc.packet[2] = pmul(accZ.packet[2], pAlpha);
+- acc.packet[3] = pmul(accZ.packet[3], pAlpha);
+-}
+-
+-template
+-EIGEN_ALWAYS_INLINE void bscalec_common(PacketBlock& acc, PacketBlock& accZ, const Packet& pAlpha)
++template
++EIGEN_ALWAYS_INLINE void bscalec_common(PacketBlock& acc, PacketBlock& accZ, const Packet& pAlpha)
+ {
+ acc.packet[0] = pmul(accZ.packet[0], pAlpha);
++ if (N > 1) {
++ acc.packet[1] = pmul(accZ.packet[1], pAlpha);
++ }
++ if (N > 2) {
++ acc.packet[2] = pmul(accZ.packet[2], pAlpha);
++ }
++ if (N > 3) {
++ acc.packet[3] = pmul(accZ.packet[3], pAlpha);
++ }
+ }
+
+ // Complex version of PacketBlock scaling.
+ template
+ EIGEN_ALWAYS_INLINE void bscalec(PacketBlock& aReal, PacketBlock& aImag, const Packet& bReal, const Packet& bImag, PacketBlock& cReal, PacketBlock& cImag)
+ {
+- bscalec_common(cReal, aReal, bReal);
++ bscalec_common(cReal, aReal, bReal);
+
+- bscalec_common(cImag, aImag, bReal);
++ bscalec_common(cImag, aImag, bReal);
+
+- pger_common(&cReal, bImag, aImag.packet);
++ pger_common(&cReal, bImag, aImag.packet);
+
+- pger_common(&cImag, bImag, aReal.packet);
++ pger_common(&cImag, bImag, aReal.packet);
+ }
+
+-template
+-EIGEN_ALWAYS_INLINE void band(PacketBlock& acc, const Packet& pMask)
++template
++EIGEN_ALWAYS_INLINE void band(PacketBlock& acc, const Packet& pMask)
+ {
+ acc.packet[0] = pand(acc.packet[0], pMask);
+- acc.packet[1] = pand(acc.packet[1], pMask);
+- acc.packet[2] = pand(acc.packet[2], pMask);
+- acc.packet[3] = pand(acc.packet[3], pMask);
++ if (N > 1) {
++ acc.packet[1] = pand(acc.packet[1], pMask);
++ }
++ if (N > 2) {
++ acc.packet[2] = pand(acc.packet[2], pMask);
++ }
++ if (N > 3) {
++ acc.packet[3] = pand(acc.packet[3], pMask);
++ }
+ }
+
+-template
+-EIGEN_ALWAYS_INLINE void bscalec(PacketBlock& aReal, PacketBlock& aImag, const Packet& bReal, const Packet& bImag, PacketBlock& cReal, PacketBlock& cImag, const Packet& pMask)
++template
++EIGEN_ALWAYS_INLINE void bscalec(PacketBlock& aReal, PacketBlock& aImag, const Packet& bReal, const Packet& bImag, PacketBlock& cReal, PacketBlock& cImag, const Packet& pMask)
+ {
+- band(aReal, pMask);
+- band(aImag, pMask);
++ band(aReal, pMask);
++ band(aImag, pMask);
+
+- bscalec(aReal, aImag, bReal, bImag, cReal, cImag);
++ bscalec(aReal, aImag, bReal, bImag, cReal, cImag);
+ }
+
+ // Load a PacketBlock, the N parameters make tunning gemm easier so we can add more accumulators as needed.
+-template
+-EIGEN_ALWAYS_INLINE void bload(PacketBlock& acc, const DataMapper& res, Index row, Index col)
+-{
+- if (StorageOrder == RowMajor) {
+- acc.packet[0] = res.template loadPacket(row + 0, col + N*accCols);
+- acc.packet[1] = res.template loadPacket(row + 1, col + N*accCols);
+- acc.packet[2] = res.template loadPacket(row + 2, col + N*accCols);
+- acc.packet[3] = res.template loadPacket(row + 3, col + N*accCols);
+- } else {
+- acc.packet[0] = res.template loadPacket(row + N*accCols, col + 0);
+- acc.packet[1] = res.template loadPacket(row + N*accCols, col + 1);
+- acc.packet[2] = res.template loadPacket(row + N*accCols, col + 2);
+- acc.packet[3] = res.template loadPacket(row + N*accCols, col + 3);
+- }
+-}
+-
+-// An overload of bload when you have a PacketBLock with 8 vectors.
+-template
+-EIGEN_ALWAYS_INLINE void bload(PacketBlock& acc, const DataMapper& res, Index row, Index col)
++template
++EIGEN_ALWAYS_INLINE void bload(PacketBlock& acc, const DataMapper& res, Index row, Index col)
+ {
+ if (StorageOrder == RowMajor) {
+- acc.packet[0] = res.template loadPacket(row + 0, col + N*accCols);
+- acc.packet[1] = res.template loadPacket(row + 1, col + N*accCols);
+- acc.packet[2] = res.template loadPacket(row + 2, col + N*accCols);
+- acc.packet[3] = res.template loadPacket(row + 3, col + N*accCols);
+- acc.packet[4] = res.template loadPacket(row + 0, col + (N+1)*accCols);
+- acc.packet[5] = res.template loadPacket(row + 1, col + (N+1)*accCols);
+- acc.packet[6] = res.template loadPacket(row + 2, col + (N+1)*accCols);
+- acc.packet[7] = res.template loadPacket(row + 3, col + (N+1)*accCols);
++ acc.packet[0] = res.template loadPacket(row + 0, col);
++ if (N > 1) {
++ acc.packet[1] = res.template loadPacket(row + 1, col);
++ }
++ if (N > 2) {
++ acc.packet[2] = res.template loadPacket(row + 2, col);
++ }
++ if (N > 3) {
++ acc.packet[3] = res.template loadPacket(row + 3, col);
++ }
++ if (Complex) {
++ acc.packet[0+N] = res.template loadPacket(row + 0, col + accCols);
++ if (N > 1) {
++ acc.packet[1+N] = res.template loadPacket(row + 1, col + accCols);
++ }
++ if (N > 2) {
++ acc.packet[2+N] = res.template loadPacket(row + 2, col + accCols);
++ }
++ if (N > 3) {
++ acc.packet[3+N] = res.template loadPacket(row + 3, col + accCols);
++ }
++ }
+ } else {
+- acc.packet[0] = res.template loadPacket(row + N*accCols, col + 0);
+- acc.packet[1] = res.template loadPacket(row + N*accCols, col + 1);
+- acc.packet[2] = res.template loadPacket(row + N*accCols, col + 2);
+- acc.packet[3] = res.template loadPacket(row + N*accCols, col + 3);
+- acc.packet[4] = res.template loadPacket(row + (N+1)*accCols, col + 0);
+- acc.packet[5] = res.template loadPacket(row + (N+1)*accCols, col + 1);
+- acc.packet[6] = res.template loadPacket(row + (N+1)*accCols, col + 2);
+- acc.packet[7] = res.template loadPacket(row + (N+1)*accCols, col + 3);
++ acc.packet[0] = res.template loadPacket(row, col + 0);
++ if (N > 1) {
++ acc.packet[1] = res.template loadPacket(row, col + 1);
++ }
++ if (N > 2) {
++ acc.packet[2] = res.template loadPacket(row, col + 2);
++ }
++ if (N > 3) {
++ acc.packet[3] = res.template loadPacket(row, col + 3);
++ }
++ if (Complex) {
++ acc.packet[0+N] = res.template loadPacket(row + accCols, col + 0);
++ if (N > 1) {
++ acc.packet[1+N] = res.template loadPacket(row + accCols, col + 1);
++ }
++ if (N > 2) {
++ acc.packet[2+N] = res.template loadPacket(row + accCols, col + 2);
++ }
++ if (N > 3) {
++ acc.packet[3+N] = res.template loadPacket(row + accCols, col + 3);
++ }
++ }
+ }
+ }
+
+-template
+-EIGEN_ALWAYS_INLINE void bload(PacketBlock& acc, const DataMapper& res, Index row, Index col)
+-{
+- acc.packet[0] = res.template loadPacket(row + N*accCols, col + 0);
+- acc.packet[1] = res.template loadPacket(row + (N+1)*accCols, col + 0);
+-}
+-
+ const static Packet4i mask41 = { -1, 0, 0, 0 };
+ const static Packet4i mask42 = { -1, -1, 0, 0 };
+ const static Packet4i mask43 = { -1, -1, -1, 0 };
+@@ -1275,22 +1293,44 @@ EIGEN_ALWAYS_INLINE Packet2d bmask(const int remaining_rows)
+ }
+ }
+
+-template
+-EIGEN_ALWAYS_INLINE void bscale(PacketBlock& acc, PacketBlock& accZ, const Packet& pAlpha, const Packet& pMask)
++template
++EIGEN_ALWAYS_INLINE void bscale(PacketBlock& acc, PacketBlock& accZ, const Packet& pAlpha, const Packet& pMask)
+ {
+- band(accZ, pMask);
++ band(accZ, pMask);
+
+- bscale(acc, accZ, pAlpha);
++ bscale(acc, accZ, pAlpha);
+ }
+
+-template
+-EIGEN_ALWAYS_INLINE void pbroadcast4_old(const __UNPACK_TYPE__(Packet)* a, Packet& a0, Packet& a1, Packet& a2, Packet& a3)
++template