openblas/openblas-ppc64be_up2_p8.patch
Ismail Dönmez 4f9678748e - Update to version 0.3.14
common:
  * Fixed a race condition on thread shutdown in non-OpenMP builds
  * Fixed custom BUFFERSIZE option getting ignored in gmake builds
  * Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms
  * Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT
  * Improved performance of OMATCOPY_RT across all platforms
  * Changed perl scripts to use env instead of a hardcoded /usr/bin/perl
  * Fixed potential misreading of the GCC compiler version in the build scripts
  * Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477)
  * Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335)
  RISC V:
  * Fixed compilation on RISCV (missing entry in getarch)
  POWER:
  * Fixed compilation for DYNAMIC_ARCH with clang and with older gcc versions
  * Added support for compilation on FreeBSD/ppc64le
  * Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL
  * Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM
  * Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10
  * Improved SCOPY and CCOPY performance on POWER10
  * Improved SGEMM and DGEMM performance on POWER10
  * Added support for compilation with the NVIDIA HPC compiler
  x86_64:
  * Added an optimized bfloat16 GEMM kernel for Cooperlake
  * Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus
  * Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus
  * Added support for compilation with the NAG Fortran compiler
  * Fixed recognition of the AMD AOCC compiler
  * Fixed compilation for DYNAMIC_ARCH with clang on Windows
  * Added support for running the BLAS/CBLAS tests on Windows

OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=120
2021-03-18 08:47:05 +00:00

86 lines
2.5 KiB
Diff

From: Michel Normand <normand@linux.vnet.ibm.com>
Subject: openblas ppc64be up2 p8
Date: Wed, 03 Feb 2021 15:39:25 +0100
openblas ppc64be up2 p8
because:
* openblas build failed for ppc64 (BE) in openSUSE
since version 0.3.12
* ppc64 (BE) not supported by IBM after P8.
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
---
driver/others/dynamic_power.c | 11 +++++++++++
1 files changed, 11 insertions(+)
Index: OpenBLAS-0.3.14/driver/others/dynamic_power.c
===================================================================
--- OpenBLAS-0.3.14.orig/driver/others/dynamic_power.c
+++ OpenBLAS-0.3.14/driver/others/dynamic_power.c
@@ -3,6 +3,7 @@
extern gotoblas_t gotoblas_POWER6;
extern gotoblas_t gotoblas_POWER8;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
extern gotoblas_t gotoblas_POWER9;
#endif
@@ -13,6 +14,7 @@ extern gotoblas_t gotoblas_POWER9;
#ifdef HAVE_P10_SUPPORT
extern gotoblas_t gotoblas_POWER10;
#endif
+#endif
extern void openblas_warning(int verbose, const char *msg);
@@ -31,12 +33,14 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_POWER6) return corename[1];
#endif
if (gotoblas == &gotoblas_POWER8) return corename[2];
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
if (gotoblas == &gotoblas_POWER9) return corename[3];
#endif
#ifdef HAVE_P10_SUPPORT
if (gotoblas == &gotoblas_POWER10) return corename[4];
#endif
+#endif
return corename[0];
}
@@ -200,6 +204,10 @@ static gotoblas_t *get_coretype(void) {
#endif
if (__builtin_cpu_is("power8"))
return &gotoblas_POWER8;
+ /* Fall back to the POWER8 implementation for big endian */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return &gotoblas_POWER8;
+#else
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
if (__builtin_cpu_is("power9"))
return &gotoblas_POWER9;
@@ -213,6 +221,7 @@ static gotoblas_t *get_coretype(void) {
if (__builtin_cpu_is("power10"))
return &gotoblas_POWER9;
#endif
+#endif
return NULL;
}
@@ -237,12 +246,14 @@ static gotoblas_t *force_coretype(char *
case 1: return (&gotoblas_POWER6);
#endif
case 2: return (&gotoblas_POWER8);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
case 3: return (&gotoblas_POWER9);
#endif
#ifdef HAVE_P10_SUPPORT
case 4: return (&gotoblas_POWER10);
#endif
+#endif
default: return NULL;
}
snprintf(message, 128, "Core not found: %s\n", coretype);