- Update to version 0.3.14
common: * Fixed a race condition on thread shutdown in non-OpenMP builds * Fixed custom BUFFERSIZE option getting ignored in gmake builds * Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms * Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT * Improved performance of OMATCOPY_RT across all platforms * Changed perl scripts to use env instead of a hardcoded /usr/bin/perl * Fixed potential misreading of the GCC compiler version in the build scripts * Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477) * Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335) RISC V: * Fixed compilation on RISCV (missing entry in getarch) POWER: * Fixed compilation for DYNAMIC_ARCH with clang and with older gcc versions * Added support for compilation on FreeBSD/ppc64le * Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL * Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM * Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10 * Improved SCOPY and CCOPY performance on POWER10 * Improved SGEMM and DGEMM performance on POWER10 * Added support for compilation with the NVIDIA HPC compiler x86_64: * Added an optimized bfloat16 GEMM kernel for Cooperlake * Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus * Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus * Added support for compilation with the NAG Fortran compiler * Fixed recognition of the AMD AOCC compiler * Fixed compilation for DYNAMIC_ARCH with clang on Windows * Added support for running the BLAS/CBLAS tests on Windows OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=120
This commit is contained in:
parent
52ad410a6c
commit
4f9678748e
@ -1,26 +0,0 @@
|
||||
From 63fa3c3f8f869c585d8c5aef6f580a967b64405c Mon Sep 17 00:00:00 2001
|
||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Wed, 20 Jan 2021 15:41:04 +0100
|
||||
Subject: [PATCH 1/2] Require gcc 11 for builtin_cpu_is(power10)
|
||||
|
||||
fixes #3074
|
||||
---
|
||||
driver/others/dynamic_power.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c
|
||||
index f9feeb6e..18f16f83 100644
|
||||
--- a/driver/others/dynamic_power.c
|
||||
+++ b/driver/others/dynamic_power.c
|
||||
@@ -202,7 +202,7 @@ static gotoblas_t *get_coretype(void) {
|
||||
return &gotoblas_POWER10;
|
||||
#endif
|
||||
/* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
|
||||
-#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
+#if (!defined __GNUC__) || ( __GNUC__ >= 11)
|
||||
if (__builtin_cpu_is("power10"))
|
||||
return &gotoblas_POWER9;
|
||||
#endif
|
||||
--
|
||||
2.26.2
|
||||
|
@ -1,26 +0,0 @@
|
||||
From b94dab5250469d4d30d1a21bf0e0b78eea3cf286 Mon Sep 17 00:00:00 2001
|
||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Wed, 20 Jan 2021 21:34:36 +0100
|
||||
Subject: [PATCH 2/2] patch to support power10 in builtin_cpu_is was backported
|
||||
to gcc 10.2, so allow that as wel
|
||||
|
||||
---
|
||||
driver/others/dynamic_power.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c
|
||||
index 18f16f83..b8e5840a 100644
|
||||
--- a/driver/others/dynamic_power.c
|
||||
+++ b/driver/others/dynamic_power.c
|
||||
@@ -202,7 +202,7 @@ static gotoblas_t *get_coretype(void) {
|
||||
return &gotoblas_POWER10;
|
||||
#endif
|
||||
/* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
|
||||
-#if (!defined __GNUC__) || ( __GNUC__ >= 11)
|
||||
+#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
|
||||
if (__builtin_cpu_is("power10"))
|
||||
return &gotoblas_POWER9;
|
||||
#endif
|
||||
--
|
||||
2.26.2
|
||||
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:79197543b17cc314b7e43f7a33148c308b0807cd6381ee77f77e15acf3e6459e
|
||||
size 12437434
|
3
OpenBLAS-0.3.14.tar.gz
Normal file
3
OpenBLAS-0.3.14.tar.gz
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d381935d26f9cae8e4bbd7d7f278435adf8e3a90920edf284bb9ad789ee9ad60
|
||||
size 12460465
|
@ -1,7 +1,7 @@
|
||||
Index: OpenBLAS-0.3.11/exports/Makefile
|
||||
Index: OpenBLAS-0.3.14/exports/Makefile
|
||||
===================================================================
|
||||
--- OpenBLAS-0.3.11.orig/exports/Makefile
|
||||
+++ OpenBLAS-0.3.11/exports/Makefile
|
||||
--- OpenBLAS-0.3.14.orig/exports/Makefile
|
||||
+++ OpenBLAS-0.3.14/exports/Makefile
|
||||
@@ -179,6 +179,7 @@ else ifeq ($(F_COMPILER), FLANG)
|
||||
else
|
||||
ifneq ($(C_COMPILER), LSB)
|
||||
|
@ -11,36 +11,13 @@ because:
|
||||
|
||||
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
|
||||
---
|
||||
Makefile.system | 4 ++++
|
||||
driver/others/dynamic_power.c | 11 +++++++++++
|
||||
2 files changed, 15 insertions(+)
|
||||
1 files changed, 11 insertions(+)
|
||||
|
||||
Index: OpenBLAS-0.3.13/Makefile.system
|
||||
Index: OpenBLAS-0.3.14/driver/others/dynamic_power.c
|
||||
===================================================================
|
||||
--- OpenBLAS-0.3.13.orig/Makefile.system
|
||||
+++ OpenBLAS-0.3.13/Makefile.system
|
||||
@@ -665,6 +665,9 @@ endif # ARCH zarch
|
||||
ifeq ($(ARCH), power)
|
||||
DYNAMIC_CORE = POWER6
|
||||
DYNAMIC_CORE += POWER8
|
||||
+ifeq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
|
||||
+$(info, OpenBLAS: for big endian limit to POWER8 kernels.)
|
||||
+else
|
||||
ifneq ($(C_COMPILER), GCC)
|
||||
DYNAMIC_CORE += POWER9
|
||||
DYNAMIC_CORE += POWER10
|
||||
@@ -690,6 +693,7 @@ $(info, OpenBLAS: Your gcc version is to
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
+endif
|
||||
|
||||
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
||||
ifndef DYNAMIC_CORE
|
||||
Index: OpenBLAS-0.3.13/driver/others/dynamic_power.c
|
||||
===================================================================
|
||||
--- OpenBLAS-0.3.13.orig/driver/others/dynamic_power.c
|
||||
+++ OpenBLAS-0.3.13/driver/others/dynamic_power.c
|
||||
--- OpenBLAS-0.3.14.orig/driver/others/dynamic_power.c
|
||||
+++ OpenBLAS-0.3.14/driver/others/dynamic_power.c
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
extern gotoblas_t gotoblas_POWER6;
|
||||
@ -57,9 +34,9 @@ Index: OpenBLAS-0.3.13/driver/others/dynamic_power.c
|
||||
|
||||
extern void openblas_warning(int verbose, const char *msg);
|
||||
|
||||
@@ -29,12 +31,14 @@ static char *corename[] = {
|
||||
char *gotoblas_corename(void) {
|
||||
@@ -31,12 +33,14 @@ char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_POWER6) return corename[1];
|
||||
#endif
|
||||
if (gotoblas == &gotoblas_POWER8) return corename[2];
|
||||
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
@ -72,8 +49,8 @@ Index: OpenBLAS-0.3.13/driver/others/dynamic_power.c
|
||||
return corename[0];
|
||||
}
|
||||
|
||||
@@ -44,6 +48,10 @@ static gotoblas_t *get_coretype(void) {
|
||||
return &gotoblas_POWER6;
|
||||
@@ -200,6 +204,10 @@ static gotoblas_t *get_coretype(void) {
|
||||
#endif
|
||||
if (__builtin_cpu_is("power8"))
|
||||
return &gotoblas_POWER8;
|
||||
+ /* Fall back to the POWER8 implementation for big endian */
|
||||
@ -83,7 +60,7 @@ Index: OpenBLAS-0.3.13/driver/others/dynamic_power.c
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
if (__builtin_cpu_is("power9"))
|
||||
return &gotoblas_POWER9;
|
||||
@@ -57,6 +65,7 @@ static gotoblas_t *get_coretype(void) {
|
||||
@@ -213,6 +221,7 @@ static gotoblas_t *get_coretype(void) {
|
||||
if (__builtin_cpu_is("power10"))
|
||||
return &gotoblas_POWER9;
|
||||
#endif
|
||||
@ -91,9 +68,9 @@ Index: OpenBLAS-0.3.13/driver/others/dynamic_power.c
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -79,12 +88,14 @@ static gotoblas_t *force_coretype(char *
|
||||
{
|
||||
@@ -237,12 +246,14 @@ static gotoblas_t *force_coretype(char *
|
||||
case 1: return (&gotoblas_POWER6);
|
||||
#endif
|
||||
case 2: return (&gotoblas_POWER8);
|
||||
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
|
@ -1,8 +1,8 @@
|
||||
Index: OpenBLAS-0.3.11/c_check
|
||||
Index: OpenBLAS-0.3.14/c_check
|
||||
===================================================================
|
||||
--- OpenBLAS-0.3.11.orig/c_check
|
||||
+++ OpenBLAS-0.3.11/c_check
|
||||
@@ -11,7 +11,7 @@ $hostarch = "x86_64" if ($hostarch eq "a
|
||||
--- OpenBLAS-0.3.14.orig/c_check
|
||||
+++ OpenBLAS-0.3.14/c_check
|
||||
@@ -12,7 +12,7 @@ $hostarch = "x86_64" if ($hostarch eq "a
|
||||
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
|
||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
|
||||
@ -11,10 +11,10 @@ Index: OpenBLAS-0.3.11/c_check
|
||||
|
||||
#$tmpf = new File::Temp( UNLINK => 1 );
|
||||
$binary = $ENV{"BINARY"};
|
||||
Index: OpenBLAS-0.3.11/ctest.c
|
||||
Index: OpenBLAS-0.3.14/ctest.c
|
||||
===================================================================
|
||||
--- OpenBLAS-0.3.11.orig/ctest.c
|
||||
+++ OpenBLAS-0.3.11/ctest.c
|
||||
--- OpenBLAS-0.3.14.orig/ctest.c
|
||||
+++ OpenBLAS-0.3.14/ctest.c
|
||||
@@ -117,7 +117,7 @@ ARCH_X86_64
|
||||
ARCH_POWER
|
||||
#endif
|
||||
@ -24,11 +24,11 @@ Index: OpenBLAS-0.3.11/ctest.c
|
||||
ARCH_ZARCH
|
||||
#endif
|
||||
|
||||
Index: OpenBLAS-0.3.11/getarch.c
|
||||
Index: OpenBLAS-0.3.14/getarch.c
|
||||
===================================================================
|
||||
--- OpenBLAS-0.3.11.orig/getarch.c
|
||||
+++ OpenBLAS-0.3.11/getarch.c
|
||||
@@ -1266,7 +1266,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
--- OpenBLAS-0.3.14.orig/getarch.c
|
||||
+++ OpenBLAS-0.3.14/getarch.c
|
||||
@@ -1333,7 +1333,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
|
@ -1,3 +1,55 @@
|
||||
-------------------------------------------------------------------
|
||||
Thu Mar 18 08:05:58 UTC 2021 - Ismail Dönmez <idonmez@suse.com>
|
||||
|
||||
- Update to version 0.3.14
|
||||
common:
|
||||
* Fixed a race condition on thread shutdown in non-OpenMP builds
|
||||
* Fixed custom BUFFERSIZE option getting ignored in gmake builds
|
||||
* Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms
|
||||
* Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT
|
||||
* Improved performance of OMATCOPY_RT across all platforms
|
||||
* Changed perl scripts to use env instead of a hardcoded /usr/bin/perl
|
||||
* Fixed potential misreading of the GCC compiler version in the build scripts
|
||||
* Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477)
|
||||
* Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335)
|
||||
|
||||
RISC V:
|
||||
* Fixed compilation on RISCV (missing entry in getarch)
|
||||
|
||||
POWER:
|
||||
* Fixed compilation for DYNAMIC_ARCH with clang and with older gcc versions
|
||||
* Added support for compilation on FreeBSD/ppc64le
|
||||
* Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL
|
||||
* Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM
|
||||
* Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10
|
||||
* Improved SCOPY and CCOPY performance on POWER10
|
||||
* Improved SGEMM and DGEMM performance on POWER10
|
||||
* Added support for compilation with the NVIDIA HPC compiler
|
||||
|
||||
x86_64:
|
||||
* Added an optimized bfloat16 GEMM kernel for Cooperlake
|
||||
* Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus
|
||||
* Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus
|
||||
* Added support for compilation with the NAG Fortran compiler
|
||||
* Fixed recognition of the AMD AOCC compiler
|
||||
* Fixed compilation for DYNAMIC_ARCH with clang on Windows
|
||||
* Added support for running the BLAS/CBLAS tests on Windows
|
||||
* Fixed signatures of the tls callback functions for Windows x64
|
||||
* Fixed various issues with fma intrinsics support handling
|
||||
|
||||
ARM:
|
||||
* Support compilation for embedded Cortex M4 targets via a new option EMBEDDED
|
||||
|
||||
ARM64:
|
||||
* Fixed the THUNDERX2T99 and NEOVERSEN1 DNRM2/ZNRM2 kernels for inputs with Inf
|
||||
* Added support for the DYNAMIC_LIST option
|
||||
* Added support for compilation with the NVIDIA HPC compiler
|
||||
* Added support for compiling with the NAG Fortran compiler
|
||||
|
||||
- Remove 0001-Require-gcc-11-for-builtin_cpu_is-power10.patch
|
||||
0002-patch-to-support-power10-in-builtin_cpu_is-was-backp.patch
|
||||
Upstream fixed in a different way.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Feb 4 11:49:11 UTC 2021 - Michel Normand <normand@linux.vnet.ibm.com>
|
||||
|
||||
|
@ -18,8 +18,8 @@
|
||||
|
||||
%global flavor @BUILD_FLAVOR@%{nil}
|
||||
|
||||
%define _vers 0_3_13
|
||||
%define vers 0.3.13
|
||||
%define _vers 0_3_14
|
||||
%define vers 0.3.14
|
||||
%define pname openblas
|
||||
|
||||
%bcond_with ringdisabled
|
||||
@ -167,9 +167,6 @@ URL: http://www.openblas.net
|
||||
Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz#/OpenBLAS-%{version}.tar.gz
|
||||
Source1: README.SUSE
|
||||
Source2: README.HPC.SUSE
|
||||
# Temporarily - delete with next version update
|
||||
Patch1: 0001-Require-gcc-11-for-builtin_cpu_is-power10.patch
|
||||
Patch2: 0002-patch-to-support-power10-in-builtin_cpu_is-was-backp.patch
|
||||
# PATCH-FIX-UPSTREAM openblas-noexecstack.patch
|
||||
Patch101: openblas-noexecstack.patch
|
||||
# PATCH port
|
||||
@ -182,7 +179,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build
|
||||
BuildRequires: gcc-fortran
|
||||
BuildRequires: update-alternatives
|
||||
Requires(post): update-alternatives
|
||||
Requires(preun): update-alternatives
|
||||
Requires(preun):update-alternatives
|
||||
%else
|
||||
BuildRequires: %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel
|
||||
BuildRequires: lua-lmod
|
||||
@ -198,7 +195,7 @@ Summary: An optimized BLAS library based on GotoBLAS2, %{flavor} version
|
||||
Group: System/Libraries
|
||||
%if %{without hpc}
|
||||
Requires(post): update-alternatives
|
||||
Requires(preun): update-alternatives
|
||||
Requires(preun):update-alternatives
|
||||
%if "%flavor" == "serial"
|
||||
Obsoletes: lib%{pname}%{so_v} < %{version}
|
||||
Provides: lib%{pname}%{so_v} = %{version}
|
||||
@ -284,8 +281,6 @@ This package contains headers for OpenBLAS.
|
||||
%prep
|
||||
|
||||
%setup -q -n OpenBLAS-%{version}
|
||||
%patch1 -p1
|
||||
%patch2 -p1
|
||||
%patch101 -p1
|
||||
%patch102 -p1
|
||||
%patch103 -p1
|
||||
|
Loading…
x
Reference in New Issue
Block a user