From 4f9678748ec962d1d72c7756cb0fd60cfb13ca68df282aeed43ff7a49aed95cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ismail=20D=C3=B6nmez?= Date: Thu, 18 Mar 2021 08:47:05 +0000 Subject: [PATCH] - Update to version 0.3.14 common: * Fixed a race condition on thread shutdown in non-OpenMP builds * Fixed custom BUFFERSIZE option getting ignored in gmake builds * Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms * Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT * Improved performance of OMATCOPY_RT across all platforms * Changed perl scripts to use env instead of a hardcoded /usr/bin/perl * Fixed potential misreading of the GCC compiler version in the build scripts * Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477) * Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335) RISC V: * Fixed compilation on RISCV (missing entry in getarch) POWER: * Fixed compilation for DYNAMIC_ARCH with clang and with older gcc versions * Added support for compilation on FreeBSD/ppc64le * Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL * Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM * Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10 * Improved SCOPY and CCOPY performance on POWER10 * Improved SGEMM and DGEMM performance on POWER10 * Added support for compilation with the NVIDIA HPC compiler x86_64: * Added an optimized bfloat16 GEMM kernel for Cooperlake * Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus * Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus * Added support for compilation with the NAG Fortran compiler * Fixed recognition of the AMD AOCC compiler * Fixed compilation for DYNAMIC_ARCH with clang on Windows * Added support for running the BLAS/CBLAS tests on Windows OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=120 --- ...re-gcc-11-for-builtin_cpu_is-power10.patch | 26 ---------- ...-power10-in-builtin_cpu_is-was-backp.patch | 26 ---------- OpenBLAS-0.3.13.tar.gz | 3 -- OpenBLAS-0.3.14.tar.gz | 3 ++ openblas-noexecstack.patch | 6 +-- openblas-ppc64be_up2_p8.patch | 45 ++++------------ openblas-s390.patch | 22 ++++---- openblas.changes | 52 +++++++++++++++++++ openblas.spec | 23 ++++---- 9 files changed, 89 insertions(+), 117 deletions(-) delete mode 100644 0001-Require-gcc-11-for-builtin_cpu_is-power10.patch delete mode 100644 0002-patch-to-support-power10-in-builtin_cpu_is-was-backp.patch delete mode 100644 OpenBLAS-0.3.13.tar.gz create mode 100644 OpenBLAS-0.3.14.tar.gz diff --git a/0001-Require-gcc-11-for-builtin_cpu_is-power10.patch b/0001-Require-gcc-11-for-builtin_cpu_is-power10.patch deleted file mode 100644 index 8e7c200..0000000 --- a/0001-Require-gcc-11-for-builtin_cpu_is-power10.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 63fa3c3f8f869c585d8c5aef6f580a967b64405c Mon Sep 17 00:00:00 2001 -From: Martin Kroeker -Date: Wed, 20 Jan 2021 15:41:04 +0100 -Subject: [PATCH 1/2] Require gcc 11 for builtin_cpu_is(power10) - -fixes #3074 ---- - driver/others/dynamic_power.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c -index f9feeb6e..18f16f83 100644 ---- a/driver/others/dynamic_power.c -+++ b/driver/others/dynamic_power.c -@@ -202,7 +202,7 @@ static gotoblas_t *get_coretype(void) { - return &gotoblas_POWER10; - #endif - /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ --#if (!defined __GNUC__) || ( __GNUC__ >= 6) -+#if (!defined __GNUC__) || ( __GNUC__ >= 11) - if (__builtin_cpu_is("power10")) - return &gotoblas_POWER9; - #endif --- -2.26.2 - diff --git a/0002-patch-to-support-power10-in-builtin_cpu_is-was-backp.patch b/0002-patch-to-support-power10-in-builtin_cpu_is-was-backp.patch deleted file mode 100644 index bbeb11e..0000000 --- a/0002-patch-to-support-power10-in-builtin_cpu_is-was-backp.patch +++ /dev/null @@ -1,26 +0,0 @@ -From b94dab5250469d4d30d1a21bf0e0b78eea3cf286 Mon Sep 17 00:00:00 2001 -From: Martin Kroeker -Date: Wed, 20 Jan 2021 21:34:36 +0100 -Subject: [PATCH 2/2] patch to support power10 in builtin_cpu_is was backported - to gcc 10.2, so allow that as wel - ---- - driver/others/dynamic_power.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c -index 18f16f83..b8e5840a 100644 ---- a/driver/others/dynamic_power.c -+++ b/driver/others/dynamic_power.c -@@ -202,7 +202,7 @@ static gotoblas_t *get_coretype(void) { - return &gotoblas_POWER10; - #endif - /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ --#if (!defined __GNUC__) || ( __GNUC__ >= 11) -+#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) - if (__builtin_cpu_is("power10")) - return &gotoblas_POWER9; - #endif --- -2.26.2 - diff --git a/OpenBLAS-0.3.13.tar.gz b/OpenBLAS-0.3.13.tar.gz deleted file mode 100644 index 5206b49..0000000 --- a/OpenBLAS-0.3.13.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79197543b17cc314b7e43f7a33148c308b0807cd6381ee77f77e15acf3e6459e -size 12437434 diff --git a/OpenBLAS-0.3.14.tar.gz b/OpenBLAS-0.3.14.tar.gz new file mode 100644 index 0000000..3ab6983 --- /dev/null +++ b/OpenBLAS-0.3.14.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d381935d26f9cae8e4bbd7d7f278435adf8e3a90920edf284bb9ad789ee9ad60 +size 12460465 diff --git a/openblas-noexecstack.patch b/openblas-noexecstack.patch index e6a675a..3261bab 100644 --- a/openblas-noexecstack.patch +++ b/openblas-noexecstack.patch @@ -1,7 +1,7 @@ -Index: OpenBLAS-0.3.11/exports/Makefile +Index: OpenBLAS-0.3.14/exports/Makefile =================================================================== ---- OpenBLAS-0.3.11.orig/exports/Makefile -+++ OpenBLAS-0.3.11/exports/Makefile +--- OpenBLAS-0.3.14.orig/exports/Makefile ++++ OpenBLAS-0.3.14/exports/Makefile @@ -179,6 +179,7 @@ else ifeq ($(F_COMPILER), FLANG) else ifneq ($(C_COMPILER), LSB) diff --git a/openblas-ppc64be_up2_p8.patch b/openblas-ppc64be_up2_p8.patch index c3cc832..05efbd1 100644 --- a/openblas-ppc64be_up2_p8.patch +++ b/openblas-ppc64be_up2_p8.patch @@ -11,36 +11,13 @@ because: Signed-off-by: Michel Normand --- - Makefile.system | 4 ++++ driver/others/dynamic_power.c | 11 +++++++++++ - 2 files changed, 15 insertions(+) + 1 files changed, 11 insertions(+) -Index: OpenBLAS-0.3.13/Makefile.system +Index: OpenBLAS-0.3.14/driver/others/dynamic_power.c =================================================================== ---- OpenBLAS-0.3.13.orig/Makefile.system -+++ OpenBLAS-0.3.13/Makefile.system -@@ -665,6 +665,9 @@ endif # ARCH zarch - ifeq ($(ARCH), power) - DYNAMIC_CORE = POWER6 - DYNAMIC_CORE += POWER8 -+ifeq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__) -+$(info, OpenBLAS: for big endian limit to POWER8 kernels.) -+else - ifneq ($(C_COMPILER), GCC) - DYNAMIC_CORE += POWER9 - DYNAMIC_CORE += POWER10 -@@ -690,6 +693,7 @@ $(info, OpenBLAS: Your gcc version is to - endif - endif - endif -+endif - - # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty - ifndef DYNAMIC_CORE -Index: OpenBLAS-0.3.13/driver/others/dynamic_power.c -=================================================================== ---- OpenBLAS-0.3.13.orig/driver/others/dynamic_power.c -+++ OpenBLAS-0.3.13/driver/others/dynamic_power.c +--- OpenBLAS-0.3.14.orig/driver/others/dynamic_power.c ++++ OpenBLAS-0.3.14/driver/others/dynamic_power.c @@ -3,6 +3,7 @@ extern gotoblas_t gotoblas_POWER6; @@ -57,9 +34,9 @@ Index: OpenBLAS-0.3.13/driver/others/dynamic_power.c extern void openblas_warning(int verbose, const char *msg); -@@ -29,12 +31,14 @@ static char *corename[] = { - char *gotoblas_corename(void) { +@@ -31,12 +33,14 @@ char *gotoblas_corename(void) { if (gotoblas == &gotoblas_POWER6) return corename[1]; + #endif if (gotoblas == &gotoblas_POWER8) return corename[2]; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #if (!defined __GNUC__) || ( __GNUC__ >= 6) @@ -72,8 +49,8 @@ Index: OpenBLAS-0.3.13/driver/others/dynamic_power.c return corename[0]; } -@@ -44,6 +48,10 @@ static gotoblas_t *get_coretype(void) { - return &gotoblas_POWER6; +@@ -200,6 +204,10 @@ static gotoblas_t *get_coretype(void) { + #endif if (__builtin_cpu_is("power8")) return &gotoblas_POWER8; + /* Fall back to the POWER8 implementation for big endian */ @@ -83,7 +60,7 @@ Index: OpenBLAS-0.3.13/driver/others/dynamic_power.c #if (!defined __GNUC__) || ( __GNUC__ >= 6) if (__builtin_cpu_is("power9")) return &gotoblas_POWER9; -@@ -57,6 +65,7 @@ static gotoblas_t *get_coretype(void) { +@@ -213,6 +221,7 @@ static gotoblas_t *get_coretype(void) { if (__builtin_cpu_is("power10")) return &gotoblas_POWER9; #endif @@ -91,9 +68,9 @@ Index: OpenBLAS-0.3.13/driver/others/dynamic_power.c return NULL; } -@@ -79,12 +88,14 @@ static gotoblas_t *force_coretype(char * - { +@@ -237,12 +246,14 @@ static gotoblas_t *force_coretype(char * case 1: return (&gotoblas_POWER6); + #endif case 2: return (&gotoblas_POWER8); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #if (!defined __GNUC__) || ( __GNUC__ >= 6) diff --git a/openblas-s390.patch b/openblas-s390.patch index 9df7c32..2def1b0 100644 --- a/openblas-s390.patch +++ b/openblas-s390.patch @@ -1,8 +1,8 @@ -Index: OpenBLAS-0.3.11/c_check +Index: OpenBLAS-0.3.14/c_check =================================================================== ---- OpenBLAS-0.3.11.orig/c_check -+++ OpenBLAS-0.3.11/c_check -@@ -11,7 +11,7 @@ $hostarch = "x86_64" if ($hostarch eq "a +--- OpenBLAS-0.3.14.orig/c_check ++++ OpenBLAS-0.3.14/c_check +@@ -12,7 +12,7 @@ $hostarch = "x86_64" if ($hostarch eq "a $hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/); $hostarch = "arm64" if ($hostarch eq "aarch64"); $hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/); @@ -11,10 +11,10 @@ Index: OpenBLAS-0.3.11/c_check #$tmpf = new File::Temp( UNLINK => 1 ); $binary = $ENV{"BINARY"}; -Index: OpenBLAS-0.3.11/ctest.c +Index: OpenBLAS-0.3.14/ctest.c =================================================================== ---- OpenBLAS-0.3.11.orig/ctest.c -+++ OpenBLAS-0.3.11/ctest.c +--- OpenBLAS-0.3.14.orig/ctest.c ++++ OpenBLAS-0.3.14/ctest.c @@ -117,7 +117,7 @@ ARCH_X86_64 ARCH_POWER #endif @@ -24,11 +24,11 @@ Index: OpenBLAS-0.3.11/ctest.c ARCH_ZARCH #endif -Index: OpenBLAS-0.3.11/getarch.c +Index: OpenBLAS-0.3.14/getarch.c =================================================================== ---- OpenBLAS-0.3.11.orig/getarch.c -+++ OpenBLAS-0.3.11/getarch.c -@@ -1266,7 +1266,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF +--- OpenBLAS-0.3.14.orig/getarch.c ++++ OpenBLAS-0.3.14/getarch.c +@@ -1333,7 +1333,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF #define OPENBLAS_SUPPORTED #endif diff --git a/openblas.changes b/openblas.changes index 6e3b973..8384fc8 100644 --- a/openblas.changes +++ b/openblas.changes @@ -1,3 +1,55 @@ +------------------------------------------------------------------- +Thu Mar 18 08:05:58 UTC 2021 - Ismail Dönmez + +- Update to version 0.3.14 + common: + * Fixed a race condition on thread shutdown in non-OpenMP builds + * Fixed custom BUFFERSIZE option getting ignored in gmake builds + * Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms + * Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT + * Improved performance of OMATCOPY_RT across all platforms + * Changed perl scripts to use env instead of a hardcoded /usr/bin/perl + * Fixed potential misreading of the GCC compiler version in the build scripts + * Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477) + * Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335) + + RISC V: + * Fixed compilation on RISCV (missing entry in getarch) + + POWER: + * Fixed compilation for DYNAMIC_ARCH with clang and with older gcc versions + * Added support for compilation on FreeBSD/ppc64le + * Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL + * Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM + * Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10 + * Improved SCOPY and CCOPY performance on POWER10 + * Improved SGEMM and DGEMM performance on POWER10 + * Added support for compilation with the NVIDIA HPC compiler + + x86_64: + * Added an optimized bfloat16 GEMM kernel for Cooperlake + * Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus + * Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus + * Added support for compilation with the NAG Fortran compiler + * Fixed recognition of the AMD AOCC compiler + * Fixed compilation for DYNAMIC_ARCH with clang on Windows + * Added support for running the BLAS/CBLAS tests on Windows + * Fixed signatures of the tls callback functions for Windows x64 + * Fixed various issues with fma intrinsics support handling + + ARM: + * Support compilation for embedded Cortex M4 targets via a new option EMBEDDED + + ARM64: + * Fixed the THUNDERX2T99 and NEOVERSEN1 DNRM2/ZNRM2 kernels for inputs with Inf + * Added support for the DYNAMIC_LIST option + * Added support for compilation with the NVIDIA HPC compiler + * Added support for compiling with the NAG Fortran compiler + +- Remove 0001-Require-gcc-11-for-builtin_cpu_is-power10.patch + 0002-patch-to-support-power10-in-builtin_cpu_is-was-backp.patch + Upstream fixed in a different way. + ------------------------------------------------------------------- Thu Feb 4 11:49:11 UTC 2021 - Michel Normand diff --git a/openblas.spec b/openblas.spec index 1cd4f6c..6957731 100644 --- a/openblas.spec +++ b/openblas.spec @@ -18,8 +18,8 @@ %global flavor @BUILD_FLAVOR@%{nil} -%define _vers 0_3_13 -%define vers 0.3.13 +%define _vers 0_3_14 +%define vers 0.3.14 %define pname openblas %bcond_with ringdisabled @@ -37,10 +37,10 @@ ExclusiveArch: do_not_build # we build devel packages only from one flavor %define build_devel 1 %{bcond_with hpc} -%endif +%endif %if "%flavor" == "pthreads" -%define build_flags USE_THREAD=1 USE_OPENMP=0 +%define build_flags USE_THREAD=1 USE_OPENMP=0 %ifarch %ix86 x86_64 %define openblas_so_prio 50 %else @@ -56,7 +56,7 @@ ExclusiveArch: do_not_build %define openblas_so_prio 50 %endif %{bcond_with hpc} -%endif +%endif %if "%flavor" == "gnu-hpc" %define compiler_family gnu @@ -167,9 +167,6 @@ URL: http://www.openblas.net Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz#/OpenBLAS-%{version}.tar.gz Source1: README.SUSE Source2: README.HPC.SUSE -# Temporarily - delete with next version update -Patch1: 0001-Require-gcc-11-for-builtin_cpu_is-power10.patch -Patch2: 0002-patch-to-support-power10-in-builtin_cpu_is-was-backp.patch # PATCH-FIX-UPSTREAM openblas-noexecstack.patch Patch101: openblas-noexecstack.patch # PATCH port @@ -182,7 +179,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: gcc-fortran BuildRequires: update-alternatives Requires(post): update-alternatives -Requires(preun): update-alternatives +Requires(preun):update-alternatives %else BuildRequires: %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel BuildRequires: lua-lmod @@ -198,7 +195,7 @@ Summary: An optimized BLAS library based on GotoBLAS2, %{flavor} version Group: System/Libraries %if %{without hpc} Requires(post): update-alternatives -Requires(preun): update-alternatives +Requires(preun):update-alternatives %if "%flavor" == "serial" Obsoletes: lib%{pname}%{so_v} < %{version} Provides: lib%{pname}%{so_v} = %{version} @@ -284,8 +281,6 @@ This package contains headers for OpenBLAS. %prep %setup -q -n OpenBLAS-%{version} -%patch1 -p1 -%patch2 -p1 %patch101 -p1 %patch102 -p1 %patch103 -p1 @@ -319,9 +314,9 @@ cp %{SOURCE2} . %define openblas_opt BUILD_BFLOAT16=1 %endif %ifarch aarch64 -%global openblas_target %openblas_target TARGET=ARMV8 +%global openblas_target %openblas_target TARGET=ARMV8 %define openblas_opt BUILD_BFLOAT16=1 -%endif +%endif %ifarch s390 s390x %global openblas_target %openblas_target TARGET=ZARCH_GENERIC %endif