From 4d274741c510a161343973d6d485b505b9f285faa8ed95be55ab7dbee744f013 Mon Sep 17 00:00:00 2001 From: Egbert Eich Date: Wed, 30 Mar 2022 09:28:45 +0000 Subject: [PATCH] Accepting request 965613 from home:eeich:branches:science_alt - Update to v0.3.20: * general: some code cleanup, with added casts etc. fixed obtaining the cpu count with OpenMP and OMP_PROC_BIND unset fixed pivot index calculation by ?LASWP for negative increments other than one fixed input argument check in LAPACK ? GEQRT2 improved the check for a Fortran compiler in CMAKE builds disabled building OpenBLAS' optimized versions of LAPACK complex SPMV, SPR,SYMV,SYR with NO_LAPACK=1 fixed building of LAPACK on certain distributed filesystems with parallel gmake fixed building the shared library on MacOS with classic flang (v0.3.19) reverted unsafe TRSV/ZRSV optimizations introduced in 0.3.16 fixed a potential thread race in the thread buffer reallocation routines that were introduced in 0.3.18 fixed miscounting of thread pool size on Linux with OMP_PROC_BIND=TRUE fixed CBLAS interfaces for CSROT/ZSROT and CROTG/ZROTG made automatic library suffix for CMAKE builds with INTERFACE64 available to CBLAS-only builds (v0.3.18) when the build-time number of preconfigured threads is exceeded at runtime (by an external program calling BLAS functions from a larger number of threads), OpenBLAS will now allocate an auxiliary control structure for up to 512 additional threads instead of aborting added support for Loongson's LoongArch64 cpu architecture fixed building OpenBLAS with CMAKE and -DBUILD_BFLOAT16=ON added support for building OpenBLAS as a CMAKE subproject OBS-URL: https://build.opensuse.org/request/show/965613 OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=129 --- ...iguration-when-building-DYNAMIC_ARCH.patch | 185 ++++++++++++++++ ...te-preprocessed-output-from-setparam.patch | 31 +++ ...t-to-check-host-CPU-if-TARGET-is-set.patch | 78 +++++++ ...-others-parameter.c-in-DYNAMIC_BUILD.patch | 33 +++ ...m_r-as-parameter.c-doesn-t-get-build.patch | 51 +++++ OpenBLAS-0.3.17.tar.gz | 3 - OpenBLAS-0.3.20.tar.gz | 3 + _multibuild | 2 +- openblas.changes | 118 ++++++++++ openblas.spec | 203 +++++++++--------- 10 files changed, 600 insertions(+), 107 deletions(-) create mode 100644 Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch create mode 100644 Create-preprocessed-output-from-setparam.patch create mode 100644 Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch create mode 100644 Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch create mode 100644 For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch delete mode 100644 OpenBLAS-0.3.17.tar.gz create mode 100644 OpenBLAS-0.3.20.tar.gz diff --git a/Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch b/Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch new file mode 100644 index 0000000..5cc77fd --- /dev/null +++ b/Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch @@ -0,0 +1,185 @@ +From: Egbert Eich +Date: Tue Mar 1 10:04:01 2022 +0100 +Subject: Create independent kernel Makfile & configuration when building DYNAMIC_ARCH +Patch-mainline: Not yet +Git-commit: c8b95480ec9fd2cab0321da61a7711415be6e9a9 +References: + +- For 'classic' builds, generate separate config_kernel_.h, + Makfile_.conf and getarch- files/binaries +- For cmake builds, generate separate getarch- binaries +for better debugging. + +Signed-off-by: Egbert Eich +Signed-off-by: Egbert Eich +--- + Makefile | 4 ++-- + Makefile.prebuild | 26 +++++++++++++++++--------- + Makefile.system | 4 ++++ + cmake/prebuild.cmake | 6 +++++- + common.h | 4 ++++ + getarch_2nd.c | 4 ++++ + kernel/Makefile | 2 +- + 7 files changed, 37 insertions(+), 13 deletions(-) +diff --git a/Makefile b/Makefile +index 1bb3f6b..2a1639c 100644 +--- a/Makefile ++++ b/Makefile +@@ -387,11 +387,11 @@ clean :: + @$(MAKE) -C kernel clean + #endif + @$(MAKE) -C reference clean +- @rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0 ++ @rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch-* getarch_2nd getarch_2nd-* *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0 + ifeq ($(OSNAME), Darwin) + @rm -rf getarch.dSYM getarch_2nd.dSYM + endif +- @rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib ++ @rm -f Makefile.conf config.h Makefile_kernel*.conf config_kernel*.h st* *.dylib + @rm -f cblas.tmp cblas.tmp2 + @touch $(NETLIB_LAPACK_DIR)/make.inc + @$(MAKE) -C $(NETLIB_LAPACK_DIR) clean +diff --git a/Makefile.prebuild b/Makefile.prebuild +index 399db95..c9eb8e0 100644 +--- a/Makefile.prebuild ++++ b/Makefile.prebuild +@@ -8,11 +8,19 @@ override HOST_CFLAGS += -DDYNAMIC_ARCH + endif + + ifdef TARGET_CORE ++ifdef DYNAMIC_ARCH ++TARGET_MAKE = Makefile_kernel_$(TARGET_CORE).conf ++TARGET_CONF = config_kernel_$(TARGET_CORE).h ++GETARCH_FLAVOR = -$(TARGET_CORE) ++else + TARGET_MAKE = Makefile_kernel.conf + TARGET_CONF = config_kernel.h ++GETARCH_FLAVOR = -$(TARGET_CORE) ++endif + else + TARGET_MAKE = Makefile.conf + TARGET_CONF = config.h ++GETARCH_FLAVOR = + endif + + # CPUIDEMU = ../../cpuid/table.o +@@ -49,11 +57,11 @@ ifeq ($(TARGET), C910V) + TARGET_FLAGS = -march=rv64gcvxthead -mabi=lp64v + endif + +-all: getarch_2nd +- ./getarch_2nd 0 >> $(TARGET_MAKE) +- ./getarch_2nd 1 >> $(TARGET_CONF) ++all: getarch_2nd$(GETARCH_FLAVOR) ++ ./getarch_2nd$(GETARCH_FLAVOR) 0 >> $(TARGET_MAKE) ++ ./getarch_2nd$(GETARCH_FLAVOR) 1 >> $(TARGET_CONF) + +-config.h : c_check f_check getarch ++config.h : c_check f_check getarch$(GETARCH_FLAVOR) + perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS) + ifneq ($(ONLY_CBLAS), 1) + perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS) +@@ -66,18 +74,18 @@ else + echo "#define BUNDERSCORE _" >> $(TARGET_CONF) + echo "#define NEEDBUNDERSCORE 1" >> $(TARGET_CONF) + endif +- ./getarch 0 >> $(TARGET_MAKE) +- ./getarch 1 >> $(TARGET_CONF) ++ ./getarch$(GETARCH_FLAVOR) 0 >> $(TARGET_MAKE) ++ ./getarch$(GETARCH_FLAVOR) 1 >> $(TARGET_CONF) + + +-getarch : getarch.c cpuid.S dummy $(CPUIDEMU) ++getarch$(GETARCH_FLAVOR) : getarch.c cpuid.S dummy $(CPUIDEMU) + $(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU) + +-getarch_2nd : getarch_2nd.c config.h dummy ++getarch_2nd$(GETARCH_FLAVOR) : getarch_2nd.c config.h dummy + ifndef TARGET_CORE + $(HOSTCC) -I. $(HOST_CFLAGS) -o $(@F) getarch_2nd.c + else +- $(HOSTCC) -I. $(HOST_CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c ++ $(HOSTCC) -I. $(HOST_CFLAGS) -DBUILD_KERNEL -DKERNEL_CONFIG=\"$(TARGET_CONF)\" -o $(@F) getarch_2nd.c + endif + + dummy: +diff --git a/Makefile.system b/Makefile.system +index 1057255..5f66d30 100644 +--- a/Makefile.system ++++ b/Makefile.system +@@ -298,8 +298,12 @@ HAVE_SSE5= + HAVE_AVX= + HAVE_AVX2= + HAVE_FMA3= ++ifeq ($(DYNAMIC_ARCH), 1) ++include $(TOPDIR)/Makefile_kernel_$(TARGET_CORE).conf ++else + include $(TOPDIR)/Makefile_kernel.conf + endif ++endif + + endif + +diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake +index 4ef0ce9..2f5d472 100644 +--- a/cmake/prebuild.cmake ++++ b/cmake/prebuild.cmake +@@ -629,7 +629,11 @@ else(NOT CMAKE_CROSSCOMPILING) + endif () + + set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") +- set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") ++ if (DEFINED TARGET_CORE) ++ set(GETARCH_BIN "getarch-${TARGET_CORE}${CMAKE_EXECUTABLE_SUFFIX}") ++ else () ++ set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") ++ endif () + file(MAKE_DIRECTORY ${GETARCH_DIR}) + configure_file(${TARGET_CONF_TEMP} ${GETARCH_DIR}/${TARGET_CONF} COPYONLY) + if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") +diff --git a/common.h b/common.h +index 00d1d0b..d563e3b 100644 +--- a/common.h ++++ b/common.h +@@ -57,7 +57,11 @@ extern "C" { + #endif + + #ifdef BUILD_KERNEL ++#ifdef KERNEL_CONFIG ++#include KERNEL_CONFIG ++#else + #include "config_kernel.h" ++#endif + #else + #include "config.h" + #endif +diff --git a/getarch_2nd.c b/getarch_2nd.c +index dd1f830..2abeee6 100644 +--- a/getarch_2nd.c ++++ b/getarch_2nd.c +@@ -2,8 +2,12 @@ + #ifndef BUILD_KERNEL + #include "config.h" + #else ++#ifdef KERNEL_CONFIG ++#include KERNEL_CONFIG ++#else + #include "config_kernel.h" + #endif ++#endif + #if (defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64)) && defined(__64BIT__) + typedef long long BLASLONG; + typedef unsigned long long BLASULONG; +diff --git a/kernel/Makefile b/kernel/Makefile +index cbe4cde..94a718b 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -124,7 +124,7 @@ COMMONOBJS += lsame.$(SUFFIX) scabs1.$(SUFFIX) dcabs1.$(SUFFIX) + + ifeq ($(DYNAMIC_ARCH), 1) + SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX) +-CCOMMON_OPT += -DTS=$(TSUFFIX) ++CCOMMON_OPT += -DTS=$(TSUFFIX) -DKERNEL_CONFIG=\"config_kernel$(TSUFFIX).h\" + endif + + KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h diff --git a/Create-preprocessed-output-from-setparam.patch b/Create-preprocessed-output-from-setparam.patch new file mode 100644 index 0000000..e5da8d0 --- /dev/null +++ b/Create-preprocessed-output-from-setparam.patch @@ -0,0 +1,31 @@ +From: Egbert Eich +Date: Mon Feb 28 10:33:05 2022 +0100 +Subject: Create preprocessed output from setparam. +Patch-mainline: Not yet +Git-commit: e5b7694b3f37f44145ce6343ca29bd97327f82bf +References: + +Signed-off-by: Egbert Eich +--- + kernel/Makefile | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) +diff --git a/kernel/Makefile b/kernel/Makefile +index cbe4cde..8bc6883 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -161,7 +161,14 @@ qconjg.$(SUFFIX): $(KERNELDIR)/qconjg.S + lsame.$(SUFFIX): $(KERNELDIR)/$(LSAME_KERNEL) + $(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F) + +-setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h ++setparam$(TSUFFIX).E: setparam$(TSUFFIX).c kernel$(TSUFFIX).h ++ifeq ($(USE_GEMM3M), 1) ++ $(CC) -E $(CFLAGS) -DUSE_GEMM3M $< > $@ ++else ++ $(CC) -E $(CFLAGS) $< > $@ ++endif ++ ++setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h setparam$(TSUFFIX).E + ifeq ($(USE_GEMM3M), 1) + $(CC) -c $(CFLAGS) -DUSE_GEMM3M $< -o $@ + else diff --git a/Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch b/Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch new file mode 100644 index 0000000..cac835e --- /dev/null +++ b/Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch @@ -0,0 +1,78 @@ +From: Egbert Eich +Date: Tue Mar 1 18:18:54 2022 +0100 +Subject: Do not attempt to check host CPU if TARGET is set. +Patch-mainline: Not yet +Git-commit: 1cf40c7eb77076aa5ae9641bd0fd328ce2bc5e00 +References: + +This wired 'autodetection' breaks DYNAMIC arch (or makes the build +unreproducible) and will most likely not work with cross compiling. + +Presently, this is only relevant for SkylakeX, Cooperlake and +Sapphire Rapids cores, that's what has been implemented. Going +forward, other cores would have to be added here as well (in sync +with Markfile.$(arch) as this needs to be kept in sync with changes +to getarch. +It would be better to remove this hack (and the counterpart in +getarch) entirely. + +Signed-off-by: Egbert Eich +Signed-off-by: Egbert Eich +--- + Makefile.system | 14 +++++++++++--- + cmake/system.cmake | 4 ++-- + getarch.c | 2 +- + 3 files changed, 14 insertions(+), 6 deletions(-) +diff --git a/Makefile.system b/Makefile.system +index 5f66d30..855a734 100644 +--- a/Makefile.system ++++ b/Makefile.system +@@ -197,9 +197,17 @@ endif + + # On x86_64 build getarch with march=native unless the compiler is PGI. This is required to detect AVX512 support in getarch. + ifeq ($(HOSTARCH), x86_64) +-ifeq ($(findstring pgcc,$(HOSTCC))$(findstring nvc,$(HOSTCC)),) +-GETARCH_FLAGS += -march=native +-endif ++ ifeq ($(findstring pgcc,$(HOSTCC))$(findstring nvc,$(HOSTCC)),) ++ ifdef TARGET ++GETARCH_NOPROBE=1 ++ endif ++ ifdef TARGET_CORE ++GETARCH_NOPROBE=1 ++ endif ++ ifndef GETARCH_NOPROBE ++GETARCH_FLAGS += -march=native -DAUTOPROBE ++ endif ++ endif + endif + + ifdef INTERFACE64 +diff --git a/cmake/system.cmake b/cmake/system.cmake +index e0e92bd..27c4539 100644 +--- a/cmake/system.cmake ++++ b/cmake/system.cmake +@@ -55,8 +55,8 @@ if (DEFINED TARGET) + endif () + + # On x86_64 build getarch with march=native. This is required to detect AVX512 support in getarch. +-if (X86_64 AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "PGI") +- set(GETARCH_FLAGS "${GETARCH_FLAGS} -march=native") ++if (X86_64 AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT DEFINED TARGET) ++ set(GETARCH_FLAGS "${GETARCH_FLAGS} -march=native -DAUTOPROBE") + endif () + + # On x86 no AVX support is available +diff --git a/getarch.c b/getarch.c +index 59ac1f6..6c5be9a 100644 +--- a/getarch.c ++++ b/getarch.c +@@ -94,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #include + #endif + +-#if defined(__x86_64__) || defined(_M_X64) ++#if defined (AUTOPROBE) && (defined(__x86_64__) || defined(_M_X64)) + #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX2__)) || (defined(__clang__) && __clang_major__ >= 6)) + #else + #ifndef NO_AVX512 diff --git a/Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch b/Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch new file mode 100644 index 0000000..6190a6b --- /dev/null +++ b/Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch @@ -0,0 +1,33 @@ +From: Egbert Eich +Date: Sun Mar 13 10:57:59 2022 +0100 +Subject: Do not include symbols defined in driver/others/parameter.c in DYNAMIC_BUILD +Patch-mainline: Not yet +Git-repo: https://github.com/xianyi/OpenBLAS +Git-commit: 53cd07b0201c94ea50a499867382dcf39d1b8766 +References: + +driver/others/parameter.c does not get build during DYNAMIC_BUILD, thus, +do not declare its symbols. This will make the build fail early and in +an obvious way if functions are trying to use these symbols. + +Signed-off-by: Egbert Eich +Signed-off-by: Egbert Eich +--- + common_macro.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) +diff --git a/common_macro.h b/common_macro.h +index 9826f180..d2fa822c 100644 +--- a/common_macro.h ++++ b/common_macro.h +@@ -2610,8 +2610,9 @@ + #endif + + #ifndef ASSEMBLER +-#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)\ +-|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) ++#if !defined(DYNAMIC_ARCH) \ ++ && (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \ ++ || defined(ARCH_LOONGARCH64) || defined(ARCH_E2K)) + extern BLASLONG gemm_offset_a; + extern BLASLONG gemm_offset_b; + extern BLASLONG sbgemm_p; diff --git a/For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch b/For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch new file mode 100644 index 0000000..749646f --- /dev/null +++ b/For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch @@ -0,0 +1,51 @@ +From: Egbert Eich +Date: Tue Mar 1 19:27:47 2022 +0100 +Subject: For DYNAMIC_ARCH don't use sbgemm_r as parameter.c doesn't get build +Patch-mainline: Not yet +Git-commit: dce6d9a5fb5e1af31aedcdc0fec1d6393bae395f +References: + +Presently, DYNAMIC_ARCH doesn't build if the build includes Intel +Cooperlake or Sapphire Rapids cores. This is because their init +function reference sbgemm_r which is defined in +driver/other/parameter.c. +This file is not built when using DYNAMIC_ARCH. +The value is the one that blas_set_parameter() would set on build +without DYNAMIC_ARCH. +There seems to be some duplication between blas_set_parameter() and +the kernel specific init_parameter() calls. Some consolidation would +be in order here. + +Signed-off-by: Egbert Eich +Signed-off-by: Egbert Eich +--- + param.h | 8 ++++++++ + 1 file changed, 8 insertions(+) +diff --git a/param.h b/param.h +index 8649e44..e8fdfae 100644 +--- a/param.h ++++ b/param.h +@@ -1803,7 +1803,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #define SBGEMM_DEFAULT_UNROLL_M 32 + #define SBGEMM_DEFAULT_P 256 + #define SBGEMM_DEFAULT_Q 1024 ++#ifndef DYNAMIC_ARCH + #define SBGEMM_DEFAULT_R sbgemm_r ++#else ++#define SBGEMM_DEFAULT_R 43280 ++#endif + + #ifdef ARCH_X86 + +@@ -1933,7 +1937,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #define SBGEMM_DEFAULT_UNROLL_M 16 + #define SBGEMM_DEFAULT_P 384 + #define SBGEMM_DEFAULT_Q 768 ++#ifndef DYNAMIC_ARCH + #define SBGEMM_DEFAULT_R sbgemm_r ++#else ++#define SBGEMM_DEFAULT_R 43280 ++#endif + + #ifdef ARCH_X86 + diff --git a/OpenBLAS-0.3.17.tar.gz b/OpenBLAS-0.3.17.tar.gz deleted file mode 100644 index f6c8c3f..0000000 --- a/OpenBLAS-0.3.17.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:df2934fa33d04fd84d839ca698280df55c690c86a5a1133b3f7266fce1de279f -size 12513037 diff --git a/OpenBLAS-0.3.20.tar.gz b/OpenBLAS-0.3.20.tar.gz new file mode 100644 index 0000000..f1ac2f6 --- /dev/null +++ b/OpenBLAS-0.3.20.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8495c9affc536253648e942908e88e097f2ec7753ede55aca52e5dead3029e3c +size 12742441 diff --git a/_multibuild b/_multibuild index 74856ab..961f079 100644 --- a/_multibuild +++ b/_multibuild @@ -1,7 +1,7 @@ serial - openmp pthreads + openmp gnu-hpc gnu-hpc-pthreads diff --git a/openblas.changes b/openblas.changes index 30bfaf8..5dca180 100644 --- a/openblas.changes +++ b/openblas.changes @@ -1,3 +1,121 @@ +------------------------------------------------------------------- +Fri Feb 25 20:10:04 UTC 2022 - Egbert Eich + +- Update to v0.3.20: + * general: + some code cleanup, with added casts etc. + fixed obtaining the cpu count with OpenMP and OMP_PROC_BIND unset + fixed pivot index calculation by ?LASWP for negative increments other + than one + fixed input argument check in LAPACK ? GEQRT2 + improved the check for a Fortran compiler in CMAKE builds + disabled building OpenBLAS' optimized versions of LAPACK complex SPMV, + SPR,SYMV,SYR with NO_LAPACK=1 + fixed building of LAPACK on certain distributed filesystems with parallel + gmake + fixed building the shared library on MacOS with classic flang + (v0.3.19) + reverted unsafe TRSV/ZRSV optimizations introduced in 0.3.16 + fixed a potential thread race in the thread buffer reallocation routines + that were introduced in 0.3.18 + fixed miscounting of thread pool size on Linux with OMP_PROC_BIND=TRUE + fixed CBLAS interfaces for CSROT/ZSROT and CROTG/ZROTG + made automatic library suffix for CMAKE builds with INTERFACE64 available + to CBLAS-only builds + (v0.3.18) + when the build-time number of preconfigured threads is exceeded + at runtime (by an external program calling BLAS functions from + a larger number of threads), OpenBLAS will now allocate an + auxiliary control structure for up to 512 additional threads + instead of aborting + added support for Loongson's LoongArch64 cpu architecture + fixed building OpenBLAS with CMAKE and -DBUILD_BFLOAT16=ON + added support for building OpenBLAS as a CMAKE subproject + added support for building for Windows/ARM64 targets with clang + improved support for building with the IBM xlf compiler + imported Reference-LAPACK PR 625 (out-of-bounds access in ?LARRV) + imported Reference-LAPACK PR 597 for testsuite compatibility with + LLVM's libomp + * x86_64: + fixed cross-compilation with CMAKE for CORE2 target + fixed miscompilation of AVX512 code in DYNAMIC_ARCH builds + added support for the "incidental" AVX512 hardware in Alder Lake when + enabled in BIOS + (v0.3.19) + DYNAMIC_ARCH builds now fall back to the cpu with most similar capabilities + when an unknown CPUID is encountered, instead of defaulting to Prescott + added cpu detection for Intel Alder Lake + added cpu detection for Intel Sapphire Rapids + added an optimized SBGEMM kernel for Sapphire Rapids + fixed DYNAMIC_ARCH builds on OSX with CMAKE + worked around DYNAMIC_ARCH builds made on Sandybridge failing on SkylakeX + fixed missing thread initialization for static builds on Windows/MSVC + fixed an excessive read in ZSYMV + (v0.3.18) + added SkylakeX S/DGEMM kernels for small problem sizes (MNK<=1000000) + added optimized SBGEMM for Intel Cooper Lake + reinstated the performance patch for AVX512 SGEMV_T with a proper fix + added a workaround for a gcc11 tree-vectorizer bug that caused spurious + failures in the test programs for complex BLAS3 when compiling at -O3 + (the default for cmake "release" builds) + added support for runtime cpu count detection under Haiku OS + worked around a long-standing miscompilation issue of the Haswell DGEMV_T + kernel with gcc that could produce NaN output in some corner cases + * Power: + added support for POWER10 in big-endian mode + added support for building with CMAKE + added optimized SGEMM and DGEMM kernels for small matrix sizes + (v0.3.18) + improved performance of DASUM on POWER10 + * ARMV8: + added SVE-enabled CGEMM and ZGEMM kernels for ARMV8SVE and A64FX + added support for Neoverse N2 and V1 cpus + (v0.3.19) + added basic support and cputype detection for Fujitsu A64FX + added a generic ARMV8SVE target + added SVE-enabled SGEMM and DGEMM kernels for ARMV8SVE and A64FX + added optimized CGEMM and ZGEMM kernels for Cortex A53 and A55 cpus + fixed cpuid detection for Apple M1 and improved performance + improved compiler flag setting in CMAKE builds + (v0.3.18) + fixed crashes (use of reserved register x18) on Apple M1 under OSX + fixed building with gcc releases earlier than 5.1 +- Fix out of bounds read in ?llarv + LAPACK Reference: PR 625 + CVE-2021-4048, bsc#1196513 +- Limit parallel builds according to available memory. + Do NOT use %%_smp_mflags with top level 'make', set MAKE_NB_JOBS + instead and let the build do the work. + Also change -flto=auto to -flto=1: spawning even more parallel builds + on top of parallel build treads will wreak havok. +- Move calls to 'update-alternatives --remove' to %%postun instead + of %%preun as suggested by rpmlint. +- Since we build with DYNAMIC_ARCH, create separate config files for + the different target kernels to help debugging + Add Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch +- Remove compiler feature detection when not using auto-detection. + Add Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch +- Do not depend in variables which are not available when building + DYNAMIC_ARCH. + Add For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch +- Do not include symbols defined in driver/others/parameter.c in + DYNAMIC_BUILD to generate more conclusive error messages earlier. + Add Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch +- Install lapack and blas libraries to an openblas-flavor + specific subdirectory of %%_libdir and set up the alternatives + to point to this directory. Set the system-wide BLAS/LAPACK + default directory to %%_libdir/openblas-default. + This way, the blas/lapack libraries will remain consistent + and from the same source. The user is able to override this + easily by setting the LD_LIBRARY_PATH to include the preferred + BLAS/LAPACK implementation (boo#1177260). +- Consolidate packages 'openblas-devel' and 'openblas-devel-headers' + into 'openblas-common-devel' (these are built for the serial + flavor only). +- Fix the openblas default flavor selection: + # /usr/sbin/update-alternatives --config libopenblas.so.0 +- Add cmake and pkgconfig files. + ------------------------------------------------------------------- Sun Feb 13 16:02:01 UTC 2022 - Egbert Eich diff --git a/openblas.spec b/openblas.spec index 0388e2e..411778e 100644 --- a/openblas.spec +++ b/openblas.spec @@ -1,5 +1,5 @@ # -# spec file for package openblas +# spec file # # Copyright (c) 2022 SUSE LLC # @@ -18,8 +18,8 @@ %global flavor @BUILD_FLAVOR@%{nil} -%define _vers 0_3_17 -%define vers 0.3.17 +%define _vers 0_3_20 +%define vers 0.3.20 %define pname openblas %bcond_with ringdisabled @@ -135,7 +135,7 @@ ExclusiveArch: do_not_build %define so_v 0 %define p_prefix %_prefix %define p_includedir %_includedir/%pname -%define p_libdir %_libdir +%define p_libdir %_libdir/openblas%{?flavor:-%{flavor}} %define p_cmakedir %{p_libdir}/cmake/%{pname} %define num_threads 64 @@ -167,6 +167,10 @@ URL: http://www.openblas.net Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz#/OpenBLAS-%{version}.tar.gz Source1: README.SUSE Source2: README.HPC.SUSE +Patch1: Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch +Patch2: Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch +Patch3: For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch +Patch4: Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch # PATCH-FIX-UPSTREAM openblas-noexecstack.patch Patch101: openblas-noexecstack.patch # PATCH port @@ -175,6 +179,8 @@ Patch103: openblas-ppc64be_up2_p8.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build +#BuildRequires: cmake +BuildRequires: memory-constraints %if %{without hpc} BuildRequires: gcc-fortran BuildRequires: update-alternatives @@ -212,12 +218,6 @@ Obsoletes: lib%{pname}o0 %hpc_requires %endif -%if %{without hpc} -%define libname %name -%else -%define libname %pname -%endif - %description -n lib%{name}%{?so_v} OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. @@ -228,7 +228,7 @@ Summary: Development libraries for OpenBLAS, %{flavor} version Group: Development/Libraries/C and C++ Requires: lib%{name}%{?so_v} = %{version} %if %{without hpc} -Requires: %{pname}-devel-headers = %{version} +Requires: %{pname}-common-devel = %{version} %else %hpc_requires_devel %endif @@ -243,37 +243,18 @@ This package contains the development libraries for serial OpenBLAS version. %package devel-static Summary: Static version of OpenBLAS Group: Development/Libraries/C and C++ -%if %{without hpc} -Requires: %{pname}-devel = %{version} -%else Requires: lib%{name}-devel = %{version} -%endif %description devel-static OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. This package contains the static libraries. -%package -n %{pname}-devel +%package -n %{pname}-common-devel Summary: Development headers and libraries for OpenBLAS Group: Development/Libraries/C and C++ -Requires: %{pname}-devel-headers = %{version} -%ifarch %ix86 x86_64 -Requires: lib%{pname}_pthreads-devel = %{version} -%else -Requires: lib%{pname}_openmp-devel = %{version} -%endif -%description -n %{pname}-devel -OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. - -%package -n %{pname}-devel-headers -Summary: Development headers for OpenBLAS -Group: Development/Libraries/C and C++ -Conflicts: %{pname}-devel < %{version} -BuildArch: noarch - -%description -n %{pname}-devel-headers +%description -n %{pname}-common-devel OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. This package contains headers for OpenBLAS. @@ -281,9 +262,7 @@ This package contains headers for OpenBLAS. %prep %setup -q -n OpenBLAS-%{version} -%patch101 -p1 -%patch102 -p1 -%patch103 -p1 +%autopatch -p1 %ifarch s390 sed -i -e "s@m32@m31@" Makefile.system %endif @@ -295,8 +274,11 @@ cp %{SOURCE2} . %endif %build + +# Limit lto jobs to 1 - -flto=auto together with make -j +# would cause a huge number of build jobs spawned in parallel %if "%{?_lto_cflags}" != "" -%global _lto_cflags %{_lto_cflags} -ffat-lto-objects +%global _lto_cflags -flto=1 -ffat-lto-objects %endif # disable lto for ppc64le, boo#1181733 @@ -310,6 +292,9 @@ cp %{SOURCE2} . %endif # Use DYNAMIC_ARCH everywhere - not sure about PPC? +# Use DYNAMIC_ARCH to build for multiple targets, use TARGET to specify +# the CPU model assumed for the common code. It should be set to the +# oldest CPU model one expects to encounter. %global openblas_target DYNAMIC_ARCH=1 # We specify TARGET= to avoid compile-time CPU-detection (boo#1100677) %ifarch %ix86 x86_64 @@ -337,17 +322,27 @@ cp %{SOURCE2} . # ../kernel/power/sasum_microk_power8.c:41:3: error: '__vector' undeclared (first use in this function); did you mean '__cpow'? # TODO why is it required ? (and not for ppc64le) %ifarch ppc64 -%define addopt -mvsx +%global addopt -mvsx %endif +%global addopt %{?addopt} -fno-strict-aliasing + # Make serial, threaded and OpenMP versions -make %{?_smp_mflags} %{?openblas_target} %{?build_flags} \ - %{?openblas_opt} COMMON_OPT="%{optflags} %{?addopt}" \ - NUM_THREADS=%{num_threads} V=1 \ - OPENBLAS_LIBRARY_DIR=%{p_libdir} \ - OPENBLAS_INCLUDE_DIR=%{hpc_includedir} \ - OPENBLAS_CMAKE_DIR=%{p_cmakedir} \ - PREFIX=%{p_prefix} \ - %{!?with_hpc:LIBNAMESUFFIX=%flavor FC=gfortran CC=gcc} + +# Calculate process limits +%limit_build -m 1500 +[[ -n $_threads ]] && jobs=$_threads +[[ -z $jobs ]] && jobs=1 +# NEVER use %%_smp_mflags with top level make: +# set MAKE_NB_JOBS instead and let the build do the work! +make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \ + %{?openblas_opt} \ + COMMON_OPT="%{optflags} %{?addopt}" \ + NUM_THREADS=%{num_threads} V=1 \ + OPENBLAS_LIBRARY_DIR=%{p_libdir} \ + OPENBLAS_INCLUDE_DIR=%{p_includedir} \ + OPENBLAS_CMAKE_DIR=%{p_cmakedir} \ + PREFIX=%{p_prefix} \ + %{!?with_hpc:LIBNAMESUFFIX=%flavor FC=gfortran CC=gcc} %install %if %{with hpc} @@ -373,43 +368,39 @@ make %{?_smp_mflags} %{?openblas_target} %{?build_flags} \ %if 0%{!?build_devel:1} # We need the includes only once rm -rf %{buildroot}%{p_includedir}/ -rm -rf %{buildroot}%{p_libdir}/cmake/ -%else +%endif + # Fix cmake config file sed -i 's|%{buildroot}||g' %{buildroot}%{p_cmakedir}/*.cmake -sed -i 's|_serial||g' %{buildroot}%{p_cmakedir}/*.cmake -%endif +sed -i 's|_%{flavor}||g' %{buildroot}%{p_cmakedir}/*.cmake # Put libraries in correct location rm -rf %{buildroot}%{p_libdir}/lib%{name}* # Install the serial library -install -D -p -m 755 lib%{name}.so %{buildroot}%{p_libdir}/lib%{name}.so.0 -install -D -p -m 644 lib%{name}.a %{buildroot}%{p_libdir}/lib%{name}.a +install -D -p -m 755 lib%{name}.so %{buildroot}%{p_libdir}/lib%{pname}.so.0 +install -D -p -m 644 lib%{name}.a %{buildroot}%{p_libdir}/lib%{pname}.a # Fix source permissions (also applies to LAPACK) find -name \*.f -exec chmod 644 {} + -# Remove pkgconfig file, it can't be configured for different library suffixes we use and, as such, is useless -rm -fr %{buildroot}%{p_libdir}/pkgconfig/ - # Dummy target for update-alternatives install -d %{buildroot}/%{_sysconfdir}/alternatives -ln -s lib%{libname}.so.0 %{buildroot}/%{p_libdir}/lib%{pname}.so.0 -ln -s lib%{pname}.so.0 %{buildroot}/%{p_libdir}/libblas.so.3 -ln -s lib%{pname}.so.0 %{buildroot}/%{p_libdir}/libcblas.so.3 -ln -s lib%{pname}.so.0 %{buildroot}/%{p_libdir}/liblapack.so.3 - -ln -s lib%{pname}.so.0 %{buildroot}/%{_sysconfdir}/alternatives/libblas.so.3 -ln -s lib%{pname}.so.0 %{buildroot}/%{_sysconfdir}/alternatives/libcblas.so.3 -ln -s lib%{pname}.so.0 %{buildroot}/%{_sysconfdir}/alternatives/liblapack.so.3 - -# Fix symlinks -pushd %{buildroot}%{p_libdir} +ln -sf %{_sysconfdir}/alternatives/libblas.so.3 %{buildroot}/%{_libdir}/libblas.so.3 +ln -sf %{_sysconfdir}/alternatives/libcblas.so.3 %{buildroot}/%{_libdir}/libcblas.so.3 +ln -sf %{_sysconfdir}/alternatives/liblapack.so.3 %{buildroot}/%{_libdir}/liblapack.so.3 +ln -sf %{_sysconfdir}/alternatives/liblapacke.so.3 %{buildroot}/%{_libdir}/liblapacke.so.3 +ln -sf %{_sysconfdir}/alternatives/openblas-default %{buildroot}/%{_libdir}/openblas-default +ln -s lib%{pname}.so.%{so_v} %{buildroot}%{p_libdir}/lib%{pname}.so +ln -s %{_libdir}/openblas-default %{buildroot}%{_sysconfdir}/alternatives/openblas-default +ln -s openblas-default/lib%{pname}.so.%{so_v} %{buildroot}%{_libdir}/lib%{pname}.so.%{so_v} %if 0%{?build_devel} -ln -sf lib%{pname}.so.0 lib%{pname}.so +ln -s lib%{pname}.so.%{so_v} %{buildroot}%{_libdir}/lib%{pname}.so +install -d %{buildroot}%{_libdir}/pkgconfig/ +ln -s %{_libdir}/openblas-default/pkgconfig/openblas.pc %{buildroot}%{_libdir}/pkgconfig/ +install -d %{buildroot}/%{_libdir}/cmake +ln -s %{_libdir}/openblas-default/cmake/openblas %{buildroot}/%{_libdir}/cmake/ %endif -ln -sf lib%{name}.so.0 lib%{name}.so %else # with hpc @@ -460,30 +451,34 @@ EOF %post -n lib%{name}%{so_v} %{_sbindir}/update-alternatives --install \ - %{p_libdir}/libblas.so.3 libblas.so.3 %{p_libdir}/lib%{name}.so.%{so_v} 20 -%{_sbindir}/update-alternatives --install \ - %{p_libdir}/libcblas.so.3 libcblas.so.3 %{p_libdir}/lib%{name}.so.%{so_v} 20 -%{_sbindir}/update-alternatives --install \ - %{p_libdir}/liblapack.so.3 liblapack.so.3 %{p_libdir}/lib%{name}.so.%{so_v} 20 -%{_sbindir}/update-alternatives --install \ - %{p_libdir}/lib%{pname}.so.%{so_v} lib%{name}.so.%{so_v} %{p_libdir}/lib%{name}.so.%{so_v} %openblas_so_prio + %{_libdir}/openblas-default openblas-default %{p_libdir} %openblas_so_prio +for lib in libblas.so.3 libcblas.so.3 liblapack.so.3 liblapacke.so.3; do + %{_sbindir}/update-alternatives --install \ + %{_libdir}/${lib} ${lib} %{_libdir}/lib%{pname}.so.%{so_v} 20 +done /sbin/ldconfig -%preun -n lib%{name}%{so_v} -if [ "$1" = 0 ] ; then - %{_sbindir}/update-alternatives --remove libblas.so.3 %{p_libdir}/lib%{name}.so.%{so_v} - %{_sbindir}/update-alternatives --remove libcblas.so.3 %{p_libdir}/lib%{name}.so.%{so_v} - %{_sbindir}/update-alternatives --remove liblapack.so.3 %{p_libdir}/lib%{name}.so.%{so_v} - %{_sbindir}/update-alternatives --remove lib%{name}.so.0 %{p_libdir}/lib%{name}.so.%{so_v} +%postun -n lib%{name}%{so_v} +if [ ! -f %{p_libdir}/lib%{pname}.so.%{so_v} ]; then + for lib in libblas.so.3 libcblas.so.3 liblapack.so.3 liblapacke.so.3; do + %{_sbindir}/update-alternatives --remove ${lib} %{_libdir}/lib%{pname}.so.%{so_v} + done fi - -%postun -n lib%{name}%{so_v} -p /sbin/ldconfig +if [ ! -d %{p_libdir} ]; then + %{_sbindir}/update-alternatives --remove openblas-default %{p_libdir} +fi +/sbin/ldconfig %posttrans -n lib%{name}%{so_v} if [ "$1" = 0 ] ; then - if ! [ -f %{p_libdir}/lib%{name}.so.%{so_v} ] ; then - %{_sbindir}/update-alternatives --auto lib%{pname}.so.%{so_v} + if [ ! -d %{_libdir}/openblas-default ] ; then + %{_sbindir}/update-alternatives --auto openblas-default fi + for lib in libblas.so.3 libcblas.so.3 liblapack.so.3 liblapacke.so.3; do + if ! [ -f %{_libdir}/${lib} ] ; then + %{_sbindir}/update-alternatives --auto ${lib} + fi + done fi %else @@ -495,16 +490,20 @@ fi %files -n lib%{name}%{?so_v} %defattr(-,root,root,-) -%{p_libdir}/lib%{libname}.so.0 +%{p_libdir}/lib%{pname}.so.0 %if %{without hpc} -%ghost %{p_libdir}/lib%{pname}.so.%{so_v} -%ghost %{p_libdir}/libblas.so.3 -%ghost %{p_libdir}/libcblas.so.3 -%ghost %{p_libdir}/liblapack.so.3 -%ghost %{_sysconfdir}/alternatives/lib%{pname}.so.%{so_v} +%dir %{p_libdir} +%{_libdir}/openblas-default +%{_libdir}/lib%{pname}.so.%{so_v} +%ghost %{_libdir}/libblas.so.3 +%ghost %{_libdir}/libcblas.so.3 +%ghost %{_libdir}/liblapack.so.3 +%ghost %{_libdir}/liblapacke.so.3 +%ghost %{_sysconfdir}/alternatives/openblas-default %ghost %{_sysconfdir}/alternatives/libblas.so.3 %ghost %{_sysconfdir}/alternatives/libcblas.so.3 %ghost %{_sysconfdir}/alternatives/liblapack.so.3 +%ghost %{_sysconfdir}/alternatives/liblapacke.so.3 %else %hpc_dirs %{p_libdir}/libopenblas*r*.so @@ -512,33 +511,31 @@ fi %endif %files -n lib%{name}-devel -%defattr(-,root,root,-) -%{p_libdir}/lib%{libname}.so +%{p_libdir}/lib%{pname}.so +%{p_cmakedir}/ %if %{with hpc} %license LICENSE %doc Changelog.txt GotoBLAS* README.md README.HPC.SUSE %hpc_pkgconfig_file -%{p_cmakedir}/ %{p_includedir}/ +%else +%dir %{p_libdir}/cmake +%dir %{p_libdir}/pkgconfig +%{p_libdir}/pkgconfig %endif %files devel-static -%defattr(-,root,root,-) -#%%{p_libdir}/lib%{libname}.a %{p_libdir}/libopenblas*.a %if 0%{?build_devel} -%files -n %{pname}-devel -%defattr(-,root,root,-) +%files -n %{pname}-common-devel %license LICENSE %doc Changelog.txt GotoBLAS* README.md README.SUSE -%{p_libdir}/libopenblas.so -%dir %{p_libdir}/cmake -%{p_cmakedir}/ - -%files -n %{pname}-devel-headers -%defattr(-,root,root,-) +%{_libdir}/lib%{pname}.so %{p_includedir}/ +%{_libdir}/pkgconfig/openblas.pc +%dir %{_libdir}/cmake +%{_libdir}/cmake/openblas %endif %changelog