diff --git a/Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch b/Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch new file mode 100644 index 0000000..5cc77fd --- /dev/null +++ b/Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch @@ -0,0 +1,185 @@ +From: Egbert Eich +Date: Tue Mar 1 10:04:01 2022 +0100 +Subject: Create independent kernel Makfile & configuration when building DYNAMIC_ARCH +Patch-mainline: Not yet +Git-commit: c8b95480ec9fd2cab0321da61a7711415be6e9a9 +References: + +- For 'classic' builds, generate separate config_kernel_.h, + Makfile_.conf and getarch- files/binaries +- For cmake builds, generate separate getarch- binaries +for better debugging. + +Signed-off-by: Egbert Eich +Signed-off-by: Egbert Eich +--- + Makefile | 4 ++-- + Makefile.prebuild | 26 +++++++++++++++++--------- + Makefile.system | 4 ++++ + cmake/prebuild.cmake | 6 +++++- + common.h | 4 ++++ + getarch_2nd.c | 4 ++++ + kernel/Makefile | 2 +- + 7 files changed, 37 insertions(+), 13 deletions(-) +diff --git a/Makefile b/Makefile +index 1bb3f6b..2a1639c 100644 +--- a/Makefile ++++ b/Makefile +@@ -387,11 +387,11 @@ clean :: + @$(MAKE) -C kernel clean + #endif + @$(MAKE) -C reference clean +- @rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0 ++ @rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch-* getarch_2nd getarch_2nd-* *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0 + ifeq ($(OSNAME), Darwin) + @rm -rf getarch.dSYM getarch_2nd.dSYM + endif +- @rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib ++ @rm -f Makefile.conf config.h Makefile_kernel*.conf config_kernel*.h st* *.dylib + @rm -f cblas.tmp cblas.tmp2 + @touch $(NETLIB_LAPACK_DIR)/make.inc + @$(MAKE) -C $(NETLIB_LAPACK_DIR) clean +diff --git a/Makefile.prebuild b/Makefile.prebuild +index 399db95..c9eb8e0 100644 +--- a/Makefile.prebuild ++++ b/Makefile.prebuild +@@ -8,11 +8,19 @@ override HOST_CFLAGS += -DDYNAMIC_ARCH + endif + + ifdef TARGET_CORE ++ifdef DYNAMIC_ARCH ++TARGET_MAKE = Makefile_kernel_$(TARGET_CORE).conf ++TARGET_CONF = config_kernel_$(TARGET_CORE).h ++GETARCH_FLAVOR = -$(TARGET_CORE) ++else + TARGET_MAKE = Makefile_kernel.conf + TARGET_CONF = config_kernel.h ++GETARCH_FLAVOR = -$(TARGET_CORE) ++endif + else + TARGET_MAKE = Makefile.conf + TARGET_CONF = config.h ++GETARCH_FLAVOR = + endif + + # CPUIDEMU = ../../cpuid/table.o +@@ -49,11 +57,11 @@ ifeq ($(TARGET), C910V) + TARGET_FLAGS = -march=rv64gcvxthead -mabi=lp64v + endif + +-all: getarch_2nd +- ./getarch_2nd 0 >> $(TARGET_MAKE) +- ./getarch_2nd 1 >> $(TARGET_CONF) ++all: getarch_2nd$(GETARCH_FLAVOR) ++ ./getarch_2nd$(GETARCH_FLAVOR) 0 >> $(TARGET_MAKE) ++ ./getarch_2nd$(GETARCH_FLAVOR) 1 >> $(TARGET_CONF) + +-config.h : c_check f_check getarch ++config.h : c_check f_check getarch$(GETARCH_FLAVOR) + perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS) + ifneq ($(ONLY_CBLAS), 1) + perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS) +@@ -66,18 +74,18 @@ else + echo "#define BUNDERSCORE _" >> $(TARGET_CONF) + echo "#define NEEDBUNDERSCORE 1" >> $(TARGET_CONF) + endif +- ./getarch 0 >> $(TARGET_MAKE) +- ./getarch 1 >> $(TARGET_CONF) ++ ./getarch$(GETARCH_FLAVOR) 0 >> $(TARGET_MAKE) ++ ./getarch$(GETARCH_FLAVOR) 1 >> $(TARGET_CONF) + + +-getarch : getarch.c cpuid.S dummy $(CPUIDEMU) ++getarch$(GETARCH_FLAVOR) : getarch.c cpuid.S dummy $(CPUIDEMU) + $(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU) + +-getarch_2nd : getarch_2nd.c config.h dummy ++getarch_2nd$(GETARCH_FLAVOR) : getarch_2nd.c config.h dummy + ifndef TARGET_CORE + $(HOSTCC) -I. $(HOST_CFLAGS) -o $(@F) getarch_2nd.c + else +- $(HOSTCC) -I. $(HOST_CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c ++ $(HOSTCC) -I. $(HOST_CFLAGS) -DBUILD_KERNEL -DKERNEL_CONFIG=\"$(TARGET_CONF)\" -o $(@F) getarch_2nd.c + endif + + dummy: +diff --git a/Makefile.system b/Makefile.system +index 1057255..5f66d30 100644 +--- a/Makefile.system ++++ b/Makefile.system +@@ -298,8 +298,12 @@ HAVE_SSE5= + HAVE_AVX= + HAVE_AVX2= + HAVE_FMA3= ++ifeq ($(DYNAMIC_ARCH), 1) ++include $(TOPDIR)/Makefile_kernel_$(TARGET_CORE).conf ++else + include $(TOPDIR)/Makefile_kernel.conf + endif ++endif + + endif + +diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake +index 4ef0ce9..2f5d472 100644 +--- a/cmake/prebuild.cmake ++++ b/cmake/prebuild.cmake +@@ -629,7 +629,11 @@ else(NOT CMAKE_CROSSCOMPILING) + endif () + + set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") +- set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") ++ if (DEFINED TARGET_CORE) ++ set(GETARCH_BIN "getarch-${TARGET_CORE}${CMAKE_EXECUTABLE_SUFFIX}") ++ else () ++ set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") ++ endif () + file(MAKE_DIRECTORY ${GETARCH_DIR}) + configure_file(${TARGET_CONF_TEMP} ${GETARCH_DIR}/${TARGET_CONF} COPYONLY) + if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") +diff --git a/common.h b/common.h +index 00d1d0b..d563e3b 100644 +--- a/common.h ++++ b/common.h +@@ -57,7 +57,11 @@ extern "C" { + #endif + + #ifdef BUILD_KERNEL ++#ifdef KERNEL_CONFIG ++#include KERNEL_CONFIG ++#else + #include "config_kernel.h" ++#endif + #else + #include "config.h" + #endif +diff --git a/getarch_2nd.c b/getarch_2nd.c +index dd1f830..2abeee6 100644 +--- a/getarch_2nd.c ++++ b/getarch_2nd.c +@@ -2,8 +2,12 @@ + #ifndef BUILD_KERNEL + #include "config.h" + #else ++#ifdef KERNEL_CONFIG ++#include KERNEL_CONFIG ++#else + #include "config_kernel.h" + #endif ++#endif + #if (defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64)) && defined(__64BIT__) + typedef long long BLASLONG; + typedef unsigned long long BLASULONG; +diff --git a/kernel/Makefile b/kernel/Makefile +index cbe4cde..94a718b 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -124,7 +124,7 @@ COMMONOBJS += lsame.$(SUFFIX) scabs1.$(SUFFIX) dcabs1.$(SUFFIX) + + ifeq ($(DYNAMIC_ARCH), 1) + SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX) +-CCOMMON_OPT += -DTS=$(TSUFFIX) ++CCOMMON_OPT += -DTS=$(TSUFFIX) -DKERNEL_CONFIG=\"config_kernel$(TSUFFIX).h\" + endif + + KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h diff --git a/Create-preprocessed-output-from-setparam.patch b/Create-preprocessed-output-from-setparam.patch new file mode 100644 index 0000000..e5da8d0 --- /dev/null +++ b/Create-preprocessed-output-from-setparam.patch @@ -0,0 +1,31 @@ +From: Egbert Eich +Date: Mon Feb 28 10:33:05 2022 +0100 +Subject: Create preprocessed output from setparam. +Patch-mainline: Not yet +Git-commit: e5b7694b3f37f44145ce6343ca29bd97327f82bf +References: + +Signed-off-by: Egbert Eich +--- + kernel/Makefile | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) +diff --git a/kernel/Makefile b/kernel/Makefile +index cbe4cde..8bc6883 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -161,7 +161,14 @@ qconjg.$(SUFFIX): $(KERNELDIR)/qconjg.S + lsame.$(SUFFIX): $(KERNELDIR)/$(LSAME_KERNEL) + $(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F) + +-setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h ++setparam$(TSUFFIX).E: setparam$(TSUFFIX).c kernel$(TSUFFIX).h ++ifeq ($(USE_GEMM3M), 1) ++ $(CC) -E $(CFLAGS) -DUSE_GEMM3M $< > $@ ++else ++ $(CC) -E $(CFLAGS) $< > $@ ++endif ++ ++setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h setparam$(TSUFFIX).E + ifeq ($(USE_GEMM3M), 1) + $(CC) -c $(CFLAGS) -DUSE_GEMM3M $< -o $@ + else diff --git a/Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch b/Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch new file mode 100644 index 0000000..cac835e --- /dev/null +++ b/Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch @@ -0,0 +1,78 @@ +From: Egbert Eich +Date: Tue Mar 1 18:18:54 2022 +0100 +Subject: Do not attempt to check host CPU if TARGET is set. +Patch-mainline: Not yet +Git-commit: 1cf40c7eb77076aa5ae9641bd0fd328ce2bc5e00 +References: + +This wired 'autodetection' breaks DYNAMIC arch (or makes the build +unreproducible) and will most likely not work with cross compiling. + +Presently, this is only relevant for SkylakeX, Cooperlake and +Sapphire Rapids cores, that's what has been implemented. Going +forward, other cores would have to be added here as well (in sync +with Markfile.$(arch) as this needs to be kept in sync with changes +to getarch. +It would be better to remove this hack (and the counterpart in +getarch) entirely. + +Signed-off-by: Egbert Eich +Signed-off-by: Egbert Eich +--- + Makefile.system | 14 +++++++++++--- + cmake/system.cmake | 4 ++-- + getarch.c | 2 +- + 3 files changed, 14 insertions(+), 6 deletions(-) +diff --git a/Makefile.system b/Makefile.system +index 5f66d30..855a734 100644 +--- a/Makefile.system ++++ b/Makefile.system +@@ -197,9 +197,17 @@ endif + + # On x86_64 build getarch with march=native unless the compiler is PGI. This is required to detect AVX512 support in getarch. + ifeq ($(HOSTARCH), x86_64) +-ifeq ($(findstring pgcc,$(HOSTCC))$(findstring nvc,$(HOSTCC)),) +-GETARCH_FLAGS += -march=native +-endif ++ ifeq ($(findstring pgcc,$(HOSTCC))$(findstring nvc,$(HOSTCC)),) ++ ifdef TARGET ++GETARCH_NOPROBE=1 ++ endif ++ ifdef TARGET_CORE ++GETARCH_NOPROBE=1 ++ endif ++ ifndef GETARCH_NOPROBE ++GETARCH_FLAGS += -march=native -DAUTOPROBE ++ endif ++ endif + endif + + ifdef INTERFACE64 +diff --git a/cmake/system.cmake b/cmake/system.cmake +index e0e92bd..27c4539 100644 +--- a/cmake/system.cmake ++++ b/cmake/system.cmake +@@ -55,8 +55,8 @@ if (DEFINED TARGET) + endif () + + # On x86_64 build getarch with march=native. This is required to detect AVX512 support in getarch. +-if (X86_64 AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "PGI") +- set(GETARCH_FLAGS "${GETARCH_FLAGS} -march=native") ++if (X86_64 AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT DEFINED TARGET) ++ set(GETARCH_FLAGS "${GETARCH_FLAGS} -march=native -DAUTOPROBE") + endif () + + # On x86 no AVX support is available +diff --git a/getarch.c b/getarch.c +index 59ac1f6..6c5be9a 100644 +--- a/getarch.c ++++ b/getarch.c +@@ -94,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #include + #endif + +-#if defined(__x86_64__) || defined(_M_X64) ++#if defined (AUTOPROBE) && (defined(__x86_64__) || defined(_M_X64)) + #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX2__)) || (defined(__clang__) && __clang_major__ >= 6)) + #else + #ifndef NO_AVX512 diff --git a/Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch b/Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch new file mode 100644 index 0000000..6190a6b --- /dev/null +++ b/Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch @@ -0,0 +1,33 @@ +From: Egbert Eich +Date: Sun Mar 13 10:57:59 2022 +0100 +Subject: Do not include symbols defined in driver/others/parameter.c in DYNAMIC_BUILD +Patch-mainline: Not yet +Git-repo: https://github.com/xianyi/OpenBLAS +Git-commit: 53cd07b0201c94ea50a499867382dcf39d1b8766 +References: + +driver/others/parameter.c does not get build during DYNAMIC_BUILD, thus, +do not declare its symbols. This will make the build fail early and in +an obvious way if functions are trying to use these symbols. + +Signed-off-by: Egbert Eich +Signed-off-by: Egbert Eich +--- + common_macro.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) +diff --git a/common_macro.h b/common_macro.h +index 9826f180..d2fa822c 100644 +--- a/common_macro.h ++++ b/common_macro.h +@@ -2610,8 +2610,9 @@ + #endif + + #ifndef ASSEMBLER +-#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)\ +-|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) ++#if !defined(DYNAMIC_ARCH) \ ++ && (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \ ++ || defined(ARCH_LOONGARCH64) || defined(ARCH_E2K)) + extern BLASLONG gemm_offset_a; + extern BLASLONG gemm_offset_b; + extern BLASLONG sbgemm_p; diff --git a/For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch b/For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch new file mode 100644 index 0000000..749646f --- /dev/null +++ b/For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch @@ -0,0 +1,51 @@ +From: Egbert Eich +Date: Tue Mar 1 19:27:47 2022 +0100 +Subject: For DYNAMIC_ARCH don't use sbgemm_r as parameter.c doesn't get build +Patch-mainline: Not yet +Git-commit: dce6d9a5fb5e1af31aedcdc0fec1d6393bae395f +References: + +Presently, DYNAMIC_ARCH doesn't build if the build includes Intel +Cooperlake or Sapphire Rapids cores. This is because their init +function reference sbgemm_r which is defined in +driver/other/parameter.c. +This file is not built when using DYNAMIC_ARCH. +The value is the one that blas_set_parameter() would set on build +without DYNAMIC_ARCH. +There seems to be some duplication between blas_set_parameter() and +the kernel specific init_parameter() calls. Some consolidation would +be in order here. + +Signed-off-by: Egbert Eich +Signed-off-by: Egbert Eich +--- + param.h | 8 ++++++++ + 1 file changed, 8 insertions(+) +diff --git a/param.h b/param.h +index 8649e44..e8fdfae 100644 +--- a/param.h ++++ b/param.h +@@ -1803,7 +1803,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #define SBGEMM_DEFAULT_UNROLL_M 32 + #define SBGEMM_DEFAULT_P 256 + #define SBGEMM_DEFAULT_Q 1024 ++#ifndef DYNAMIC_ARCH + #define SBGEMM_DEFAULT_R sbgemm_r ++#else ++#define SBGEMM_DEFAULT_R 43280 ++#endif + + #ifdef ARCH_X86 + +@@ -1933,7 +1937,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #define SBGEMM_DEFAULT_UNROLL_M 16 + #define SBGEMM_DEFAULT_P 384 + #define SBGEMM_DEFAULT_Q 768 ++#ifndef DYNAMIC_ARCH + #define SBGEMM_DEFAULT_R sbgemm_r ++#else ++#define SBGEMM_DEFAULT_R 43280 ++#endif + + #ifdef ARCH_X86 + diff --git a/OpenBLAS-0.3.17.tar.gz b/OpenBLAS-0.3.17.tar.gz deleted file mode 100644 index f6c8c3f..0000000 --- a/OpenBLAS-0.3.17.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:df2934fa33d04fd84d839ca698280df55c690c86a5a1133b3f7266fce1de279f -size 12513037 diff --git a/OpenBLAS-0.3.20.tar.gz b/OpenBLAS-0.3.20.tar.gz new file mode 100644 index 0000000..f1ac2f6 --- /dev/null +++ b/OpenBLAS-0.3.20.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8495c9affc536253648e942908e88e097f2ec7753ede55aca52e5dead3029e3c +size 12742441 diff --git a/_multibuild b/_multibuild index 74856ab..961f079 100644 --- a/_multibuild +++ b/_multibuild @@ -1,7 +1,7 @@ serial - openmp pthreads + openmp gnu-hpc gnu-hpc-pthreads diff --git a/openblas.changes b/openblas.changes index 30bfaf8..5dca180 100644 --- a/openblas.changes +++ b/openblas.changes @@ -1,3 +1,121 @@ +------------------------------------------------------------------- +Fri Feb 25 20:10:04 UTC 2022 - Egbert Eich + +- Update to v0.3.20: + * general: + some code cleanup, with added casts etc. + fixed obtaining the cpu count with OpenMP and OMP_PROC_BIND unset + fixed pivot index calculation by ?LASWP for negative increments other + than one + fixed input argument check in LAPACK ? GEQRT2 + improved the check for a Fortran compiler in CMAKE builds + disabled building OpenBLAS' optimized versions of LAPACK complex SPMV, + SPR,SYMV,SYR with NO_LAPACK=1 + fixed building of LAPACK on certain distributed filesystems with parallel + gmake + fixed building the shared library on MacOS with classic flang + (v0.3.19) + reverted unsafe TRSV/ZRSV optimizations introduced in 0.3.16 + fixed a potential thread race in the thread buffer reallocation routines + that were introduced in 0.3.18 + fixed miscounting of thread pool size on Linux with OMP_PROC_BIND=TRUE + fixed CBLAS interfaces for CSROT/ZSROT and CROTG/ZROTG + made automatic library suffix for CMAKE builds with INTERFACE64 available + to CBLAS-only builds + (v0.3.18) + when the build-time number of preconfigured threads is exceeded + at runtime (by an external program calling BLAS functions from + a larger number of threads), OpenBLAS will now allocate an + auxiliary control structure for up to 512 additional threads + instead of aborting + added support for Loongson's LoongArch64 cpu architecture + fixed building OpenBLAS with CMAKE and -DBUILD_BFLOAT16=ON + added support for building OpenBLAS as a CMAKE subproject + added support for building for Windows/ARM64 targets with clang + improved support for building with the IBM xlf compiler + imported Reference-LAPACK PR 625 (out-of-bounds access in ?LARRV) + imported Reference-LAPACK PR 597 for testsuite compatibility with + LLVM's libomp + * x86_64: + fixed cross-compilation with CMAKE for CORE2 target + fixed miscompilation of AVX512 code in DYNAMIC_ARCH builds + added support for the "incidental" AVX512 hardware in Alder Lake when + enabled in BIOS + (v0.3.19) + DYNAMIC_ARCH builds now fall back to the cpu with most similar capabilities + when an unknown CPUID is encountered, instead of defaulting to Prescott + added cpu detection for Intel Alder Lake + added cpu detection for Intel Sapphire Rapids + added an optimized SBGEMM kernel for Sapphire Rapids + fixed DYNAMIC_ARCH builds on OSX with CMAKE + worked around DYNAMIC_ARCH builds made on Sandybridge failing on SkylakeX + fixed missing thread initialization for static builds on Windows/MSVC + fixed an excessive read in ZSYMV + (v0.3.18) + added SkylakeX S/DGEMM kernels for small problem sizes (MNK<=1000000) + added optimized SBGEMM for Intel Cooper Lake + reinstated the performance patch for AVX512 SGEMV_T with a proper fix + added a workaround for a gcc11 tree-vectorizer bug that caused spurious + failures in the test programs for complex BLAS3 when compiling at -O3 + (the default for cmake "release" builds) + added support for runtime cpu count detection under Haiku OS + worked around a long-standing miscompilation issue of the Haswell DGEMV_T + kernel with gcc that could produce NaN output in some corner cases + * Power: + added support for POWER10 in big-endian mode + added support for building with CMAKE + added optimized SGEMM and DGEMM kernels for small matrix sizes + (v0.3.18) + improved performance of DASUM on POWER10 + * ARMV8: + added SVE-enabled CGEMM and ZGEMM kernels for ARMV8SVE and A64FX + added support for Neoverse N2 and V1 cpus + (v0.3.19) + added basic support and cputype detection for Fujitsu A64FX + added a generic ARMV8SVE target + added SVE-enabled SGEMM and DGEMM kernels for ARMV8SVE and A64FX + added optimized CGEMM and ZGEMM kernels for Cortex A53 and A55 cpus + fixed cpuid detection for Apple M1 and improved performance + improved compiler flag setting in CMAKE builds + (v0.3.18) + fixed crashes (use of reserved register x18) on Apple M1 under OSX + fixed building with gcc releases earlier than 5.1 +- Fix out of bounds read in ?llarv + LAPACK Reference: PR 625 + CVE-2021-4048, bsc#1196513 +- Limit parallel builds according to available memory. + Do NOT use %%_smp_mflags with top level 'make', set MAKE_NB_JOBS + instead and let the build do the work. + Also change -flto=auto to -flto=1: spawning even more parallel builds + on top of parallel build treads will wreak havok. +- Move calls to 'update-alternatives --remove' to %%postun instead + of %%preun as suggested by rpmlint. +- Since we build with DYNAMIC_ARCH, create separate config files for + the different target kernels to help debugging + Add Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch +- Remove compiler feature detection when not using auto-detection. + Add Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch +- Do not depend in variables which are not available when building + DYNAMIC_ARCH. + Add For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch +- Do not include symbols defined in driver/others/parameter.c in + DYNAMIC_BUILD to generate more conclusive error messages earlier. + Add Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch +- Install lapack and blas libraries to an openblas-flavor + specific subdirectory of %%_libdir and set up the alternatives + to point to this directory. Set the system-wide BLAS/LAPACK + default directory to %%_libdir/openblas-default. + This way, the blas/lapack libraries will remain consistent + and from the same source. The user is able to override this + easily by setting the LD_LIBRARY_PATH to include the preferred + BLAS/LAPACK implementation (boo#1177260). +- Consolidate packages 'openblas-devel' and 'openblas-devel-headers' + into 'openblas-common-devel' (these are built for the serial + flavor only). +- Fix the openblas default flavor selection: + # /usr/sbin/update-alternatives --config libopenblas.so.0 +- Add cmake and pkgconfig files. + ------------------------------------------------------------------- Sun Feb 13 16:02:01 UTC 2022 - Egbert Eich diff --git a/openblas.spec b/openblas.spec index 0388e2e..411778e 100644 --- a/openblas.spec +++ b/openblas.spec @@ -1,5 +1,5 @@ # -# spec file for package openblas +# spec file # # Copyright (c) 2022 SUSE LLC # @@ -18,8 +18,8 @@ %global flavor @BUILD_FLAVOR@%{nil} -%define _vers 0_3_17 -%define vers 0.3.17 +%define _vers 0_3_20 +%define vers 0.3.20 %define pname openblas %bcond_with ringdisabled @@ -135,7 +135,7 @@ ExclusiveArch: do_not_build %define so_v 0 %define p_prefix %_prefix %define p_includedir %_includedir/%pname -%define p_libdir %_libdir +%define p_libdir %_libdir/openblas%{?flavor:-%{flavor}} %define p_cmakedir %{p_libdir}/cmake/%{pname} %define num_threads 64 @@ -167,6 +167,10 @@ URL: http://www.openblas.net Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz#/OpenBLAS-%{version}.tar.gz Source1: README.SUSE Source2: README.HPC.SUSE +Patch1: Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch +Patch2: Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch +Patch3: For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch +Patch4: Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch # PATCH-FIX-UPSTREAM openblas-noexecstack.patch Patch101: openblas-noexecstack.patch # PATCH port @@ -175,6 +179,8 @@ Patch103: openblas-ppc64be_up2_p8.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build +#BuildRequires: cmake +BuildRequires: memory-constraints %if %{without hpc} BuildRequires: gcc-fortran BuildRequires: update-alternatives @@ -212,12 +218,6 @@ Obsoletes: lib%{pname}o0 %hpc_requires %endif -%if %{without hpc} -%define libname %name -%else -%define libname %pname -%endif - %description -n lib%{name}%{?so_v} OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. @@ -228,7 +228,7 @@ Summary: Development libraries for OpenBLAS, %{flavor} version Group: Development/Libraries/C and C++ Requires: lib%{name}%{?so_v} = %{version} %if %{without hpc} -Requires: %{pname}-devel-headers = %{version} +Requires: %{pname}-common-devel = %{version} %else %hpc_requires_devel %endif @@ -243,37 +243,18 @@ This package contains the development libraries for serial OpenBLAS version. %package devel-static Summary: Static version of OpenBLAS Group: Development/Libraries/C and C++ -%if %{without hpc} -Requires: %{pname}-devel = %{version} -%else Requires: lib%{name}-devel = %{version} -%endif %description devel-static OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. This package contains the static libraries. -%package -n %{pname}-devel +%package -n %{pname}-common-devel Summary: Development headers and libraries for OpenBLAS Group: Development/Libraries/C and C++ -Requires: %{pname}-devel-headers = %{version} -%ifarch %ix86 x86_64 -Requires: lib%{pname}_pthreads-devel = %{version} -%else -Requires: lib%{pname}_openmp-devel = %{version} -%endif -%description -n %{pname}-devel -OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. - -%package -n %{pname}-devel-headers -Summary: Development headers for OpenBLAS -Group: Development/Libraries/C and C++ -Conflicts: %{pname}-devel < %{version} -BuildArch: noarch - -%description -n %{pname}-devel-headers +%description -n %{pname}-common-devel OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. This package contains headers for OpenBLAS. @@ -281,9 +262,7 @@ This package contains headers for OpenBLAS. %prep %setup -q -n OpenBLAS-%{version} -%patch101 -p1 -%patch102 -p1 -%patch103 -p1 +%autopatch -p1 %ifarch s390 sed -i -e "s@m32@m31@" Makefile.system %endif @@ -295,8 +274,11 @@ cp %{SOURCE2} . %endif %build + +# Limit lto jobs to 1 - -flto=auto together with make -j +# would cause a huge number of build jobs spawned in parallel %if "%{?_lto_cflags}" != "" -%global _lto_cflags %{_lto_cflags} -ffat-lto-objects +%global _lto_cflags -flto=1 -ffat-lto-objects %endif # disable lto for ppc64le, boo#1181733 @@ -310,6 +292,9 @@ cp %{SOURCE2} . %endif # Use DYNAMIC_ARCH everywhere - not sure about PPC? +# Use DYNAMIC_ARCH to build for multiple targets, use TARGET to specify +# the CPU model assumed for the common code. It should be set to the +# oldest CPU model one expects to encounter. %global openblas_target DYNAMIC_ARCH=1 # We specify TARGET= to avoid compile-time CPU-detection (boo#1100677) %ifarch %ix86 x86_64 @@ -337,17 +322,27 @@ cp %{SOURCE2} . # ../kernel/power/sasum_microk_power8.c:41:3: error: '__vector' undeclared (first use in this function); did you mean '__cpow'? # TODO why is it required ? (and not for ppc64le) %ifarch ppc64 -%define addopt -mvsx +%global addopt -mvsx %endif +%global addopt %{?addopt} -fno-strict-aliasing + # Make serial, threaded and OpenMP versions -make %{?_smp_mflags} %{?openblas_target} %{?build_flags} \ - %{?openblas_opt} COMMON_OPT="%{optflags} %{?addopt}" \ - NUM_THREADS=%{num_threads} V=1 \ - OPENBLAS_LIBRARY_DIR=%{p_libdir} \ - OPENBLAS_INCLUDE_DIR=%{hpc_includedir} \ - OPENBLAS_CMAKE_DIR=%{p_cmakedir} \ - PREFIX=%{p_prefix} \ - %{!?with_hpc:LIBNAMESUFFIX=%flavor FC=gfortran CC=gcc} + +# Calculate process limits +%limit_build -m 1500 +[[ -n $_threads ]] && jobs=$_threads +[[ -z $jobs ]] && jobs=1 +# NEVER use %%_smp_mflags with top level make: +# set MAKE_NB_JOBS instead and let the build do the work! +make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \ + %{?openblas_opt} \ + COMMON_OPT="%{optflags} %{?addopt}" \ + NUM_THREADS=%{num_threads} V=1 \ + OPENBLAS_LIBRARY_DIR=%{p_libdir} \ + OPENBLAS_INCLUDE_DIR=%{p_includedir} \ + OPENBLAS_CMAKE_DIR=%{p_cmakedir} \ + PREFIX=%{p_prefix} \ + %{!?with_hpc:LIBNAMESUFFIX=%flavor FC=gfortran CC=gcc} %install %if %{with hpc} @@ -373,43 +368,39 @@ make %{?_smp_mflags} %{?openblas_target} %{?build_flags} \ %if 0%{!?build_devel:1} # We need the includes only once rm -rf %{buildroot}%{p_includedir}/ -rm -rf %{buildroot}%{p_libdir}/cmake/ -%else +%endif + # Fix cmake config file sed -i 's|%{buildroot}||g' %{buildroot}%{p_cmakedir}/*.cmake -sed -i 's|_serial||g' %{buildroot}%{p_cmakedir}/*.cmake -%endif +sed -i 's|_%{flavor}||g' %{buildroot}%{p_cmakedir}/*.cmake # Put libraries in correct location rm -rf %{buildroot}%{p_libdir}/lib%{name}* # Install the serial library -install -D -p -m 755 lib%{name}.so %{buildroot}%{p_libdir}/lib%{name}.so.0 -install -D -p -m 644 lib%{name}.a %{buildroot}%{p_libdir}/lib%{name}.a +install -D -p -m 755 lib%{name}.so %{buildroot}%{p_libdir}/lib%{pname}.so.0 +install -D -p -m 644 lib%{name}.a %{buildroot}%{p_libdir}/lib%{pname}.a # Fix source permissions (also applies to LAPACK) find -name \*.f -exec chmod 644 {} + -# Remove pkgconfig file, it can't be configured for different library suffixes we use and, as such, is useless -rm -fr %{buildroot}%{p_libdir}/pkgconfig/ - # Dummy target for update-alternatives install -d %{buildroot}/%{_sysconfdir}/alternatives -ln -s lib%{libname}.so.0 %{buildroot}/%{p_libdir}/lib%{pname}.so.0 -ln -s lib%{pname}.so.0 %{buildroot}/%{p_libdir}/libblas.so.3 -ln -s lib%{pname}.so.0 %{buildroot}/%{p_libdir}/libcblas.so.3 -ln -s lib%{pname}.so.0 %{buildroot}/%{p_libdir}/liblapack.so.3 - -ln -s lib%{pname}.so.0 %{buildroot}/%{_sysconfdir}/alternatives/libblas.so.3 -ln -s lib%{pname}.so.0 %{buildroot}/%{_sysconfdir}/alternatives/libcblas.so.3 -ln -s lib%{pname}.so.0 %{buildroot}/%{_sysconfdir}/alternatives/liblapack.so.3 - -# Fix symlinks -pushd %{buildroot}%{p_libdir} +ln -sf %{_sysconfdir}/alternatives/libblas.so.3 %{buildroot}/%{_libdir}/libblas.so.3 +ln -sf %{_sysconfdir}/alternatives/libcblas.so.3 %{buildroot}/%{_libdir}/libcblas.so.3 +ln -sf %{_sysconfdir}/alternatives/liblapack.so.3 %{buildroot}/%{_libdir}/liblapack.so.3 +ln -sf %{_sysconfdir}/alternatives/liblapacke.so.3 %{buildroot}/%{_libdir}/liblapacke.so.3 +ln -sf %{_sysconfdir}/alternatives/openblas-default %{buildroot}/%{_libdir}/openblas-default +ln -s lib%{pname}.so.%{so_v} %{buildroot}%{p_libdir}/lib%{pname}.so +ln -s %{_libdir}/openblas-default %{buildroot}%{_sysconfdir}/alternatives/openblas-default +ln -s openblas-default/lib%{pname}.so.%{so_v} %{buildroot}%{_libdir}/lib%{pname}.so.%{so_v} %if 0%{?build_devel} -ln -sf lib%{pname}.so.0 lib%{pname}.so +ln -s lib%{pname}.so.%{so_v} %{buildroot}%{_libdir}/lib%{pname}.so +install -d %{buildroot}%{_libdir}/pkgconfig/ +ln -s %{_libdir}/openblas-default/pkgconfig/openblas.pc %{buildroot}%{_libdir}/pkgconfig/ +install -d %{buildroot}/%{_libdir}/cmake +ln -s %{_libdir}/openblas-default/cmake/openblas %{buildroot}/%{_libdir}/cmake/ %endif -ln -sf lib%{name}.so.0 lib%{name}.so %else # with hpc @@ -460,30 +451,34 @@ EOF %post -n lib%{name}%{so_v} %{_sbindir}/update-alternatives --install \ - %{p_libdir}/libblas.so.3 libblas.so.3 %{p_libdir}/lib%{name}.so.%{so_v} 20 -%{_sbindir}/update-alternatives --install \ - %{p_libdir}/libcblas.so.3 libcblas.so.3 %{p_libdir}/lib%{name}.so.%{so_v} 20 -%{_sbindir}/update-alternatives --install \ - %{p_libdir}/liblapack.so.3 liblapack.so.3 %{p_libdir}/lib%{name}.so.%{so_v} 20 -%{_sbindir}/update-alternatives --install \ - %{p_libdir}/lib%{pname}.so.%{so_v} lib%{name}.so.%{so_v} %{p_libdir}/lib%{name}.so.%{so_v} %openblas_so_prio + %{_libdir}/openblas-default openblas-default %{p_libdir} %openblas_so_prio +for lib in libblas.so.3 libcblas.so.3 liblapack.so.3 liblapacke.so.3; do + %{_sbindir}/update-alternatives --install \ + %{_libdir}/${lib} ${lib} %{_libdir}/lib%{pname}.so.%{so_v} 20 +done /sbin/ldconfig -%preun -n lib%{name}%{so_v} -if [ "$1" = 0 ] ; then - %{_sbindir}/update-alternatives --remove libblas.so.3 %{p_libdir}/lib%{name}.so.%{so_v} - %{_sbindir}/update-alternatives --remove libcblas.so.3 %{p_libdir}/lib%{name}.so.%{so_v} - %{_sbindir}/update-alternatives --remove liblapack.so.3 %{p_libdir}/lib%{name}.so.%{so_v} - %{_sbindir}/update-alternatives --remove lib%{name}.so.0 %{p_libdir}/lib%{name}.so.%{so_v} +%postun -n lib%{name}%{so_v} +if [ ! -f %{p_libdir}/lib%{pname}.so.%{so_v} ]; then + for lib in libblas.so.3 libcblas.so.3 liblapack.so.3 liblapacke.so.3; do + %{_sbindir}/update-alternatives --remove ${lib} %{_libdir}/lib%{pname}.so.%{so_v} + done fi - -%postun -n lib%{name}%{so_v} -p /sbin/ldconfig +if [ ! -d %{p_libdir} ]; then + %{_sbindir}/update-alternatives --remove openblas-default %{p_libdir} +fi +/sbin/ldconfig %posttrans -n lib%{name}%{so_v} if [ "$1" = 0 ] ; then - if ! [ -f %{p_libdir}/lib%{name}.so.%{so_v} ] ; then - %{_sbindir}/update-alternatives --auto lib%{pname}.so.%{so_v} + if [ ! -d %{_libdir}/openblas-default ] ; then + %{_sbindir}/update-alternatives --auto openblas-default fi + for lib in libblas.so.3 libcblas.so.3 liblapack.so.3 liblapacke.so.3; do + if ! [ -f %{_libdir}/${lib} ] ; then + %{_sbindir}/update-alternatives --auto ${lib} + fi + done fi %else @@ -495,16 +490,20 @@ fi %files -n lib%{name}%{?so_v} %defattr(-,root,root,-) -%{p_libdir}/lib%{libname}.so.0 +%{p_libdir}/lib%{pname}.so.0 %if %{without hpc} -%ghost %{p_libdir}/lib%{pname}.so.%{so_v} -%ghost %{p_libdir}/libblas.so.3 -%ghost %{p_libdir}/libcblas.so.3 -%ghost %{p_libdir}/liblapack.so.3 -%ghost %{_sysconfdir}/alternatives/lib%{pname}.so.%{so_v} +%dir %{p_libdir} +%{_libdir}/openblas-default +%{_libdir}/lib%{pname}.so.%{so_v} +%ghost %{_libdir}/libblas.so.3 +%ghost %{_libdir}/libcblas.so.3 +%ghost %{_libdir}/liblapack.so.3 +%ghost %{_libdir}/liblapacke.so.3 +%ghost %{_sysconfdir}/alternatives/openblas-default %ghost %{_sysconfdir}/alternatives/libblas.so.3 %ghost %{_sysconfdir}/alternatives/libcblas.so.3 %ghost %{_sysconfdir}/alternatives/liblapack.so.3 +%ghost %{_sysconfdir}/alternatives/liblapacke.so.3 %else %hpc_dirs %{p_libdir}/libopenblas*r*.so @@ -512,33 +511,31 @@ fi %endif %files -n lib%{name}-devel -%defattr(-,root,root,-) -%{p_libdir}/lib%{libname}.so +%{p_libdir}/lib%{pname}.so +%{p_cmakedir}/ %if %{with hpc} %license LICENSE %doc Changelog.txt GotoBLAS* README.md README.HPC.SUSE %hpc_pkgconfig_file -%{p_cmakedir}/ %{p_includedir}/ +%else +%dir %{p_libdir}/cmake +%dir %{p_libdir}/pkgconfig +%{p_libdir}/pkgconfig %endif %files devel-static -%defattr(-,root,root,-) -#%%{p_libdir}/lib%{libname}.a %{p_libdir}/libopenblas*.a %if 0%{?build_devel} -%files -n %{pname}-devel -%defattr(-,root,root,-) +%files -n %{pname}-common-devel %license LICENSE %doc Changelog.txt GotoBLAS* README.md README.SUSE -%{p_libdir}/libopenblas.so -%dir %{p_libdir}/cmake -%{p_cmakedir}/ - -%files -n %{pname}-devel-headers -%defattr(-,root,root,-) +%{_libdir}/lib%{pname}.so %{p_includedir}/ +%{_libdir}/pkgconfig/openblas.pc +%dir %{_libdir}/cmake +%{_libdir}/cmake/openblas %endif %changelog