From 7fca305fb4b78c1626549e8ba33ec720fb1f51a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Schr=C3=B6ter?= Date: Sat, 10 Feb 2024 20:13:50 +0100 Subject: [PATCH] Sync from SUSE:ALP:Source:Standard:1.0 openblas revision 349162a71c86e82656561391561088ca --- .gitattributes | 23 + Handle-s390-correctly.patch | 39 + Link-library-with-z-noexecstack.patch | 24 + OpenBLAS-0.3.25.tar.gz | 3 + README.HPC.SUSE | 23 + README.SUSE | 25 + _constraints | 8 + _multibuild | 7 + openblas-ppc64be_up2_p8.patch | 114 ++ openblas.changes | 1656 +++++++++++++++++++++++++ openblas.rpmlintrc | 3 + openblas.spec | 664 ++++++++++ 12 files changed, 2589 insertions(+) create mode 100644 .gitattributes create mode 100644 Handle-s390-correctly.patch create mode 100644 Link-library-with-z-noexecstack.patch create mode 100644 OpenBLAS-0.3.25.tar.gz create mode 100644 README.HPC.SUSE create mode 100644 README.SUSE create mode 100644 _constraints create mode 100644 _multibuild create mode 100644 openblas-ppc64be_up2_p8.patch create mode 100644 openblas.changes create mode 100644 openblas.rpmlintrc create mode 100644 openblas.spec diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..fecc750 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/Handle-s390-correctly.patch b/Handle-s390-correctly.patch new file mode 100644 index 0000000..9ec84b1 --- /dev/null +++ b/Handle-s390-correctly.patch @@ -0,0 +1,39 @@ +From: Egbert Eich +Date: Wed Nov 30 20:14:53 2022 +0100 +Subject: Handle s390 correctly +Patch-mainline: Not yet +Git-commit: f1761f16899756e4da71df35b82772bcbcc33460 +References: + +Signed-off-by: Egbert Eich +Signed-off-by: Egbert Eich +--- + c_check | 2 +- + ctest.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) +diff --git a/c_check b/c_check +index 01d4f4a7..7db8bb42 100755 +--- a/c_check ++++ b/c_check +@@ -11,7 +11,7 @@ case "$hostarch" in + arm*) [ "$hostarch" = "arm64" ] || hostarch='arm' ;; + aarch64) hostarch=arm64 ;; + powerpc*|ppc*) hostarch=power ;; +- s390x) hostarch=zarch ;; ++ s390*) hostarch=zarch ;; + esac + + makefile="$1" +diff --git a/ctest.c b/ctest.c +index df628b1d..f09571b1 100644 +--- a/ctest.c ++++ b/ctest.c +@@ -121,7 +121,7 @@ ARCH_X86_64 + ARCH_POWER + #endif + +-#if defined(__s390x__) || defined(__zarch__) ++#if defined(__s390x__) || defined(__s390__) || defined(__zarch__) + ARCH_ZARCH + #endif + diff --git a/Link-library-with-z-noexecstack.patch b/Link-library-with-z-noexecstack.patch new file mode 100644 index 0000000..0579573 --- /dev/null +++ b/Link-library-with-z-noexecstack.patch @@ -0,0 +1,24 @@ +From: Egbert Eich +Date: Wed Nov 30 20:16:21 2022 +0100 +Subject: Link library with -z,noexecstack +Patch-mainline: Not yet +Git-commit: adddc0eadc81bcd29c48594793cb33eac0edb572 +References: + +Signed-off-by: Egbert Eich +Signed-off-by: Egbert Eich +--- + exports/Makefile | 1 + + 1 file changed, 1 insertion(+) +Index: OpenBLAS-0.3.25/exports/Makefile +=================================================================== +--- OpenBLAS-0.3.25.orig/exports/Makefile ++++ OpenBLAS-0.3.25/exports/Makefile +@@ -193,6 +193,7 @@ else ifeq ($(F_COMPILER), FLANG) + else + ifneq ($(C_COMPILER), LSB) + $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ ++ -Wl,-z,noexecstack \ + -Wl,--whole-archive $< -Wl,--no-whole-archive \ + -Wl,-soname,$(INTERNALNAME) $(EXTRALIB) + $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. diff --git a/OpenBLAS-0.3.25.tar.gz b/OpenBLAS-0.3.25.tar.gz new file mode 100644 index 0000000..a2ceb58 --- /dev/null +++ b/OpenBLAS-0.3.25.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c25cb30c4bb23eddca05d7d0a85997b8db6144f5464ba7f8c09ce91e2f35543 +size 24073168 diff --git a/README.HPC.SUSE b/README.HPC.SUSE new file mode 100644 index 0000000..0d5c7b5 --- /dev/null +++ b/README.HPC.SUSE @@ -0,0 +1,23 @@ +openSUSE specific packaging +=========================== + +OpenBLAS provides optimized implementations of BLAS and LAPACK. +openSUSE provides two variants: + * With OpenMP support + * With threading support +The serial variant has been dropped. To run a program +which requires the serial version (ie. because it is +multi-threaded itself), either specify the environment +variable OMP_NUM_THREADS=1 or place a call to + void openblas_set_num_threads(int num_threads); +in your program to limit the number of threads this library +uses to 1. + +On x86 systems OpenBLAS uses dynamic architectures support, +so it contains all CPU-related optimizations. + +How to switch between the various BLAS/LAPACK implementations +============================================================= + +The openmp and threaded variants may be installed in parallel. +To select which one to use please use the 'modules' command. diff --git a/README.SUSE b/README.SUSE new file mode 100644 index 0000000..42862b9 --- /dev/null +++ b/README.SUSE @@ -0,0 +1,25 @@ +openSUSE specific packaging +=========================== + +OpenBLAS provides optimized implementations of BLAS and LAPACK. +openSUSE provides three variants: + * Serial library (libopenblas_serial0) + * With OpenMP support (libopenblas_openmp0) + * With threading support (libopenblas_pthreads0) + +By defult openSUSE uses pthreads version on x86 systems and OpenMP +for other architectures. On x86 systems OpenBLAS uses dynamic +architectures support, so it contains all CPU-related optimizations. + +How to switch between the various BLAS/LAPACK implementations +============================================================= + +BLAS: + sudo /usr/sbin/update-alternatives --config libblas.so.3 + +LAPACK: + sudo /usr/sbin/update-alternatives --config liblapack.so.3 + +More information is available at: + + https://en.opensuse.org/openSUSE:Science_Linear_algebra_libraries diff --git a/_constraints b/_constraints new file mode 100644 index 0000000..b7991e6 --- /dev/null +++ b/_constraints @@ -0,0 +1,8 @@ + + + + + 7 + + + diff --git a/_multibuild b/_multibuild new file mode 100644 index 0000000..6ebffad --- /dev/null +++ b/_multibuild @@ -0,0 +1,7 @@ + + serial + pthreads + openmp + gnu-hpc + gnu-hpc-pthreads + diff --git a/openblas-ppc64be_up2_p8.patch b/openblas-ppc64be_up2_p8.patch new file mode 100644 index 0000000..cb5fea2 --- /dev/null +++ b/openblas-ppc64be_up2_p8.patch @@ -0,0 +1,114 @@ +From: Michel Normand +Subject: openblas ppc64be up2 p8 +Date: Wed, 03 Feb 2021 15:39:25 +0100 + +openblas ppc64be up2 p8 + +because: +* openblas build failed for ppc64 (BE) in openSUSE + since version 0.3.12 +* ppc64 (BE) not supported by IBM after P8. + +Signed-off-by: Michel Normand +--- + Makefile.system | 10 +++++++--- + driver/others/dynamic_power.c | 11 +++++++++++ + 2 files changed, 18 insertions(+), 3 deletions(-) + +Index: OpenBLAS-0.3.25/driver/others/dynamic_power.c +=================================================================== +--- OpenBLAS-0.3.25.orig/driver/others/dynamic_power.c ++++ OpenBLAS-0.3.25/driver/others/dynamic_power.c +@@ -3,12 +3,14 @@ + + extern gotoblas_t gotoblas_POWER6; + extern gotoblas_t gotoblas_POWER8; ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) + extern gotoblas_t gotoblas_POWER9; + #endif + #ifdef HAVE_P10_SUPPORT + extern gotoblas_t gotoblas_POWER10; + #endif ++#endif + + extern void openblas_warning(int verbose, const char *msg); + +@@ -28,11 +30,13 @@ char *gotoblas_corename(void) { + #endif + if (gotoblas == &gotoblas_POWER8) return corename[2]; + #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + if (gotoblas == &gotoblas_POWER9) return corename[3]; + #endif + #ifdef HAVE_P10_SUPPORT + if (gotoblas == &gotoblas_POWER10) return corename[4]; + #endif ++#endif + return corename[0]; + } + +@@ -243,6 +247,10 @@ static gotoblas_t *get_coretype(void) { + #endif + if (__builtin_cpu_is("power8")) + return &gotoblas_POWER8; ++ /* Fall back to the POWER8 implementation for big endian */ ++#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ++ return &gotoblas_POWER8; ++#else + #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) + if (__builtin_cpu_is("power9")) + return &gotoblas_POWER9; +@@ -260,6 +268,7 @@ static gotoblas_t *get_coretype(void) { + if (__builtin_cpu_is("power10")) + return &gotoblas_POWER9; + #endif ++#endif + return NULL; + } + +@@ -284,12 +293,14 @@ static gotoblas_t *force_coretype(char * + case 1: return (&gotoblas_POWER6); + #endif + case 2: return (&gotoblas_POWER8); ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) + case 3: return (&gotoblas_POWER9); + #endif + #ifdef HAVE_P10_SUPPORT + case 4: return (&gotoblas_POWER10); + #endif ++#endif + default: return NULL; + } + snprintf(message, 128, "Core not found: %s\n", coretype); +Index: OpenBLAS-0.3.25/Makefile.system +=================================================================== +--- OpenBLAS-0.3.25.orig/Makefile.system ++++ OpenBLAS-0.3.25/Makefile.system +@@ -748,6 +748,9 @@ ifeq ($(ARCH), power) + ifneq ($(C_COMPILER), PGI) + DYNAMIC_CORE = POWER6 + DYNAMIC_CORE += POWER8 ++ifeq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__) ++$(info, OpenBLAS: for big endian limit to POWER8 kernels.) ++else + ifneq ($(C_COMPILER), GCC) + DYNAMIC_CORE += POWER9 + DYNAMIC_CORE += POWER10 +@@ -776,11 +779,12 @@ else + $(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.) + endif + endif +-else ++endif # __ORDER_BIG_ENDIAN__ ++else # C_COMPILER PGI + DYNAMIC_CORE = POWER8 + DYNAMIC_CORE += POWER9 +-endif +-endif ++endif # C_COMPILER PGI ++endif # ARCH power + + # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty + ifndef DYNAMIC_CORE diff --git a/openblas.changes b/openblas.changes new file mode 100644 index 0000000..64fe8c0 --- /dev/null +++ b/openblas.changes @@ -0,0 +1,1656 @@ +------------------------------------------------------------------- +Wed Dec 20 12:02:55 UTC 2023 - Giacomo Comes + +- add Requires(pre/post): coreutils to the sub-packages that use + commands like: ln, dirname, mktemp, etc in the pre/post scriptlets + +------------------------------------------------------------------- +Wed Nov 29 05:43:18 UTC 2023 - Atri Bhattacharya + +- Update to version 0.3.25: + * General: + - improved the error message shown on exceeding the maximum + thread count + - improved the code to add supplementary thread buffers in + case of overflow + - fixed a potential division by zero in ?ROTG + - improved the ?MATCOPY functions to accept zero-sized rows or + columns + - corrected empty prototypes in function declarations + - cleaned up unused declarations in the f2c-converted versions + of the LAPACK sources + - fixed compilation with the Cray CCE Compiler suite + - improved link line rewriting to avoid mixed libgomp/libomp + builds with clang&gfortran + - worked around OPENMP builds with LLVM14's libomp hanging on + FreeBSD + - improved the Makefiles to require less option duplication on + "make install" + - imported the following changes from the upcoming release + 3.12 of Reference-LAPACK: LAPACK PR 900, LAPACK PR 904, + LAPACK PR 907, LAPACK PR 909, LAPACK PR 926, LAPACK PR 927, + LAPACK PR 928 & 930 + * x86-64: + - fixed compile-time autodetection of AMD Ryzen3 and Ryzen4 + cpus + - fixed capability-based fallback selection for unknown cpus + in DYNAMIC_ARCH + - added AVX512 optimizations for ?ASUM on Sapphire Rapids and + Cooper Lake + * ARM64: + - fixed building on Apple with homebrew gcc + - fixed building with XCODE 15 + - fixed building on A64FX and Cortex A710/X1/X2 + - increased the default buffer size for recent ARM server cpus + * POWER: + - fixed building with the IBM xlf 16.1.1 compiler + - fixed building with IBM XL C + - added support for DYNAMIC_ARCH builds with clang + - fixed union declaration in the BFLOAT16 test case + - enable optimizations for the AIX assembler on POWER10 + * LOONGARCH64: + - added an optimized SGEMV kernel + - added an optimized DTRSM kernel +- Minor rebase of openblas-ppc64be_up2_p8.patch to apply cleanly. +- Drop upstreamed patches: + * Use-blasint-for-INTERFACE64-compatibility.patch + * remove-spurious-loops.patch + +------------------------------------------------------------------- +Fri Oct 27 11:26:28 UTC 2023 - Stefan Brüns + +- Propagate the correct CPU count to the pkgconfig file, see + gh#OpenMathLib/OpenBLAS#4275. + +------------------------------------------------------------------- +Tue Oct 17 02:18:53 UTC 2023 - Bernhard Wiedemann + +- Delete build machine cpu count + +------------------------------------------------------------------- +Fri Feb 24 09:57:34 UTC 2023 - Egbert Eich + +- Recreate old library scheme for existing products: + It turned out the new scheme on existing systems has + been causing package breakages. +- Do not generate baselibs.conf for HPC builds. +- Add support for gcc11 & 12. +- For SLE/Leap on x86_64 and s390x do not mix compiler versions + as this will make the gfortran ABI version inconsistent. Instead + use the stock compiler and set the list of kernels for x86_64 + cores explicitly as Cooperlake requires compiler intrinsics + which are not provided by gcc 7. +- Require at least 7G of disk space for building. + +------------------------------------------------------------------- +Tue Feb 14 18:14:06 UTC 2023 - Egbert Eich + +- Make sure pre-existing (arch-independent) update-alternatives + are wiped before registering new ones. + Since update-alternatives has no reliable way to check if + a certain 'generic name' exists, brute-force it and ignore + any error (boo#1208248). +- Remove totally pointless - ie. never executed - %%posttrans + script. +- Restore generic link for update-alternatives. This is usually + set by the update-alternatives and it is '%ghost'ed but rpmlint + complains. +- Add rpmlintrc rules to avoid false positives from consistently + guessing the update-alternatives generic name wrong. +- Make arch dependent generic names conditional. + +------------------------------------------------------------------- +Fri Feb 3 07:52:04 UTC 2023 - Egbert Eich + +- Do not set LIBNAMESUFFIX to mark different flavors as this causes + the SONAME to be different so that different flavors of OpenBLAS + cannot serve as plugin replacements of each other (boo#1177260). +- Fix a fallout of making alternatives directory arch dependent. +- Remove unneeded links that will be created by update-alternatives. + Create remaining links %post scripts properly %ghost-ing the files. + +------------------------------------------------------------------- +Wed Jan 25 21:13:49 UTC 2023 - Egbert Eich + +- Make library links in the alternatives directory arch dependent. + This avoids conflicts when both 32-bit and 64-bit versions are + installed (boo#1207563). + +------------------------------------------------------------------- +Sun Dec 25 00:19:21 UTC 2022 - Stefan Brüns + +- Fix aarch64 builds with GCC < 9 (i.e. Leap/SLE 15.x), disable + NEOVERSEN2 target. See gh#xianyi/OpenBLAS#3874. + +------------------------------------------------------------------- +Wed Nov 30 19:24:20 UTC 2022 - Egbert Eich + +- Update to v0.3.21: + * general: + - Updated the included LAPACK to Reference-LAPACK release 3.10.1 + - when no Fortran compiler is available, OpenBLAS builds will now automatically + - function LAPACKE_lsame is now annotated with the GCC attribute "const" to aid static analyzers + - added USE_TLS to the list of options reported by the openblas_get_config() function + - added SYMBOLPREFIX/SYMBOLSUFFIX handling for LAPACK 3.10.0 functions added in 0.3.20 + - reverted OpenMP threadpool behaviour in the exec_blas call to its state before 0.3.11, that is + the threadpool will no longer grow or shrink on demand as the overhead for this is too big at least with + GNU OpenMP. The adaptive behaviour introduced in 0.3.11 can still be requested at runtime by setting + the environment variable OMP_ADAPTIVE + - worked around spurious STFSM/CTFSM errors reported by the LAPACK testsuite + * x86_64: + - fixed determination of compiler support for AVX512 and removed the 0.3.19 + - workaround for building SKYLAKEX kernels on Sandybridge hardware + - fixed compilation for the SKYLAKEX target with gcc 6 + - fixed compilation of the SkyLakeX small matrix GEMM kernels with LLVM or ICC + - added support for the Zhaoxin/Centaur KH40000 cpu + - fixed a potential crash in the ZSYMV kernel used for all targets except generic + * POWER: + - worked around an overflow error in the POWER6 DNRM2 kernel + - fixed compilation on PPC440 + - fixed a performance regression in the level1 BLAS on POWER10 + - fixed the POWER10 ZGEMM kernel + - fixed singlethreaded builds for POWER10 + - fixed compilation of the POWER10 DGEMV kernel with older gcc versions + - enabled compilation of the BFLOAT16 kernels by default + - enabled the small matrix kernels by default for DYNAMIC_ARCH builds + - added a workaround for a miscompilation of the CDOT and ZDOT kernels by GCC 12 +- Obsolete: + * Define-sbgemm_r-to-fix-DYNAMIC_ARCH-builds.patch + * Remove-extraneous-and-wrong-definition-of-sbgemm_r-on-x86_64.patch + * Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch + * Utilize-compiler-AVX512-capability-info-from-c_check-when-building-getarch.patch + * Revert-AVX512-capability-check-from-PR-1980-moved-to-build.patch + * Fix-checks-for-AVX512-and-atomics.patch + * Use-CC-and-full-command-line-instead-of-hard-coding-gcc-for-AVX512-checking.patch +- Updated/renamed: + * openblas-noexecstack.patch to Link-library-with-z-noexecstack.patch + * openblas-s390.patch to Handle-s390-correctly.patch +- Added (see https://github.com/xianyi/OpenBLAS/issues/3738): + * remove-spurious-loops.patch + * Use-blasint-for-INTERFACE64-compatibility.patch + +------------------------------------------------------------------- +Wed Sep 14 06:08:40 UTC 2022 - Egbert Eich + +- The toolchain and 'make' in TW have been updated to handle parallel + makes spawned by gcc (for LTO optimization) properly. Thus, remove + restrictions. +- Disable lto when linking test programs to speeds up building. + +------------------------------------------------------------------- +Thu May 5 11:56:20 UTC 2022 - Atri Bhattacharya + +- Dynamically generate baselibs.conf for openblas flavors; this + will fix the unresolvables down the chain for arpack-ng 32-bit + bi-arch packages. + +------------------------------------------------------------------- +Fri Apr 29 16:36:57 UTC 2022 - Egbert Eich + +- For non-HPC builds create links (bsc#1198885): + %_lib/libopenblas_.so[.0] -> + %_lib/openblas-/libopenblas.so[.0] + +------------------------------------------------------------------- +Sat Apr 9 06:13:09 UTC 2022 - Egbert Eich + +- Fix issues in update paths from earlier versions introduced by + recent structural changes (bsc#1198264): + - Add Obsoletes for old package names + - Handle the change from directories to soft links properly + +------------------------------------------------------------------- +Wed Apr 6 06:57:55 UTC 2022 - Egbert Eich + +- Also build for s390x using latest gcc as requested by IBM + (jsc#SLE-18143, bsc#1197721). + +------------------------------------------------------------------- +Mon Apr 4 08:57:16 UTC 2022 - Egbert Eich + +- Build HPC packages with gcc- >= 10 on Leap/SLE. + +------------------------------------------------------------------- +Wed Mar 30 12:00:30 UTC 2022 - Egbert Eich + +- Build PPC64LE libraries with the latest gcc available to + take advantage of instruction sets in later CPUs used in + the CPU specific kernels (jsc#SLE-18143, bsc#1197721). + For fortran use the stock compiler to avoid compatibility + issues between different versions of libfortran. + This is relevant for Leap/SLE only. It may be dropped once + gcc < 10 is no longer supported. +- Do the same for x86_64 on SLE to make sure Cooperlake support + is built properly. +- Remove: + * Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch + * Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch + * For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch + Instead, add from upstream: + * Define-sbgemm_r-to-fix-DYNAMIC_ARCH-builds.patch + * Remove-extraneous-and-wrong-definition-of-sbgemm_r-on-x86_64.patch + * Fix-checks-for-AVX512-and-atomics.patch + * Revert-AVX512-capability-check-from-PR-1980-moved-to-build.patch + * Use-CC-and-full-command-line-instead-of-hard-coding-gcc-for-AVX512-checking.patch + * Utilize-compiler-AVX512-capability-info-from-c_check-when-building-getarch.patch + +------------------------------------------------------------------- +Fri Feb 25 20:10:04 UTC 2022 - Egbert Eich + +- Update to v0.3.20: + * general: + some code cleanup, with added casts etc. + fixed obtaining the cpu count with OpenMP and OMP_PROC_BIND unset + fixed pivot index calculation by ?LASWP for negative increments other + than one + fixed input argument check in LAPACK ? GEQRT2 + improved the check for a Fortran compiler in CMAKE builds + disabled building OpenBLAS' optimized versions of LAPACK complex SPMV, + SPR,SYMV,SYR with NO_LAPACK=1 + fixed building of LAPACK on certain distributed filesystems with parallel + gmake + fixed building the shared library on MacOS with classic flang + (v0.3.19) + reverted unsafe TRSV/ZRSV optimizations introduced in 0.3.16 + fixed a potential thread race in the thread buffer reallocation routines + that were introduced in 0.3.18 + fixed miscounting of thread pool size on Linux with OMP_PROC_BIND=TRUE + fixed CBLAS interfaces for CSROT/ZSROT and CROTG/ZROTG + made automatic library suffix for CMAKE builds with INTERFACE64 available + to CBLAS-only builds + (v0.3.18) + when the build-time number of preconfigured threads is exceeded + at runtime (by an external program calling BLAS functions from + a larger number of threads), OpenBLAS will now allocate an + auxiliary control structure for up to 512 additional threads + instead of aborting + added support for Loongson's LoongArch64 cpu architecture + fixed building OpenBLAS with CMAKE and -DBUILD_BFLOAT16=ON + added support for building OpenBLAS as a CMAKE subproject + added support for building for Windows/ARM64 targets with clang + improved support for building with the IBM xlf compiler + imported Reference-LAPACK PR 625 (out-of-bounds access in ?LARRV) + imported Reference-LAPACK PR 597 for testsuite compatibility with + LLVM's libomp + * x86_64: + fixed cross-compilation with CMAKE for CORE2 target + fixed miscompilation of AVX512 code in DYNAMIC_ARCH builds + added support for the "incidental" AVX512 hardware in Alder Lake when + enabled in BIOS + (v0.3.19) + DYNAMIC_ARCH builds now fall back to the cpu with most similar capabilities + when an unknown CPUID is encountered, instead of defaulting to Prescott + added cpu detection for Intel Alder Lake + added cpu detection for Intel Sapphire Rapids + added an optimized SBGEMM kernel for Sapphire Rapids + fixed DYNAMIC_ARCH builds on OSX with CMAKE + worked around DYNAMIC_ARCH builds made on Sandybridge failing on SkylakeX + fixed missing thread initialization for static builds on Windows/MSVC + fixed an excessive read in ZSYMV + (v0.3.18) + added SkylakeX S/DGEMM kernels for small problem sizes (MNK<=1000000) + added optimized SBGEMM for Intel Cooper Lake + reinstated the performance patch for AVX512 SGEMV_T with a proper fix + added a workaround for a gcc11 tree-vectorizer bug that caused spurious + failures in the test programs for complex BLAS3 when compiling at -O3 + (the default for cmake "release" builds) + added support for runtime cpu count detection under Haiku OS + worked around a long-standing miscompilation issue of the Haswell DGEMV_T + kernel with gcc that could produce NaN output in some corner cases + * Power: + added support for POWER10 in big-endian mode + added support for building with CMAKE + added optimized SGEMM and DGEMM kernels for small matrix sizes + (v0.3.18) + improved performance of DASUM on POWER10 + * ARMV8: + added SVE-enabled CGEMM and ZGEMM kernels for ARMV8SVE and A64FX + added support for Neoverse N2 and V1 cpus + (v0.3.19) + added basic support and cputype detection for Fujitsu A64FX + added a generic ARMV8SVE target + added SVE-enabled SGEMM and DGEMM kernels for ARMV8SVE and A64FX + added optimized CGEMM and ZGEMM kernels for Cortex A53 and A55 cpus + fixed cpuid detection for Apple M1 and improved performance + improved compiler flag setting in CMAKE builds + (v0.3.18) + fixed crashes (use of reserved register x18) on Apple M1 under OSX + fixed building with gcc releases earlier than 5.1 +- Fix out of bounds read in ?llarv + LAPACK Reference: PR 625 + CVE-2021-4048, bsc#1196513 +- Limit parallel builds according to available memory. + Do NOT use %%_smp_mflags with top level 'make', set MAKE_NB_JOBS + instead and let the build do the work. + Also change -flto=auto to -flto=1: spawning even more parallel builds + on top of parallel build treads will wreak havok. +- Move calls to 'update-alternatives --remove' to %%postun instead + of %%preun as suggested by rpmlint. +- Since we build with DYNAMIC_ARCH, create separate config files for + the different target kernels to help debugging + Add Create-independent-kernel-Makfile-configuration-when-building-DYNAMIC_ARCH.patch +- Remove compiler feature detection when not using auto-detection. + Add Do-not-attempt-to-check-host-CPU-if-TARGET-is-set.patch +- Do not depend in variables which are not available when building + DYNAMIC_ARCH. + Add For-DYNAMIC_ARCH-don-t-use-sbgemm_r-as-parameter.c-doesn-t-get-build.patch +- Do not include symbols defined in driver/others/parameter.c in + DYNAMIC_BUILD to generate more conclusive error messages earlier. + Add Do-not-include-symbols-defined-in-driver-others-parameter.c-in-DYNAMIC_BUILD.patch +- Install lapack and blas libraries to an openblas-flavor + specific subdirectory of %%_libdir and set up the alternatives + to point to this directory. Set the system-wide BLAS/LAPACK + default directory to %%_libdir/openblas-default. + This way, the blas/lapack libraries will remain consistent + and from the same source. The user is able to override this + easily by setting the LD_LIBRARY_PATH to include the preferred + BLAS/LAPACK implementation (boo#1177260). +- Consolidate packages 'openblas-devel' and 'openblas-devel-headers' + into 'openblas-common-devel' (these are built for the serial + flavor only). + 'openblas-common-devel' will provide the removed 'openblas-devel-headers' + while the arch specific 'preferred' flavor will provide the removed + 'openblas-devel'. +- Fix the openblas default flavor selection: + # /usr/sbin/update-alternatives --config libopenblas.so.0 +- Add cmake and pkgconfig files. + +------------------------------------------------------------------- +Sun Feb 13 16:02:01 UTC 2022 - Egbert Eich + +- Fixed bsc#1195232 for good: found and removed offending entry. + This reintroduces part of: + Thu Jul 8 12:35:35 UTC 2021 - Dominique Leuenberger + + - Do not create dummy symlinks on $self in /etc/alternatives: those + files are packages as %ghost and any real file existance only + confuses brp-checks, as it detects circular symlinks. + +------------------------------------------------------------------- +Sun Jan 30 12:06:43 UTC 2022 - Egbert Eich + +- Partly revert: + Thu Jul 8 12:35:35 UTC 2021 - Dominique Leuenberger + + - Do not create dummy symlinks on $self in /etc/alternatives: those + files are packages as %ghost and any real file existance only + confuses brp-checks, as it detects circular symlinks. + for all suse_versions < current Factory in an attempt to fix bsc#1195232. + +------------------------------------------------------------------- +Mon Jul 26 08:20:32 UTC 2021 - Andreas Schwab + +- Use RISCV64_GENERIC for riscv64 +- Add -ffat-lto-objects to get proper static archives + +------------------------------------------------------------------- +Thu Jul 22 08:11:06 UTC 2021 - Ismail Dönmez + +- Update to version 0.3.17 + - Fixes regressions introduced in 0.3.16 + See https://github.com/xianyi/OpenBLAS/releases/tag/v0.3.17 for + the complete changelog. + +------------------------------------------------------------------- +Tue Jul 13 11:03:36 UTC 2021 - Ismail Dönmez + +- Update to version 0.3.16 + Please see https://github.com/xianyi/OpenBLAS/releases/tag/v0.3.15 + and https://github.com/xianyi/OpenBLAS/releases/tag/v0.3.16 + for the complete list of changes. A complete changelog is + also available in the installed Changelog.txt . + +------------------------------------------------------------------- +Thu Jul 8 12:35:35 UTC 2021 - Dominique Leuenberger + +- Do not create dummy symlinks on $self in /etc/alternatives: those + files are packages as %ghost and any real file existance only + confuses brp-checks, as it detects circular symlinks. + +------------------------------------------------------------------- +Thu Mar 18 12:22:57 UTC 2021 - Michel Normand + +- Update openblas-ppc64be_up2_p8.patch trimed by previous sr + (still need changes in Makefile.system) + +------------------------------------------------------------------- +Thu Mar 18 08:05:58 UTC 2021 - Ismail Dönmez + +- Update to version 0.3.14 + common: + * Fixed a race condition on thread shutdown in non-OpenMP builds + * Fixed custom BUFFERSIZE option getting ignored in gmake builds + * Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms + * Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT + * Improved performance of OMATCOPY_RT across all platforms + * Changed perl scripts to use env instead of a hardcoded /usr/bin/perl + * Fixed potential misreading of the GCC compiler version in the build scripts + * Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477) + * Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335) + + RISC V: + * Fixed compilation on RISCV (missing entry in getarch) + + POWER: + * Fixed compilation for DYNAMIC_ARCH with clang and with older gcc versions + * Added support for compilation on FreeBSD/ppc64le + * Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL + * Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM + * Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10 + * Improved SCOPY and CCOPY performance on POWER10 + * Improved SGEMM and DGEMM performance on POWER10 + * Added support for compilation with the NVIDIA HPC compiler + + x86_64: + * Added an optimized bfloat16 GEMM kernel for Cooperlake + * Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus + * Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus + * Added support for compilation with the NAG Fortran compiler + * Fixed recognition of the AMD AOCC compiler + * Fixed compilation for DYNAMIC_ARCH with clang on Windows + * Added support for running the BLAS/CBLAS tests on Windows + * Fixed signatures of the tls callback functions for Windows x64 + * Fixed various issues with fma intrinsics support handling + + ARM: + * Support compilation for embedded Cortex M4 targets via a new option EMBEDDED + + ARM64: + * Fixed the THUNDERX2T99 and NEOVERSEN1 DNRM2/ZNRM2 kernels for inputs with Inf + * Added support for the DYNAMIC_LIST option + * Added support for compilation with the NVIDIA HPC compiler + * Added support for compiling with the NAG Fortran compiler + +- Remove 0001-Require-gcc-11-for-builtin_cpu_is-power10.patch + 0002-patch-to-support-power10-in-builtin_cpu_is-was-backp.patch + Upstream fixed in a different way. + +------------------------------------------------------------------- +Thu Feb 4 11:49:11 UTC 2021 - Michel Normand + +- Disable lto for ppc64le to avoid build failure (bsc#1181733) +- Add openblas-ppc64be_up2_p8.patch to avoid ppc64 (BE) build failure + Do not set BUILD_BFLOAT16 for ppc64 (BE) (same bug nb) + +------------------------------------------------------------------- +Tue Feb 2 21:30:18 UTC 2021 - Egbert Eich + +- BUILD_BFLOAT16=1 is not supported in s390(x) (bsc#1181522) +- Add: + * 0001-Require-gcc-11-for-builtin_cpu_is-power10.patch + * 0002-patch-to-support-power10-in-builtin_cpu_is-was-backp.patch: + Only gcc11 has builtin_cpu_is(power10) - fix build issue for ppc64 + (bsc#1181522). + +------------------------------------------------------------------- +Thu Dec 17 07:12:02 UTC 2020 - Ismail Dönmez + +- Update to version 0.3.13 + common: + * Added a generic bfloat16 SBGEMV kernel + * Fixed a potentially severe memory leak after fork in OpenMP builds + that was introduced in 0.3.12 + * Added detection of the Fujitsu Fortran compiler + * Added detection of the (e)gfortran compiler on OpenBSD + * Added support for overriding the default name of the library independently + from symbol suffixing in the gmake builds (already supported in cmake) + + RISC V: + * Added a RISC V port optimized for C910V + + POWER: + * Added optimized POWER10 kernels for SAXPY, CAXPY, SDOT, DDOT and DGEMV_N + * Improved DGEMM performance on POWER10 + * Improved STRSM and DTRSM performance on POWER9 and POWER10 + * Fixed segmemtation faults in DYNAMIC_ARCH builds + * Fixed compilation with the PGI compiler + + x86: + * Fixed compilation of kernels that require SSE2 intrinsics since 0.3.12 + + x86_64: + * Added an optimized bfloat16 SBGEMV kernel for SkylakeX and Cooperlake + * Improved the performance of SASUM and DASUM kernels through parallelization + * Improved the performance of SROT and DROT kernels + * Improved the performance of multithreaded xSYRK + * Fixed OpenMP builds that use the LLVM Clang compiler together with GNU gfortran + (where linking of both the LLVM libomp and GNU libgomp could lead to lockups or + wrong results) + * Fixed miscompilations by old gcc 4.6 + * Fixed misdetection of AVX2 capability in some Sandybridge cpus + * Fixed lockups in builds combining DYNAMIC_ARCH with TARGET=GENERIC on OpenBSD + + ARM64: + * Fixed segmentation faults in DYNAMIC_ARCH builds + + MIPS: + * Improved kernels for Loongson 3R3 ("3A") and 3R4 ("3B") models, including MSA + * Fixed bugs in the MSA kernels for CGEMM, CTRMM, CGEMV and ZGEMV + * Added handling of zero increments in the MSA kernels for SSWAP and DSWAP + * Added DYNAMIC_ARCH support for MIPS64 (currently Loongson3R3/3R4 only) + + SPARC: + * Fixed building 32 and 64 bit SPARC kernels with the SolarisStudio compilers + +------------------------------------------------------------------- +Wed Dec 16 16:27:22 UTC 2020 - Dominique Leuenberger + +- Fix invalid symlinks (boo#1179764). + +------------------------------------------------------------------- +Sat Oct 24 16:27:35 UTC 2020 - Ismail Dönmez + +- Update to version 0.3.12 + common: + * Fixed missing BLAS/LAPACK functions (inadvertently dropped during + the build system restructuring to support selective compilation) + * Fixed argument conversion macro in LAPACKE_zgesvdq (LAPACK #458) + + power: + * Added optimized SCOPY/CCOPY kernels for POWER10 + * Increased and unified the default size of the GEMM buffer + * Fixed building for POWER10 in DYNAMIC_ARCH mode + * POWER10 compatibility test now checks binutils version as well + * Cleaned up compiler warnings + + x86_64: + * Corrected compiler version checks for AVX2 compatibility + * Added compiler option -mavx2 for building with flang + * Fixed direct SGEMM pathway for small matrix sizes (broken by + the code refactoring in 0.3.11) + * Fixed unhandled partial register clobbers in several kernels + for AXPY,DOT,GEMV_N and GEMV_T flagged by gcc10 tree-vectorizer + + armv8: + * Improved Apple Vortex support to include cross-compiling + +- Drop fix-build.patch, merged upstream. + +------------------------------------------------------------------- +Wed Oct 21 09:18:18 UTC 2020 - Ismail Dönmez + +- Update _constraints to use 12GB RAM on x86_64 + +------------------------------------------------------------------- +Wed Oct 21 05:17:45 UTC 2020 - Ismail Dönmez + +- Update to version 0.3.11 + common: + * Reduced the default BLAS3_MEM_ALLOC_THRESHOLD (used as an upper + limit for placing temporary arrays on the stack) to be compatible + with a stack size of 1mb (as imposed by the JAVA runtime library) + * Added mixed-precision dot function SBDOT and utility functions + shstobf16, shdtobf16, sbf16tos and dbf16tod to convert between + single or double precision float arrays and bfloat16 arrays + * Fixed prototypes of LAPACK_?ggsvp and LAPACK_?ggsvd functions + in lapack.h + * Fixed underflow and rounding errors in LAPACK SLANV2 and DLANV2 + (causing miscalculations in e.g. SHSEQR/DHSEQR, LAPACK issue #263) + * Fixed workspace calculation in LAPACK ?GELQ (LAPACK issue #415) + * Fixed several bugs in the LAPACK testsuite + * Improved performance of TRMM and TRSM for certain problem sizes + * Fixed infinite recursions and workspace miscalculations in ReLAPACK + * CMAKE builds no longer require pkg-config for creating the .pc file + * Makefile builds no longer misread NO_CBLAS=0 or NO_LAPACK=0 as + enabling these options + * Fixed detection of gfortran when invoked through an mpi wrapper + * Improve thread reinitialization performance with OpenMP after a fork + * Added support for building only the subset of the library required + for a particular precision by specifying BUILD_SINGLE, BUILD_DOUBLE + * Optional function name prefixes and suffixes are now correctly + reflected in the generated cblas.h + * Added CMAKE build support for the LAPACK and multithreading tests + + power: + * Added optimized support for POWER10 + * Added support for compiling for POWER8 in 32bit mode + * Added support for compilation with LLVM/clang + * Added support for compilation with NVIDIA/PGI compilers + * Fixed building on big-endian POWER8 + * Fixed miscompilation of ZDOTC by gcc10 + * Fixed alignment errors in the POWER8 SAXPY kernel + * Improved CPU detection on AIX + * Supported building with older compilers on POWER9 + + x86_64: + * Added support for Intel Cooperlake + * Added autodetection of AMD Renoir/Matisse/Zen3 cpus + * Added autodetection of Intel Comet Lake cpus + * Reimplemented ?sum, ?dot and daxpy using universal intrinsics + * Reset the fpu state before using the fpu on Windows as a workaround + for a problem introduced in Windows 10 build 19041 (a.k.a. SDK 2004) + * Fixed potentially undefined behaviour in the dot and gemv_t kernels + * Fixed a potential segmentation fault in DYNAMIC_ARCH builds + * Fixed building for ZEN with PGI/NVIDIA and AMD AOCC compilers + + armv7: + * Fixed cpu detection on BSD-like systems + + armv8: + * Added preliminary support for Apple Vortex cpus + * Added support for the Cavium ThunderX3T110 cpu + * Fixed cpu detection on BSD-like systems + * Fixed compilation in -std=C18 mode + + IBM Z: + * Added support for compiling with the clang compiler + * Improved GEMM performance on Z14 + +- Enable bloat16 support via BUILD_BFLOAT16=1 +- Add fix-build.patch to fix build with -Werror=return-type + +------------------------------------------------------------------- +Sat Oct 3 07:30:06 UTC 2020 - Egbert Eich + +- Set DYNAMIC_ARCH everywhere, use a base CPU model for non-dynamic + bits to have a reproducible base line: + x86_64: CORE2 + aarch64: ARMV8 + ppc: POWER8 + s390: ZARCH_GENERIC +- Remove workaround for build failure on aarch64 (boo#1128794). + +------------------------------------------------------------------- +Thu Sep 24 10:45:45 UTC 2020 - Egbert Eich + +- For s390/s390x add TARGET=ZARCH_GENERIC (jsc#SLE-13773). + +------------------------------------------------------------------- +Wed Aug 12 02:36:15 UTC 2020 - Bernhard Wiedemann + +- Avoid compile-time CPU-detection (boo#1100677) + +------------------------------------------------------------------- +Thu Jul 23 16:25:56 UTC 2020 - Egbert Eich + +- Add build support for gcc10 to HPC build (bsc#1174439). + +------------------------------------------------------------------- +Mon Jun 15 05:13:19 UTC 2020 - Ismail Dönmez + +- Update to version 0.3.10 + common: + * Improved thread locking behaviour in blas_server and parallel getrf + * Imported bugfix 394 from LAPACK (spurious reference to "XERBL" + due to overlong lines) + * Imported bugfix 403 from LAPACK (compile option "recursive" required + for correctness with Intel and PGI) + * Imported bugfix 408 from LAPACK (wrong scaling in ZHEEQUB) + * Imported bugfix 411 from LAPACK (infinite loop in LARGV/LARTG/LARTGP) + * Fixed mismatches between BUFFERSIZE and GEMM_UNROLL parameters that + could lead to crashes at large matrix sizes + * Restored internal soname in dynamic libraries on FreeBSD and Dragonfly + * Added API (openblas_setaffinity) to set thread affinity + programmatically on Linux + * Added initial infrastructure for half-precision floating point + (bfloat16) support with a generic implementation of SHGEMM + * Added CMAKE build system support for building the cblas_Xgemm3m + functions + * Fixed CMAKE support for building in a path with embedded spaces + * Fixed CMAKE (non)handling of NO_EXPRECISION and MAX_STACK_ALLOC + * Fixed GCC version detection in the Makefiles + * Allowed overriding the names of AR, AS and LD in Makefile builds + + POWER: + * fixed big-endian POWER8 ELFv2 builds on FreeBSD + * Fixed GCC version checks and DYNAMIC_ARCH builds on POWER9 + * Fixed CMAKE build support for POWER9 + * fixed a potential race condition in the thread buffer allocation + * Worked around LAPACK test failures on PPC G4 + + MIPS: + * fixed a potential race condition in the thread buffer allocation + * Added support for MIPS 24K/24KE family based on P5600 kernels + + MIPS64: + * fixed a potential race condition in the thread buffer allocation + * Added TARGET=GENERIC + + ARMV7: + * fixed a race condition in the thread buffer allocation + + ARMV8: + * Fixed a race condition in the thread buffer allocation + * Fixed zero initialisation in the assembly for SGEMM and DGEMM BETA + * Improved performance of the ThunderX2 DAXPY kernel + * Added an optimized SGEMM kernel for Cortex A53 + * Fixed Makefile support for INTERFACE64 (8-byte integer) + + x86_64: + * Fixed a syntax error in the CMAKE setup for SkylakeX + * Improved performance of STRSM on Haswell, SkylakeX and Ryzen + * Improved SGEMM performance on SGEMM for workloads with ldc a + multiple of 1024 + * Improved DGEMM performance on Skylake X + * Fixed unwanted AVX512-dependency of SGEMM in DYNAMIC_ARCH + builds created on SkylakeX + * Removed data alignment requirement in the SSE2 copy kernels + that could cause spurious crashes + * Added a workaround for an optimizer bug in AppleClang 11.0.3 + * Fixed LAPACK-TEST failures with Intel Fortran + * Fixed compilation and LAPACK test results with recent Flang + and AMD AOCC + * Fixed DYNAMIC_ARCH builds with CMAKE on OS X + * Fixed missing exports of cblas_i?amin, cblas_i?min, cblas_i?max, + * cblas_?sum, cblas_?gemm3m in the shared library on OS X + * Fixed reporting of cpu name in DYNAMIC_ARCH builds (would sometimes + show the name of an older generation chip supported by the same kernels) + + IBM Z: + * Improved performance of SGEMM/STRMM and DGEMM/DTRMM on Z14 + +- Refresh openblas-noexecstack.patch and openblas-s390.patch + +------------------------------------------------------------------- +Mon Mar 2 08:19:23 UTC 2020 - Martin Liška + +- Update to version 0.3.9 + common: + * Fixed a miscompilation of the GETRF functions with CMAKE + The size of the memory buffer used for splitting GEMM tasks across + multiple threads can now be configured in the build system. + Imported bugfix 390 from LAPACK (missing NaN propagation in xCOMBSSQ) + POWER: + * fixed several compilation problems related to endianness and + ELF version support on POWER8 and POWER9. + * fixed misuse of the absolute value IAMIN/IAMAX in place of IMIN/IMAX + * fixed a race condition in the level3 blas code + MIPS64: + * fixed misuse of the absolute value IAMIN/IAMAX in place of IMIN/IMAX + ARMV7: + * fixed a race condition in the level3 blas code + * fixed a compilation problem on Android + ARMV8: + * Added support for Ampere EMAG8180 + * Added support for Neoverse N1 + improved performance of the blas_lock function + fixed a race condition in the level3 blas code + * Fixed a performance regression on TSV110 servers + x86_64: + * Fixed a long-standing error with undeclared register clobbers in + the DSCAL microkernel for Haswell,SkylakeX and Zen exposed by gcc9.2 + * Fixed a long-standing bug in the SSE implementation of the IAMAX functions + * Fixed a cmake build failure with DYNAMIC_ARCH on x86_64 + * Fixed an oversight in the cpu detection code for Intel Goldmont+, + Cannon Lake and Ice Lake + * Fixed compile failure on OSX when the compiler name contains a dash + (e.g. gcc-9) + * Fixed compilation with MinGW on SkylakeX + * Improved speed of the AVX512 GEMM3M code, added an AVX512 kernel for + * STRMM and improved performance of the AVX2 GEMM kernels + IBM Z: + * fixed compilation of the DYNAMIC_ARCH code + +------------------------------------------------------------------- +Wed Feb 26 12:22:00 UTC 2020 - Egbert Eich + +- Add support for gcc8/9 building (jsc#SLE-8604). + +------------------------------------------------------------------- +Mon Feb 10 07:01:49 UTC 2020 - Ismail Dönmez + +- Update to version 0.3.8 + common: + * LAPACK has been updated to 3.9.0 (plus patches up to January 2nd, 2020) + * CMAKE support has been improved in several areas including cross-compilation + * A thread race condition in the GEMM3M kernels was resolved + * The "generic" (plain C) gemm beta kernel used by many targets has been sped up + * An optimized version of the LAPACK trtrs functions has been added + * An incompatibilty between the LAPACK tests and the OpenBLAS implementation of XERBLA + was resolved, removing the numerous warnings about wrong error exits in the former + * Support for NetBSD has been added + * Support for compilation with g95 and non-GNU versions of ld has been improved + * Compilation with (upcoming) gcc 10 is now supported + power: + * Worked around miscompilation of several POWER8 and POWER9 kernels by + older versions of gcc + * Added support for big-endian POWER8 and for compilation on AIX + * Corrected bugs in the big-endian support for PPC440 and PPC970 + * DYNAMIC_ARCH support is now available in CMAKE builds as well + armv8: + * Performance of DGEMM_BETA and SGEMM_NCOPY has been improved + * Compilation for 32bit works again + * Performance of the RPCC function has been improved + * Improved performance on small systems + * DYNAMIC_ARCH support is now available in CMAKE builds as well + * Cross-compilation from OSX to IOS was simplified + x86-64: + * A new AVX512 DGEMM kernel was added and the AVX512 SGEMM kernel was + significantly improved + * Optimized AVX512 kernels for CGEMM and ZGEMM have been added + * AVX2 kernels for STRMM, SGEMM, and CGEMM have been significantly + sped up and optimized CGEMM3M and ZGEMM3M kernels have been added + * Added support for QEMU virtual cpus + * A compilation problem with PGI and SUN compilers was fixed + * Intel "Goldmont plus" is now autodetected + * A potential crash on program exit on MS Windows has been fixed + IBM Z: + * Z15 is now supported as Z14 + * DYNAMIC_ARCH is now available on ZARCH as well +- Remove now merged gcc10-Support-two-digit-version-numbers-in-gcc-version-che.patch + +------------------------------------------------------------------- +Sun Dec 8 09:03:24 UTC 2019 - Martin Liška + +- Add gcc10-Support-two-digit-version-numbers-in-gcc-version-che.patch + in order to fix boo#1158727. + +------------------------------------------------------------------- +Mon Aug 12 06:22:47 UTC 2019 - Martin Liška + +- Update to version 0.3.7 (jsc#SLE-8492) + common: + * having the gmake special variables TARGET_ARCH or TARGET_MACH + defined no longer causes build failures in ctest or utest + * defining NO_AFFINITY or USE_TLS to zero in gmake builds + no longer has the same effect as setting them to one + * a new test program was added to allow checking the library for thread safety + * a new option USE_LOCKING was added to ensure thread safety + when OpenBLAS itself is built without multithreading but + * will be called from multiple threads. + * a build failure on Linux with glibc versions earlier than 2.5 was fixed + * a runtime error with CPU enumeration (and NO_AFFINITY not set) + on glibc 2.6 was fixed + * NO_AFFINITY was added to the CMAKE options + (and defaults to being active on Linux, as in the gmake builds) + x86_64 + * the build-time logic for detection of AVX512 availability + in the processor and compiler was fixed + * gmake builds on OSX now set the internal name + of the library to libopenblas.0.dylib (consistent with CMAKE) + * the Haswell DGEMM kernel received a significant speedup + through improved prefetch and load instructions + * performance of DGEMM, DTRMM, DTRSM and ZDOT on Zen/Zen2 + was markedly increased by avoiding vpermpd instructions + * the SKYLAKEX (AVX512) DGEMM helper functions have now been + disabled to fix remaining errors in DGEMM, DSYMM and DTRMM + POWER: + * added support for building on FreeBSD/powerpc64 and FreeBSD/ppc970 + * added optimized kernels for POWER9 single and double precision complex BLAS3 + * added optimized kernels for POWER9 SGEMM and STRMM + ARMV7: + * fixed the softfp implementations of xAMAX and IxAMAX + * removed the predefined -march= flags on both ARMV5 + and ARMV6 as they were appropriate for only a subset of platforms +- Remove not needed fix-arm64-cpuid-return.patch. + +------------------------------------------------------------------- +Tue Apr 30 07:42:06 UTC 2019 - Martin Liška + +- Update to version 0.3.6 (boo#1122186) + common: + * the build tools now check that a given cpu TARGET is actually valid + * the build-time check of system features (c_check) has been made + less dependent on particular perl features (this should mainly + benefit building on Windows) + * several problems with ReLAPACK and its integration were fixed, + including INTERFACE64 support and building a shared library + * building with CMAKE on BSD systems was improved + * a non-absolute SUM function was added based on the + existing optimized code for ASUM + * CBLAS interfaces to the IxMIN and IxMAX functions were added + * a name clash between LAPACKE and BOOST headers was resolved + * CMAKE builds with OpenMP failed to include the appropriate getrf_parallel + kernels + * a crash on thread (key) deletion with the USE_TLS=1 memory management + option was fixed + * restored several earlier fixes, in particular for OpenMP performance, + building on BSD, and calling fork on CYGWIN, which had inadvertently + been dropped in the 0.3.3 rewrite of the memory management code. + POWER: + * single precision BLAS1/2 functions have received optimized POWER8 kernels + * POWER9 is now a separate target, with an optimized DGEMM/DTRMM kernel + * building on PPC970 systems under OSX Leopard or Tiger is now supported + * out-of-bounds memory accesses in the gemm_beta microkernels were fixed + * building a shared library on AIX is now supported for POWER6 + * DYNAMIC_ARCH support has been added for POWER6 and newer + ARMV7: + * corrected xDOT behaviour with zero INC_X or INC_Y + * a bug in the IMIN implementation made it return the result of IMAX + ARMV8: + * added support for HiSilicon TSV110 cpus + * the CMAKE build system now recognizes 32bit userspace on 64bit hardware + * cross-compilation with CMAKE now works again + * a bug in the IMIN implementation made it return the result of IMAX + * ARMV8 builds with the BINARY=32 option are now automatically handled as ARMV7 + x86_64: + * the AVX512 DGEMM kernel has been disabled again due to unsolved problems + * building with old versions of MSVC was fixed + * it is now possible to build a static library on Windows with CMAKE + * accessing environment variables on CYGWIN at run time was fixed + * the CMAKE build system now recognizes 32bit userspace on 64bit hardware + * Intel "Denverton" atom and Hygon "Dhyana" zen CPUs are now autodetected + * building for DYNAMIC_ARCH with a DYNAMIC_LIST of targets is now supported + with CMAKE as well + * building for DYNAMIC_ARCH with GENERIC as the default target is now supported + * a buffer overflow in the SSE GEMM kernel for Intel Nano targets was fixed + * assembly bugs involving undeclared modification of input operands were fixed + in the AXPY, DOT, GEMV, GER, SCAL, SYMV and TRSM microkernels for Nehalem, + Sandybridge, Haswell, Bulldozer and Piledriver. These would typically cause + test failures or segfaults when compiled with recent versions of gcc from 8 onward. + * a similar bug was fixed in the blas_quickdivide code used to split workloads + in most functions + * a bug in the IxMIN implementation for the GENERIC target made it return the result of IxMAX + * fixed building on SkylakeX systems when either the compiler or the (emulated) operating + environment does not support AVX512 + * improved GEMM performance on ZEN targets + x86: + * build failures caused by the recently added checks for AVX512 were fixed + * an inline assembly bug involving undeclared modification of an input argument was + fixed in the blas_quickdivide code used to split workloads in most functions + * a bug in the IMIN implementation for the GENERIC target made it return the result of IMAX + MIPS32: + * a bug in the IMIN implementation made it return the result of IMAX + IBM Z: + * optimized microkernels for single precicion BLAS1/2 functions have been added for Z13 and Z14 + - Rebase openblas-noexecstack.patch and openblas-s390.patch patches. + - Remove 0001-Add-a-register-to-the-clobber-list-for-the-mul-instr.patch. + +------------------------------------------------------------------- +Sun Mar 10 06:06:05 UTC 2019 - Stefan Brüns + +- Reduce _constraints to a reasonable size, the old constraints + were probably necessary pre multibuild. +- Enable DYNAMIC_ARCH for aarch64, available since 0.3.4 + +------------------------------------------------------------------- +Thu Feb 14 18:22:49 UTC 2019 - Egbert Eich + +- Add OPENBLAS_INC and OPENBLAS_DIR to HPC environment (bsc#1125547). + +------------------------------------------------------------------- +Tue Feb 12 19:33:00 UTC 2019 - Stefan Brüns + +- Fix https://github.com/xianyi/OpenBLAS/issues/2014 + Add 0001-Add-a-register-to-the-clobber-list-for-the-mul-instr.patch + +------------------------------------------------------------------- +Mon Jan 7 10:15:03 UTC 2019 - Ismail Dönmez + +- Update to versiom 0.3.5 + common: + * Loop unrolling in TRMV has been enabled again. + * A domain error in the thread workload distribution for SYRK + has been fixed. + * gmake builds will now automatically add -fPIC to the build + options if the platform requires it. + * A pthreads key leakage (and associate crash on dlclose) in + the USE_TLS codepath was fixed. + * Building of the utest cases on systems that do not provide + an implementation of complex.h was fixed. + x86_64: + * The SkylakeX code was changed to compile on OSX. + * Unwanted application of the -march=skylake-avx512 option + to the common code parts of a DYNAMIC_ARCH build was fixed. + * Improved performance of SGEMM for small workloads on Skylake X. + * Performance of SGEMM and DGEMM was improved on Haswell. + armv8: + * A configuration error that broke the CNRM2 kernel was corrected. + * Compilation of the GEMM kernels with CMAKE was fixed. + * DYNAMIC_ARCH builds are now available with CMAKE as well. + * Using CMAKE for cross-compilation to the new cpu TARGETs + introduced in 0.3.4 now works. + power: + * A problem in cpu autodetection for AIX has been corrected. + +------------------------------------------------------------------- +Fri Dec 7 12:29:27 UTC 2018 - Ismail Dönmez + +- Update to version 0.3.4 + common: + * The new, experimental thread-local memory allocation had + inadvertently been left enabled for gmake builds in 0.3.3 + despite the announcement. It is now disabled by default, + and single-threaded builds will keep using the old + allocator even if the USE_TLS option is turned on. + * OpenBLAS will now provide enough buffer space for at least + 50 threads by default. + * The output of openblas_get_config() now contains the version + number. + * A serious thread safety bug in GEMV operation with small M and + large N size has been fixed. + * The code will now automatically call blas_thread_init after + a fork if needed before handling a call to + openblas_set_num_threads + * Accesses to parallelized level3 functions from multiple + callers are now serialized to avoid thread races + (unless using OpenMP). + * This should provide better performance than the + known-threadsafe (but non-default) + USE_SIMPLE_THREADED_LEVEL3 option. + * When building LAPACK with gfortran, -frecursive is now + (again) enabled by default to ensure correct behaviour. + * The OpenBLAS version cblas.h now supports both CBLAS_ORDER + and CBLAS_LAYOUT as the name of the matrix row/column order + option. + * Externally set LDFLAGS are now passed through to the final + compile/link + * steps to facilitate setting platform-specific linker flags. + * A potential race condition during the build of LAPACK + (that would usually manifest itself as a failure to build + TESTING/MATGEN) has been fixed. + * xHEMV has been changed to stay single-threaded for small + input sizes where the overhead of multithreading exceeds + any possible gains + * CSWAP and ZSWAP have been limited to a single thread + except on ARMV8 or ThunderX hardware with sizable input. + * Linker flags for the PGI compiler have been updated + * Behaviour of AXPY with zero increments is now handled + in the C interface, correcting the result on at least + Intel Atom. + * The result matrix from calling SGELSS with an all-zero + input matrix is now zeroed completely. + x86_64: + * Autodetection of AMD Ryzen2 has been fixed (again). + * CMAKE builds now support labeling of an INTERFACE64=1 + build of the library with the _64 suffix. + * AVX512 version of DGEMM has been added and the + AVX512 SGEMM kernel has been sped up by rewriting + with C intrinsics + * Fixed compilation on RHEL5/CENTOS5 + (issue with typename __WAIT_STATUS) + armv8: + * DYNAMic_ARCH support is now available for 64bit ARM + * cross-compiling for ARMV8 under iOS now works. + * cpu-specific code has been rearranged to make better + use of both hardware commonalities and model-specific + compiler optimizations. + * XGENE1 has been removed as a TARGET, superseded by the + improved generic ARMV8 support. + armv7: + * Older assembly mnemonics have been converted to UAL + form to allow building with clang 7.0 + +------------------------------------------------------------------- +Tue Oct 9 19:00:49 UTC 2018 - Dmitry Roshchin + +- Update to version 0.3.3 + common: + * thread memory allocation has been switched back to the method + used before version 0.3.1 due to unexpected problems caused by + the new code under some circumstances. + * LAPACK PR272 has been integrated, which fixes spurious errors + in DSYEVR and related functions caused by missing conversion + from ILAENV to ILAENV_2STAGE in several _2stage routines. + x86_64 + * added AVX512 implementations of SDOT, DDOT, SAXPY, DAXPY, + DSCAL, DGEMVN and DSYMVL + * added a workaround for a cygwin issue that prevented compilation + of AVX512 code + + +------------------------------------------------------------------- +Fri Aug 17 12:56:04 UTC 2018 - idonmez@suse.com + +- Update to version 0.3.2 + common: + * Fixes for regressions caused by the rewrite of the thread + initialization code in 0.3.1 + x86_64: + * Added autodetection of AMD Ryzen 2 + * Fixed build with older versions of MSVC + power: + * Fixed cpu autodetection for the BSDs + mips64: + * Fixed utest errors in AXPY, DSDOT, ROT and SWAP +- Version 0.3.1 + common: + * Rewritten thread initialization code with significantly + reduced overhead + * Added CBLAS interfaces to the IxAMIN BLAS extension functions + * Fixed the lapack-test target + * CMAKE builds now create an OpenBLASConfig.cmake file + * ZAXPY now uses a single thread for small input sizes + * The LAPACK code was updated from Reference-LAPACK/lapack#253 + power: + * Corrected CROT and ZROT behaviour with zero INC_X + armv7: + * Corrected xDOT behaviour with zero INC_X or INC_Y + x86_64: + * Retired some older targets of DYNAMIC_ARCH builds to a + new option DYNAMIC_OLDER, this affects PENRYN,DUNNINGTON, + OPTERON,OPTERON_SSE3,BOBCAT,ATOM and NANO (which will still + be supported via the slower PRESCOTT kernels when this option + is not set) + * Added an option DYNAMIC_LIST that (used in conjunction with + DYNAMIC_ARCH) allows to specify the list of x86_64 targets to + include. Any target not on the list will be supported by + the Sandybridge or Nehalem kernels if available, or by Prescott. + * Improved SWITCH_RATIO on Haswell for increased GEMM throughput + * Added initial support for Intel Skylake X, including an AVX512 + SGEMM kernel + * Added autodetection of Intel Cannon Lake series as Skylake X + * Added a default L2 cache size for hypervisors that return zero + here (Chromebook) + * Fixed a name clash with recent Windows10 headers that broke the + build with (at least) recent mingw from MSYS2 + * Fixed a link error in mixed clang/gfortran builds with OpenMP + * Updated the OSX deployment target to 10.8 + * Switched on parallel make for builds on MS Windows by default + x86: + * Fixed SSWAP and DSWAP behaviour with zero INC_X and INC_Y +- Version 0.3.0 + common: + * Fixed some more thread race and locking bugs + * Added preliminary support for calling an OpenMP build of the + library from multiple threads + * Removed performance impact of thread locks added in 0.2.20 + on OpenMP code + * General code cleanup + * Optimized DSDOT implementation + * Improved thread distribution for GEMM + * Corrected IMATCOPY/OMATCOPY implementation + * Fixed out-of-bounds accesses in the multithreaded xBMV/xPMV + and SYMV implementations + * Cmake build improvements + * pkgconfig file now contains build options + * openblas_get_config() now reports USE_OPENMP and NUM_THREADS + settings used for the build + * Corrections and improvements for systems with more than 64 cpus + * LAPACK code updated to 3.8.0 including later fixes + * Added ReLAPACK, a recursive implementation of several LAPACK functions + * Rewrote ROTMG to handle cases that the netlib code failed to address + * Disabled (broken) multithreading code for xTRMV + * corrected prototypes of complex CBLAS functions to make our + cblas.h match the generally accepted standard + * Shared memory access failures on startup are now handled more gracefully + * Restored utests from earlier releases (and made them pass on all + affected systems) + sparc: + * several fixes for cpu autodetection + arm: + * Added support for CortexA53 and A72 + * Added autodetection for ThunderX2T99 + * Made most optimized kernels the default for generic ARMv8 targets + x86_64: + * Parallelized DDOT kernel for Haswell + * Changed alignment directives in assembly kernels to boost performance on OSX + * Fixed register handling in the GEMV microkernels (bug exposed by gcc7) + * Added support for building on OpenBSD and Dragonfly + * Updated compiler options to work with Intel release 2018 + * Support fully optimized build with clang/flang on Microsoft Windows + * Fixed building on AIX + ibm z: + * added optimized BLAS 1/2 functions + mips: + * Fixed cpu autodetection helper code + * Added mips32 1004K cpu (Mediatek MT7621 and similar SoC) + * Added mips64 I6500 cpu +- Remove c_xerbla_no-void-return.patch: fixed upstream. + +------------------------------------------------------------------- +Tue Jan 30 18:19:33 CET 2018 - ro@suse.de + +- add openblas-s390.patch to build on s390 (bsc#1079513). + +------------------------------------------------------------------- +Fri Jan 5 18:27:17 UTC 2018 - eich@suse.com + +- Switch from gcc6 to gcc7 as additional compiler flavor for HPC on SLES. +- Fix library package requires - use HPC macro (boo#1074890). +- Fix unexpanded rpm macro in environment module file for HPC (boo#1074897). + +------------------------------------------------------------------- +Mon Nov 27 11:55:04 UTC 2017 - normand@linux.vnet.ibm.com + +- Add -mvsx option for ppc64 archi (not required for ppc64le) + to avoid ./kernel/power/sasum_microk_power8.c:41:3: error: + '__vector' undeclared (first use in this function); ... + +------------------------------------------------------------------- +Tue Oct 17 13:38:47 UTC 2017 - eich@suse.com + +- Add magic to limit the number of flavors built in the + OBS to non-HPC ones. + +------------------------------------------------------------------- +Thu Oct 12 10:01:10 UTC 2017 - eich@suse.com + +- Generate baselib.conf dynamically and only for the non-HPC + builds: this avoids issues with the source validator. + +------------------------------------------------------------------- +Fri Sep 8 14:30:29 UTC 2017 - eich@suse.com + +- Convert openblas to multibuild. +- Add HPC build using environment modules. + (FATE#321708). +- fix-arm64-cpuid-return.patch + Fix CPUID detection on ARM (From OHPC). + +------------------------------------------------------------------- +Wed Aug 9 19:45:54 UTC 2017 - dmitry_r@opensuse.org + +- Remove migration %post scripts for old library names + +------------------------------------------------------------------- +Sat Jul 29 16:08:38 UTC 2017 - badshah400@gmail.com + +- Update to version 0.2.20: + * common: + - Improved CMake support + - Fixed several thread race and locking bugs + - Fixed default LAPACK optimization level + - Updated LAPACK to 3.7.0 + - Added ReLAPACK (https://github.com/HPAC/ReLAPACK), make + BUILD_RELAPACK=1 + * POWER: + - Optimizations for Power9 + - Fixed several Power8 assembly bugs + * ARM: + - New optimized Vulcan and ThunderX2T99 targets + - Support for ARMV7 SOFT_FP ABI (make ARM_SOFTFP_ABI=1) + - Detect all cpu cores including offline ones + - Fix compilation with CLANG + - Support building a shared library for Android + * MIPS: + - Fixed several threading issues + - Fix compilation with CLANG + * x86_64: + - Detect Intel Bay Trail and Apollo Lake + - Detect Intel Sky Lake and Kaby Lake + - Detect Intel Knights Landing + - Detect AMD A8, A10, A12 and Ryzen + - Support 64bit builds with Visual Studio + - Fix building with Intel and PGI compilers + - Fix building with MINGW and TDM-GCC + - Fix cmake builds for Haswell and related cpus + - Fix building for Sandybridge with CLANG 3.9 + - Add support for the FLANG compiler + * IBM Z: + - New target z13 with BLAS3 optimizations +- Drop 0001-Fix-power8-asm.patch; fixed upstream. +- Minor rebase of c_xerbla_no-void-return.patch and + openblas-noexecstack.patch for updated version. +- Remove installed pkgconfig file as it is not adapted to the + library names we use. + +------------------------------------------------------------------- +Thu May 18 09:33:23 UTC 2017 - meissner@suse.com + +- 0001-Fix-power8-asm.patch: fixed power8 assembly (bsc#1039397) + +------------------------------------------------------------------- +Wed Sep 7 15:58:36 UTC 2016 - idonmez@suse.com + +- Update to version 0.2.19 + POWER: + * Optimize BLAS on Power8 + * Fixed Julia+OpenBLAS bugs on Power8 + MIPS: + * Optimize BLAS on MIPS P5600 and I6400 + ARM: + * Improved on ARM Cortex-A57 + +------------------------------------------------------------------- +Wed Apr 13 08:12:19 UTC 2016 - dmitry_r@opensuse.org + +- Update to version 0.2.18 + ARM: + * Provide DGEMM 8x4 kernel for Cortex-A57 + POWER: + * Optimize S and C BLAS3 on Power8 + * Optimize BLAS2/1 on Power8 + +------------------------------------------------------------------- +Mon Mar 21 21:15:39 UTC 2016 - dmitry_r@opensuse.org + +- Update to version 0.2.17 + * Enable BUILD_LAPACK_DEPRECATED=1 by default. + +------------------------------------------------------------------- +Wed Mar 16 19:35:53 UTC 2016 - idonmez@suse.com + +- Update to version 0.2.16 + * Upgrade LAPACK to 3.6.0 version. + * Disable multi-threading for small size swap and ger. + * Improve small zger, zgemv, ztrmv using stack alloction. + * Let openblas_get_num_threads return the number of active threads. + * Fix LAPACK Dormbr, Dormlq bug. + * Avoid potential getenv segfault. + * Import LAPACK svn bugfix #142-#147,#150-#155 + x86/x86_64: + * Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller. + * Detect Intel Avoton. + * Detect AMD Trinity, Richland, E2-3200. + * Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller + * Fix bug with scipy linalg test. + ARM: + * Support and optimize Cortex-A57 AArch64. + * Update ARMV6 kernels. + * Improve DGEMM for ARM Cortex-A57. + POWER: + * Fix detection of POWER architecture. + * Optimize D and Z BLAS3 functions for Power8. +- Remove openblas-libs.patch, not needed. + +------------------------------------------------------------------- +Tue Oct 27 21:11:50 UTC 2015 - dmitry_r@opensuse.org + +- Update to version 0.2.15 + * Enable MAX_STACK_ALLOC flags by default. + * Improve ger and gemv for small matrices. + * Improve gemv parallel with small m and large n case. + * Improve ?imatcopy when lda==ldb + * Add vecLib benchmarks + * Fix LAPACK lantr for row major matrices + * Fix LAPACKE lansy + * Import bug fixes for LAPACKE s/dormlq, c/zunmlq + * Raise the signal when pthread_create fails + * Drop obsolete openblas-arm64-build.patch + x86/x86-64: + * Support pure C generic kernels for x86/x86-64. + * Support Intel Boardwell and Skylake by Haswell kernels. + * Support AMD Excavator by Steamroller kernels. + * Optimize s/d/c/zdot for Intel SandyBridge and Haswell. + * Optimize s/d/c/zdot for AMD Piledriver and Steamroller. + * Optimize s/d/c/zapxy for Intel SandyBridge and Haswell. + * Optimize s/d/c/zapxy for AMD Piledriver and Steamroller. + * Optimize d/c/zscal for Intel Haswell, dscal for Intel SandyBridge. + * Optimize d/c/zscal for AMD Bulldozer, Piledriver and Steamroller. + * Optimize s/dger for Intel SandyBridge. + * Optimize s/dsymv for Intel SandyBridge. + * Optimize ssymv for Intel Haswell. + * Optimize dgemv for Intel Nehalem and Haswell. + * Optimize dtrmm for Intel Haswell. + ARM: + * Support Android NDK armeabi-v7a-hard ABI (-mfloat-abi=hard) + * Fix lock, rpcc bugs + POWER: + * Support ppc64le platform (ELF ABI v2) + * Support POWER7/8 by POWER6 kernels. + +------------------------------------------------------------------- +Wed Jul 29 21:13:47 UTC 2015 - dmitry_r@opensuse.org + +- Change library name suffix + * drop openblas-soname.patch +- Add RPM %post script for manual BLAS/LAPACK update-alternatives + configuration update +- Use update-alternatives mechanism for OpenBLAS variants (serial, + openmp, pthreads). pthreads variant is default for x86 and x86_64, + OpenMP for other architectures. +- Fix build on ARM64 + * openblas-arm64-build.patch +- Add update-alternatives mechanism for CBLAS +- Provide cmake module +- Delete info about host cpu from openblas_config.h for dynamic arch +- Add update-alternatives to 'preup' and 'post' requires list for + libraries +- Add README.SUSE + +------------------------------------------------------------------- +Wed Mar 25 08:05:20 UTC 2015 - dmitry_r@opensuse.org + +- Update to version 0.2.14 + * Improve ger and gemv for small matrices by stack allocation. + e.g. make -DMAX_STACK_ALLOC=2048 + * Introduce openblas_get_num_threads and openblas_get_num_procs. + * Add ATLAS-style ?geadd function. + * Fix c/zsyr bug with negative incx. + * Fix race condition during shutdown causing a crash in + gotoblas_set_affinity(). + x86/x86-64: + * Support AMD Streamroller. + ARM: + * Add Cortex-A9 and Cortex-A15 targets. + +------------------------------------------------------------------- +Wed Dec 3 16:06:49 UTC 2014 - dmitry_r@opensuse.org + +- Update to version 0.2.13 + * Add SYMBOLPREFIX and SYMBOLSUFFIX makefile options + for adding a prefix or suffix to all exported symbol names + in the shared library. + * Remove openblas-0.1.0-soname.patch + * Add openblas-soname.patch + * Rebase openblas-noexecstack.patch + x86/x86-64: + * Add generic kernel files for x86-64. make TARGET=GENERIC + * Fix a bug of sgemm kernel on Intel Sandy Bridge. + * Fix c_check bug on some amd64 systems. + ARM: + * Support APM's X-Gene 1 AArch64 processors. + * Optimize trmm and sgemm. + +------------------------------------------------------------------- +Fri Oct 17 13:09:58 UTC 2014 - dmitry_r@opensuse.org + +- Update to version 0.2.12 + * Added CBLAS interface for ?omatcopy and ?imatcopy. + * Enable ?gemm3m functions. + * Added benchmark for ?gemm3m. + * Optimized multithreading lower limits. + * Disabled SYMM3M and HEMM3M functions because of segment violations. + x86/x86-64: + * Improved axpy and symv performance on AMD Bulldozer. + * Improved gemv performance on modern Intel and AMD CPUs. + +------------------------------------------------------------------- +Mon Aug 18 12:43:10 UTC 2014 - dmitry_r@opensuse.org + +- Update to version 0.2.11 + * Added some benchmark codes. + x86/x86-64: + * Improved s/c/zgemm performance for Intel Haswell. + * Improved s/d/c/zgemv performance. + * Support the big numa machine.(EXPERIMENT) + ARM: + * Fix detection when cpuinfo uses "Processor". + +------------------------------------------------------------------- +Thu Jul 17 20:44:58 UTC 2014 - dmitry_r@opensuse.org + +- Update to version 0.2.10 + * Added BLAS extensions as following. + s/d/c/zaxpby, s/d/c/zimatcopy, s/d/c/zomatcopy. + * Added OPENBLAS_CORETYPE environment for dynamic_arch. (a86d34) + * Support outputing the CPU corename on runtime.(#407) + * Patched LAPACK to fix bug 114, 117, 118. + (http://www.netlib.org/lapack/bug_list.html) + * Disabled ?gemm3m for a work-around fix. (#400) + * Fixed lots of bugs for optimized kernels on sandybridge,Haswell, + bulldozer, and piledriver. + * Remove obsolete openblas-0.2.9-gcc-warnings.patch + +------------------------------------------------------------------- +Tue Jun 10 14:34:02 UTC 2014 - dmitry_r@opensuse.org + +- Update to version 0.2.9 + * Update LAPACK to 3.5.0 version + * Fixed compatiable issues with Clang and Pathscale compilers. + * Added OPENBLAS_VERBOSE environment variable.(#338) + * Make OpenBLAS thread-pool resilient to fork via pthread_atfork. + (#294) + * Rewrote rotmg + * Fixed sdsdot bug. + * Improved the result for LAPACK testing. (#372) + x86/x86-64: + * Optimization on Intel Haswell. + * Enable optimization kernels on AMD Bulldozer and Piledriver. + * Detect Intel Haswell for new Macbook. + * To improve LAPACK testing, we fallback some kernels. (#372) + https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List + ARM: + * Support ARMv6 and ARMv7 ISA. + * Optimization on ARM Cortex-A9. +- Update patches: + * openblas-0.2.8-libs.patch + * openblas-0.2.8-noexecstack.patch + to + * openblas-libs.patch + * openblas-noexecstack.patch +- Fix gcc warnings (#385) + * openblas-0.2.9-gcc-warnings.patch + +------------------------------------------------------------------- +Sat Apr 12 09:02:16 UTC 2014 - dmitry_r@opensuse.org + +- Remove files with problematic licenses + +------------------------------------------------------------------- +Fri Apr 4 20:32:24 UTC 2014 - dmitry_r@opensuse.org + +- Update to version 0.2.8 + * Add executable stack markings. + * Respect user's LDFLAGS + * Rollback bulldozer and piledriver kernels to barcelona kernels + * update openblas-0.2.6-libs.patch + * update c_xerbla_no-void-return.patch + * update openblas-0.2.7-noexecstack.patch + +------------------------------------------------------------------- +Fri Jul 26 20:31:17 UTC 2013 - scorot@free.fr + +- version 0.2.7 + * Support LSB (Linux Standard Base) 4.1. + e.g. make CC=lsbcc + * Include LAPACK 3.4.2 source codes to the repo. + Avoid downloading at compile time. + * Add NO_PARALLEL_MAKE flag to disable parallel make. + * Create openblas_get_parallel to retrieve information which + parallelization model is used by OpenBLAS. (Thank + grisuthedragon) + * Detect LLVM/Clang compiler. + * A walk round for dtrti_U single thread bug. Replace it with + LAPACK codes. (#191) + * Optimize c/zgemm, trsm, dgemv_n, ddot, daxpy, dcopy on + AMD Bulldozer. (Thank Werner Saar) + * Add Intel Haswell support (using Sandybridge optimizations). + (Thank Dan Luu) + * Add AMD Piledriver support (using Bulldozer optimizations). + * Fix the computational error in zgemm avx kernel on + Sandybridge. (#237) + * Fix the overflow bug in gemv. + * Fix the overflow bug in multi-threaded BLAS3, getrf when + NUM_THREADS is very large.(#214, #221, #246). +- rebase patch noexecstack.patch +- remove lapack source tarball since lapack sources are included + in openblas sources +- increase NUM_THREAD from 32 to 64 + +------------------------------------------------------------------- +Sat Mar 2 16:08:16 UTC 2013 - scorot@free.fr + +- version 0.2.6 + * Improved OpenMP performance slightly. (d744c9) + * Improved cblas.h compatibility with Intel MKL.(#185) + * Fixed the overflowing bug in single thread cholesky + factorization. + * Fixed the overflowing buffer bug of multithreading hbmv and + sbmv.(#174) + * Added AMD Bulldozer x86-64 S/DGEMM AVX kernels. (Thank + Werner Saar) We will tune the performance in future. + * Auto-detect Intel Xeon E7540. + * Fixed the overflowing buffer bug of gemv. (#173) + * Fixed the bug of s/cdot about invalid reading NAN on + x86_64. (#189) +- rebase patch0 openblas-0.2.6-libs.patch + +------------------------------------------------------------------- +Sun Feb 17 14:10:55 UTC 2013 - jengelh@inai.de + +- Remove redundant cleaning commands +- Do not create .so.0.2.5. SO versions are not package release + numbers. + +------------------------------------------------------------------- +Mon Jan 21 20:19:13 UTC 2013 - scorot@free.fr + +- use Requires(post) and Requires(preun) instead of PreReq +- add patch markups in spec file + +------------------------------------------------------------------- +Tue Jan 15 20:42:00 UTC 2013 - scorot@free.fr + +- add update-alternatives support to allow easy switching between + the different blas and lapack implementations + +------------------------------------------------------------------- +Fri Nov 30 20:46:47 UTC 2012 - scorot@free.fr + +- version 0.2.5 + * Export LAPACK 3.4.2 symbols in shared library. (#147) + * Restore the original CPU affinity when calling + openblas_set_num_threads(1) (#153) + * Fixed a SEGFAULT bug in dgemv_t when m is very large.(#154) + +------------------------------------------------------------------- +Mon Oct 8 19:12:49 UTC 2012 - scorot@free.fr + +- version 0.2.4 + * Upgraded LAPACK to 3.4.2 version. (#145) + * f77blas.h:compatibility for compilers without C99 complex + number support. (#141) + * Added NO_AVX flag. Check OS supporting AVX on runtime. (#139) + +------------------------------------------------------------------- +Mon Aug 20 21:30:03 UTC 2012 - scorot@free.fr + +- version 0.2.3 + * Fixed LAPACK unstable bug about ?laswp. (#130) + * Fixed the shared library bug about unloading the library on + Linux (#132). + +------------------------------------------------------------------- +Sun Jul 8 20:24:03 UTC 2012 - scorot@free.fr + +- version 0.2.2 + * Support Intel Sandy Bridge 22nm desktop/mobile CPU + +------------------------------------------------------------------- +Mon Jul 2 20:45:57 UTC 2012 - scorot@free.fr + +- version 0.2.1 + * Fixed the SEGFAULT bug about hyper-theading + * Support AMD Bulldozer by using GotoBLAS2 AMD Barcelona codes + * Removed the limitation (64) of numbers of CPU cores. + Now, it supports 256 cores at max. + * Supported clang compiler. + * Fixed some build bugs on FreeBSD + * Optimized Level-3 BLAS on Intel Sandy Bridge x86-64 by AVX + instructions. + * Support AMD Bobcat by using GotoBLAS2 AMD Barcelona codes. +- update patch3 + +------------------------------------------------------------------- +Wed May 2 21:16:16 UTC 2012 - scorot@free.fr + +- update patch0 + +------------------------------------------------------------------- +Wed May 2 20:45:18 UTC 2012 - scorot@free.fr + +- again fix remaining library file name error in spec file + +------------------------------------------------------------------- +Wed May 2 20:18:48 UTC 2012 - scorot@free.fr + +- fix wrong library file name version + +------------------------------------------------------------------- +Wed May 2 20:05:55 UTC 2012 - scorot@free.fr + +- Update to version 0.1.1 + * Upgraded LAPACK to 3.4.1 version. (Thank Zaheer Chothia) + * Supported LAPACKE, a C interface to LAPACKE. (Thank Zaheer Chothia) + * Fixed the build bug (MD5 and download) on Mac OSX. + * Auto download CUnit 2.1.2-2 from SF.net with UTEST_CHECK=1. +x86/x86_64: + * Auto-detect Intel Sandy Bridge Core i7-3xxx & Xeon E7 Westmere-EX. + * Test alpha=Nan in dscale. + * Fixed a SEGFAULT bug in samax on x86 windows. + +------------------------------------------------------------------- +Wed Apr 25 21:46:07 UTC 2012 - scorot@free.fr + +- version 0.1.0 +- update openblas-0.1.0-soname.patch +- add openblas-0.1.0-noexecstack.patch +- spec file cleanup + +------------------------------------------------------------------- +Mon Mar 12 22:19:17 UTC 2012 - scorot@free.fr + +- version 0.1alpha2.5 + diff --git a/openblas.rpmlintrc b/openblas.rpmlintrc new file mode 100644 index 0000000..b05c0ae --- /dev/null +++ b/openblas.rpmlintrc @@ -0,0 +1,3 @@ +# rpmlint get update-alternatives generic names wrong consistently +addFilter(".* alternative-link-missing.*") +addFilter(".* alternative-generic-name-missing .*\$\{lib\}.*") diff --git a/openblas.spec b/openblas.spec new file mode 100644 index 0000000..a76e66e --- /dev/null +++ b/openblas.spec @@ -0,0 +1,664 @@ +# +# spec file +# +# Copyright (c) 2023 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%global flavor @BUILD_FLAVOR@%{nil} + +%define _vers 0_3_25 +%define vers 0.3.25 +%define so_v 0 +%define pname openblas + +%bcond_with ringdisabled + +%if 0%{?suse_version} > 1500 +%define a_x _%{_arch} +%endif + +%if "%flavor" == "" +%define package_name %{pname} +ExclusiveArch: do_not_build +%endif + +%global build_flags USE_THREAD=1 USE_OPENMP=1 + +%if "%flavor" == "serial" +%define build_flags USE_THREAD=0 USE_OPENMP=0 +%define openblas_so_prio 20 +# we build devel packages only from one flavor +%define build_devel 1 +%{bcond_with hpc} +%endif + +%if "%flavor" == "pthreads" +%define build_flags USE_THREAD=1 USE_OPENMP=0 + %ifarch %ix86 x86_64 + %define arch_flavor 1 + %define openblas_so_prio 50 + %else + %define openblas_so_prio 20 + %endif +%{bcond_with hpc} +%endif + +%if "%flavor" == "openmp" + %ifarch %ix86 x86_64 + %define openblas_so_prio 20 + %else + %define arch_flavor 1 + %define openblas_so_prio 50 + %endif +%{bcond_with hpc} +%endif + +%if "%flavor" == "gnu-hpc" +%define compiler_family gnu +%undefine c_f_ver +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu-hpc-pthreads" +%define compiler_family gnu +%undefine c_f_ver +%define ext pthreads +%define build_flags USE_THREAD=1 USE_OPENMP=0 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu7-hpc" +%define compiler_family gnu +%define c_f_ver 7 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu7-hpc-pthreads" +%define compiler_family gnu +%define c_f_ver 7 +%define ext pthreads +%define build_flags USE_THREAD=1 USE_OPENMP=0 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu8-hpc" +%define compiler_family gnu +%define c_f_ver 8 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu8-hpc-pthreads" +%define compiler_family gnu +%define c_f_ver 8 +%define ext pthreads +%define build_flags USE_THREAD=1 USE_OPENMP=0 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu9-hpc" +%define compiler_family gnu +%define c_f_ver 9 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu9-hpc-pthreads" +%define compiler_family gnu +%define c_f_ver 9 +%define ext pthreads +%define build_flags USE_THREAD=1 USE_OPENMP=0 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu10-hpc" +%define compiler_family gnu +%define c_f_ver 10 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu10-hpc-pthreads" +%define compiler_family gnu +%define c_f_ver 10 +%define ext pthreads +%define build_flags USE_THREAD=1 USE_OPENMP=0 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu11-hpc" +%define compiler_family gnu +%define c_f_ver 11 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu11-hpc-pthreads" +%define compiler_family gnu +%define c_f_ver 11 +%define ext pthreads +%define build_flags USE_THREAD=1 USE_OPENMP=0 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu12-hpc" +%define compiler_family gnu +%define c_f_ver 12 +%{bcond_without hpc} +%endif + +%if "%flavor" == "gnu12-hpc-pthreads" +%define compiler_family gnu +%define c_f_ver 12 +%define ext pthreads +%define build_flags USE_THREAD=1 USE_OPENMP=0 +%{bcond_without hpc} +%endif + +%ifarch ppc64le +%if 0%{?c_f_ver} > 9 +%else +%if 0%{?sle_version} == 150500 +%define cc_v 12 +%endif +%if 0%{?sle_version} == 150400 +%define cc_v 11 +%endif +%if 0%{?sle_version} == 150300 +%define cc_v 10 +%endif +%endif +%endif +%ifarch x86_64 + %if 0%{?sle_version} && 0%{?c_f_ver} < 11 + %define dynamic_list DYNAMIC_LIST="PRESCOTT CORE2 NEHALEM BARCELONA SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR HASWELL ZEN SKYLAKEX" + %endif +%endif + +%if %{without hpc} +%define so_a %{so_v} +%if 0%{!?package_name:1} +%define package_name %{pname}_%{flavor} +%endif +%define p_prefix %_prefix +%define p_includedir %_includedir/%pname +%define p_libdir %_libdir/openblas%{?flavor:-%{flavor}} +%define p_cmakedir %{p_libdir}/cmake/%{pname} +%define num_threads 64 + +%else +%define so_a %{nil} +# Magic for OBS Staging. Only build the flavors required by +# other packages in the ring. +%if %{with ringdisabled} +ExclusiveArch: do_not_build +%endif + +%define package_name %{hpc_package_name %_vers} + +%define p_prefix %hpc_prefix +%define p_includedir %hpc_includedir +%define p_libdir %hpc_libdir +%define p_cmakedir %{hpc_libdir}/cmake +%define num_threads 256 + +%{hpc_init -c %{compiler_family} %{?c_f_ver:-v %{c_f_ver}} %{?ext:-e %{ext}}} +%endif + +Name: %{package_name} +Version: %vers +Release: 0 +Summary: An optimized BLAS library based on GotoBLAS2 +License: BSD-3-Clause +Group: Productivity/Scientific/Math +URL: http://www.openblas.net +Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz#/OpenBLAS-%{version}.tar.gz +Source1: README.SUSE +Source2: README.HPC.SUSE +Source3: openblas.rpmlintrc +Patch101: Link-library-with-z-noexecstack.patch +# PATCH port +Patch102: Handle-s390-correctly.patch +Patch103: openblas-ppc64be_up2_p8.patch + +#BuildRequires: cmake +BuildRequires: memory-constraints +%if 0%{?cc_v:1} +BuildRequires: gcc%{?cc_v}-fortran +%endif +%if %{without hpc} +BuildRequires: gcc-fortran +BuildRequires: update-alternatives +Requires(post): update-alternatives +Requires(preun):update-alternatives +%else +BuildRequires: %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel +BuildRequires: lua-lmod +BuildRequires: suse-hpc +%global dep_summary %{summary} +%endif + +%description +OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. + +%package -n lib%{name}%{so_a} +Summary: An optimized BLAS library based on GotoBLAS2, %{flavor} version +Group: System/Libraries +%if %{without hpc} +Requires(post): update-alternatives +Requires(post): coreutils +Requires(preun):update-alternatives + %if "%flavor" == "serial" +Obsoletes: lib%{pname}%{so_v} < %{version} +Provides: lib%{pname}%{so_v} = %{version} + %else +Obsoletes: lib%{pname}0 + %endif + %if "%flavor" == "pthreads" +Obsoletes: lib%{pname}p0 + %endif + %if "%flavor" == "openmp" +Obsoletes: lib%{pname}o0 + %endif +%else # with hpc +%hpc_requires +%endif + +%description -n lib%{name}%{so_a} +OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. + +%{?with_hpc:%{hpc_master_package -l -L}} + +%package -n lib%{name}-devel +Summary: Development libraries for OpenBLAS, %{flavor} version +Group: Development/Libraries/C and C++ +Requires: lib%{name}%{so_a} = %{version} +%if %{without hpc} +Requires: %{pname}-common-devel = %{version} +%if 0%{?arch_flavor} +Provides: %{pname}-devel = %version +Provides: %{pname}-devel(default) = %version +%else +Provides: %{pname}-devel(other) = %version +%endif +%else +%hpc_requires_devel +%endif + +%description -n lib%{name}-devel +OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. + +This package contains the development libraries for serial OpenBLAS version. + +%{?with_hpc:%{hpc_master_package -l -L devel}} + +%package devel-static +Summary: Static version of OpenBLAS +Group: Development/Libraries/C and C++ +Requires: lib%{name}-devel = %{version} + +%description devel-static +OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. + +This package contains the static libraries. + +%package -n %{pname}-common-devel +Summary: Development headers and libraries for OpenBLAS +Group: Development/Libraries/C and C++ +Requires: (%{pname}-devel(default) or %{pname}-devel(other)) +Requires(pre): coreutils +Requires(post): coreutils +Obsoletes: %{pname}-devel < %version +Obsoletes: %{pname}-devel-headers < %version +Provides: %{pname}-devel-headers = %version +Provides: pkgconfig(openblas) = %version + +%description -n %{pname}-common-devel +OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. + +This package contains headers for OpenBLAS. + +%prep + +%setup -q -n OpenBLAS-%{version} +%autopatch -p1 +%ifarch s390 +sed -i -e "s@m32@m31@" Makefile.system +%endif +sed -i -e '/FLDFLAGS = \|$(CC)\|$(CXX)/s@$@ $(LDFLAGS_TESTS)@' \ + test/Makefile ctest/Makefile utest/Makefile cpp_thread_test/Makefile + +%if %{without hpc} +cp %{SOURCE1} . +%else +cp %{SOURCE2} . +%endif + +%if %{without hpc} +# create baselibs.conf based on flavor +cat > %{_sourcedir}/baselibs.conf < + requires "lib%{name}%{?so_a}- = " +EOF +%endif + +%build + +# For static libraries use -ffat-lto-objects to make sure the 'regular' +# assembler code is generated as well as the intermediate code will be +# stripped during pre-packaging post-processing. Also, set ldflags_tests +# to speed up building of tests. +%if "%{?_lto_cflags}" != "" +%global _lto_cflags %{_lto_cflags} -ffat-lto-objects +%global ldflags_tests -fno-lto +%endif + +# disable lto for ppc64le, boo#1181733 +%ifarch ppc64le +%define _lto_cflags %{nil} +%endif + +%if %{with hpc} +%hpc_debug +%hpc_setup_compiler +%endif + +# Use DYNAMIC_ARCH everywhere - not sure about PPC? +# Use DYNAMIC_ARCH to build for multiple targets, use TARGET to specify +# the CPU model assumed for the common code. It should be set to the +# oldest CPU model one expects to encounter. +%global openblas_target DYNAMIC_ARCH=1 +# We specify TARGET= to avoid compile-time CPU-detection (boo#1100677) +%ifarch %ix86 x86_64 +%global openblas_target %openblas_target TARGET=CORE2 +%define openblas_opt BUILD_BFLOAT16=1 +%endif +%ifarch aarch64 +%global openblas_target %openblas_target TARGET=ARMV8 +%if 0%{?suse_version} < 1550 +# Only enable targets without sve when using GCC < 9 +%global openblas_target %openblas_target DYNAMIC_LIST="ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 CORTEXA55 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 NEOVERSEV1 THUNDERX3T110" +%endif +%define openblas_opt BUILD_BFLOAT16=1 +%endif +%ifarch s390 s390x +%global openblas_target %openblas_target TARGET=ZARCH_GENERIC +%endif +%ifarch ppc64le +%global openblas_target %openblas_target TARGET=POWER8 +%define openblas_opt BUILD_BFLOAT16=1 +%endif +%ifarch ppc64 +%global openblas_target %openblas_target TARGET=POWER8 +%endif +%ifarch riscv64 +%global openblas_target %openblas_target TARGET=RISCV64_GENERIC +%endif +# force -mvsx for ppc64 to avoid build failure: +# ../kernel/power/sasum_microk_power8.c:41:3: error: '__vector' undeclared (first use in this function); did you mean '__cpow'? +# TODO why is it required ? (and not for ppc64le) +%ifarch ppc64 +%global addopt -mvsx +%endif +%global addopt %{?addopt} -fno-strict-aliasing + +# Make serial, threaded and OpenMP versions + +# Calculate process limits +%limit_build -m 1500 +[[ -n $_threads ]] && jobs=$_threads +[[ -z $jobs ]] && jobs=1 +# NEVER use %%_smp_mflags with top level make: +# set MAKE_NB_JOBS instead and let the build do the work! +# Do not use LIBNAMESUFFIX for new builds as it will not allow +# the different flavors to be plugin replacements of each other +%if 0%{?suse_version} <= 1500 && %{without hpc} +%define libnamesuffix LIBNAMESUFFIX=%flavor +%endif +make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \ + %{?openblas_opt} \ + COMMON_OPT="%{optflags} %{?addopt}" \ + NUM_THREADS=%{num_threads} V=1 \ + OPENBLAS_LIBRARY_DIR=%{p_libdir} \ + OPENBLAS_INCLUDE_DIR=%{p_includedir} \ + OPENBLAS_CMAKE_DIR=%{p_cmakedir} \ + PREFIX=%{p_prefix} \ + %{?dynamic_list} \ + %{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \ + %{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \ + %{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} CEXTRALIB=""}} + +%install +%if %{with hpc} +%hpc_setup_compiler +%endif + +# Install library and headers +# Pass NUM_THREADS again, as it is not propagated from the build step +# https://github.com/OpenMathLib/OpenBLAS/issues/4275 +%make_install %{?build_flags} \ + NUM_THREADS=%{num_threads} \ + OPENBLAS_LIBRARY_DIR=%{p_libdir} \ + OPENBLAS_INCLUDE_DIR=%{p_includedir} \ + OPENBLAS_CMAKE_DIR=%{p_cmakedir} \ + %{?libnamesuffix} \ + PREFIX=%{p_prefix} + +# Delete info about OBS host cpu +%ifarch %ix86 x86_64 + sed -i '/#define OPENBLAS_NEEDBUNDERSCORE/,/#define OPENBLAS_VERSION/{//!d}' \ + %{buildroot}%{p_includedir}/openblas_config.h +%endif + +%if %{without hpc} + +%if 0%{!?build_devel:1} +# We need the includes only once +rm -rf %{buildroot}%{p_includedir}/ +%endif + +# Fix cmake config file +sed -i 's|%{buildroot}||g' %{buildroot}%{p_cmakedir}/*.cmake +sed -i 's|_%{flavor}||g' %{buildroot}%{p_cmakedir}/*.cmake + +# Remove library type specific so. This is solved differently. +# Needed when not using LIBNAMESUFFIX. +%if 0%{!?libnamesuffix:1} +rm -f %{buildroot}%{p_libdir}/lib%{pname}*-r%{version}.so +rm -f %{buildroot}%{p_libdir}/lib%{pname}*-r%{version}.a +rm -f %{buildroot}%{p_libdir}/lib%{pname}.so +%endif +# Instead set up new 'devel'-link for flavor: +ln -s lib%{pname}.so.%{so_v} %{buildroot}%{p_libdir}/lib%{pname}.so + +# Put libraries in correct location +rm -rf %{buildroot}%{p_libdir}/lib%{name}* + +# Install library +%define orgname %{?libnamesuffix:%{name}}%{!?libnamesuffix:%{pname}} +install -D -p -m 755 lib%{orgname}.so %{buildroot}%{p_libdir}/lib%{pname}.so.%{so_v} +install -D -p -m 644 lib%{orgname}.a %{buildroot}%{p_libdir}/lib%{pname}.a + +# Fix source permissions (also applies to LAPACK) +find -name \*.f -exec chmod 644 {} + + +# update-alternatives strategy in %%post: +# update-alternatives: +# /usr/lib64/libblas.so. -> /etc/alternatives/libblas.so._ -> /usr/lib64/openblas-/libblas.so. +# /usr/lib64/openblas-default -> /etc/alternatives/openblas-default_ -> /usr/lib64/openblas- +# directly - default shared lib in default location +# /usr/lib64/libopenblas.so. -> /usr/lib64/openblas_default/libopenblas.so. +# /usr/lib64/libopenblas.so -> libopenblas.so. + +install -d %{buildroot}/%{_sysconfdir}/alternatives +for link in openblas-default libblas.so.3 liblapack.so.3 libcblas.so.3 liblapacke.so.3; do + ln -s %{_sysconfdir}/alternatives/${link}%{?a_x} %{buildroot}/%{_libdir}/${link} +done + +%if 0%{?build_devel} +install -d %{buildroot}%{_libdir}/pkgconfig/ +ln -s %{_sysconfdir}/alternatives/openblas-default%{?a_x}/pkgconfig/openblas.pc %{buildroot}%{_libdir}/pkgconfig/ +install -d %{buildroot}/%{_libdir}/cmake +ln -s %{_sysconfdir}/alternatives/openblas-default%{?a_x}/cmake/openblas %{buildroot}/%{_libdir}/cmake/ +%endif + +# Compatibility Links +%if 0%{?libnamesuffix:1} +ln -s openblas-%{flavor}/lib%{pname}.so.%{so_v} %{buildroot}%{_libdir}/lib%{name}.so.%{so_v} +ln -s openblas-%{flavor}/lib%{pname}.so %{buildroot}%{_libdir}/lib%{name}.so +%endif +%else # with hpc + +# HPC module file +%hpc_write_modules_files +#%%Module1.0##################################################################### + +proc ModulesHelp { } { + +puts stderr " " +puts stderr "This module loads the %{pname} library built with the %{compiler_family} compiler toolchain." +puts stderr "\nVersion %{version}\n" + +} +module-whatis "Name: %{hpc_upcase %pname} built with %{compiler_family} toolchain" +module-whatis "Version: %{version}" +module-whatis "Category: runtime library" +module-whatis "Description: %{dep_summary}" +module-whatis "%{url}" + +set version %{version} + +prepend-path LD_LIBRARY_PATH %{p_libdir} + +setenv %{hpc_upcase %pname}_DIR %{hpc_prefix} + +if {[file isdirectory %{hpc_includedir}]} { +prepend-path LIBRARY_PATH %{p_libdir} +prepend-path CPATH %{p_includedir} +prepend-path C_INCLUDE_PATH %{p_includedir} +prepend-path CPLUS_INCLUDE_PATH %{p_includedir} +prepend-path INCLUDE %{p_includedir} +%hpc_modulefile_add_pkgconfig_path + +setenv %{hpc_upcase %pname}_DIR %{hpc_prefix} +setenv %{hpc_upcase %pname}_LIB %{p_libdir} +setenv %{hpc_upcase %pname}_INC %{p_includedir} + +} + +family "openblas" +EOF +%{hpc_write_pkgconfig -l %{pname}} + +%endif # with hpc + +%if %{without hpc} + +# Ensure directory used in older versions are replaced by symlink properly +%pre -n %{pname}-common-devel +d=%{_libdir}/cmake/openblas +[ -d $d -a ! -L $d -a "$(rpm -q --qf '%%{NAME}' -f $d 2>/dev/null)" = "openblas-devel" ] \ + && { n=$(mktemp -d $(dirname $d)/tmpd-XXXXX); mv $d $n; rm -rf $n; } || true + +%post -n lib%{name}%{so_v} +# There's no way to determine if a setting exists, so just remove it and ignore errors +%{?a_x:%{_sbindir}/update-alternatives --remove-all openblas-default 2>/dev/null || true} +%{_sbindir}/update-alternatives --install \ + %{_libdir}/openblas-default openblas-default%{?a_x} %{p_libdir} %openblas_so_prio +# Cannot package this link - brp-25-symlink doesn't recognize links created by update-alternatives +ln -sf openblas-default/lib%{pname}.so.%{so_v} %{_libdir}/lib%{pname}.so.%{so_v} +for lib in libblas.so.3 libcblas.so.3 liblapack.so.3 liblapacke.so.3; do + %{?a_x:%{_sbindir}/update-alternatives --remove-all ${lib} 2>/dev/null || true} + %{_sbindir}/update-alternatives --install \ + %{_libdir}/${lib} ${lib}%{?a_x} %{p_libdir}/lib%{pname}.so.%{so_v} 20 +done +/sbin/ldconfig + +%post -n %{pname}-common-devel +ln -sf lib%{pname}.so.%{so_v} %{_libdir}/lib%{pname}.so + +%postun -n lib%{name}%{so_v} +if [ ! -f %{p_libdir}/lib%{pname}.so.%{so_v} ]; then + for lib in libblas.so.3 libcblas.so.3 liblapack.so.3 liblapacke.so.3; do + %{_sbindir}/update-alternatives --remove ${lib}%{?a_x} %{_libdir}/lib%{pname}.so.%{so_v} + done +fi +if [ ! -d %{p_libdir} ]; then + %{_sbindir}/update-alternatives --remove openblas-default%{?a_x} %{p_libdir} +fi +/sbin/ldconfig + +%else + +%postun -n lib%{name} +%hpc_module_delete_if_default + +%endif + +%files -n lib%{name}%{so_a} +%defattr(-,root,root,-) +%{p_libdir}/lib%{pname}.so.%{so_v} +%if %{without hpc} +%dir %{p_libdir} +%{?libnamesuffix:%{_libdir}/lib%{name}.so.%{so_v}} +# Created by %%post +%ghost %{_libdir}/lib%{pname}.so.%{so_v} +%ghost %{_libdir}/openblas-default +%ghost %{_libdir}/libblas.so.3 +%ghost %{_libdir}/libcblas.so.3 +%ghost %{_libdir}/liblapack.so.3 +%ghost %{_libdir}/liblapacke.so.3 +%ghost %{_sysconfdir}/alternatives/openblas-default%{?a_x} +%ghost %{_sysconfdir}/alternatives/libblas.so.3%{?a_x} +%ghost %{_sysconfdir}/alternatives/libcblas.so.3%{?a_x} +%ghost %{_sysconfdir}/alternatives/liblapack.so.3%{?a_x} +%ghost %{_sysconfdir}/alternatives/liblapacke.so.3%{?a_x} +%else +%hpc_dirs +%{p_libdir}/libopenblas*r*.so +%hpc_modules_files +%endif + +%files -n lib%{name}-devel +%{p_libdir}/lib%{pname}.so +%{?libnamesuffix:%{_libdir}/lib%{name}.so} +%{p_cmakedir}/ +%if %{with hpc} +%license LICENSE +%doc Changelog.txt GotoBLAS* README.md README.HPC.SUSE +%hpc_pkgconfig_file +%{p_includedir}/ +%else +%dir %{p_libdir}/cmake +%dir %{p_libdir}/pkgconfig +%{p_libdir}/pkgconfig +%endif + +%files devel-static +%{p_libdir}/libopenblas*.a + +%if 0%{?build_devel} +%files -n %{pname}-common-devel +%license LICENSE +%doc Changelog.txt GotoBLAS* README.md README.SUSE +# created by %%post +%ghost %{_libdir}/lib%{pname}.so +%{p_includedir}/ +%{_libdir}/pkgconfig/openblas.pc +%dir %{_libdir}/cmake +%{_libdir}/cmake/openblas +%endif + +%changelog