Sync from SUSE:SLFO:1.1 openblas revision dc6596ee0fb6e8f1f8b4d3d608ae91c7

2025-04-15 22:35:50 +02:00
8 changed files with 65 additions and 812 deletions
--- a/Link-library-with-z-noexecstack.patch
+++ b/Link-library-with-z-noexecstack.patch
@@ -0,0 +1,24 @@
+From: Egbert Eich <eich@suse.com>
+Date: Wed Nov 30 20:16:21 2022 +0100
+Subject: Link library with -z,noexecstack
+Patch-mainline: Not yet
+Git-commit: adddc0eadc81bcd29c48594793cb33eac0edb572
+References: 
+
+Signed-off-by: Egbert Eich <eich@suse.com>
+Signed-off-by: Egbert Eich <eich@suse.de>
+---
+ exports/Makefile | 1 +
+ 1 file changed, 1 insertion(+)
+Index: OpenBLAS-0.3.25/exports/Makefile
+===================================================================
+--- OpenBLAS-0.3.25.orig/exports/Makefile
+++ OpenBLAS-0.3.25/exports/Makefile
+@@ -193,6 +193,7 @@ else ifeq ($(F_COMPILER), FLANG)
+ else
+ ifneq ($(C_COMPILER), LSB)
+ 	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
+	-Wl,-z,noexecstack \
+ 	-Wl,--whole-archive $< -Wl,--no-whole-archive \
+ 	-Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
+ 	$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
--- a/OpenBLAS-0.3.25.tar.gz
+++ b/OpenBLAS-0.3.25.tar.gz
--- a/OpenBLAS-0.3.29.tar.gz
+++ b/OpenBLAS-0.3.29.tar.gz
--- a/Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
+++ b/Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
@@ -1,139 +0,0 @@
-From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date: Wed Feb 12 09:04:22 2025 +0100
-Subject: Restore the non-vectorized code from before PR4880 for POWER8
-Patch-mainline: Not yet
-Git-repo: https://github.com/xianyi/OpenBLAS
-Git-commit: 98b5ef929cfc98f2f3c236966830276c255118d2
-References: bsc#1239134
-
-
-Signed-off-by: Egbert Eich <eich@suse.de>
---
- kernel/power/sgemv_t.c | 23 +++++++++++++++++++----
- 1 file changed, 19 insertions(+), 4 deletions(-)
-
-diff --git a/kernel/power/sgemv_t.c b/kernel/power/sgemv_t.c
-index e133c815c..ed0a24230 100644
--- a/kernel/power/sgemv_t.c
-+++ b/kernel/power/sgemv_t.c
-@@ -78,7 +78,17 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
-             temp7 += v_x[i] * va7[i]; 
-         }
-     
-  
-+ #if defined(POWER8)
-+    y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
-+    y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
-+    y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
-+    y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
-+
-+    y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
-+    y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
-+    y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
-+    y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
-+ #else
-     register __vector float t0, t1, t2, t3;
-     register __vector float a = { alpha, alpha, alpha, alpha };
-      __vector float *v_y = (__vector float*) y;
-@@ -105,7 +115,7 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
- 
-     v_y[0] += a * temp0;
-     v_y[1] += a * temp4;
-
-+#endif
- }
-  
- 
-@@ -132,7 +142,12 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
-         temp2 += v_x[i] * va2[i];
-         temp3 += v_x[i] * va3[i]; 
-     }
- 
-+ #if defined(POWER8)
-+    y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
-+    y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
-+    y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
-+    y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
-+ #else
-     register __vector float t0, t1, t2, t3;
-     register __vector float a = { alpha, alpha, alpha, alpha };
-      __vector float *v_y = (__vector float*) y;
-@@ -148,7 +163,7 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
-     temp0 += temp1 + temp2 + temp3;
- 
-     v_y[0] += a * temp0;
-
-+#endif
- }
-  
- 
-From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date: Wed Feb 12 09:07:20 2025 +0100
-Subject: Restore the non-vectorized code from before PR4880 for POWER8
-Patch-mainline: Not yet
-Git-repo: https://github.com/xianyi/OpenBLAS
-Git-commit: 81eed868b68c72ea1868663902f0904dc1b22326
-References: bsc#1239134
-
-
-Signed-off-by: Egbert Eich <eich@suse.de>
---
- kernel/power/sgemv_t_8.c | 24 ++++++++++++++++++++----
- 1 file changed, 20 insertions(+), 4 deletions(-)
-
-diff --git a/kernel/power/sgemv_t_8.c b/kernel/power/sgemv_t_8.c
-index f21f6eb7d..b30bb1137 100644
--- a/kernel/power/sgemv_t_8.c
-+++ b/kernel/power/sgemv_t_8.c
-@@ -99,7 +99,17 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
-             temp7 += vx1* va7_1 + vx2 * va7_2;  
-         }
-     
-  
-+  #if defined(POWER8)
-+    y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
-+    y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
-+    y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
-+    y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
-+
-+    y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
-+    y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
-+    y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
-+    y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
-+ #else
-     register __vector float t0, t1, t2, t3;
-     register __vector float a = { alpha, alpha, alpha, alpha };
-      __vector float *v_y = (__vector float*) y;
-@@ -126,7 +136,7 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
- 
-     v_y[0] += a * temp0;
-     v_y[1] += a * temp4;
-
-+#endif
- }
-  
- 
-@@ -153,7 +163,13 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
-         temp2 += v_x[i] * va2[i] + v_x[i+1] * va2[i+1];
-         temp3 += v_x[i] * va3[i] + v_x[i+1] * va3[i+1]; 
-     }
- 
-+
-+ #if defined(POWER8)
-+    y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
-+    y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
-+    y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
-+    y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
-+ #else
-     register __vector float t0, t1, t2, t3;
-     register __vector float a = { alpha, alpha, alpha, alpha };
-      __vector float *v_y = (__vector float*) y;
-@@ -169,7 +185,7 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
-     temp0 += temp1 + temp2 + temp3;
- 
-     v_y[0] += a * temp0;
-
-+#endif
- }
-  
- 
--- a/2
+++ b/2
@@ -2,4 +2,6 @@
  <package>serial</package>
  <package>pthreads</package>
  <package>openmp</package>
+  <package>gnu-hpc</package>
+  <package>gnu-hpc-pthreads</package>
 </multibuild>
--- a/openblas.changes
+++ b/openblas.changes
@@ -1,350 +1,9 @@
-------------------------------------------------------------------
-Mon Mar 17 08:51:26 UTC 2025 - Egbert Eich <eich@suse.com>
-
- Disable sgemmt and dgemmt tests in the test suite on power
-  when gcc-13 is used. It is known (bsc#1239134) that some
-  of these tests fail on this architecture when OpenBLAS
-  is being build with the said compiler version ever since
-  these tests were introduced.
-  With this will essentially restore the situation of the
-  version prior to the adition of these tests (0.3.26) where
-  one was unaware of the problem.
-  This is only a temporary measure and will be removed once
-  the issue with gcc-13 has been resolved.
- Remove: Link-library-with-z-noexecstack.patch
-  since `-Wa,--noexecstack -Wl,-z,noexecstack` are global options,
-  now.
-
-------------------------------------------------------------------
-Fri Mar 14 09:24:18 UTC 2025 - Egbert Eich <eich@suse.com>
-
- Use upstream patch for bsc#1239134 which is more friendly to the
-  non-affected power9 and power10 sub-architectures:
-  Replace:
-  Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
-  by:
-  Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
-
-------------------------------------------------------------------
-Sat Mar  8 13:23:53 UTC 2025 - Egbert Eich <eich@suse.com>
-
- Revert  commit ba47c7f4f301aad100ed166de338b86e01da8465 to
-  prevent failures on Power8 (bsc#1239134)
-  * Add: Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
- Add a script to run tests.
- Add bisect support.
-
-------------------------------------------------------------------
-Wed Mar  5 15:47:13 UTC 2025 - Egbert Eich <eich@suse.com>
-
- Update to version 0.2.29 (jsc#PED-9676):
-  General:
-  * Fixed a potential NULL pointer dereference in multithreaded builds.
-  * Added function aliases for `GEMMT` using its new name `GEMMTR`
-    adopted by Reference-BLAS.
-  * Fixed the behavior of the recently added `CBLAS_?GEMMT` functions
-    with row-major data.
-  * Improved thread scaling of multithreaded `SBGEMV`.
-  * Improved thread scaling of multithreaded `TRTRI`.
-  * Fixed compilation of the CBLAS testsuite with gcc14 (and no
-    Fortran compiler).
-  * Fixed placement of the `-fopenmp` flag and libsuffix in the
-    generated pkgconfig file.
-  * Improved the `CMakeConfig` file generated by the Makefile build.
-  * Fixed const-correctness of `cblas_?geadd` in `cblas.h`.
-  * Fixed a potential inaccuracy in multithreaded BLAS3 calls.
-  * Fixed empty implementations of `get`/`set_affinity` that print a
-    warning in OpenMP builds.
-  * Fixed function signatures for TRTRS in the converted C version of
-    LAPACK.
-  * Fixed omission of several single-precision LAPACK symbols in the
-    shared library.
-  * Improved build instructions for the provided "pybench" benchmarks.
-  * Improved documentation, including descriptions of environment
-    variables that affect build and runtime behavior.
-  * Added a separate "make install_tests" target for use with
-    cross-compilations.
-  * Integrated improvements and corrections from Reference-LAPACK:
-    - removed a comparison in LAPACKE `?tpmqrt` that is always false.
-    - fixed the leading dimension for B in tests for GGEV.
-    - replaced `the ?LARFT` functions with a recursive implementation.
-  arm64:
-  * Fixed a long-standing bug in the (generic) `c`/`zgemm_beta` kernel
-    that could lead to reads and writes outside the array bounds in some
-    circumstances.
-  * Rewrote cpu autodetection to scan all cores and return the highest
-    performing type.
-  * Improved the DGEMM performance for SVE targets and small matrix sizes.
-  * improved dimension criteria for forwarding from `GEMM` to `GEMV`
-    kernels.
-  * Added SVE kernels for `ROT` and `SWAP`.
-  * Improved SVE kernels for `SGEMV` and `DGEMV` on `A64FX` and
-    `NEOVERSEV1`.
-  * Fixed NRM2 implementations for generic SVE targets and the Neoverse N2.
-  x86_64:
-  * Fixed a wrong storage size in the SBGEMV kernel for Cooper Lake.
-  * Added cpu autodetection for Intel Granite Rapids.
-  * Added cpu autodetection for AMD Ryzen 5 series.
-  * Added optimized `SOMATCOPY_CT` for AVX-capable targets.
-  * fixed the fallback implementation of `GEMM3M` in GENERIC builds.
-  Power:
-  * Fixed multithreaded `SBGEMM`.
-  * Fixed a CMake build problem on POWER10.
-  * Improved the performance of SGEMV.
-  * Added vectorized implementations of `SBGEMV` and support for
-    forwarding 1xN `SBGEMM` to them.
-  * Fixed illegal instructions and potential memory overflow in SGEMM
-    on PPCG4.
-  * Fixed handling of NaN and Inf arguments in `SSCAL` and `DSCAL` on
-    PPC440,G4 and 970.
-  * Added improved `CGEMM` and `ZGEMM` kernels for POWER10.
-  Riscv64:
-  * Removed thread yielding overhead caused by `sched_yield`.
-  * Replaced some non-standard intrinsics with their official names.
-  * Fixed and sped up the implementations of `CGEMM`/`ZGEMM` `TCOPY`
-    for vector lenghts 128 and 256.
-  * Improved the performance of `SNRM2`/`DNRM2` for RVV1.0 targets.
-  * Added optimized `?OMATCOPY_CN` kernels for RVV1.0 targets.
- Add test package.
- Add flags: `-Wa,--noexecstack -Wl,-z,noexecstack` to make sure
-  stack is not executable. This works around problems in assembler
-  code for z.
- Make stack of empty cpuid.S non-executable as well.
-
-------------------------------------------------------------------
-Wed Mar  5 14:17:26 UTC 2025 - Egbert Eich <eich@suse.com>
-
- Set gcc versions for ppc64le (bsc#1239702)
-  * on SLE-15-SP6: v13
-  * on SLE-15-SP7: v14
-
-------------------------------------------------------------------
-Mon Feb  3 14:43:29 UTC 2025 - Andreas Schwab <schwab@suse.de>
-
- Disable LTO on riscv64 due to GCC#110812
-
-------------------------------------------------------------------
-Thu Jan  2 15:15:51 UTC 2025 - Egbert Eich <eich@suse.com>
-
- Update to version 0.3.28 (jsc#PED-9676):
-  * General:
-    + Reworked the unfinished implementation of `HUGETLB` from GotoBLAS
-      for allocating huge memory pages as buffers on suitable systems.
-    + Changed the unfinished implementation of `GEMM3M` for the generic
-      target on all architectures to at least forward to regular GEMM.
-    + Improved multithreaded `GEMM` performance for large non-skinny
-      matrices.
-    + Improved BLAS3 performance on larger multicore systems through
-      improved parallelism.
-    + Improved performance of the initial memory allocation by reducing
-      locking overhead.
-    + Improved performance of `GBMV` at small problem sizes by introducing
-      a size barrier for the switch to multithreading.
-    + Added an implementation of the `CBLAS_GEMM_BATCH` extension.
-    + Fixed corner cases involving the handling of NAN and INFINITY
-      arguments in `?SCAL` on all architectures.
-    + Fixed NAN handling and potential accuracy issues in compilations
-      with Intel ICX by supplying a suitable fp-model option by default.
-    + It is now possible to register a callback function that replaces
-      the built-in support for multithreading with an external backend
-      like TBB (`openblas_set_threads_callback_function`).
-    + Fixed potential duplication of suffixes in shared library naming.
-    + Improved C compiler detection by the build system to tolerate
-      more naming variants for gcc builds.
-    + Fixed an unnecessary dependency of the utest on CBLAS.
-    + Fixed spurious error reports from the BLAS extensions `utest`.
-    + Fixed unwanted invocation of the `GEMM3M` tests in cross-
-      compilation.
-    + Fixed a flaw in the makefile build that could lead to the
-      pkgconfig file containing an entry of `UNKNOWN` for the target
-      cpu after installing.
-    + Integrated fixes from the Reference-LAPACK project:
-      - Fixed uninitialized variables in the LAPACK tests for `?QP3RK`.
-      - Fixed potential bounds error in `?UNHR_COL`/`?ORHR_COL`.
-      - Fixed potential infinite loop in the LAPACK testsuite.
-      - Make the variable type used for hidden length arguments
-        configurable.
-    + Fixed `SYTRD` workspace computation and various typos.
-    + Prevent compiler use of FMA that could increase numerical
-      error in `?GEEVX`.
-  * x86-64:
-    + Fixed a potential thread buffer overrun in `SBSTOBF16` on small
-      systems.
-    + Fixed an accuracy issue in `ZSCAL` introduced in 0.3.26.
-    + Added support for Intel Emerald Rapids and Meteor Lake CPUs.
-    + Added autodetection support for the Zhaoxin KX-7000 CPU.
-    + Fixed autodetection of Intel Prescott (probably broken
-      since 0.3.19).
-    + Fixed compilation of the converter-generated C versions
-      of the LAPACK sources with gcc-14.
-    + Added support for supplying the L2 cache size via an
-      environment variable (`OPENBLAS_L2_SIZE`) in case it is not
-      correctly reported (as in some VM configurations).
-    + Improved the error message shown when thread creation fails
-    on startup.
-  * arm64:
-    + Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
-      1xN or Mx1 matrix to the corresponding `GEMV` kernel.
-    + Added optimized `SGEMV` and `DGEMV` kernels for A64FX.
-    + Added optimized SVE kernels for small-matrix `GEMM`.
-    + Added A64FX to the CPU list for DYNAMIC_ARCH.
-    + Fixed building with support for CPU affinity.
-    + Worked around accuracy problems with `C/ZNRM2` on NeoverseN1
-      targets.
-    + Improved GEMM performance on Neoverse V1.
-    + Fixed compilation for `NEOVERSEN2` with older compilers.
-    + Fixed potential miscompilation of the SVE `SDOT` and `DDOT`
-      kernels.
-    + Fixed potential miscompilation of the non-SVE `CDOT` and
-      `ZDOT` kernels.
-    + Fixed a potential overflow when using very large user-defined
-      `BUFFERSIZE`.
-  * Power:
-    + Added a fast path forwarding `SGEMM` and `DGEMM` calls with a 1xN
-    or Mx1 matrix to the corresponding `GEMV` kernel.
-    + Significantly improved performance of `SBGEMM`. on POWER10.
-    + Fixed compilation with OpenMP and the XLF compiler.
-    + Fixed building of parts of the LAPACK testsuite with XLF.
-    + Fixed CSWAP/ZSWAP on big-endian POWER10 targets.
-    + Fixed a performance regression in SAXPY on POWER10 with OpenXL.
-    + Fixed a potential overflow when using very large user-defined
-      `BUFFERSIZE`.
-    + Fixed an accuracy issue in the POWER6 kernels for `GEMM` and
-      `GEMV`.
-  * RISCV64:
-    + Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
-      1xN or Mx1 matrix to the corresponding GEMV kernel.
-    + Wdded `DYNAMIC_ARCH` support (comprising `GENERIC_RISCV64` and
-      the two RVV 1.0 targets with vector length of 128 and 256).
-    + Worked around the `ZVL128B` kernels for `AXPBY` mishandling the
-      special case of zero Y increment.
- Obsoleted: no-static.patch.
-
-------------------------------------------------------------------
-Mon Jul 29 09:21:41 UTC 2024 - Egbert Eich <eich@suse.com>
-
- Duplicate all options passed to `make` also to `make install`:
-  The openblas build output suggests this: 'Note that any flags
-  passed to make during build should also be passed to make install
-  to circumvent any install errors'.
-  This also makes sure that minimum CPU requirement is set in
-  the pkgconfig file is the same one as used for building.
-  This helps to maintain a reproducible build (boo#1228177).
-
-------------------------------------------------------------------
-Thu Jun 13 07:32:23 UTC 2024 - Andreas Schwab <schwab@suse.de>
-
- no-static.patch: do not link statically
-
-------------------------------------------------------------------
-Sun Jun  9 07:07:51 UTC 2024 - Egbert Eich <eich@suse.com>
-
- Update to version 0.3.27 (boo#1225869):
-  General:
-  * Added initial (generic) support for the `CSKY` architecture.
-  * Capped the maximum number of threads used in `GEMM`, `GETRF`
-    and `POTRF` to avoid creating underutilized or idle threads.
-  * Sped up multithreaded `POTRF` on all platforms.
-  * Added extension `openblas_set_num_threads_local()` that returns
-    the previous thread count.
-  * Re-evaluated the `SGEMV` and `DGEMV` load thresholds to avoid
-    activating multithreading for too small workloads.
-  * Improved the fallback code used when the precompiled number of
-    threads is exceeded,  and made it callable multiple times
-    during the lifetime of an instance.
-  * Added CBLAS interfaces for the BLAS extensions `?AMIN`,`?AMAX`,
-    `CAXPYC` and `ZAXPYC`.
-  * Fixed a potential buffer overflow in the interface to the
-    `GEMMT` kernels.
-  * Fixed use of incompatible pointer types in `GEMMT` and
-    `C`/`ZAXPBY` as flagged by GCC-14.
-  * Fixed unwanted case sensitivity of the character parameters in
-    `?TRTRS` sped up the OpenMP thread management code.
-  * Fixed sizing of logical variables in `INTERFACE64` builds of
-    the C version of LAPACK.
-  * Fixed inclusion of new LAPACK and LAPACKE functions from
-    LAPACK 3.11 in the shared library.
-  * Modified the error thresholds for `SGS`/`DGS` functions in
-    the LAPACK testsuite to suppress spurious errors.
-  * Added support for calling ?NRM2 with a negative increment value
-    on all architectures.
-  * Fixed handling of the `OPENBLAS_LOOPS` variable in several
-    benchmarks.
-  * Integrated fixes from the Reference-LAPACK project:
-    Increased accuracy in `C`/`ZLARFGP` (Reference-LAPACK PR 981).
-  x86:
-  * Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
-  x86-64:
-  * Removed all instances of `sched_yield()` on Linux and BSD.
-  * Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
-  * Added compiler checks for `AVX512BF16` compatibility.
-  * Fixed cpu handling fallbacks for Sapphire Rapids with disabled
-    AVX2 in `DYNAMIC_ARCH` mode.
-  * Fixed extensions `SCSUM` and `DZSUM`.
-  * Improved `GEMM` performance for ZEN targets.
-  arm64:
-  * Added initial support for the Cortex-A76 cpu.
-  * Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
-  * Fixed default compiler options for gcc (-march and -mtune).
-  * Added support for the NeoverseV2 cpu in `DYNAMIC_ARCH` builds.
-  * Corrected `SCSUM` kernels (erroneously duplicating `SCASUM`
-    behaviour).
-  * Added SVE-enabled kernels for `CSUM`/`ZSUM`.
-  * Worked around an inaccuracy in the `NRM2` kernels for NeoverseN1.
-  power:
-  * Improved performance of `SGEMM` on POWER8/9/10.
-  * Improved performance of `DGEMM` on POWER10.
-  zarch:
-  * Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
-  * Fixed calculation of `?SUM` on Z13.
- LIBNAMESUFFIX semantics have changed: no separator will be added.
-  Adjusted in spec file.
-
 -------------------------------------------------------------------
 Thu Feb 15 08:27:33 UTC 2024 - Egbert Eich <eich@suse.com>

 - Remove DYNAMIC_LIST for aarch64 for older gcc versions: This has
  been fixed upstream.

-------------------------------------------------------------------
-Wed Jan 17 08:47:55 UTC 2024 - Egbert Eich <eich@suse.com>
-
- Update to version 0.3.26:
-  * General:
-    - Added type declarations for complex variables to the
-      MSVC-specific parts of the LAPACK header.
-    - Significantly sped up `?GESV` for small problem sizes by
-      introducing a lower bound for multithreading.
-    - Imported additions and corrections from the Reference-LAPACK
-      project:
-      + Added new LAPACK functions for truncated `QR` with pivoting
-        (Reference-LAPACK PRs 891&941).
-      + Handle miscalculation of minimum work array size in corner
-      	cases (Reference-LAPACK PR 942).
-      + Fixed use of uninitialized variables in `?GEDMD` and
-      	improved inline documentation.
-      + Fixed use of uninitialized variables (and consequential
-      	failures) in `?BBCSD`.
-      + Added tests for the recently introduced Dynamic Mode
-      	Decomposition functions.
-      + Fixed several memory leaks in the LAPACK testsuite.
-  * x86-64:
-    - Fixed computation of `CASUM` on SkylakeX and newer targets in
-      the special case that AVX512 is not supported by the compiler
-      or operating environment.
-    - Fixed potential undefined behaviour in the `CASUM`/`ZASUM`
-      kernels for AVX512 targets.
-    - worked around a problem in the pre-AVX kernels for `GEMV`
-  * arm64:
-    - Sped up `SGEMM` and `DGEMM` on Neoverse V1 and N1.
-    - Sped up `?DOT` on SVE-capable targets.
-    - Reduced the number of targets in `DYNAMIC_ARCH` builds by
-      eliminating functionally equivalent ones.
-  * POWER:
-    - Improved the SGEMM kernel for POWER10.
-    - Fixed compilation with (very) old versions of gcc.
-    - Added autodetection of the POWERPC 7400 subtype.
-
 -------------------------------------------------------------------
 Wed Dec 20 12:02:55 UTC 2023 - Giacomo Comes <gcomes.obs@gmail.com>

@@ -360,188 +19,44 @@ Wed Nov 29 05:43:18 UTC 2023 - Atri Bhattacharya <badshah400@gmail.com>
      thread count
    - improved the code to add supplementary thread buffers in
      case of overflow
-    - fixed a potential division by zero in `?ROTG`
-    - improved the `?MATCOPY` functions to accept zero-sized rows or
+    - fixed a potential division by zero in ?ROTG
+    - improved the ?MATCOPY functions to accept zero-sized rows or
      columns
    - corrected empty prototypes in function declarations
    - cleaned up unused declarations in the f2c-converted versions
      of the LAPACK sources
+    - fixed compilation with the Cray CCE Compiler suite
    - improved link line rewriting to avoid mixed libgomp/libomp
      builds with clang&gfortran
+    - worked around OPENMP builds with LLVM14's libomp hanging on
+      FreeBSD
+    - improved the Makefiles to require less option duplication on
+      "make install"
    - imported the following changes from the upcoming release
      3.12 of Reference-LAPACK: LAPACK PR 900, LAPACK PR 904,
      LAPACK PR 907, LAPACK PR 909, LAPACK PR 926, LAPACK PR 927,
      LAPACK PR 928 & 930
  * x86-64:
+    - fixed compile-time autodetection of AMD Ryzen3 and Ryzen4
+      cpus
    - fixed capability-based fallback selection for unknown cpus
-      in `DYNAMIC_ARCH`
-    - added AVX512 optimizations for `?ASUM` on Intel Sapphire Rapids and
+      in DYNAMIC_ARCH
+    - added AVX512 optimizations for ?ASUM on Sapphire Rapids and
      Cooper Lake
  * ARM64:
+    - fixed building on Apple with homebrew gcc
    - fixed building with XCODE 15
    - fixed building on A64FX and Cortex A710/X1/X2
-    - increased the default buffer size for recent arm server cpus
+    - increased the default buffer size for recent ARM server cpus
  * POWER:
-    - added support for `DYNAMIC_ARCH` builds with clang
-    - fixed union declaration in the `BFLOAT16` test case
- Changes in version 0.3.24
-  * General:
-    - Declared the arguments of `cblas_xerbla` as `const`
-      (in accordance with the reference implementation
-      and others, the previous discrepancy appears to have dated
-      back to GotoBLAS)
-    - fixed the implementation of `?GEMMT` that was added in 0.3.23
-    - made cpu-specific `SWITCH_RATIO` parameters for GEMM
-      available to `DYNAMIC_ARCH` builds
-    - fixed missing `SSYCONVF` function in the shared library
-    - fixed parallel build logic used with gmake
-    - fixed several issues with the handling of runtime limits on
-      the number of OPENMP threads
-    - corrected the error code returned by `SGEADD`/`DGEADD` when
-      LDA is too small
-    - corrected the error code returned by `IMATCOPY` when LDB
-      is too small
-    - updated `?NRM2` to support negative increment values (as
-      introduced in release 3.10.0 of the Reference BLAS)
-    - updated `?ROTG` to use the safe scaling algorithm introduced
-      in release 3.10.0 of the Reference BLAS
-    - fixed OpenMP builds with CLANG for the case where libomp is
-      not in a standard location
-    - fixed a potential overwrite of unrelated memory during
-      thread initialisation on startup
-    - fixed a potential integer overflow in the multithreading
-      threshold for `?SYMM`/`?SYRK`
-    - fixed build of the LAPACKE interfaces for the LAPACK 3.11.0
-      `?TRSYL` functions added in 0.3.22
-    - applied additions and corrections from the development
-      branch of Reference-LAPACK:
-      - fixed actual arguments passed to a number of LAPACK
-        functions (from Reference-LAPACK PR 885)
-      - fixed workspace query results in LAPACK `?SYTRF`/`?TRECV3`
-        (from Reference-LAPACK PR 883)
-      - fixed derivation of the UPLO parameter in `LAPACKE_?larfb`
-        (from Reference-LAPACK PR 878)
-      - fixed a crash in LAPACK `?GELSDD` on `NRHS=0` (from
-        Reference-LAPACK PR 876)
-      - added new LAPACK utility functions `CRSCL` and `ZRSCL`
-        (from Reference-LAPACK PR 839)
-      - corrected the order of eigenvalues for 2x2 matrices in
-       `?STEMR` (Reference-LAPACK PR 867)
-      - removed spurious reference to OpenMP variables outside
-        OpenMP contexts (Reference-LAPACK PR 860)
-      - updated file comments on use of `LAMBDA` variable in
-        LAPACK (Reference-LAPACK PR 852)
-      - fixed documentation of LAPACK `SLASD0`/`DLASD0`
-        (Reference-LAPACK PR 855)
-      - fixed confusing use of "minor" in LAPACK documentation
-        (Reference-LAPACK PR 849)
-      - added new LAPACK functions ?GEDMD for dynamic mode
-        decomposition (Reference-LAPACK PR 736)
-      - fixed potential stack overflows in the `EIG` part of the
-        LAPACK testsuite (Reference-LAPACK PR 854)
-      - applied small improvements to the variants of
-        Cholesky and QR functions (Reference-LAPACK PR 847)
-      - removed unused variables from LAPACK `?BDSQR`
-        (Reference-LAPACK PR 832)
-      - fixed a potential crash on allocation failure in LAPACKE
-        `SGEESX`/`DGEESX` (Reference-LAPACK PR 836)
-      - added a quick return from `SLARUV`/`DLARUV` for N < 1
-        (Reference-LAPACK PR 837)
-      - updated function descriptions in LAPACK `?GEGS`/`?GEGV`
-        (Reference-LAPACK PR 831)
-      - improved algorithm description in `?GELSY`
-        (Reference-LAPACK PR 833)
-      - fixed scaling in LAPACK `STGSNA`/`DTGSNA`
-        (Reference-LAPACK PR 830)
-      - fixed crash in `LAPACKE_?geqrt` with row-major data
-        (Reference-LAPACK PR 768)
-      - added LAPACKE interfaces for `C/ZUNHR_COL` and
-        `S/DORHR_COL` (Reference-LAPACK PR 827)
-      - added error exit tests for `SYSV`/`SYTD2`/`GEHD2` to
-        the testsuite (Reference-LAPACK PR 795)
-      - fixed typos in LAPACK source and comments
-        (Reference-LAPACK PRs 809,811,812,814,820)
-      - adopt refactored `?GEBAL` implementation
-        (Reference-LAPACK PR 808)
-  * x86_64:
-    - added cpu model autodetection for Intel Alder Lake N
-    - added activation of the AMX tile to the Sapphire Rapids
-      `SBGEMM` kernel
-    - worked around miscompilations of GEMV/SYMV kernels by
-      gcc's tree-vectorizer
-    - fixed runtime detection of Cooperlake and Sapphire Rapids
-      in `DYNAMIC_ARCH`
-    - fixed feature-based cputype fallback in `DYNAMIC_ARCH`
-    - corrected `ZAXPY` result on old pre-AVX hardware for the
-      `INCX=0` case
-    - fixed a potential use of uninitialized variables in ZTRSM
-  * ARMV8:
-    - implemented SWITCH_RATIO parameter for improved GEMM
-      performance on Neoverse
-    - activated SVE SGEMM and DGEMM kernels for Neoverse V1
-    - improved performance of the SVE CGEMM and ZGEMM kernels
-      on Neoverse V1
-    - improved kernel selection for the ARMV8SVE target and added
-      it to `DYNAMIC_ARCH`
-    - fixed runtime check for SVE availability in `DYNAMIC_ARCH`
-      builds to take OS or container restrictions into account
-    - fixed a potential use of uninitialized variables in ZTRSM
-  * POWER:
-    - fixed compiler warnings in the POWER10 SBGEMM kernel
- Changes in version 0.3.23
-  * General:
-    - fixed a serious regression in `GETRF`/`GETF2` and
-      `ZGETRF`/`ZGETF2` where subnormal but nonzero data elements
-      triggered the singularity flag
-    - fixed a long-standing bug in `CSPR`/`ZSPR` in single-threaded
-      operation
-    - for cases where elements of the X vector are real numbers (or
-      complex with only the real part zero)
-  * x86_64:
-    - added further CPUID values for Intel Raptor Lake
- Changes in version 0.3.22
-  * General:
-    - Updated the included LAPACK to Reference-LAPACK release 3.11.0
-      plus post-release corrections and improvements
-    - Added a threshold for multithreading in `SYMM`, `SYMV` and
-      `SYR2K`
-    - Increased the threshold for multithreading in `SYRK`
-    - OpenBLAS no longer decreases the global `OMP_NUM_THREADS`
-      when it exceeds the maximum thread count the library was
-      compiled for.
-    - fixed `?GETF2` potentially returning `NaN` with tiny matrix
-      elements
-    - fixed `openblas_set_num_threads` to work in `USE_OPENMP`
-      builds.
-    - fixed cpu core counting in `USE_OPENMP` builds returning the
-      number of OMP "places" rather than cores
-    - fixed stride calculation in the optimized small-matrix path of
-      complex `SYR`
-    - fixed building of Reference-LAPACK with recent gfortran
-    - added new environment variable `OPENBLAS_DEFAULT_NUM_THREADS`
-    - added a GEMV-based implementation of `GEMMT`
-  * x86_64:
-    - added autodetection of Intel Raptor Lake cpu models
-    - added SSCAL microkernels for Haswell and newer targets
-    - improved the performance of the Haswell DSCAL microkernel
-    - added CSCAL and ZSCAL microkernels for SkylakeX targets
-    - fixed detection of gfortran and Cray CCE compilers
-    - fixed runtime selection of COOPERLAKE in `DYNAMIC_ARCH` builds
-    - worked around gcc/llvm using risky FMA operations in
-      CSCAL/ZSCAL
-  * ARMV8:
-    - fixed cross-compilation to CortexA53 with CMAKE
-    - fixed compilation with CMAKE and "Arm Compiler for Linux 22.1"
-    - added cpu autodetection for Cortex X3 and A715
-    - fixed conditional compilation of SVE-capable targets in
-      `DYNAMIC_ARCH`
-    - sped up SVE kernels by removing unnecessary prefetches
-    - improved the GEMM performance of Neoverse V1
-    - added SVE kernels for SDOT and DDOT
-    - added an SBGEMM kernel for Neoverse N2
-    - improved cpu-specific compiler option selection for
-      Neoverse cpus
-    - added support for setting `CONSISTENT_FPCSR`
+    - fixed building with the IBM xlf 16.1.1 compiler
+    - fixed building with IBM XL C
+    - added support for DYNAMIC_ARCH builds with clang
+    - fixed union declaration in the BFLOAT16 test case
+    - enable optimizations for the AIX assembler on POWER10
+  * LOONGARCH64:
+    - added an optimized SGEMV kernel
+    - added an optimized DTRSM kernel
 - Minor rebase of openblas-ppc64be_up2_p8.patch to apply cleanly.
 - Drop upstreamed patches:
  * Use-blasint-for-INTERFACE64-compatibility.patch
--- a/openblas.spec
+++ b/openblas.spec
@@ -1,7 +1,7 @@
 #
-# spec file for package openblas
+# spec file
 #
-# Copyright (c) 2025 SUSE LLC
+# Copyright (c) 2023 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -18,9 +18,8 @@

 %global flavor @BUILD_FLAVOR@%{nil}

-%undefine sha1
-%define _vers 0_3_29
-%define vers 0.3.29
+%define _vers 0_3_25
+%define vers 0.3.25
 %define so_v 0
 %define pname openblas

@@ -167,12 +166,6 @@ ExclusiveArch:  do_not_build
 %ifarch ppc64le
 %if 0%{?c_f_ver} > 9
 %else
-%if 0%{?sle_version} == 150700
-%define cc_v 14
-%endif
-%if 0%{?sle_version} == 150600
-%define cc_v 13
-%endif
 %if 0%{?sle_version} == 150500
 %define cc_v 12
 %endif
@@ -198,7 +191,6 @@ ExclusiveArch:  do_not_build
 %define p_prefix %_prefix
 %define p_includedir %_includedir/%pname
 %define p_libdir %_libdir/openblas%{?flavor:-%{flavor}}
-%define p_testdir %_libexecdir/openblas%{?flavor:-%{flavor}}/tests
 %define p_cmakedir %{p_libdir}/cmake/%{pname}
 %define num_threads 64

@@ -215,19 +207,12 @@ ExclusiveArch:  do_not_build
 %define p_prefix %hpc_prefix
 %define p_includedir %hpc_includedir
 %define p_libdir %hpc_libdir
-%define p_testdir %hpc_prefix/tests
 %define p_cmakedir %{hpc_libdir}/cmake
 %define num_threads 256

 %{hpc_init -c %{compiler_family} %{?c_f_ver:-v %{c_f_ver}} %{?ext:-e %{ext}}}
 %endif

-%if 0%{?sha1:1}
-%define v_string %{sha1}
-%else
-%define v_string v%{version}
-%endif
-
 Name:           %{package_name}
 Version:        %vers
 Release:        0
@@ -235,15 +220,14 @@ Summary:        An optimized BLAS library based on GotoBLAS2
 License:        BSD-3-Clause
 Group:          Productivity/Scientific/Math
 URL:            http://www.openblas.net
-Source0:        https://github.com/xianyi/OpenBLAS/archive/%{v_string}.tar.gz#/OpenBLAS-%{version}%{?sha1:_%{sha1}}.tar.gz
+Source0:        https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz#/OpenBLAS-%{version}.tar.gz
 Source1:        README.SUSE
 Source2:        README.HPC.SUSE
-Source3:        openblas_tests.sh.in
-Source4:        openblas.rpmlintrc
+Source3:        openblas.rpmlintrc
+Patch101:       Link-library-with-z-noexecstack.patch
 # PATCH port
 Patch102:       Handle-s390-correctly.patch
 Patch103:       openblas-ppc64be_up2_p8.patch
-Patch104:       Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch

 #BuildRequires:  cmake
 BuildRequires:  memory-constraints
@@ -254,7 +238,7 @@ BuildRequires:  gcc%{?cc_v}-fortran
 BuildRequires:  gcc-fortran
 BuildRequires:  update-alternatives
 Requires(post): update-alternatives
-Requires(preun): update-alternatives
+Requires(preun):update-alternatives
 %else
 BuildRequires:  %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel
 BuildRequires:  lua-lmod
@@ -271,7 +255,7 @@ Group:          System/Libraries
 %if %{without hpc}
 Requires(post): update-alternatives
 Requires(post): coreutils
-Requires(preun): update-alternatives
+Requires(preun):update-alternatives
 %if "%flavor" == "serial"
 Obsoletes:      lib%{pname}%{so_v} < %{version}
 Provides:       lib%{pname}%{so_v} = %{version}
@@ -342,30 +326,15 @@ OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.

 This package contains headers for OpenBLAS.

-%package   tests
-Summary:        Unit Tests for openblas library
-Group:          Development/Libraries/C and C++
-
-%description tests
-OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
-
-This package contains test binaries.
-
 %prep

-%setup -q -n OpenBLAS-%{?sha1:%{sha1}}%{!?sha1:%{version}}
+%setup -q -n OpenBLAS-%{version}
 %autopatch -p1
 %ifarch s390
 sed -i -e "s@m32@m31@" Makefile.system
 %endif
 sed -i -e '/FLDFLAGS = \|$(CC)\|$(CXX)/s@$@ $(LDFLAGS_TESTS)@' \
    test/Makefile ctest/Makefile utest/Makefile cpp_thread_test/Makefile
-grep -q .note.GNU-stack cpuid.S || echo '.section        .note.GNU-stack,"",@progbits' >> cpuid.S
-# Disable sgemmt and dgemmt tests on ppc64le when using gcc13
-%if "%{?_arch}" == "ppc64le" && 0%{?gcc_version} == 13
-sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_sgemmt.o *@@' utest/Makefile
-sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_dgemmt.o *@@' utest/Makefile
-%endif

 %if %{without hpc}
 cp %{SOURCE1} .
@@ -401,11 +370,6 @@ EOF
 %define  _lto_cflags %{nil}
 %endif

-%ifarch riscv64
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110812
-%global _lto_cflags %{nil}
-%endif
-
 %if %{with hpc}
 %hpc_debug
 %hpc_setup_compiler
@@ -444,7 +408,7 @@ EOF
 %ifarch ppc64
 %global addopt -mvsx
 %endif
-%global addopt %{?addopt} -fno-strict-aliasing -Wa,--noexecstack -Wl,-z,noexecstack
+%global addopt %{?addopt} -fno-strict-aliasing

 # Make serial, threaded and OpenMP versions

@@ -457,7 +421,7 @@ EOF
 # Do not use LIBNAMESUFFIX for new builds as it will not allow
 # the different flavors to be plugin replacements of each other
 %if 0%{?suse_version} <= 1500 && %{without hpc}
-%define libnamesuffix LIBNAMESUFFIX=_%{flavor}
+%define libnamesuffix LIBNAMESUFFIX=%flavor
 %endif
 make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
     %{?openblas_opt} \
@@ -465,13 +429,12 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
     NUM_THREADS=%{num_threads} V=1 \
     OPENBLAS_LIBRARY_DIR=%{p_libdir} \
     OPENBLAS_INCLUDE_DIR=%{p_includedir} \
-     OPENBLAS_BINARY_DIR=%{p_testdir} \
     OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
     PREFIX=%{p_prefix} \
     %{?dynamic_list} \
     %{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \
     %{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
-     %{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} FC=gfortran-%{cc_v} CEXTRALIB=""}}
+     %{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} CEXTRALIB=""}}

 %install
 %if %{with hpc}
@@ -481,28 +444,13 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
 # Install library and headers
 # Pass NUM_THREADS again, as it is not propagated from the build step
 # https://github.com/OpenMathLib/OpenBLAS/issues/4275
-mkdir -p %{buildroot}/%{p_testdir}
-%make_install install_tests %{?openblas_target} %{?build_flags} \
-    %{?openblas_opt} \
+%make_install  %{?build_flags} \
    NUM_THREADS=%{num_threads} \
    OPENBLAS_LIBRARY_DIR=%{p_libdir} \
    OPENBLAS_INCLUDE_DIR=%{p_includedir} \
-    OPENBLAS_BINARY_DIR=%{p_testdir} \
    OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
-    %{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \
-    %{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
-    %{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} FC=gfortran-%{cc_v} CEXTRALIB=""}} \
+    %{?libnamesuffix} \
    PREFIX=%{p_prefix}
-sed -e 's#@FLAVOR@#%{flavor}#' \
-    -e 's#@COMPILER@#%{?compiler_family:%compiler_family%{?hpc_gnu_dep_version:/%hpc_gnu_dep_version}}#' \
-    < %{S:3} > %{buildroot}/%{p_testdir}/openblas_tests.sh
-chmod 0755 %{buildroot}/%{p_testdir}/openblas_tests.sh
-for i in %{buildroot}/%{p_testdir}/*; do
-    case $i in
-	*.dat|*in*) chmod 0644 $i;;
-	*)     chmod 0755 $i;;
-    esac
-done

 # Delete info about OBS host cpu
 %ifarch %ix86 x86_64
@@ -694,11 +642,6 @@ fi
 %{p_libdir}/pkgconfig
 %endif

-%files tests
-%dir %{p_testdir}
-%dir %{dirname:%{p_testdir}}
-%{p_testdir}/*
-
 %files devel-static
 %{p_libdir}/libopenblas*.a

--- a/openblas_tests.sh.in
+++ b/openblas_tests.sh.in
@@ -1,92 +0,0 @@
-#! /bin/bash
-
-FLAVOR=@FLAVOR@
-COMPILER=@COMPILER@
-# Series 'test'
-series_test() {
-    ${dir}/sblat1 || echo "sblat1 failed"
-    ${dir}/dblat1 || echo "dblat1 failed"
-    ${dir}/cblat1 || echo "cblat1 failed"
-    ${dir}/zblat1 || echo "zblat1 failed"
-    ${dir}/sblat2 < ${dir}/sblat2.dat || echo " failed"
-    grep -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || true
-    ${dir}/dblat2 < ${dir}/dblat2.dat || echo " failed"
-    grep -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || true
-    ${dir}/cblat2 < ${dir}/cblat2.dat || echo " failed"
-    grep -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || true
-    ${dir}/zblat2 < ${dir}/zblat2.dat || echo " failed"
-    grep -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || true
-    ${dir}/test_sbgemm > SBBLAT3.SUMM || echo "test_sbgemm failed"
-    grep -q FATAL SBBLAT3.SUMM && cat SBBLAT3.SUMM || true
-    ${dir}/dblat3 < ${dir}/dblat3.dat || echo "dblat3 failed"
-    grep -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || true
-    ${dir}/cblat3 < ${dir}/cblat3.dat || echo "cblat3 failed"
-    grep -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || true
-    [ -x ${dir}/cblat3_3m ] && \
-	{  ${dir}/cblat3_3m < ${dir}/cblat3_3m.dat;
-	   grep -q FATAL CBLAT3_3M.SUMM && cat CBLAT3_3M.SUMM || true; }
-    ${dir}/zblat3 < ${dir}/zblat3.dat || echo "zblat3 failed";
-    grep -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || true
-    [ -x ${dir}/zblat3_3m ] && \
-	{ ${dir}/zblat3_3m < ${dir}/zblat3_3m.dat || echo "zblat3 failed";
-	  grep -q FATAL ZBLAT3_3M.SUMM && cat ZBLAT3_3M.SUMM || true; }
-}
-
-# Series 'ctest'
-series_ctest() {
-    ${dir}/xscblat1 || echo "xscblat1 failed"
-    ${dir}/xdcblat1 || echo "sdcblat1 failed"
-    ${dir}/xccblat1 || echo "xccblat1 failed"
-    ${dir}/xzcblat1 || echo "xzcblat1 failed"
-    ${dir}/xscblat2 < ${dir}/sin2 || echo "xscblat2 failed"
-    ${dir}/xdcblat2 < ${dir}/din2 || echo "xdcblat2 failed"
-    ${dir}/xccblat2 < ${dir}/cin2 || echo "xccblat2 failed"
-    ${dir}/xzcblat2 < ${dir}/zin2 || echo "xzcblat2 failed"
-    ${dir}/xscblat3 < ${dir}/sin3 || echo "xscblat3 failed"
-    ${dir}/xdcblat3 < ${dir}/din3 || echo "xdcblat3 failed"
-    ${dir}/xccblat3 < ${dir}/cin3 || echo "xccblat3 failed"
-    ${dir}/xzcblat3 < ${dir}/zin3 || echo "xzcblat3 failed"
-    [ -x ${dir}/xccblat3_3m ] && { ${dir}/xccblat3_3m < ${dir}/cin3_3m || echo "cin3_3m failed"; };
-    [ -x ${dir}/xzcblat3_3m ] && { ${dir}/xzcblat3_3m < ${dir}/zin3_3m || echo "zin3_3m failed"; };
-}
-
-# Series 'utest'
-series_utest() {
-    ${dir}/openblas_utest || echo "openblas_utest failed"
-    ${dir}/openblas_utest_ext || echo "openblas_utest_ext failed"
-}
-
-
-dir=/usr/lib/openblas-${FLAVOR}/tests
-
-case $FLAVOR in
-    serial) export THREADS=false; export OMP=false ;;
-    pthreads) export THREADS=true; export OMP=false ;;
-    openmp) export THREADS=false; export OMP=true ;;
-    gnu-hpc) module pure;
-	     module load gnu/$COMPILER openblas;
-	     dir=$OPENBLAS_DIR/bin
-	     export THREADS=false; export OMP=true
-	     ;;
-    gnu-hpc-ptreads) module pure;
-	     module load gnu/$COMPILER_VERSION openblas;
-	     export THREADS=true; export OMP=false ;;
-esac
-
-OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 series_test
-if  $OMP || $TREADS; then
-    rm -f ?BLAT2.SUMM ?BLAT3.SUMM ?BLAT3_3M.SUMM
-    if $OMP; then
-	OMP_NUM_THREADS=2 series_test
-    else
-	OPENBLAS_NUM_THREADS=2 series_test
-    fi
-fi
-
-if  $OMP; then
-    OMP_NUM_THREADS=2 series_ctest
-else
-    OPENBLAS_NUM_THREADS=2 series_ctest
-fi
-
-series_utest