Compare commits

1 Commits
main ... 1.1

8 changed files with 65 additions and 812 deletions

View File

@@ -0,0 +1,24 @@
From: Egbert Eich <eich@suse.com>
Date: Wed Nov 30 20:16:21 2022 +0100
Subject: Link library with -z,noexecstack
Patch-mainline: Not yet
Git-commit: adddc0eadc81bcd29c48594793cb33eac0edb572
References:
Signed-off-by: Egbert Eich <eich@suse.com>
Signed-off-by: Egbert Eich <eich@suse.de>
---
exports/Makefile | 1 +
1 file changed, 1 insertion(+)
Index: OpenBLAS-0.3.25/exports/Makefile
===================================================================
--- OpenBLAS-0.3.25.orig/exports/Makefile
+++ OpenBLAS-0.3.25/exports/Makefile
@@ -193,6 +193,7 @@ else ifeq ($(F_COMPILER), FLANG)
else
ifneq ($(C_COMPILER), LSB)
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
+ -Wl,-z,noexecstack \
-Wl,--whole-archive $< -Wl,--no-whole-archive \
-Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.

BIN
OpenBLAS-0.3.25.tar.gz (Stored with Git LFS) Normal file

Binary file not shown.

BIN
OpenBLAS-0.3.29.tar.gz (Stored with Git LFS)

Binary file not shown.

View File

@@ -1,139 +0,0 @@
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed Feb 12 09:04:22 2025 +0100
Subject: Restore the non-vectorized code from before PR4880 for POWER8
Patch-mainline: Not yet
Git-repo: https://github.com/xianyi/OpenBLAS
Git-commit: 98b5ef929cfc98f2f3c236966830276c255118d2
References: bsc#1239134
Signed-off-by: Egbert Eich <eich@suse.de>
---
kernel/power/sgemv_t.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/kernel/power/sgemv_t.c b/kernel/power/sgemv_t.c
index e133c815c..ed0a24230 100644
--- a/kernel/power/sgemv_t.c
+++ b/kernel/power/sgemv_t.c
@@ -78,7 +78,17 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp7 += v_x[i] * va7[i];
}
-
+ #if defined(POWER8)
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
+
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
+ #else
register __vector float t0, t1, t2, t3;
register __vector float a = { alpha, alpha, alpha, alpha };
__vector float *v_y = (__vector float*) y;
@@ -105,7 +115,7 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
v_y[0] += a * temp0;
v_y[1] += a * temp4;
-
+#endif
}
@@ -132,7 +142,12 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp2 += v_x[i] * va2[i];
temp3 += v_x[i] * va3[i];
}
-
+ #if defined(POWER8)
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
+ #else
register __vector float t0, t1, t2, t3;
register __vector float a = { alpha, alpha, alpha, alpha };
__vector float *v_y = (__vector float*) y;
@@ -148,7 +163,7 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp0 += temp1 + temp2 + temp3;
v_y[0] += a * temp0;
-
+#endif
}
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed Feb 12 09:07:20 2025 +0100
Subject: Restore the non-vectorized code from before PR4880 for POWER8
Patch-mainline: Not yet
Git-repo: https://github.com/xianyi/OpenBLAS
Git-commit: 81eed868b68c72ea1868663902f0904dc1b22326
References: bsc#1239134
Signed-off-by: Egbert Eich <eich@suse.de>
---
kernel/power/sgemv_t_8.c | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/kernel/power/sgemv_t_8.c b/kernel/power/sgemv_t_8.c
index f21f6eb7d..b30bb1137 100644
--- a/kernel/power/sgemv_t_8.c
+++ b/kernel/power/sgemv_t_8.c
@@ -99,7 +99,17 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp7 += vx1* va7_1 + vx2 * va7_2;
}
-
+ #if defined(POWER8)
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
+
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
+ #else
register __vector float t0, t1, t2, t3;
register __vector float a = { alpha, alpha, alpha, alpha };
__vector float *v_y = (__vector float*) y;
@@ -126,7 +136,7 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
v_y[0] += a * temp0;
v_y[1] += a * temp4;
-
+#endif
}
@@ -153,7 +163,13 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp2 += v_x[i] * va2[i] + v_x[i+1] * va2[i+1];
temp3 += v_x[i] * va3[i] + v_x[i+1] * va3[i+1];
}
-
+
+ #if defined(POWER8)
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
+ #else
register __vector float t0, t1, t2, t3;
register __vector float a = { alpha, alpha, alpha, alpha };
__vector float *v_y = (__vector float*) y;
@@ -169,7 +185,7 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp0 += temp1 + temp2 + temp3;
v_y[0] += a * temp0;
-
+#endif
}

View File

@@ -2,4 +2,6 @@
<package>serial</package>
<package>pthreads</package>
<package>openmp</package>
<package>gnu-hpc</package>
<package>gnu-hpc-pthreads</package>
</multibuild>

View File

@@ -1,350 +1,9 @@
-------------------------------------------------------------------
Mon Mar 17 08:51:26 UTC 2025 - Egbert Eich <eich@suse.com>
- Disable sgemmt and dgemmt tests in the test suite on power
when gcc-13 is used. It is known (bsc#1239134) that some
of these tests fail on this architecture when OpenBLAS
is being build with the said compiler version ever since
these tests were introduced.
With this will essentially restore the situation of the
version prior to the adition of these tests (0.3.26) where
one was unaware of the problem.
This is only a temporary measure and will be removed once
the issue with gcc-13 has been resolved.
- Remove: Link-library-with-z-noexecstack.patch
since `-Wa,--noexecstack -Wl,-z,noexecstack` are global options,
now.
-------------------------------------------------------------------
Fri Mar 14 09:24:18 UTC 2025 - Egbert Eich <eich@suse.com>
- Use upstream patch for bsc#1239134 which is more friendly to the
non-affected power9 and power10 sub-architectures:
Replace:
Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
by:
Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
-------------------------------------------------------------------
Sat Mar 8 13:23:53 UTC 2025 - Egbert Eich <eich@suse.com>
- Revert commit ba47c7f4f301aad100ed166de338b86e01da8465 to
prevent failures on Power8 (bsc#1239134)
* Add: Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
- Add a script to run tests.
- Add bisect support.
-------------------------------------------------------------------
Wed Mar 5 15:47:13 UTC 2025 - Egbert Eich <eich@suse.com>
- Update to version 0.2.29 (jsc#PED-9676):
General:
* Fixed a potential NULL pointer dereference in multithreaded builds.
* Added function aliases for `GEMMT` using its new name `GEMMTR`
adopted by Reference-BLAS.
* Fixed the behavior of the recently added `CBLAS_?GEMMT` functions
with row-major data.
* Improved thread scaling of multithreaded `SBGEMV`.
* Improved thread scaling of multithreaded `TRTRI`.
* Fixed compilation of the CBLAS testsuite with gcc14 (and no
Fortran compiler).
* Fixed placement of the `-fopenmp` flag and libsuffix in the
generated pkgconfig file.
* Improved the `CMakeConfig` file generated by the Makefile build.
* Fixed const-correctness of `cblas_?geadd` in `cblas.h`.
* Fixed a potential inaccuracy in multithreaded BLAS3 calls.
* Fixed empty implementations of `get`/`set_affinity` that print a
warning in OpenMP builds.
* Fixed function signatures for TRTRS in the converted C version of
LAPACK.
* Fixed omission of several single-precision LAPACK symbols in the
shared library.
* Improved build instructions for the provided "pybench" benchmarks.
* Improved documentation, including descriptions of environment
variables that affect build and runtime behavior.
* Added a separate "make install_tests" target for use with
cross-compilations.
* Integrated improvements and corrections from Reference-LAPACK:
- removed a comparison in LAPACKE `?tpmqrt` that is always false.
- fixed the leading dimension for B in tests for GGEV.
- replaced `the ?LARFT` functions with a recursive implementation.
arm64:
* Fixed a long-standing bug in the (generic) `c`/`zgemm_beta` kernel
that could lead to reads and writes outside the array bounds in some
circumstances.
* Rewrote cpu autodetection to scan all cores and return the highest
performing type.
* Improved the DGEMM performance for SVE targets and small matrix sizes.
* improved dimension criteria for forwarding from `GEMM` to `GEMV`
kernels.
* Added SVE kernels for `ROT` and `SWAP`.
* Improved SVE kernels for `SGEMV` and `DGEMV` on `A64FX` and
`NEOVERSEV1`.
* Fixed NRM2 implementations for generic SVE targets and the Neoverse N2.
x86_64:
* Fixed a wrong storage size in the SBGEMV kernel for Cooper Lake.
* Added cpu autodetection for Intel Granite Rapids.
* Added cpu autodetection for AMD Ryzen 5 series.
* Added optimized `SOMATCOPY_CT` for AVX-capable targets.
* fixed the fallback implementation of `GEMM3M` in GENERIC builds.
Power:
* Fixed multithreaded `SBGEMM`.
* Fixed a CMake build problem on POWER10.
* Improved the performance of SGEMV.
* Added vectorized implementations of `SBGEMV` and support for
forwarding 1xN `SBGEMM` to them.
* Fixed illegal instructions and potential memory overflow in SGEMM
on PPCG4.
* Fixed handling of NaN and Inf arguments in `SSCAL` and `DSCAL` on
PPC440,G4 and 970.
* Added improved `CGEMM` and `ZGEMM` kernels for POWER10.
Riscv64:
* Removed thread yielding overhead caused by `sched_yield`.
* Replaced some non-standard intrinsics with their official names.
* Fixed and sped up the implementations of `CGEMM`/`ZGEMM` `TCOPY`
for vector lenghts 128 and 256.
* Improved the performance of `SNRM2`/`DNRM2` for RVV1.0 targets.
* Added optimized `?OMATCOPY_CN` kernels for RVV1.0 targets.
- Add test package.
- Add flags: `-Wa,--noexecstack -Wl,-z,noexecstack` to make sure
stack is not executable. This works around problems in assembler
code for z.
- Make stack of empty cpuid.S non-executable as well.
-------------------------------------------------------------------
Wed Mar 5 14:17:26 UTC 2025 - Egbert Eich <eich@suse.com>
- Set gcc versions for ppc64le (bsc#1239702)
* on SLE-15-SP6: v13
* on SLE-15-SP7: v14
-------------------------------------------------------------------
Mon Feb 3 14:43:29 UTC 2025 - Andreas Schwab <schwab@suse.de>
- Disable LTO on riscv64 due to GCC#110812
-------------------------------------------------------------------
Thu Jan 2 15:15:51 UTC 2025 - Egbert Eich <eich@suse.com>
- Update to version 0.3.28 (jsc#PED-9676):
* General:
+ Reworked the unfinished implementation of `HUGETLB` from GotoBLAS
for allocating huge memory pages as buffers on suitable systems.
+ Changed the unfinished implementation of `GEMM3M` for the generic
target on all architectures to at least forward to regular GEMM.
+ Improved multithreaded `GEMM` performance for large non-skinny
matrices.
+ Improved BLAS3 performance on larger multicore systems through
improved parallelism.
+ Improved performance of the initial memory allocation by reducing
locking overhead.
+ Improved performance of `GBMV` at small problem sizes by introducing
a size barrier for the switch to multithreading.
+ Added an implementation of the `CBLAS_GEMM_BATCH` extension.
+ Fixed corner cases involving the handling of NAN and INFINITY
arguments in `?SCAL` on all architectures.
+ Fixed NAN handling and potential accuracy issues in compilations
with Intel ICX by supplying a suitable fp-model option by default.
+ It is now possible to register a callback function that replaces
the built-in support for multithreading with an external backend
like TBB (`openblas_set_threads_callback_function`).
+ Fixed potential duplication of suffixes in shared library naming.
+ Improved C compiler detection by the build system to tolerate
more naming variants for gcc builds.
+ Fixed an unnecessary dependency of the utest on CBLAS.
+ Fixed spurious error reports from the BLAS extensions `utest`.
+ Fixed unwanted invocation of the `GEMM3M` tests in cross-
compilation.
+ Fixed a flaw in the makefile build that could lead to the
pkgconfig file containing an entry of `UNKNOWN` for the target
cpu after installing.
+ Integrated fixes from the Reference-LAPACK project:
- Fixed uninitialized variables in the LAPACK tests for `?QP3RK`.
- Fixed potential bounds error in `?UNHR_COL`/`?ORHR_COL`.
- Fixed potential infinite loop in the LAPACK testsuite.
- Make the variable type used for hidden length arguments
configurable.
+ Fixed `SYTRD` workspace computation and various typos.
+ Prevent compiler use of FMA that could increase numerical
error in `?GEEVX`.
* x86-64:
+ Fixed a potential thread buffer overrun in `SBSTOBF16` on small
systems.
+ Fixed an accuracy issue in `ZSCAL` introduced in 0.3.26.
+ Added support for Intel Emerald Rapids and Meteor Lake CPUs.
+ Added autodetection support for the Zhaoxin KX-7000 CPU.
+ Fixed autodetection of Intel Prescott (probably broken
since 0.3.19).
+ Fixed compilation of the converter-generated C versions
of the LAPACK sources with gcc-14.
+ Added support for supplying the L2 cache size via an
environment variable (`OPENBLAS_L2_SIZE`) in case it is not
correctly reported (as in some VM configurations).
+ Improved the error message shown when thread creation fails
on startup.
* arm64:
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
1xN or Mx1 matrix to the corresponding `GEMV` kernel.
+ Added optimized `SGEMV` and `DGEMV` kernels for A64FX.
+ Added optimized SVE kernels for small-matrix `GEMM`.
+ Added A64FX to the CPU list for DYNAMIC_ARCH.
+ Fixed building with support for CPU affinity.
+ Worked around accuracy problems with `C/ZNRM2` on NeoverseN1
targets.
+ Improved GEMM performance on Neoverse V1.
+ Fixed compilation for `NEOVERSEN2` with older compilers.
+ Fixed potential miscompilation of the SVE `SDOT` and `DDOT`
kernels.
+ Fixed potential miscompilation of the non-SVE `CDOT` and
`ZDOT` kernels.
+ Fixed a potential overflow when using very large user-defined
`BUFFERSIZE`.
* Power:
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a 1xN
or Mx1 matrix to the corresponding `GEMV` kernel.
+ Significantly improved performance of `SBGEMM`. on POWER10.
+ Fixed compilation with OpenMP and the XLF compiler.
+ Fixed building of parts of the LAPACK testsuite with XLF.
+ Fixed CSWAP/ZSWAP on big-endian POWER10 targets.
+ Fixed a performance regression in SAXPY on POWER10 with OpenXL.
+ Fixed a potential overflow when using very large user-defined
`BUFFERSIZE`.
+ Fixed an accuracy issue in the POWER6 kernels for `GEMM` and
`GEMV`.
* RISCV64:
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
1xN or Mx1 matrix to the corresponding GEMV kernel.
+ Wdded `DYNAMIC_ARCH` support (comprising `GENERIC_RISCV64` and
the two RVV 1.0 targets with vector length of 128 and 256).
+ Worked around the `ZVL128B` kernels for `AXPBY` mishandling the
special case of zero Y increment.
- Obsoleted: no-static.patch.
-------------------------------------------------------------------
Mon Jul 29 09:21:41 UTC 2024 - Egbert Eich <eich@suse.com>
- Duplicate all options passed to `make` also to `make install`:
The openblas build output suggests this: 'Note that any flags
passed to make during build should also be passed to make install
to circumvent any install errors'.
This also makes sure that minimum CPU requirement is set in
the pkgconfig file is the same one as used for building.
This helps to maintain a reproducible build (boo#1228177).
-------------------------------------------------------------------
Thu Jun 13 07:32:23 UTC 2024 - Andreas Schwab <schwab@suse.de>
- no-static.patch: do not link statically
-------------------------------------------------------------------
Sun Jun 9 07:07:51 UTC 2024 - Egbert Eich <eich@suse.com>
- Update to version 0.3.27 (boo#1225869):
General:
* Added initial (generic) support for the `CSKY` architecture.
* Capped the maximum number of threads used in `GEMM`, `GETRF`
and `POTRF` to avoid creating underutilized or idle threads.
* Sped up multithreaded `POTRF` on all platforms.
* Added extension `openblas_set_num_threads_local()` that returns
the previous thread count.
* Re-evaluated the `SGEMV` and `DGEMV` load thresholds to avoid
activating multithreading for too small workloads.
* Improved the fallback code used when the precompiled number of
threads is exceeded, and made it callable multiple times
during the lifetime of an instance.
* Added CBLAS interfaces for the BLAS extensions `?AMIN`,`?AMAX`,
`CAXPYC` and `ZAXPYC`.
* Fixed a potential buffer overflow in the interface to the
`GEMMT` kernels.
* Fixed use of incompatible pointer types in `GEMMT` and
`C`/`ZAXPBY` as flagged by GCC-14.
* Fixed unwanted case sensitivity of the character parameters in
`?TRTRS` sped up the OpenMP thread management code.
* Fixed sizing of logical variables in `INTERFACE64` builds of
the C version of LAPACK.
* Fixed inclusion of new LAPACK and LAPACKE functions from
LAPACK 3.11 in the shared library.
* Modified the error thresholds for `SGS`/`DGS` functions in
the LAPACK testsuite to suppress spurious errors.
* Added support for calling ?NRM2 with a negative increment value
on all architectures.
* Fixed handling of the `OPENBLAS_LOOPS` variable in several
benchmarks.
* Integrated fixes from the Reference-LAPACK project:
Increased accuracy in `C`/`ZLARFGP` (Reference-LAPACK PR 981).
x86:
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
x86-64:
* Removed all instances of `sched_yield()` on Linux and BSD.
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
* Added compiler checks for `AVX512BF16` compatibility.
* Fixed cpu handling fallbacks for Sapphire Rapids with disabled
AVX2 in `DYNAMIC_ARCH` mode.
* Fixed extensions `SCSUM` and `DZSUM`.
* Improved `GEMM` performance for ZEN targets.
arm64:
* Added initial support for the Cortex-A76 cpu.
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
* Fixed default compiler options for gcc (-march and -mtune).
* Added support for the NeoverseV2 cpu in `DYNAMIC_ARCH` builds.
* Corrected `SCSUM` kernels (erroneously duplicating `SCASUM`
behaviour).
* Added SVE-enabled kernels for `CSUM`/`ZSUM`.
* Worked around an inaccuracy in the `NRM2` kernels for NeoverseN1.
power:
* Improved performance of `SGEMM` on POWER8/9/10.
* Improved performance of `DGEMM` on POWER10.
zarch:
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
* Fixed calculation of `?SUM` on Z13.
- LIBNAMESUFFIX semantics have changed: no separator will be added.
Adjusted in spec file.
-------------------------------------------------------------------
Thu Feb 15 08:27:33 UTC 2024 - Egbert Eich <eich@suse.com>
- Remove DYNAMIC_LIST for aarch64 for older gcc versions: This has
been fixed upstream.
-------------------------------------------------------------------
Wed Jan 17 08:47:55 UTC 2024 - Egbert Eich <eich@suse.com>
- Update to version 0.3.26:
* General:
- Added type declarations for complex variables to the
MSVC-specific parts of the LAPACK header.
- Significantly sped up `?GESV` for small problem sizes by
introducing a lower bound for multithreading.
- Imported additions and corrections from the Reference-LAPACK
project:
+ Added new LAPACK functions for truncated `QR` with pivoting
(Reference-LAPACK PRs 891&941).
+ Handle miscalculation of minimum work array size in corner
cases (Reference-LAPACK PR 942).
+ Fixed use of uninitialized variables in `?GEDMD` and
improved inline documentation.
+ Fixed use of uninitialized variables (and consequential
failures) in `?BBCSD`.
+ Added tests for the recently introduced Dynamic Mode
Decomposition functions.
+ Fixed several memory leaks in the LAPACK testsuite.
* x86-64:
- Fixed computation of `CASUM` on SkylakeX and newer targets in
the special case that AVX512 is not supported by the compiler
or operating environment.
- Fixed potential undefined behaviour in the `CASUM`/`ZASUM`
kernels for AVX512 targets.
- worked around a problem in the pre-AVX kernels for `GEMV`
* arm64:
- Sped up `SGEMM` and `DGEMM` on Neoverse V1 and N1.
- Sped up `?DOT` on SVE-capable targets.
- Reduced the number of targets in `DYNAMIC_ARCH` builds by
eliminating functionally equivalent ones.
* POWER:
- Improved the SGEMM kernel for POWER10.
- Fixed compilation with (very) old versions of gcc.
- Added autodetection of the POWERPC 7400 subtype.
-------------------------------------------------------------------
Wed Dec 20 12:02:55 UTC 2023 - Giacomo Comes <gcomes.obs@gmail.com>
@@ -360,188 +19,44 @@ Wed Nov 29 05:43:18 UTC 2023 - Atri Bhattacharya <badshah400@gmail.com>
thread count
- improved the code to add supplementary thread buffers in
case of overflow
- fixed a potential division by zero in `?ROTG`
- improved the `?MATCOPY` functions to accept zero-sized rows or
- fixed a potential division by zero in ?ROTG
- improved the ?MATCOPY functions to accept zero-sized rows or
columns
- corrected empty prototypes in function declarations
- cleaned up unused declarations in the f2c-converted versions
of the LAPACK sources
- fixed compilation with the Cray CCE Compiler suite
- improved link line rewriting to avoid mixed libgomp/libomp
builds with clang&gfortran
- worked around OPENMP builds with LLVM14's libomp hanging on
FreeBSD
- improved the Makefiles to require less option duplication on
"make install"
- imported the following changes from the upcoming release
3.12 of Reference-LAPACK: LAPACK PR 900, LAPACK PR 904,
LAPACK PR 907, LAPACK PR 909, LAPACK PR 926, LAPACK PR 927,
LAPACK PR 928 & 930
* x86-64:
- fixed compile-time autodetection of AMD Ryzen3 and Ryzen4
cpus
- fixed capability-based fallback selection for unknown cpus
in `DYNAMIC_ARCH`
- added AVX512 optimizations for `?ASUM` on Intel Sapphire Rapids and
in DYNAMIC_ARCH
- added AVX512 optimizations for ?ASUM on Sapphire Rapids and
Cooper Lake
* ARM64:
- fixed building on Apple with homebrew gcc
- fixed building with XCODE 15
- fixed building on A64FX and Cortex A710/X1/X2
- increased the default buffer size for recent arm server cpus
- increased the default buffer size for recent ARM server cpus
* POWER:
- added support for `DYNAMIC_ARCH` builds with clang
- fixed union declaration in the `BFLOAT16` test case
- Changes in version 0.3.24
* General:
- Declared the arguments of `cblas_xerbla` as `const`
(in accordance with the reference implementation
and others, the previous discrepancy appears to have dated
back to GotoBLAS)
- fixed the implementation of `?GEMMT` that was added in 0.3.23
- made cpu-specific `SWITCH_RATIO` parameters for GEMM
available to `DYNAMIC_ARCH` builds
- fixed missing `SSYCONVF` function in the shared library
- fixed parallel build logic used with gmake
- fixed several issues with the handling of runtime limits on
the number of OPENMP threads
- corrected the error code returned by `SGEADD`/`DGEADD` when
LDA is too small
- corrected the error code returned by `IMATCOPY` when LDB
is too small
- updated `?NRM2` to support negative increment values (as
introduced in release 3.10.0 of the Reference BLAS)
- updated `?ROTG` to use the safe scaling algorithm introduced
in release 3.10.0 of the Reference BLAS
- fixed OpenMP builds with CLANG for the case where libomp is
not in a standard location
- fixed a potential overwrite of unrelated memory during
thread initialisation on startup
- fixed a potential integer overflow in the multithreading
threshold for `?SYMM`/`?SYRK`
- fixed build of the LAPACKE interfaces for the LAPACK 3.11.0
`?TRSYL` functions added in 0.3.22
- applied additions and corrections from the development
branch of Reference-LAPACK:
- fixed actual arguments passed to a number of LAPACK
functions (from Reference-LAPACK PR 885)
- fixed workspace query results in LAPACK `?SYTRF`/`?TRECV3`
(from Reference-LAPACK PR 883)
- fixed derivation of the UPLO parameter in `LAPACKE_?larfb`
(from Reference-LAPACK PR 878)
- fixed a crash in LAPACK `?GELSDD` on `NRHS=0` (from
Reference-LAPACK PR 876)
- added new LAPACK utility functions `CRSCL` and `ZRSCL`
(from Reference-LAPACK PR 839)
- corrected the order of eigenvalues for 2x2 matrices in
`?STEMR` (Reference-LAPACK PR 867)
- removed spurious reference to OpenMP variables outside
OpenMP contexts (Reference-LAPACK PR 860)
- updated file comments on use of `LAMBDA` variable in
LAPACK (Reference-LAPACK PR 852)
- fixed documentation of LAPACK `SLASD0`/`DLASD0`
(Reference-LAPACK PR 855)
- fixed confusing use of "minor" in LAPACK documentation
(Reference-LAPACK PR 849)
- added new LAPACK functions ?GEDMD for dynamic mode
decomposition (Reference-LAPACK PR 736)
- fixed potential stack overflows in the `EIG` part of the
LAPACK testsuite (Reference-LAPACK PR 854)
- applied small improvements to the variants of
Cholesky and QR functions (Reference-LAPACK PR 847)
- removed unused variables from LAPACK `?BDSQR`
(Reference-LAPACK PR 832)
- fixed a potential crash on allocation failure in LAPACKE
`SGEESX`/`DGEESX` (Reference-LAPACK PR 836)
- added a quick return from `SLARUV`/`DLARUV` for N < 1
(Reference-LAPACK PR 837)
- updated function descriptions in LAPACK `?GEGS`/`?GEGV`
(Reference-LAPACK PR 831)
- improved algorithm description in `?GELSY`
(Reference-LAPACK PR 833)
- fixed scaling in LAPACK `STGSNA`/`DTGSNA`
(Reference-LAPACK PR 830)
- fixed crash in `LAPACKE_?geqrt` with row-major data
(Reference-LAPACK PR 768)
- added LAPACKE interfaces for `C/ZUNHR_COL` and
`S/DORHR_COL` (Reference-LAPACK PR 827)
- added error exit tests for `SYSV`/`SYTD2`/`GEHD2` to
the testsuite (Reference-LAPACK PR 795)
- fixed typos in LAPACK source and comments
(Reference-LAPACK PRs 809,811,812,814,820)
- adopt refactored `?GEBAL` implementation
(Reference-LAPACK PR 808)
* x86_64:
- added cpu model autodetection for Intel Alder Lake N
- added activation of the AMX tile to the Sapphire Rapids
`SBGEMM` kernel
- worked around miscompilations of GEMV/SYMV kernels by
gcc's tree-vectorizer
- fixed runtime detection of Cooperlake and Sapphire Rapids
in `DYNAMIC_ARCH`
- fixed feature-based cputype fallback in `DYNAMIC_ARCH`
- corrected `ZAXPY` result on old pre-AVX hardware for the
`INCX=0` case
- fixed a potential use of uninitialized variables in ZTRSM
* ARMV8:
- implemented SWITCH_RATIO parameter for improved GEMM
performance on Neoverse
- activated SVE SGEMM and DGEMM kernels for Neoverse V1
- improved performance of the SVE CGEMM and ZGEMM kernels
on Neoverse V1
- improved kernel selection for the ARMV8SVE target and added
it to `DYNAMIC_ARCH`
- fixed runtime check for SVE availability in `DYNAMIC_ARCH`
builds to take OS or container restrictions into account
- fixed a potential use of uninitialized variables in ZTRSM
* POWER:
- fixed compiler warnings in the POWER10 SBGEMM kernel
- Changes in version 0.3.23
* General:
- fixed a serious regression in `GETRF`/`GETF2` and
`ZGETRF`/`ZGETF2` where subnormal but nonzero data elements
triggered the singularity flag
- fixed a long-standing bug in `CSPR`/`ZSPR` in single-threaded
operation
- for cases where elements of the X vector are real numbers (or
complex with only the real part zero)
* x86_64:
- added further CPUID values for Intel Raptor Lake
- Changes in version 0.3.22
* General:
- Updated the included LAPACK to Reference-LAPACK release 3.11.0
plus post-release corrections and improvements
- Added a threshold for multithreading in `SYMM`, `SYMV` and
`SYR2K`
- Increased the threshold for multithreading in `SYRK`
- OpenBLAS no longer decreases the global `OMP_NUM_THREADS`
when it exceeds the maximum thread count the library was
compiled for.
- fixed `?GETF2` potentially returning `NaN` with tiny matrix
elements
- fixed `openblas_set_num_threads` to work in `USE_OPENMP`
builds.
- fixed cpu core counting in `USE_OPENMP` builds returning the
number of OMP "places" rather than cores
- fixed stride calculation in the optimized small-matrix path of
complex `SYR`
- fixed building of Reference-LAPACK with recent gfortran
- added new environment variable `OPENBLAS_DEFAULT_NUM_THREADS`
- added a GEMV-based implementation of `GEMMT`
* x86_64:
- added autodetection of Intel Raptor Lake cpu models
- added SSCAL microkernels for Haswell and newer targets
- improved the performance of the Haswell DSCAL microkernel
- added CSCAL and ZSCAL microkernels for SkylakeX targets
- fixed detection of gfortran and Cray CCE compilers
- fixed runtime selection of COOPERLAKE in `DYNAMIC_ARCH` builds
- worked around gcc/llvm using risky FMA operations in
CSCAL/ZSCAL
* ARMV8:
- fixed cross-compilation to CortexA53 with CMAKE
- fixed compilation with CMAKE and "Arm Compiler for Linux 22.1"
- added cpu autodetection for Cortex X3 and A715
- fixed conditional compilation of SVE-capable targets in
`DYNAMIC_ARCH`
- sped up SVE kernels by removing unnecessary prefetches
- improved the GEMM performance of Neoverse V1
- added SVE kernels for SDOT and DDOT
- added an SBGEMM kernel for Neoverse N2
- improved cpu-specific compiler option selection for
Neoverse cpus
- added support for setting `CONSISTENT_FPCSR`
- fixed building with the IBM xlf 16.1.1 compiler
- fixed building with IBM XL C
- added support for DYNAMIC_ARCH builds with clang
- fixed union declaration in the BFLOAT16 test case
- enable optimizations for the AIX assembler on POWER10
* LOONGARCH64:
- added an optimized SGEMV kernel
- added an optimized DTRSM kernel
- Minor rebase of openblas-ppc64be_up2_p8.patch to apply cleanly.
- Drop upstreamed patches:
* Use-blasint-for-INTERFACE64-compatibility.patch

View File

@@ -1,7 +1,7 @@
#
# spec file for package openblas
# spec file
#
# Copyright (c) 2025 SUSE LLC
# Copyright (c) 2023 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -18,9 +18,8 @@
%global flavor @BUILD_FLAVOR@%{nil}
%undefine sha1
%define _vers 0_3_29
%define vers 0.3.29
%define _vers 0_3_25
%define vers 0.3.25
%define so_v 0
%define pname openblas
@@ -167,12 +166,6 @@ ExclusiveArch: do_not_build
%ifarch ppc64le
%if 0%{?c_f_ver} > 9
%else
%if 0%{?sle_version} == 150700
%define cc_v 14
%endif
%if 0%{?sle_version} == 150600
%define cc_v 13
%endif
%if 0%{?sle_version} == 150500
%define cc_v 12
%endif
@@ -198,7 +191,6 @@ ExclusiveArch: do_not_build
%define p_prefix %_prefix
%define p_includedir %_includedir/%pname
%define p_libdir %_libdir/openblas%{?flavor:-%{flavor}}
%define p_testdir %_libexecdir/openblas%{?flavor:-%{flavor}}/tests
%define p_cmakedir %{p_libdir}/cmake/%{pname}
%define num_threads 64
@@ -215,19 +207,12 @@ ExclusiveArch: do_not_build
%define p_prefix %hpc_prefix
%define p_includedir %hpc_includedir
%define p_libdir %hpc_libdir
%define p_testdir %hpc_prefix/tests
%define p_cmakedir %{hpc_libdir}/cmake
%define num_threads 256
%{hpc_init -c %{compiler_family} %{?c_f_ver:-v %{c_f_ver}} %{?ext:-e %{ext}}}
%endif
%if 0%{?sha1:1}
%define v_string %{sha1}
%else
%define v_string v%{version}
%endif
Name: %{package_name}
Version: %vers
Release: 0
@@ -235,15 +220,14 @@ Summary: An optimized BLAS library based on GotoBLAS2
License: BSD-3-Clause
Group: Productivity/Scientific/Math
URL: http://www.openblas.net
Source0: https://github.com/xianyi/OpenBLAS/archive/%{v_string}.tar.gz#/OpenBLAS-%{version}%{?sha1:_%{sha1}}.tar.gz
Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz#/OpenBLAS-%{version}.tar.gz
Source1: README.SUSE
Source2: README.HPC.SUSE
Source3: openblas_tests.sh.in
Source4: openblas.rpmlintrc
Source3: openblas.rpmlintrc
Patch101: Link-library-with-z-noexecstack.patch
# PATCH port
Patch102: Handle-s390-correctly.patch
Patch103: openblas-ppc64be_up2_p8.patch
Patch104: Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
#BuildRequires: cmake
BuildRequires: memory-constraints
@@ -254,7 +238,7 @@ BuildRequires: gcc%{?cc_v}-fortran
BuildRequires: gcc-fortran
BuildRequires: update-alternatives
Requires(post): update-alternatives
Requires(preun): update-alternatives
Requires(preun):update-alternatives
%else
BuildRequires: %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel
BuildRequires: lua-lmod
@@ -271,7 +255,7 @@ Group: System/Libraries
%if %{without hpc}
Requires(post): update-alternatives
Requires(post): coreutils
Requires(preun): update-alternatives
Requires(preun):update-alternatives
%if "%flavor" == "serial"
Obsoletes: lib%{pname}%{so_v} < %{version}
Provides: lib%{pname}%{so_v} = %{version}
@@ -342,30 +326,15 @@ OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
This package contains headers for OpenBLAS.
%package tests
Summary: Unit Tests for openblas library
Group: Development/Libraries/C and C++
%description tests
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
This package contains test binaries.
%prep
%setup -q -n OpenBLAS-%{?sha1:%{sha1}}%{!?sha1:%{version}}
%setup -q -n OpenBLAS-%{version}
%autopatch -p1
%ifarch s390
sed -i -e "s@m32@m31@" Makefile.system
%endif
sed -i -e '/FLDFLAGS = \|$(CC)\|$(CXX)/s@$@ $(LDFLAGS_TESTS)@' \
test/Makefile ctest/Makefile utest/Makefile cpp_thread_test/Makefile
grep -q .note.GNU-stack cpuid.S || echo '.section .note.GNU-stack,"",@progbits' >> cpuid.S
# Disable sgemmt and dgemmt tests on ppc64le when using gcc13
%if "%{?_arch}" == "ppc64le" && 0%{?gcc_version} == 13
sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_sgemmt.o *@@' utest/Makefile
sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_dgemmt.o *@@' utest/Makefile
%endif
%if %{without hpc}
cp %{SOURCE1} .
@@ -401,11 +370,6 @@ EOF
%define _lto_cflags %{nil}
%endif
%ifarch riscv64
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110812
%global _lto_cflags %{nil}
%endif
%if %{with hpc}
%hpc_debug
%hpc_setup_compiler
@@ -444,7 +408,7 @@ EOF
%ifarch ppc64
%global addopt -mvsx
%endif
%global addopt %{?addopt} -fno-strict-aliasing -Wa,--noexecstack -Wl,-z,noexecstack
%global addopt %{?addopt} -fno-strict-aliasing
# Make serial, threaded and OpenMP versions
@@ -457,7 +421,7 @@ EOF
# Do not use LIBNAMESUFFIX for new builds as it will not allow
# the different flavors to be plugin replacements of each other
%if 0%{?suse_version} <= 1500 && %{without hpc}
%define libnamesuffix LIBNAMESUFFIX=_%{flavor}
%define libnamesuffix LIBNAMESUFFIX=%flavor
%endif
make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
%{?openblas_opt} \
@@ -465,13 +429,12 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
NUM_THREADS=%{num_threads} V=1 \
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
OPENBLAS_BINARY_DIR=%{p_testdir} \
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
PREFIX=%{p_prefix} \
%{?dynamic_list} \
%{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
%{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} FC=gfortran-%{cc_v} CEXTRALIB=""}}
%{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} CEXTRALIB=""}}
%install
%if %{with hpc}
@@ -481,28 +444,13 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
# Install library and headers
# Pass NUM_THREADS again, as it is not propagated from the build step
# https://github.com/OpenMathLib/OpenBLAS/issues/4275
mkdir -p %{buildroot}/%{p_testdir}
%make_install install_tests %{?openblas_target} %{?build_flags} \
%{?openblas_opt} \
%make_install %{?build_flags} \
NUM_THREADS=%{num_threads} \
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
OPENBLAS_BINARY_DIR=%{p_testdir} \
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
%{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
%{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} FC=gfortran-%{cc_v} CEXTRALIB=""}} \
%{?libnamesuffix} \
PREFIX=%{p_prefix}
sed -e 's#@FLAVOR@#%{flavor}#' \
-e 's#@COMPILER@#%{?compiler_family:%compiler_family%{?hpc_gnu_dep_version:/%hpc_gnu_dep_version}}#' \
< %{S:3} > %{buildroot}/%{p_testdir}/openblas_tests.sh
chmod 0755 %{buildroot}/%{p_testdir}/openblas_tests.sh
for i in %{buildroot}/%{p_testdir}/*; do
case $i in
*.dat|*in*) chmod 0644 $i;;
*) chmod 0755 $i;;
esac
done
# Delete info about OBS host cpu
%ifarch %ix86 x86_64
@@ -694,11 +642,6 @@ fi
%{p_libdir}/pkgconfig
%endif
%files tests
%dir %{p_testdir}
%dir %{dirname:%{p_testdir}}
%{p_testdir}/*
%files devel-static
%{p_libdir}/libopenblas*.a

View File

@@ -1,92 +0,0 @@
#! /bin/bash
FLAVOR=@FLAVOR@
COMPILER=@COMPILER@
# Series 'test'
series_test() {
${dir}/sblat1 || echo "sblat1 failed"
${dir}/dblat1 || echo "dblat1 failed"
${dir}/cblat1 || echo "cblat1 failed"
${dir}/zblat1 || echo "zblat1 failed"
${dir}/sblat2 < ${dir}/sblat2.dat || echo " failed"
grep -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || true
${dir}/dblat2 < ${dir}/dblat2.dat || echo " failed"
grep -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || true
${dir}/cblat2 < ${dir}/cblat2.dat || echo " failed"
grep -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || true
${dir}/zblat2 < ${dir}/zblat2.dat || echo " failed"
grep -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || true
${dir}/test_sbgemm > SBBLAT3.SUMM || echo "test_sbgemm failed"
grep -q FATAL SBBLAT3.SUMM && cat SBBLAT3.SUMM || true
${dir}/dblat3 < ${dir}/dblat3.dat || echo "dblat3 failed"
grep -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || true
${dir}/cblat3 < ${dir}/cblat3.dat || echo "cblat3 failed"
grep -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || true
[ -x ${dir}/cblat3_3m ] && \
{ ${dir}/cblat3_3m < ${dir}/cblat3_3m.dat;
grep -q FATAL CBLAT3_3M.SUMM && cat CBLAT3_3M.SUMM || true; }
${dir}/zblat3 < ${dir}/zblat3.dat || echo "zblat3 failed";
grep -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || true
[ -x ${dir}/zblat3_3m ] && \
{ ${dir}/zblat3_3m < ${dir}/zblat3_3m.dat || echo "zblat3 failed";
grep -q FATAL ZBLAT3_3M.SUMM && cat ZBLAT3_3M.SUMM || true; }
}
# Series 'ctest'
series_ctest() {
${dir}/xscblat1 || echo "xscblat1 failed"
${dir}/xdcblat1 || echo "sdcblat1 failed"
${dir}/xccblat1 || echo "xccblat1 failed"
${dir}/xzcblat1 || echo "xzcblat1 failed"
${dir}/xscblat2 < ${dir}/sin2 || echo "xscblat2 failed"
${dir}/xdcblat2 < ${dir}/din2 || echo "xdcblat2 failed"
${dir}/xccblat2 < ${dir}/cin2 || echo "xccblat2 failed"
${dir}/xzcblat2 < ${dir}/zin2 || echo "xzcblat2 failed"
${dir}/xscblat3 < ${dir}/sin3 || echo "xscblat3 failed"
${dir}/xdcblat3 < ${dir}/din3 || echo "xdcblat3 failed"
${dir}/xccblat3 < ${dir}/cin3 || echo "xccblat3 failed"
${dir}/xzcblat3 < ${dir}/zin3 || echo "xzcblat3 failed"
[ -x ${dir}/xccblat3_3m ] && { ${dir}/xccblat3_3m < ${dir}/cin3_3m || echo "cin3_3m failed"; };
[ -x ${dir}/xzcblat3_3m ] && { ${dir}/xzcblat3_3m < ${dir}/zin3_3m || echo "zin3_3m failed"; };
}
# Series 'utest'
series_utest() {
${dir}/openblas_utest || echo "openblas_utest failed"
${dir}/openblas_utest_ext || echo "openblas_utest_ext failed"
}
dir=/usr/lib/openblas-${FLAVOR}/tests
case $FLAVOR in
serial) export THREADS=false; export OMP=false ;;
pthreads) export THREADS=true; export OMP=false ;;
openmp) export THREADS=false; export OMP=true ;;
gnu-hpc) module pure;
module load gnu/$COMPILER openblas;
dir=$OPENBLAS_DIR/bin
export THREADS=false; export OMP=true
;;
gnu-hpc-ptreads) module pure;
module load gnu/$COMPILER_VERSION openblas;
export THREADS=true; export OMP=false ;;
esac
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 series_test
if $OMP || $TREADS; then
rm -f ?BLAT2.SUMM ?BLAT3.SUMM ?BLAT3_3M.SUMM
if $OMP; then
OMP_NUM_THREADS=2 series_test
else
OPENBLAS_NUM_THREADS=2 series_test
fi
fi
if $OMP; then
OMP_NUM_THREADS=2 series_ctest
else
OPENBLAS_NUM_THREADS=2 series_ctest
fi
series_utest