Compare commits
1 Commits
Author | SHA256 | Date | |
---|---|---|---|
db5501e9ac |
24
Link-library-with-z-noexecstack.patch
Normal file
24
Link-library-with-z-noexecstack.patch
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
From: Egbert Eich <eich@suse.com>
|
||||||
|
Date: Wed Nov 30 20:16:21 2022 +0100
|
||||||
|
Subject: Link library with -z,noexecstack
|
||||||
|
Patch-mainline: Not yet
|
||||||
|
Git-commit: adddc0eadc81bcd29c48594793cb33eac0edb572
|
||||||
|
References:
|
||||||
|
|
||||||
|
Signed-off-by: Egbert Eich <eich@suse.com>
|
||||||
|
Signed-off-by: Egbert Eich <eich@suse.de>
|
||||||
|
---
|
||||||
|
exports/Makefile | 1 +
|
||||||
|
1 file changed, 1 insertion(+)
|
||||||
|
Index: OpenBLAS-0.3.25/exports/Makefile
|
||||||
|
===================================================================
|
||||||
|
--- OpenBLAS-0.3.25.orig/exports/Makefile
|
||||||
|
+++ OpenBLAS-0.3.25/exports/Makefile
|
||||||
|
@@ -193,6 +193,7 @@ else ifeq ($(F_COMPILER), FLANG)
|
||||||
|
else
|
||||||
|
ifneq ($(C_COMPILER), LSB)
|
||||||
|
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||||
|
+ -Wl,-z,noexecstack \
|
||||||
|
-Wl,--whole-archive $< -Wl,--no-whole-archive \
|
||||||
|
-Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
|
||||||
|
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
BIN
OpenBLAS-0.3.25.tar.gz
(Stored with Git LFS)
Normal file
BIN
OpenBLAS-0.3.25.tar.gz
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
OpenBLAS-0.3.29.tar.gz
(Stored with Git LFS)
BIN
OpenBLAS-0.3.29.tar.gz
(Stored with Git LFS)
Binary file not shown.
@@ -1,139 +0,0 @@
|
|||||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
|
||||||
Date: Wed Feb 12 09:04:22 2025 +0100
|
|
||||||
Subject: Restore the non-vectorized code from before PR4880 for POWER8
|
|
||||||
Patch-mainline: Not yet
|
|
||||||
Git-repo: https://github.com/xianyi/OpenBLAS
|
|
||||||
Git-commit: 98b5ef929cfc98f2f3c236966830276c255118d2
|
|
||||||
References: bsc#1239134
|
|
||||||
|
|
||||||
|
|
||||||
Signed-off-by: Egbert Eich <eich@suse.de>
|
|
||||||
---
|
|
||||||
kernel/power/sgemv_t.c | 23 +++++++++++++++++++----
|
|
||||||
1 file changed, 19 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/kernel/power/sgemv_t.c b/kernel/power/sgemv_t.c
|
|
||||||
index e133c815c..ed0a24230 100644
|
|
||||||
--- a/kernel/power/sgemv_t.c
|
|
||||||
+++ b/kernel/power/sgemv_t.c
|
|
||||||
@@ -78,7 +78,17 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
||||||
temp7 += v_x[i] * va7[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
-
|
|
||||||
+ #if defined(POWER8)
|
|
||||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
|
||||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
|
||||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
|
||||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
|
||||||
+
|
|
||||||
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
|
|
||||||
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
|
|
||||||
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
|
|
||||||
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
|
|
||||||
+ #else
|
|
||||||
register __vector float t0, t1, t2, t3;
|
|
||||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
|
||||||
__vector float *v_y = (__vector float*) y;
|
|
||||||
@@ -105,7 +115,7 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
||||||
|
|
||||||
v_y[0] += a * temp0;
|
|
||||||
v_y[1] += a * temp4;
|
|
||||||
-
|
|
||||||
+#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@@ -132,7 +142,12 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
||||||
temp2 += v_x[i] * va2[i];
|
|
||||||
temp3 += v_x[i] * va3[i];
|
|
||||||
}
|
|
||||||
-
|
|
||||||
+ #if defined(POWER8)
|
|
||||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
|
||||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
|
||||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
|
||||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
|
||||||
+ #else
|
|
||||||
register __vector float t0, t1, t2, t3;
|
|
||||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
|
||||||
__vector float *v_y = (__vector float*) y;
|
|
||||||
@@ -148,7 +163,7 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
||||||
temp0 += temp1 + temp2 + temp3;
|
|
||||||
|
|
||||||
v_y[0] += a * temp0;
|
|
||||||
-
|
|
||||||
+#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
|
||||||
Date: Wed Feb 12 09:07:20 2025 +0100
|
|
||||||
Subject: Restore the non-vectorized code from before PR4880 for POWER8
|
|
||||||
Patch-mainline: Not yet
|
|
||||||
Git-repo: https://github.com/xianyi/OpenBLAS
|
|
||||||
Git-commit: 81eed868b68c72ea1868663902f0904dc1b22326
|
|
||||||
References: bsc#1239134
|
|
||||||
|
|
||||||
|
|
||||||
Signed-off-by: Egbert Eich <eich@suse.de>
|
|
||||||
---
|
|
||||||
kernel/power/sgemv_t_8.c | 24 ++++++++++++++++++++----
|
|
||||||
1 file changed, 20 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/kernel/power/sgemv_t_8.c b/kernel/power/sgemv_t_8.c
|
|
||||||
index f21f6eb7d..b30bb1137 100644
|
|
||||||
--- a/kernel/power/sgemv_t_8.c
|
|
||||||
+++ b/kernel/power/sgemv_t_8.c
|
|
||||||
@@ -99,7 +99,17 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
||||||
temp7 += vx1* va7_1 + vx2 * va7_2;
|
|
||||||
}
|
|
||||||
|
|
||||||
-
|
|
||||||
+ #if defined(POWER8)
|
|
||||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
|
||||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
|
||||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
|
||||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
|
||||||
+
|
|
||||||
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
|
|
||||||
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
|
|
||||||
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
|
|
||||||
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
|
|
||||||
+ #else
|
|
||||||
register __vector float t0, t1, t2, t3;
|
|
||||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
|
||||||
__vector float *v_y = (__vector float*) y;
|
|
||||||
@@ -126,7 +136,7 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
||||||
|
|
||||||
v_y[0] += a * temp0;
|
|
||||||
v_y[1] += a * temp4;
|
|
||||||
-
|
|
||||||
+#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@@ -153,7 +163,13 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
||||||
temp2 += v_x[i] * va2[i] + v_x[i+1] * va2[i+1];
|
|
||||||
temp3 += v_x[i] * va3[i] + v_x[i+1] * va3[i+1];
|
|
||||||
}
|
|
||||||
-
|
|
||||||
+
|
|
||||||
+ #if defined(POWER8)
|
|
||||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
|
||||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
|
||||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
|
||||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
|
||||||
+ #else
|
|
||||||
register __vector float t0, t1, t2, t3;
|
|
||||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
|
||||||
__vector float *v_y = (__vector float*) y;
|
|
||||||
@@ -169,7 +185,7 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
||||||
temp0 += temp1 + temp2 + temp3;
|
|
||||||
|
|
||||||
v_y[0] += a * temp0;
|
|
||||||
-
|
|
||||||
+#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@@ -2,4 +2,6 @@
|
|||||||
<package>serial</package>
|
<package>serial</package>
|
||||||
<package>pthreads</package>
|
<package>pthreads</package>
|
||||||
<package>openmp</package>
|
<package>openmp</package>
|
||||||
|
<package>gnu-hpc</package>
|
||||||
|
<package>gnu-hpc-pthreads</package>
|
||||||
</multibuild>
|
</multibuild>
|
||||||
|
527
openblas.changes
527
openblas.changes
@@ -1,350 +1,9 @@
|
|||||||
-------------------------------------------------------------------
|
|
||||||
Mon Mar 17 08:51:26 UTC 2025 - Egbert Eich <eich@suse.com>
|
|
||||||
|
|
||||||
- Disable sgemmt and dgemmt tests in the test suite on power
|
|
||||||
when gcc-13 is used. It is known (bsc#1239134) that some
|
|
||||||
of these tests fail on this architecture when OpenBLAS
|
|
||||||
is being build with the said compiler version ever since
|
|
||||||
these tests were introduced.
|
|
||||||
With this will essentially restore the situation of the
|
|
||||||
version prior to the adition of these tests (0.3.26) where
|
|
||||||
one was unaware of the problem.
|
|
||||||
This is only a temporary measure and will be removed once
|
|
||||||
the issue with gcc-13 has been resolved.
|
|
||||||
- Remove: Link-library-with-z-noexecstack.patch
|
|
||||||
since `-Wa,--noexecstack -Wl,-z,noexecstack` are global options,
|
|
||||||
now.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Fri Mar 14 09:24:18 UTC 2025 - Egbert Eich <eich@suse.com>
|
|
||||||
|
|
||||||
- Use upstream patch for bsc#1239134 which is more friendly to the
|
|
||||||
non-affected power9 and power10 sub-architectures:
|
|
||||||
Replace:
|
|
||||||
Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
|
|
||||||
by:
|
|
||||||
Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
|
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Sat Mar 8 13:23:53 UTC 2025 - Egbert Eich <eich@suse.com>
|
|
||||||
|
|
||||||
- Revert commit ba47c7f4f301aad100ed166de338b86e01da8465 to
|
|
||||||
prevent failures on Power8 (bsc#1239134)
|
|
||||||
* Add: Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
|
|
||||||
- Add a script to run tests.
|
|
||||||
- Add bisect support.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Wed Mar 5 15:47:13 UTC 2025 - Egbert Eich <eich@suse.com>
|
|
||||||
|
|
||||||
- Update to version 0.2.29 (jsc#PED-9676):
|
|
||||||
General:
|
|
||||||
* Fixed a potential NULL pointer dereference in multithreaded builds.
|
|
||||||
* Added function aliases for `GEMMT` using its new name `GEMMTR`
|
|
||||||
adopted by Reference-BLAS.
|
|
||||||
* Fixed the behavior of the recently added `CBLAS_?GEMMT` functions
|
|
||||||
with row-major data.
|
|
||||||
* Improved thread scaling of multithreaded `SBGEMV`.
|
|
||||||
* Improved thread scaling of multithreaded `TRTRI`.
|
|
||||||
* Fixed compilation of the CBLAS testsuite with gcc14 (and no
|
|
||||||
Fortran compiler).
|
|
||||||
* Fixed placement of the `-fopenmp` flag and libsuffix in the
|
|
||||||
generated pkgconfig file.
|
|
||||||
* Improved the `CMakeConfig` file generated by the Makefile build.
|
|
||||||
* Fixed const-correctness of `cblas_?geadd` in `cblas.h`.
|
|
||||||
* Fixed a potential inaccuracy in multithreaded BLAS3 calls.
|
|
||||||
* Fixed empty implementations of `get`/`set_affinity` that print a
|
|
||||||
warning in OpenMP builds.
|
|
||||||
* Fixed function signatures for TRTRS in the converted C version of
|
|
||||||
LAPACK.
|
|
||||||
* Fixed omission of several single-precision LAPACK symbols in the
|
|
||||||
shared library.
|
|
||||||
* Improved build instructions for the provided "pybench" benchmarks.
|
|
||||||
* Improved documentation, including descriptions of environment
|
|
||||||
variables that affect build and runtime behavior.
|
|
||||||
* Added a separate "make install_tests" target for use with
|
|
||||||
cross-compilations.
|
|
||||||
* Integrated improvements and corrections from Reference-LAPACK:
|
|
||||||
- removed a comparison in LAPACKE `?tpmqrt` that is always false.
|
|
||||||
- fixed the leading dimension for B in tests for GGEV.
|
|
||||||
- replaced `the ?LARFT` functions with a recursive implementation.
|
|
||||||
arm64:
|
|
||||||
* Fixed a long-standing bug in the (generic) `c`/`zgemm_beta` kernel
|
|
||||||
that could lead to reads and writes outside the array bounds in some
|
|
||||||
circumstances.
|
|
||||||
* Rewrote cpu autodetection to scan all cores and return the highest
|
|
||||||
performing type.
|
|
||||||
* Improved the DGEMM performance for SVE targets and small matrix sizes.
|
|
||||||
* improved dimension criteria for forwarding from `GEMM` to `GEMV`
|
|
||||||
kernels.
|
|
||||||
* Added SVE kernels for `ROT` and `SWAP`.
|
|
||||||
* Improved SVE kernels for `SGEMV` and `DGEMV` on `A64FX` and
|
|
||||||
`NEOVERSEV1`.
|
|
||||||
* Fixed NRM2 implementations for generic SVE targets and the Neoverse N2.
|
|
||||||
x86_64:
|
|
||||||
* Fixed a wrong storage size in the SBGEMV kernel for Cooper Lake.
|
|
||||||
* Added cpu autodetection for Intel Granite Rapids.
|
|
||||||
* Added cpu autodetection for AMD Ryzen 5 series.
|
|
||||||
* Added optimized `SOMATCOPY_CT` for AVX-capable targets.
|
|
||||||
* fixed the fallback implementation of `GEMM3M` in GENERIC builds.
|
|
||||||
Power:
|
|
||||||
* Fixed multithreaded `SBGEMM`.
|
|
||||||
* Fixed a CMake build problem on POWER10.
|
|
||||||
* Improved the performance of SGEMV.
|
|
||||||
* Added vectorized implementations of `SBGEMV` and support for
|
|
||||||
forwarding 1xN `SBGEMM` to them.
|
|
||||||
* Fixed illegal instructions and potential memory overflow in SGEMM
|
|
||||||
on PPCG4.
|
|
||||||
* Fixed handling of NaN and Inf arguments in `SSCAL` and `DSCAL` on
|
|
||||||
PPC440,G4 and 970.
|
|
||||||
* Added improved `CGEMM` and `ZGEMM` kernels for POWER10.
|
|
||||||
Riscv64:
|
|
||||||
* Removed thread yielding overhead caused by `sched_yield`.
|
|
||||||
* Replaced some non-standard intrinsics with their official names.
|
|
||||||
* Fixed and sped up the implementations of `CGEMM`/`ZGEMM` `TCOPY`
|
|
||||||
for vector lenghts 128 and 256.
|
|
||||||
* Improved the performance of `SNRM2`/`DNRM2` for RVV1.0 targets.
|
|
||||||
* Added optimized `?OMATCOPY_CN` kernels for RVV1.0 targets.
|
|
||||||
- Add test package.
|
|
||||||
- Add flags: `-Wa,--noexecstack -Wl,-z,noexecstack` to make sure
|
|
||||||
stack is not executable. This works around problems in assembler
|
|
||||||
code for z.
|
|
||||||
- Make stack of empty cpuid.S non-executable as well.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Wed Mar 5 14:17:26 UTC 2025 - Egbert Eich <eich@suse.com>
|
|
||||||
|
|
||||||
- Set gcc versions for ppc64le (bsc#1239702)
|
|
||||||
* on SLE-15-SP6: v13
|
|
||||||
* on SLE-15-SP7: v14
|
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Mon Feb 3 14:43:29 UTC 2025 - Andreas Schwab <schwab@suse.de>
|
|
||||||
|
|
||||||
- Disable LTO on riscv64 due to GCC#110812
|
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Thu Jan 2 15:15:51 UTC 2025 - Egbert Eich <eich@suse.com>
|
|
||||||
|
|
||||||
- Update to version 0.3.28 (jsc#PED-9676):
|
|
||||||
* General:
|
|
||||||
+ Reworked the unfinished implementation of `HUGETLB` from GotoBLAS
|
|
||||||
for allocating huge memory pages as buffers on suitable systems.
|
|
||||||
+ Changed the unfinished implementation of `GEMM3M` for the generic
|
|
||||||
target on all architectures to at least forward to regular GEMM.
|
|
||||||
+ Improved multithreaded `GEMM` performance for large non-skinny
|
|
||||||
matrices.
|
|
||||||
+ Improved BLAS3 performance on larger multicore systems through
|
|
||||||
improved parallelism.
|
|
||||||
+ Improved performance of the initial memory allocation by reducing
|
|
||||||
locking overhead.
|
|
||||||
+ Improved performance of `GBMV` at small problem sizes by introducing
|
|
||||||
a size barrier for the switch to multithreading.
|
|
||||||
+ Added an implementation of the `CBLAS_GEMM_BATCH` extension.
|
|
||||||
+ Fixed corner cases involving the handling of NAN and INFINITY
|
|
||||||
arguments in `?SCAL` on all architectures.
|
|
||||||
+ Fixed NAN handling and potential accuracy issues in compilations
|
|
||||||
with Intel ICX by supplying a suitable fp-model option by default.
|
|
||||||
+ It is now possible to register a callback function that replaces
|
|
||||||
the built-in support for multithreading with an external backend
|
|
||||||
like TBB (`openblas_set_threads_callback_function`).
|
|
||||||
+ Fixed potential duplication of suffixes in shared library naming.
|
|
||||||
+ Improved C compiler detection by the build system to tolerate
|
|
||||||
more naming variants for gcc builds.
|
|
||||||
+ Fixed an unnecessary dependency of the utest on CBLAS.
|
|
||||||
+ Fixed spurious error reports from the BLAS extensions `utest`.
|
|
||||||
+ Fixed unwanted invocation of the `GEMM3M` tests in cross-
|
|
||||||
compilation.
|
|
||||||
+ Fixed a flaw in the makefile build that could lead to the
|
|
||||||
pkgconfig file containing an entry of `UNKNOWN` for the target
|
|
||||||
cpu after installing.
|
|
||||||
+ Integrated fixes from the Reference-LAPACK project:
|
|
||||||
- Fixed uninitialized variables in the LAPACK tests for `?QP3RK`.
|
|
||||||
- Fixed potential bounds error in `?UNHR_COL`/`?ORHR_COL`.
|
|
||||||
- Fixed potential infinite loop in the LAPACK testsuite.
|
|
||||||
- Make the variable type used for hidden length arguments
|
|
||||||
configurable.
|
|
||||||
+ Fixed `SYTRD` workspace computation and various typos.
|
|
||||||
+ Prevent compiler use of FMA that could increase numerical
|
|
||||||
error in `?GEEVX`.
|
|
||||||
* x86-64:
|
|
||||||
+ Fixed a potential thread buffer overrun in `SBSTOBF16` on small
|
|
||||||
systems.
|
|
||||||
+ Fixed an accuracy issue in `ZSCAL` introduced in 0.3.26.
|
|
||||||
+ Added support for Intel Emerald Rapids and Meteor Lake CPUs.
|
|
||||||
+ Added autodetection support for the Zhaoxin KX-7000 CPU.
|
|
||||||
+ Fixed autodetection of Intel Prescott (probably broken
|
|
||||||
since 0.3.19).
|
|
||||||
+ Fixed compilation of the converter-generated C versions
|
|
||||||
of the LAPACK sources with gcc-14.
|
|
||||||
+ Added support for supplying the L2 cache size via an
|
|
||||||
environment variable (`OPENBLAS_L2_SIZE`) in case it is not
|
|
||||||
correctly reported (as in some VM configurations).
|
|
||||||
+ Improved the error message shown when thread creation fails
|
|
||||||
on startup.
|
|
||||||
* arm64:
|
|
||||||
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
|
|
||||||
1xN or Mx1 matrix to the corresponding `GEMV` kernel.
|
|
||||||
+ Added optimized `SGEMV` and `DGEMV` kernels for A64FX.
|
|
||||||
+ Added optimized SVE kernels for small-matrix `GEMM`.
|
|
||||||
+ Added A64FX to the CPU list for DYNAMIC_ARCH.
|
|
||||||
+ Fixed building with support for CPU affinity.
|
|
||||||
+ Worked around accuracy problems with `C/ZNRM2` on NeoverseN1
|
|
||||||
targets.
|
|
||||||
+ Improved GEMM performance on Neoverse V1.
|
|
||||||
+ Fixed compilation for `NEOVERSEN2` with older compilers.
|
|
||||||
+ Fixed potential miscompilation of the SVE `SDOT` and `DDOT`
|
|
||||||
kernels.
|
|
||||||
+ Fixed potential miscompilation of the non-SVE `CDOT` and
|
|
||||||
`ZDOT` kernels.
|
|
||||||
+ Fixed a potential overflow when using very large user-defined
|
|
||||||
`BUFFERSIZE`.
|
|
||||||
* Power:
|
|
||||||
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a 1xN
|
|
||||||
or Mx1 matrix to the corresponding `GEMV` kernel.
|
|
||||||
+ Significantly improved performance of `SBGEMM`. on POWER10.
|
|
||||||
+ Fixed compilation with OpenMP and the XLF compiler.
|
|
||||||
+ Fixed building of parts of the LAPACK testsuite with XLF.
|
|
||||||
+ Fixed CSWAP/ZSWAP on big-endian POWER10 targets.
|
|
||||||
+ Fixed a performance regression in SAXPY on POWER10 with OpenXL.
|
|
||||||
+ Fixed a potential overflow when using very large user-defined
|
|
||||||
`BUFFERSIZE`.
|
|
||||||
+ Fixed an accuracy issue in the POWER6 kernels for `GEMM` and
|
|
||||||
`GEMV`.
|
|
||||||
* RISCV64:
|
|
||||||
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
|
|
||||||
1xN or Mx1 matrix to the corresponding GEMV kernel.
|
|
||||||
+ Wdded `DYNAMIC_ARCH` support (comprising `GENERIC_RISCV64` and
|
|
||||||
the two RVV 1.0 targets with vector length of 128 and 256).
|
|
||||||
+ Worked around the `ZVL128B` kernels for `AXPBY` mishandling the
|
|
||||||
special case of zero Y increment.
|
|
||||||
- Obsoleted: no-static.patch.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Mon Jul 29 09:21:41 UTC 2024 - Egbert Eich <eich@suse.com>
|
|
||||||
|
|
||||||
- Duplicate all options passed to `make` also to `make install`:
|
|
||||||
The openblas build output suggests this: 'Note that any flags
|
|
||||||
passed to make during build should also be passed to make install
|
|
||||||
to circumvent any install errors'.
|
|
||||||
This also makes sure that minimum CPU requirement is set in
|
|
||||||
the pkgconfig file is the same one as used for building.
|
|
||||||
This helps to maintain a reproducible build (boo#1228177).
|
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Thu Jun 13 07:32:23 UTC 2024 - Andreas Schwab <schwab@suse.de>
|
|
||||||
|
|
||||||
- no-static.patch: do not link statically
|
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Sun Jun 9 07:07:51 UTC 2024 - Egbert Eich <eich@suse.com>
|
|
||||||
|
|
||||||
- Update to version 0.3.27 (boo#1225869):
|
|
||||||
General:
|
|
||||||
* Added initial (generic) support for the `CSKY` architecture.
|
|
||||||
* Capped the maximum number of threads used in `GEMM`, `GETRF`
|
|
||||||
and `POTRF` to avoid creating underutilized or idle threads.
|
|
||||||
* Sped up multithreaded `POTRF` on all platforms.
|
|
||||||
* Added extension `openblas_set_num_threads_local()` that returns
|
|
||||||
the previous thread count.
|
|
||||||
* Re-evaluated the `SGEMV` and `DGEMV` load thresholds to avoid
|
|
||||||
activating multithreading for too small workloads.
|
|
||||||
* Improved the fallback code used when the precompiled number of
|
|
||||||
threads is exceeded, and made it callable multiple times
|
|
||||||
during the lifetime of an instance.
|
|
||||||
* Added CBLAS interfaces for the BLAS extensions `?AMIN`,`?AMAX`,
|
|
||||||
`CAXPYC` and `ZAXPYC`.
|
|
||||||
* Fixed a potential buffer overflow in the interface to the
|
|
||||||
`GEMMT` kernels.
|
|
||||||
* Fixed use of incompatible pointer types in `GEMMT` and
|
|
||||||
`C`/`ZAXPBY` as flagged by GCC-14.
|
|
||||||
* Fixed unwanted case sensitivity of the character parameters in
|
|
||||||
`?TRTRS` sped up the OpenMP thread management code.
|
|
||||||
* Fixed sizing of logical variables in `INTERFACE64` builds of
|
|
||||||
the C version of LAPACK.
|
|
||||||
* Fixed inclusion of new LAPACK and LAPACKE functions from
|
|
||||||
LAPACK 3.11 in the shared library.
|
|
||||||
* Modified the error thresholds for `SGS`/`DGS` functions in
|
|
||||||
the LAPACK testsuite to suppress spurious errors.
|
|
||||||
* Added support for calling ?NRM2 with a negative increment value
|
|
||||||
on all architectures.
|
|
||||||
* Fixed handling of the `OPENBLAS_LOOPS` variable in several
|
|
||||||
benchmarks.
|
|
||||||
* Integrated fixes from the Reference-LAPACK project:
|
|
||||||
Increased accuracy in `C`/`ZLARFGP` (Reference-LAPACK PR 981).
|
|
||||||
x86:
|
|
||||||
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
|
|
||||||
x86-64:
|
|
||||||
* Removed all instances of `sched_yield()` on Linux and BSD.
|
|
||||||
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
|
|
||||||
* Added compiler checks for `AVX512BF16` compatibility.
|
|
||||||
* Fixed cpu handling fallbacks for Sapphire Rapids with disabled
|
|
||||||
AVX2 in `DYNAMIC_ARCH` mode.
|
|
||||||
* Fixed extensions `SCSUM` and `DZSUM`.
|
|
||||||
* Improved `GEMM` performance for ZEN targets.
|
|
||||||
arm64:
|
|
||||||
* Added initial support for the Cortex-A76 cpu.
|
|
||||||
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
|
|
||||||
* Fixed default compiler options for gcc (-march and -mtune).
|
|
||||||
* Added support for the NeoverseV2 cpu in `DYNAMIC_ARCH` builds.
|
|
||||||
* Corrected `SCSUM` kernels (erroneously duplicating `SCASUM`
|
|
||||||
behaviour).
|
|
||||||
* Added SVE-enabled kernels for `CSUM`/`ZSUM`.
|
|
||||||
* Worked around an inaccuracy in the `NRM2` kernels for NeoverseN1.
|
|
||||||
power:
|
|
||||||
* Improved performance of `SGEMM` on POWER8/9/10.
|
|
||||||
* Improved performance of `DGEMM` on POWER10.
|
|
||||||
zarch:
|
|
||||||
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
|
|
||||||
* Fixed calculation of `?SUM` on Z13.
|
|
||||||
- LIBNAMESUFFIX semantics have changed: no separator will be added.
|
|
||||||
Adjusted in spec file.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Thu Feb 15 08:27:33 UTC 2024 - Egbert Eich <eich@suse.com>
|
Thu Feb 15 08:27:33 UTC 2024 - Egbert Eich <eich@suse.com>
|
||||||
|
|
||||||
- Remove DYNAMIC_LIST for aarch64 for older gcc versions: This has
|
- Remove DYNAMIC_LIST for aarch64 for older gcc versions: This has
|
||||||
been fixed upstream.
|
been fixed upstream.
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
|
||||||
Wed Jan 17 08:47:55 UTC 2024 - Egbert Eich <eich@suse.com>
|
|
||||||
|
|
||||||
- Update to version 0.3.26:
|
|
||||||
* General:
|
|
||||||
- Added type declarations for complex variables to the
|
|
||||||
MSVC-specific parts of the LAPACK header.
|
|
||||||
- Significantly sped up `?GESV` for small problem sizes by
|
|
||||||
introducing a lower bound for multithreading.
|
|
||||||
- Imported additions and corrections from the Reference-LAPACK
|
|
||||||
project:
|
|
||||||
+ Added new LAPACK functions for truncated `QR` with pivoting
|
|
||||||
(Reference-LAPACK PRs 891&941).
|
|
||||||
+ Handle miscalculation of minimum work array size in corner
|
|
||||||
cases (Reference-LAPACK PR 942).
|
|
||||||
+ Fixed use of uninitialized variables in `?GEDMD` and
|
|
||||||
improved inline documentation.
|
|
||||||
+ Fixed use of uninitialized variables (and consequential
|
|
||||||
failures) in `?BBCSD`.
|
|
||||||
+ Added tests for the recently introduced Dynamic Mode
|
|
||||||
Decomposition functions.
|
|
||||||
+ Fixed several memory leaks in the LAPACK testsuite.
|
|
||||||
* x86-64:
|
|
||||||
- Fixed computation of `CASUM` on SkylakeX and newer targets in
|
|
||||||
the special case that AVX512 is not supported by the compiler
|
|
||||||
or operating environment.
|
|
||||||
- Fixed potential undefined behaviour in the `CASUM`/`ZASUM`
|
|
||||||
kernels for AVX512 targets.
|
|
||||||
- worked around a problem in the pre-AVX kernels for `GEMV`
|
|
||||||
* arm64:
|
|
||||||
- Sped up `SGEMM` and `DGEMM` on Neoverse V1 and N1.
|
|
||||||
- Sped up `?DOT` on SVE-capable targets.
|
|
||||||
- Reduced the number of targets in `DYNAMIC_ARCH` builds by
|
|
||||||
eliminating functionally equivalent ones.
|
|
||||||
* POWER:
|
|
||||||
- Improved the SGEMM kernel for POWER10.
|
|
||||||
- Fixed compilation with (very) old versions of gcc.
|
|
||||||
- Added autodetection of the POWERPC 7400 subtype.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Wed Dec 20 12:02:55 UTC 2023 - Giacomo Comes <gcomes.obs@gmail.com>
|
Wed Dec 20 12:02:55 UTC 2023 - Giacomo Comes <gcomes.obs@gmail.com>
|
||||||
|
|
||||||
@@ -360,188 +19,44 @@ Wed Nov 29 05:43:18 UTC 2023 - Atri Bhattacharya <badshah400@gmail.com>
|
|||||||
thread count
|
thread count
|
||||||
- improved the code to add supplementary thread buffers in
|
- improved the code to add supplementary thread buffers in
|
||||||
case of overflow
|
case of overflow
|
||||||
- fixed a potential division by zero in `?ROTG`
|
- fixed a potential division by zero in ?ROTG
|
||||||
- improved the `?MATCOPY` functions to accept zero-sized rows or
|
- improved the ?MATCOPY functions to accept zero-sized rows or
|
||||||
columns
|
columns
|
||||||
- corrected empty prototypes in function declarations
|
- corrected empty prototypes in function declarations
|
||||||
- cleaned up unused declarations in the f2c-converted versions
|
- cleaned up unused declarations in the f2c-converted versions
|
||||||
of the LAPACK sources
|
of the LAPACK sources
|
||||||
|
- fixed compilation with the Cray CCE Compiler suite
|
||||||
- improved link line rewriting to avoid mixed libgomp/libomp
|
- improved link line rewriting to avoid mixed libgomp/libomp
|
||||||
builds with clang&gfortran
|
builds with clang&gfortran
|
||||||
|
- worked around OPENMP builds with LLVM14's libomp hanging on
|
||||||
|
FreeBSD
|
||||||
|
- improved the Makefiles to require less option duplication on
|
||||||
|
"make install"
|
||||||
- imported the following changes from the upcoming release
|
- imported the following changes from the upcoming release
|
||||||
3.12 of Reference-LAPACK: LAPACK PR 900, LAPACK PR 904,
|
3.12 of Reference-LAPACK: LAPACK PR 900, LAPACK PR 904,
|
||||||
LAPACK PR 907, LAPACK PR 909, LAPACK PR 926, LAPACK PR 927,
|
LAPACK PR 907, LAPACK PR 909, LAPACK PR 926, LAPACK PR 927,
|
||||||
LAPACK PR 928 & 930
|
LAPACK PR 928 & 930
|
||||||
* x86-64:
|
* x86-64:
|
||||||
|
- fixed compile-time autodetection of AMD Ryzen3 and Ryzen4
|
||||||
|
cpus
|
||||||
- fixed capability-based fallback selection for unknown cpus
|
- fixed capability-based fallback selection for unknown cpus
|
||||||
in `DYNAMIC_ARCH`
|
in DYNAMIC_ARCH
|
||||||
- added AVX512 optimizations for `?ASUM` on Intel Sapphire Rapids and
|
- added AVX512 optimizations for ?ASUM on Sapphire Rapids and
|
||||||
Cooper Lake
|
Cooper Lake
|
||||||
* ARM64:
|
* ARM64:
|
||||||
|
- fixed building on Apple with homebrew gcc
|
||||||
- fixed building with XCODE 15
|
- fixed building with XCODE 15
|
||||||
- fixed building on A64FX and Cortex A710/X1/X2
|
- fixed building on A64FX and Cortex A710/X1/X2
|
||||||
- increased the default buffer size for recent arm server cpus
|
- increased the default buffer size for recent ARM server cpus
|
||||||
* POWER:
|
* POWER:
|
||||||
- added support for `DYNAMIC_ARCH` builds with clang
|
- fixed building with the IBM xlf 16.1.1 compiler
|
||||||
- fixed union declaration in the `BFLOAT16` test case
|
- fixed building with IBM XL C
|
||||||
- Changes in version 0.3.24
|
- added support for DYNAMIC_ARCH builds with clang
|
||||||
* General:
|
- fixed union declaration in the BFLOAT16 test case
|
||||||
- Declared the arguments of `cblas_xerbla` as `const`
|
- enable optimizations for the AIX assembler on POWER10
|
||||||
(in accordance with the reference implementation
|
* LOONGARCH64:
|
||||||
and others, the previous discrepancy appears to have dated
|
- added an optimized SGEMV kernel
|
||||||
back to GotoBLAS)
|
- added an optimized DTRSM kernel
|
||||||
- fixed the implementation of `?GEMMT` that was added in 0.3.23
|
|
||||||
- made cpu-specific `SWITCH_RATIO` parameters for GEMM
|
|
||||||
available to `DYNAMIC_ARCH` builds
|
|
||||||
- fixed missing `SSYCONVF` function in the shared library
|
|
||||||
- fixed parallel build logic used with gmake
|
|
||||||
- fixed several issues with the handling of runtime limits on
|
|
||||||
the number of OPENMP threads
|
|
||||||
- corrected the error code returned by `SGEADD`/`DGEADD` when
|
|
||||||
LDA is too small
|
|
||||||
- corrected the error code returned by `IMATCOPY` when LDB
|
|
||||||
is too small
|
|
||||||
- updated `?NRM2` to support negative increment values (as
|
|
||||||
introduced in release 3.10.0 of the Reference BLAS)
|
|
||||||
- updated `?ROTG` to use the safe scaling algorithm introduced
|
|
||||||
in release 3.10.0 of the Reference BLAS
|
|
||||||
- fixed OpenMP builds with CLANG for the case where libomp is
|
|
||||||
not in a standard location
|
|
||||||
- fixed a potential overwrite of unrelated memory during
|
|
||||||
thread initialisation on startup
|
|
||||||
- fixed a potential integer overflow in the multithreading
|
|
||||||
threshold for `?SYMM`/`?SYRK`
|
|
||||||
- fixed build of the LAPACKE interfaces for the LAPACK 3.11.0
|
|
||||||
`?TRSYL` functions added in 0.3.22
|
|
||||||
- applied additions and corrections from the development
|
|
||||||
branch of Reference-LAPACK:
|
|
||||||
- fixed actual arguments passed to a number of LAPACK
|
|
||||||
functions (from Reference-LAPACK PR 885)
|
|
||||||
- fixed workspace query results in LAPACK `?SYTRF`/`?TRECV3`
|
|
||||||
(from Reference-LAPACK PR 883)
|
|
||||||
- fixed derivation of the UPLO parameter in `LAPACKE_?larfb`
|
|
||||||
(from Reference-LAPACK PR 878)
|
|
||||||
- fixed a crash in LAPACK `?GELSDD` on `NRHS=0` (from
|
|
||||||
Reference-LAPACK PR 876)
|
|
||||||
- added new LAPACK utility functions `CRSCL` and `ZRSCL`
|
|
||||||
(from Reference-LAPACK PR 839)
|
|
||||||
- corrected the order of eigenvalues for 2x2 matrices in
|
|
||||||
`?STEMR` (Reference-LAPACK PR 867)
|
|
||||||
- removed spurious reference to OpenMP variables outside
|
|
||||||
OpenMP contexts (Reference-LAPACK PR 860)
|
|
||||||
- updated file comments on use of `LAMBDA` variable in
|
|
||||||
LAPACK (Reference-LAPACK PR 852)
|
|
||||||
- fixed documentation of LAPACK `SLASD0`/`DLASD0`
|
|
||||||
(Reference-LAPACK PR 855)
|
|
||||||
- fixed confusing use of "minor" in LAPACK documentation
|
|
||||||
(Reference-LAPACK PR 849)
|
|
||||||
- added new LAPACK functions ?GEDMD for dynamic mode
|
|
||||||
decomposition (Reference-LAPACK PR 736)
|
|
||||||
- fixed potential stack overflows in the `EIG` part of the
|
|
||||||
LAPACK testsuite (Reference-LAPACK PR 854)
|
|
||||||
- applied small improvements to the variants of
|
|
||||||
Cholesky and QR functions (Reference-LAPACK PR 847)
|
|
||||||
- removed unused variables from LAPACK `?BDSQR`
|
|
||||||
(Reference-LAPACK PR 832)
|
|
||||||
- fixed a potential crash on allocation failure in LAPACKE
|
|
||||||
`SGEESX`/`DGEESX` (Reference-LAPACK PR 836)
|
|
||||||
- added a quick return from `SLARUV`/`DLARUV` for N < 1
|
|
||||||
(Reference-LAPACK PR 837)
|
|
||||||
- updated function descriptions in LAPACK `?GEGS`/`?GEGV`
|
|
||||||
(Reference-LAPACK PR 831)
|
|
||||||
- improved algorithm description in `?GELSY`
|
|
||||||
(Reference-LAPACK PR 833)
|
|
||||||
- fixed scaling in LAPACK `STGSNA`/`DTGSNA`
|
|
||||||
(Reference-LAPACK PR 830)
|
|
||||||
- fixed crash in `LAPACKE_?geqrt` with row-major data
|
|
||||||
(Reference-LAPACK PR 768)
|
|
||||||
- added LAPACKE interfaces for `C/ZUNHR_COL` and
|
|
||||||
`S/DORHR_COL` (Reference-LAPACK PR 827)
|
|
||||||
- added error exit tests for `SYSV`/`SYTD2`/`GEHD2` to
|
|
||||||
the testsuite (Reference-LAPACK PR 795)
|
|
||||||
- fixed typos in LAPACK source and comments
|
|
||||||
(Reference-LAPACK PRs 809,811,812,814,820)
|
|
||||||
- adopt refactored `?GEBAL` implementation
|
|
||||||
(Reference-LAPACK PR 808)
|
|
||||||
* x86_64:
|
|
||||||
- added cpu model autodetection for Intel Alder Lake N
|
|
||||||
- added activation of the AMX tile to the Sapphire Rapids
|
|
||||||
`SBGEMM` kernel
|
|
||||||
- worked around miscompilations of GEMV/SYMV kernels by
|
|
||||||
gcc's tree-vectorizer
|
|
||||||
- fixed runtime detection of Cooperlake and Sapphire Rapids
|
|
||||||
in `DYNAMIC_ARCH`
|
|
||||||
- fixed feature-based cputype fallback in `DYNAMIC_ARCH`
|
|
||||||
- corrected `ZAXPY` result on old pre-AVX hardware for the
|
|
||||||
`INCX=0` case
|
|
||||||
- fixed a potential use of uninitialized variables in ZTRSM
|
|
||||||
* ARMV8:
|
|
||||||
- implemented SWITCH_RATIO parameter for improved GEMM
|
|
||||||
performance on Neoverse
|
|
||||||
- activated SVE SGEMM and DGEMM kernels for Neoverse V1
|
|
||||||
- improved performance of the SVE CGEMM and ZGEMM kernels
|
|
||||||
on Neoverse V1
|
|
||||||
- improved kernel selection for the ARMV8SVE target and added
|
|
||||||
it to `DYNAMIC_ARCH`
|
|
||||||
- fixed runtime check for SVE availability in `DYNAMIC_ARCH`
|
|
||||||
builds to take OS or container restrictions into account
|
|
||||||
- fixed a potential use of uninitialized variables in ZTRSM
|
|
||||||
* POWER:
|
|
||||||
- fixed compiler warnings in the POWER10 SBGEMM kernel
|
|
||||||
- Changes in version 0.3.23
|
|
||||||
* General:
|
|
||||||
- fixed a serious regression in `GETRF`/`GETF2` and
|
|
||||||
`ZGETRF`/`ZGETF2` where subnormal but nonzero data elements
|
|
||||||
triggered the singularity flag
|
|
||||||
- fixed a long-standing bug in `CSPR`/`ZSPR` in single-threaded
|
|
||||||
operation
|
|
||||||
- for cases where elements of the X vector are real numbers (or
|
|
||||||
complex with only the real part zero)
|
|
||||||
* x86_64:
|
|
||||||
- added further CPUID values for Intel Raptor Lake
|
|
||||||
- Changes in version 0.3.22
|
|
||||||
* General:
|
|
||||||
- Updated the included LAPACK to Reference-LAPACK release 3.11.0
|
|
||||||
plus post-release corrections and improvements
|
|
||||||
- Added a threshold for multithreading in `SYMM`, `SYMV` and
|
|
||||||
`SYR2K`
|
|
||||||
- Increased the threshold for multithreading in `SYRK`
|
|
||||||
- OpenBLAS no longer decreases the global `OMP_NUM_THREADS`
|
|
||||||
when it exceeds the maximum thread count the library was
|
|
||||||
compiled for.
|
|
||||||
- fixed `?GETF2` potentially returning `NaN` with tiny matrix
|
|
||||||
elements
|
|
||||||
- fixed `openblas_set_num_threads` to work in `USE_OPENMP`
|
|
||||||
builds.
|
|
||||||
- fixed cpu core counting in `USE_OPENMP` builds returning the
|
|
||||||
number of OMP "places" rather than cores
|
|
||||||
- fixed stride calculation in the optimized small-matrix path of
|
|
||||||
complex `SYR`
|
|
||||||
- fixed building of Reference-LAPACK with recent gfortran
|
|
||||||
- added new environment variable `OPENBLAS_DEFAULT_NUM_THREADS`
|
|
||||||
- added a GEMV-based implementation of `GEMMT`
|
|
||||||
* x86_64:
|
|
||||||
- added autodetection of Intel Raptor Lake cpu models
|
|
||||||
- added SSCAL microkernels for Haswell and newer targets
|
|
||||||
- improved the performance of the Haswell DSCAL microkernel
|
|
||||||
- added CSCAL and ZSCAL microkernels for SkylakeX targets
|
|
||||||
- fixed detection of gfortran and Cray CCE compilers
|
|
||||||
- fixed runtime selection of COOPERLAKE in `DYNAMIC_ARCH` builds
|
|
||||||
- worked around gcc/llvm using risky FMA operations in
|
|
||||||
CSCAL/ZSCAL
|
|
||||||
* ARMV8:
|
|
||||||
- fixed cross-compilation to CortexA53 with CMAKE
|
|
||||||
- fixed compilation with CMAKE and "Arm Compiler for Linux 22.1"
|
|
||||||
- added cpu autodetection for Cortex X3 and A715
|
|
||||||
- fixed conditional compilation of SVE-capable targets in
|
|
||||||
`DYNAMIC_ARCH`
|
|
||||||
- sped up SVE kernels by removing unnecessary prefetches
|
|
||||||
- improved the GEMM performance of Neoverse V1
|
|
||||||
- added SVE kernels for SDOT and DDOT
|
|
||||||
- added an SBGEMM kernel for Neoverse N2
|
|
||||||
- improved cpu-specific compiler option selection for
|
|
||||||
Neoverse cpus
|
|
||||||
- added support for setting `CONSISTENT_FPCSR`
|
|
||||||
- Minor rebase of openblas-ppc64be_up2_p8.patch to apply cleanly.
|
- Minor rebase of openblas-ppc64be_up2_p8.patch to apply cleanly.
|
||||||
- Drop upstreamed patches:
|
- Drop upstreamed patches:
|
||||||
* Use-blasint-for-INTERFACE64-compatibility.patch
|
* Use-blasint-for-INTERFACE64-compatibility.patch
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
#
|
#
|
||||||
# spec file for package openblas
|
# spec file
|
||||||
#
|
#
|
||||||
# Copyright (c) 2025 SUSE LLC
|
# Copyright (c) 2023 SUSE LLC
|
||||||
#
|
#
|
||||||
# All modifications and additions to the file contributed by third parties
|
# All modifications and additions to the file contributed by third parties
|
||||||
# remain the property of their copyright owners, unless otherwise agreed
|
# remain the property of their copyright owners, unless otherwise agreed
|
||||||
@@ -18,9 +18,8 @@
|
|||||||
|
|
||||||
%global flavor @BUILD_FLAVOR@%{nil}
|
%global flavor @BUILD_FLAVOR@%{nil}
|
||||||
|
|
||||||
%undefine sha1
|
%define _vers 0_3_25
|
||||||
%define _vers 0_3_29
|
%define vers 0.3.25
|
||||||
%define vers 0.3.29
|
|
||||||
%define so_v 0
|
%define so_v 0
|
||||||
%define pname openblas
|
%define pname openblas
|
||||||
|
|
||||||
@@ -167,12 +166,6 @@ ExclusiveArch: do_not_build
|
|||||||
%ifarch ppc64le
|
%ifarch ppc64le
|
||||||
%if 0%{?c_f_ver} > 9
|
%if 0%{?c_f_ver} > 9
|
||||||
%else
|
%else
|
||||||
%if 0%{?sle_version} == 150700
|
|
||||||
%define cc_v 14
|
|
||||||
%endif
|
|
||||||
%if 0%{?sle_version} == 150600
|
|
||||||
%define cc_v 13
|
|
||||||
%endif
|
|
||||||
%if 0%{?sle_version} == 150500
|
%if 0%{?sle_version} == 150500
|
||||||
%define cc_v 12
|
%define cc_v 12
|
||||||
%endif
|
%endif
|
||||||
@@ -198,7 +191,6 @@ ExclusiveArch: do_not_build
|
|||||||
%define p_prefix %_prefix
|
%define p_prefix %_prefix
|
||||||
%define p_includedir %_includedir/%pname
|
%define p_includedir %_includedir/%pname
|
||||||
%define p_libdir %_libdir/openblas%{?flavor:-%{flavor}}
|
%define p_libdir %_libdir/openblas%{?flavor:-%{flavor}}
|
||||||
%define p_testdir %_libexecdir/openblas%{?flavor:-%{flavor}}/tests
|
|
||||||
%define p_cmakedir %{p_libdir}/cmake/%{pname}
|
%define p_cmakedir %{p_libdir}/cmake/%{pname}
|
||||||
%define num_threads 64
|
%define num_threads 64
|
||||||
|
|
||||||
@@ -215,19 +207,12 @@ ExclusiveArch: do_not_build
|
|||||||
%define p_prefix %hpc_prefix
|
%define p_prefix %hpc_prefix
|
||||||
%define p_includedir %hpc_includedir
|
%define p_includedir %hpc_includedir
|
||||||
%define p_libdir %hpc_libdir
|
%define p_libdir %hpc_libdir
|
||||||
%define p_testdir %hpc_prefix/tests
|
|
||||||
%define p_cmakedir %{hpc_libdir}/cmake
|
%define p_cmakedir %{hpc_libdir}/cmake
|
||||||
%define num_threads 256
|
%define num_threads 256
|
||||||
|
|
||||||
%{hpc_init -c %{compiler_family} %{?c_f_ver:-v %{c_f_ver}} %{?ext:-e %{ext}}}
|
%{hpc_init -c %{compiler_family} %{?c_f_ver:-v %{c_f_ver}} %{?ext:-e %{ext}}}
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%if 0%{?sha1:1}
|
|
||||||
%define v_string %{sha1}
|
|
||||||
%else
|
|
||||||
%define v_string v%{version}
|
|
||||||
%endif
|
|
||||||
|
|
||||||
Name: %{package_name}
|
Name: %{package_name}
|
||||||
Version: %vers
|
Version: %vers
|
||||||
Release: 0
|
Release: 0
|
||||||
@@ -235,15 +220,14 @@ Summary: An optimized BLAS library based on GotoBLAS2
|
|||||||
License: BSD-3-Clause
|
License: BSD-3-Clause
|
||||||
Group: Productivity/Scientific/Math
|
Group: Productivity/Scientific/Math
|
||||||
URL: http://www.openblas.net
|
URL: http://www.openblas.net
|
||||||
Source0: https://github.com/xianyi/OpenBLAS/archive/%{v_string}.tar.gz#/OpenBLAS-%{version}%{?sha1:_%{sha1}}.tar.gz
|
Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz#/OpenBLAS-%{version}.tar.gz
|
||||||
Source1: README.SUSE
|
Source1: README.SUSE
|
||||||
Source2: README.HPC.SUSE
|
Source2: README.HPC.SUSE
|
||||||
Source3: openblas_tests.sh.in
|
Source3: openblas.rpmlintrc
|
||||||
Source4: openblas.rpmlintrc
|
Patch101: Link-library-with-z-noexecstack.patch
|
||||||
# PATCH port
|
# PATCH port
|
||||||
Patch102: Handle-s390-correctly.patch
|
Patch102: Handle-s390-correctly.patch
|
||||||
Patch103: openblas-ppc64be_up2_p8.patch
|
Patch103: openblas-ppc64be_up2_p8.patch
|
||||||
Patch104: Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
|
|
||||||
|
|
||||||
#BuildRequires: cmake
|
#BuildRequires: cmake
|
||||||
BuildRequires: memory-constraints
|
BuildRequires: memory-constraints
|
||||||
@@ -254,7 +238,7 @@ BuildRequires: gcc%{?cc_v}-fortran
|
|||||||
BuildRequires: gcc-fortran
|
BuildRequires: gcc-fortran
|
||||||
BuildRequires: update-alternatives
|
BuildRequires: update-alternatives
|
||||||
Requires(post): update-alternatives
|
Requires(post): update-alternatives
|
||||||
Requires(preun): update-alternatives
|
Requires(preun):update-alternatives
|
||||||
%else
|
%else
|
||||||
BuildRequires: %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel
|
BuildRequires: %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel
|
||||||
BuildRequires: lua-lmod
|
BuildRequires: lua-lmod
|
||||||
@@ -271,7 +255,7 @@ Group: System/Libraries
|
|||||||
%if %{without hpc}
|
%if %{without hpc}
|
||||||
Requires(post): update-alternatives
|
Requires(post): update-alternatives
|
||||||
Requires(post): coreutils
|
Requires(post): coreutils
|
||||||
Requires(preun): update-alternatives
|
Requires(preun):update-alternatives
|
||||||
%if "%flavor" == "serial"
|
%if "%flavor" == "serial"
|
||||||
Obsoletes: lib%{pname}%{so_v} < %{version}
|
Obsoletes: lib%{pname}%{so_v} < %{version}
|
||||||
Provides: lib%{pname}%{so_v} = %{version}
|
Provides: lib%{pname}%{so_v} = %{version}
|
||||||
@@ -342,30 +326,15 @@ OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
|||||||
|
|
||||||
This package contains headers for OpenBLAS.
|
This package contains headers for OpenBLAS.
|
||||||
|
|
||||||
%package tests
|
|
||||||
Summary: Unit Tests for openblas library
|
|
||||||
Group: Development/Libraries/C and C++
|
|
||||||
|
|
||||||
%description tests
|
|
||||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
|
||||||
|
|
||||||
This package contains test binaries.
|
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
|
|
||||||
%setup -q -n OpenBLAS-%{?sha1:%{sha1}}%{!?sha1:%{version}}
|
%setup -q -n OpenBLAS-%{version}
|
||||||
%autopatch -p1
|
%autopatch -p1
|
||||||
%ifarch s390
|
%ifarch s390
|
||||||
sed -i -e "s@m32@m31@" Makefile.system
|
sed -i -e "s@m32@m31@" Makefile.system
|
||||||
%endif
|
%endif
|
||||||
sed -i -e '/FLDFLAGS = \|$(CC)\|$(CXX)/s@$@ $(LDFLAGS_TESTS)@' \
|
sed -i -e '/FLDFLAGS = \|$(CC)\|$(CXX)/s@$@ $(LDFLAGS_TESTS)@' \
|
||||||
test/Makefile ctest/Makefile utest/Makefile cpp_thread_test/Makefile
|
test/Makefile ctest/Makefile utest/Makefile cpp_thread_test/Makefile
|
||||||
grep -q .note.GNU-stack cpuid.S || echo '.section .note.GNU-stack,"",@progbits' >> cpuid.S
|
|
||||||
# Disable sgemmt and dgemmt tests on ppc64le when using gcc13
|
|
||||||
%if "%{?_arch}" == "ppc64le" && 0%{?gcc_version} == 13
|
|
||||||
sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_sgemmt.o *@@' utest/Makefile
|
|
||||||
sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_dgemmt.o *@@' utest/Makefile
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%if %{without hpc}
|
%if %{without hpc}
|
||||||
cp %{SOURCE1} .
|
cp %{SOURCE1} .
|
||||||
@@ -401,11 +370,6 @@ EOF
|
|||||||
%define _lto_cflags %{nil}
|
%define _lto_cflags %{nil}
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%ifarch riscv64
|
|
||||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110812
|
|
||||||
%global _lto_cflags %{nil}
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%if %{with hpc}
|
%if %{with hpc}
|
||||||
%hpc_debug
|
%hpc_debug
|
||||||
%hpc_setup_compiler
|
%hpc_setup_compiler
|
||||||
@@ -444,7 +408,7 @@ EOF
|
|||||||
%ifarch ppc64
|
%ifarch ppc64
|
||||||
%global addopt -mvsx
|
%global addopt -mvsx
|
||||||
%endif
|
%endif
|
||||||
%global addopt %{?addopt} -fno-strict-aliasing -Wa,--noexecstack -Wl,-z,noexecstack
|
%global addopt %{?addopt} -fno-strict-aliasing
|
||||||
|
|
||||||
# Make serial, threaded and OpenMP versions
|
# Make serial, threaded and OpenMP versions
|
||||||
|
|
||||||
@@ -457,7 +421,7 @@ EOF
|
|||||||
# Do not use LIBNAMESUFFIX for new builds as it will not allow
|
# Do not use LIBNAMESUFFIX for new builds as it will not allow
|
||||||
# the different flavors to be plugin replacements of each other
|
# the different flavors to be plugin replacements of each other
|
||||||
%if 0%{?suse_version} <= 1500 && %{without hpc}
|
%if 0%{?suse_version} <= 1500 && %{without hpc}
|
||||||
%define libnamesuffix LIBNAMESUFFIX=_%{flavor}
|
%define libnamesuffix LIBNAMESUFFIX=%flavor
|
||||||
%endif
|
%endif
|
||||||
make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
|
make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
|
||||||
%{?openblas_opt} \
|
%{?openblas_opt} \
|
||||||
@@ -465,13 +429,12 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
|
|||||||
NUM_THREADS=%{num_threads} V=1 \
|
NUM_THREADS=%{num_threads} V=1 \
|
||||||
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
|
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
|
||||||
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
|
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
|
||||||
OPENBLAS_BINARY_DIR=%{p_testdir} \
|
|
||||||
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
|
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
|
||||||
PREFIX=%{p_prefix} \
|
PREFIX=%{p_prefix} \
|
||||||
%{?dynamic_list} \
|
%{?dynamic_list} \
|
||||||
%{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \
|
%{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \
|
||||||
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
|
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
|
||||||
%{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} FC=gfortran-%{cc_v} CEXTRALIB=""}}
|
%{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} CEXTRALIB=""}}
|
||||||
|
|
||||||
%install
|
%install
|
||||||
%if %{with hpc}
|
%if %{with hpc}
|
||||||
@@ -481,28 +444,13 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
|
|||||||
# Install library and headers
|
# Install library and headers
|
||||||
# Pass NUM_THREADS again, as it is not propagated from the build step
|
# Pass NUM_THREADS again, as it is not propagated from the build step
|
||||||
# https://github.com/OpenMathLib/OpenBLAS/issues/4275
|
# https://github.com/OpenMathLib/OpenBLAS/issues/4275
|
||||||
mkdir -p %{buildroot}/%{p_testdir}
|
%make_install %{?build_flags} \
|
||||||
%make_install install_tests %{?openblas_target} %{?build_flags} \
|
|
||||||
%{?openblas_opt} \
|
|
||||||
NUM_THREADS=%{num_threads} \
|
NUM_THREADS=%{num_threads} \
|
||||||
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
|
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
|
||||||
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
|
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
|
||||||
OPENBLAS_BINARY_DIR=%{p_testdir} \
|
|
||||||
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
|
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
|
||||||
%{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \
|
%{?libnamesuffix} \
|
||||||
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
|
|
||||||
%{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} FC=gfortran-%{cc_v} CEXTRALIB=""}} \
|
|
||||||
PREFIX=%{p_prefix}
|
PREFIX=%{p_prefix}
|
||||||
sed -e 's#@FLAVOR@#%{flavor}#' \
|
|
||||||
-e 's#@COMPILER@#%{?compiler_family:%compiler_family%{?hpc_gnu_dep_version:/%hpc_gnu_dep_version}}#' \
|
|
||||||
< %{S:3} > %{buildroot}/%{p_testdir}/openblas_tests.sh
|
|
||||||
chmod 0755 %{buildroot}/%{p_testdir}/openblas_tests.sh
|
|
||||||
for i in %{buildroot}/%{p_testdir}/*; do
|
|
||||||
case $i in
|
|
||||||
*.dat|*in*) chmod 0644 $i;;
|
|
||||||
*) chmod 0755 $i;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# Delete info about OBS host cpu
|
# Delete info about OBS host cpu
|
||||||
%ifarch %ix86 x86_64
|
%ifarch %ix86 x86_64
|
||||||
@@ -694,11 +642,6 @@ fi
|
|||||||
%{p_libdir}/pkgconfig
|
%{p_libdir}/pkgconfig
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%files tests
|
|
||||||
%dir %{p_testdir}
|
|
||||||
%dir %{dirname:%{p_testdir}}
|
|
||||||
%{p_testdir}/*
|
|
||||||
|
|
||||||
%files devel-static
|
%files devel-static
|
||||||
%{p_libdir}/libopenblas*.a
|
%{p_libdir}/libopenblas*.a
|
||||||
|
|
||||||
|
@@ -1,92 +0,0 @@
|
|||||||
#! /bin/bash
|
|
||||||
|
|
||||||
FLAVOR=@FLAVOR@
|
|
||||||
COMPILER=@COMPILER@
|
|
||||||
# Series 'test'
|
|
||||||
series_test() {
|
|
||||||
${dir}/sblat1 || echo "sblat1 failed"
|
|
||||||
${dir}/dblat1 || echo "dblat1 failed"
|
|
||||||
${dir}/cblat1 || echo "cblat1 failed"
|
|
||||||
${dir}/zblat1 || echo "zblat1 failed"
|
|
||||||
${dir}/sblat2 < ${dir}/sblat2.dat || echo " failed"
|
|
||||||
grep -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || true
|
|
||||||
${dir}/dblat2 < ${dir}/dblat2.dat || echo " failed"
|
|
||||||
grep -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || true
|
|
||||||
${dir}/cblat2 < ${dir}/cblat2.dat || echo " failed"
|
|
||||||
grep -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || true
|
|
||||||
${dir}/zblat2 < ${dir}/zblat2.dat || echo " failed"
|
|
||||||
grep -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || true
|
|
||||||
${dir}/test_sbgemm > SBBLAT3.SUMM || echo "test_sbgemm failed"
|
|
||||||
grep -q FATAL SBBLAT3.SUMM && cat SBBLAT3.SUMM || true
|
|
||||||
${dir}/dblat3 < ${dir}/dblat3.dat || echo "dblat3 failed"
|
|
||||||
grep -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || true
|
|
||||||
${dir}/cblat3 < ${dir}/cblat3.dat || echo "cblat3 failed"
|
|
||||||
grep -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || true
|
|
||||||
[ -x ${dir}/cblat3_3m ] && \
|
|
||||||
{ ${dir}/cblat3_3m < ${dir}/cblat3_3m.dat;
|
|
||||||
grep -q FATAL CBLAT3_3M.SUMM && cat CBLAT3_3M.SUMM || true; }
|
|
||||||
${dir}/zblat3 < ${dir}/zblat3.dat || echo "zblat3 failed";
|
|
||||||
grep -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || true
|
|
||||||
[ -x ${dir}/zblat3_3m ] && \
|
|
||||||
{ ${dir}/zblat3_3m < ${dir}/zblat3_3m.dat || echo "zblat3 failed";
|
|
||||||
grep -q FATAL ZBLAT3_3M.SUMM && cat ZBLAT3_3M.SUMM || true; }
|
|
||||||
}
|
|
||||||
|
|
||||||
# Series 'ctest'
|
|
||||||
series_ctest() {
|
|
||||||
${dir}/xscblat1 || echo "xscblat1 failed"
|
|
||||||
${dir}/xdcblat1 || echo "sdcblat1 failed"
|
|
||||||
${dir}/xccblat1 || echo "xccblat1 failed"
|
|
||||||
${dir}/xzcblat1 || echo "xzcblat1 failed"
|
|
||||||
${dir}/xscblat2 < ${dir}/sin2 || echo "xscblat2 failed"
|
|
||||||
${dir}/xdcblat2 < ${dir}/din2 || echo "xdcblat2 failed"
|
|
||||||
${dir}/xccblat2 < ${dir}/cin2 || echo "xccblat2 failed"
|
|
||||||
${dir}/xzcblat2 < ${dir}/zin2 || echo "xzcblat2 failed"
|
|
||||||
${dir}/xscblat3 < ${dir}/sin3 || echo "xscblat3 failed"
|
|
||||||
${dir}/xdcblat3 < ${dir}/din3 || echo "xdcblat3 failed"
|
|
||||||
${dir}/xccblat3 < ${dir}/cin3 || echo "xccblat3 failed"
|
|
||||||
${dir}/xzcblat3 < ${dir}/zin3 || echo "xzcblat3 failed"
|
|
||||||
[ -x ${dir}/xccblat3_3m ] && { ${dir}/xccblat3_3m < ${dir}/cin3_3m || echo "cin3_3m failed"; };
|
|
||||||
[ -x ${dir}/xzcblat3_3m ] && { ${dir}/xzcblat3_3m < ${dir}/zin3_3m || echo "zin3_3m failed"; };
|
|
||||||
}
|
|
||||||
|
|
||||||
# Series 'utest'
|
|
||||||
series_utest() {
|
|
||||||
${dir}/openblas_utest || echo "openblas_utest failed"
|
|
||||||
${dir}/openblas_utest_ext || echo "openblas_utest_ext failed"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
dir=/usr/lib/openblas-${FLAVOR}/tests
|
|
||||||
|
|
||||||
case $FLAVOR in
|
|
||||||
serial) export THREADS=false; export OMP=false ;;
|
|
||||||
pthreads) export THREADS=true; export OMP=false ;;
|
|
||||||
openmp) export THREADS=false; export OMP=true ;;
|
|
||||||
gnu-hpc) module pure;
|
|
||||||
module load gnu/$COMPILER openblas;
|
|
||||||
dir=$OPENBLAS_DIR/bin
|
|
||||||
export THREADS=false; export OMP=true
|
|
||||||
;;
|
|
||||||
gnu-hpc-ptreads) module pure;
|
|
||||||
module load gnu/$COMPILER_VERSION openblas;
|
|
||||||
export THREADS=true; export OMP=false ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 series_test
|
|
||||||
if $OMP || $TREADS; then
|
|
||||||
rm -f ?BLAT2.SUMM ?BLAT3.SUMM ?BLAT3_3M.SUMM
|
|
||||||
if $OMP; then
|
|
||||||
OMP_NUM_THREADS=2 series_test
|
|
||||||
else
|
|
||||||
OPENBLAS_NUM_THREADS=2 series_test
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if $OMP; then
|
|
||||||
OMP_NUM_THREADS=2 series_ctest
|
|
||||||
else
|
|
||||||
OPENBLAS_NUM_THREADS=2 series_ctest
|
|
||||||
fi
|
|
||||||
|
|
||||||
series_utest
|
|
Reference in New Issue
Block a user