Compare commits
28 Commits
| Author | SHA256 | Date | |
|---|---|---|---|
| eacdebdbba | |||
| 4acfca4a3d | |||
| 91e75280cc | |||
|
|
91e44b5cce | ||
| a2e8b41e3f | |||
| af4e0eea8c | |||
| f9026b901c | |||
| 53c5fa41e3 | |||
| 67bfc95cea | |||
| 7bfeaf67c4 | |||
| bca0df3804 | |||
| 10476385d7 | |||
| 14e568f52d | |||
| 8b6456cf75 | |||
| a0be2a2e26 | |||
| cf70d49130 | |||
| da1902eb1b | |||
| bcc3a19335 | |||
| 616f244801 | |||
| b5d088b8b5 | |||
| caec615da5 | |||
| d2b04c7c42 | |||
| c3464afaf7 | |||
| 42c7e85871 | |||
| daf462652e | |||
| a974d174ed | |||
| 1100f93cf3 | |||
| f51c02a47c |
@@ -1,24 +0,0 @@
|
||||
From: Egbert Eich <eich@suse.com>
|
||||
Date: Wed Nov 30 20:16:21 2022 +0100
|
||||
Subject: Link library with -z,noexecstack
|
||||
Patch-mainline: Not yet
|
||||
Git-commit: adddc0eadc81bcd29c48594793cb33eac0edb572
|
||||
References:
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
||||
Signed-off-by: Egbert Eich <eich@suse.de>
|
||||
---
|
||||
exports/Makefile | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
Index: OpenBLAS-0.3.25/exports/Makefile
|
||||
===================================================================
|
||||
--- OpenBLAS-0.3.25.orig/exports/Makefile
|
||||
+++ OpenBLAS-0.3.25/exports/Makefile
|
||||
@@ -193,6 +193,7 @@ else ifeq ($(F_COMPILER), FLANG)
|
||||
else
|
||||
ifneq ($(C_COMPILER), LSB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
+ -Wl,-z,noexecstack \
|
||||
-Wl,--whole-archive $< -Wl,--no-whole-archive \
|
||||
-Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aa2d68b1564fe2b13bc292672608e9cdeeeb6dc34995512e65c3b10f4599e897
|
||||
size 24493704
|
||||
BIN
OpenBLAS-0.3.29.tar.gz
LFS
Normal file
BIN
OpenBLAS-0.3.29.tar.gz
LFS
Normal file
Binary file not shown.
@@ -1,23 +0,0 @@
|
||||
openSUSE specific packaging
|
||||
===========================
|
||||
|
||||
OpenBLAS provides optimized implementations of BLAS and LAPACK.
|
||||
openSUSE provides two variants:
|
||||
* With OpenMP support
|
||||
* With threading support
|
||||
The serial variant has been dropped. To run a program
|
||||
which requires the serial version (ie. because it is
|
||||
multi-threaded itself), either specify the environment
|
||||
variable OMP_NUM_THREADS=1 or place a call to
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
in your program to limit the number of threads this library
|
||||
uses to 1.
|
||||
|
||||
On x86 systems OpenBLAS uses dynamic architectures support,
|
||||
so it contains all CPU-related optimizations.
|
||||
|
||||
How to switch between the various BLAS/LAPACK implementations
|
||||
=============================================================
|
||||
|
||||
The openmp and threaded variants may be installed in parallel.
|
||||
To select which one to use please use the 'modules' command.
|
||||
@@ -0,0 +1,139 @@
|
||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Wed Feb 12 09:04:22 2025 +0100
|
||||
Subject: Restore the non-vectorized code from before PR4880 for POWER8
|
||||
Patch-mainline: Not yet
|
||||
Git-repo: https://github.com/xianyi/OpenBLAS
|
||||
Git-commit: 98b5ef929cfc98f2f3c236966830276c255118d2
|
||||
References: bsc#1239134
|
||||
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.de>
|
||||
---
|
||||
kernel/power/sgemv_t.c | 23 +++++++++++++++++++----
|
||||
1 file changed, 19 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/kernel/power/sgemv_t.c b/kernel/power/sgemv_t.c
|
||||
index e133c815c..ed0a24230 100644
|
||||
--- a/kernel/power/sgemv_t.c
|
||||
+++ b/kernel/power/sgemv_t.c
|
||||
@@ -78,7 +78,17 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp7 += v_x[i] * va7[i];
|
||||
}
|
||||
|
||||
-
|
||||
+ #if defined(POWER8)
|
||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
||||
+
|
||||
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
|
||||
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
|
||||
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
|
||||
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
|
||||
+ #else
|
||||
register __vector float t0, t1, t2, t3;
|
||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
||||
__vector float *v_y = (__vector float*) y;
|
||||
@@ -105,7 +115,7 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
|
||||
v_y[0] += a * temp0;
|
||||
v_y[1] += a * temp4;
|
||||
-
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -132,7 +142,12 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp2 += v_x[i] * va2[i];
|
||||
temp3 += v_x[i] * va3[i];
|
||||
}
|
||||
-
|
||||
+ #if defined(POWER8)
|
||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
||||
+ #else
|
||||
register __vector float t0, t1, t2, t3;
|
||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
||||
__vector float *v_y = (__vector float*) y;
|
||||
@@ -148,7 +163,7 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp0 += temp1 + temp2 + temp3;
|
||||
|
||||
v_y[0] += a * temp0;
|
||||
-
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Wed Feb 12 09:07:20 2025 +0100
|
||||
Subject: Restore the non-vectorized code from before PR4880 for POWER8
|
||||
Patch-mainline: Not yet
|
||||
Git-repo: https://github.com/xianyi/OpenBLAS
|
||||
Git-commit: 81eed868b68c72ea1868663902f0904dc1b22326
|
||||
References: bsc#1239134
|
||||
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.de>
|
||||
---
|
||||
kernel/power/sgemv_t_8.c | 24 ++++++++++++++++++++----
|
||||
1 file changed, 20 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/kernel/power/sgemv_t_8.c b/kernel/power/sgemv_t_8.c
|
||||
index f21f6eb7d..b30bb1137 100644
|
||||
--- a/kernel/power/sgemv_t_8.c
|
||||
+++ b/kernel/power/sgemv_t_8.c
|
||||
@@ -99,7 +99,17 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp7 += vx1* va7_1 + vx2 * va7_2;
|
||||
}
|
||||
|
||||
-
|
||||
+ #if defined(POWER8)
|
||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
||||
+
|
||||
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
|
||||
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
|
||||
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
|
||||
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
|
||||
+ #else
|
||||
register __vector float t0, t1, t2, t3;
|
||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
||||
__vector float *v_y = (__vector float*) y;
|
||||
@@ -126,7 +136,7 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
|
||||
v_y[0] += a * temp0;
|
||||
v_y[1] += a * temp4;
|
||||
-
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -153,7 +163,13 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp2 += v_x[i] * va2[i] + v_x[i+1] * va2[i+1];
|
||||
temp3 += v_x[i] * va3[i] + v_x[i+1] * va3[i+1];
|
||||
}
|
||||
-
|
||||
+
|
||||
+ #if defined(POWER8)
|
||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
||||
+ #else
|
||||
register __vector float t0, t1, t2, t3;
|
||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
||||
__vector float *v_y = (__vector float*) y;
|
||||
@@ -169,7 +185,7 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp0 += temp1 + temp2 + temp3;
|
||||
|
||||
v_y[0] += a * temp0;
|
||||
-
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,4 @@
|
||||
<package>serial</package>
|
||||
<package>pthreads</package>
|
||||
<package>openmp</package>
|
||||
<package>gnu-hpc</package>
|
||||
<package>gnu-hpc-pthreads</package>
|
||||
</multibuild>
|
||||
|
||||
249
openblas.changes
249
openblas.changes
@@ -1,3 +1,252 @@
|
||||
-------------------------------------------------------------------
|
||||
Fri May 30 08:46:09 UTC 2025 - Richard Biener <rguenther@suse.com>
|
||||
|
||||
- For SLES16 target POWER9 instead of POWER8 which fixes the
|
||||
issue with the reported sgemm testsuite fails. [bsc#1239545]
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Mar 24 13:16:09 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
|
||||
|
||||
- Disable and remove support for gnu-hpc build flavours (bsc#1239982)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Mar 17 08:51:26 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Disable sgemmt and dgemmt tests in the test suite on power
|
||||
when gcc-13 is used. It is known (bsc#1239134) that some
|
||||
of these tests fail on this architecture when OpenBLAS
|
||||
is being build with the said compiler version ever since
|
||||
these tests were introduced.
|
||||
With this will essentially restore the situation of the
|
||||
version prior to the adition of these tests (0.3.26) where
|
||||
one was unaware of the problem.
|
||||
This is only a temporary measure and will be removed once
|
||||
the issue with gcc-13 has been resolved.
|
||||
- Remove: Link-library-with-z-noexecstack.patch
|
||||
since `-Wa,--noexecstack -Wl,-z,noexecstack` are global options,
|
||||
now.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Fri Mar 14 09:24:18 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Use upstream patch for bsc#1239134 which is more friendly to the
|
||||
non-affected power9 and power10 sub-architectures:
|
||||
Replace:
|
||||
Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
|
||||
by:
|
||||
Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Sat Mar 8 13:23:53 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Revert commit ba47c7f4f301aad100ed166de338b86e01da8465 to
|
||||
prevent failures on Power8 (bsc#1239134)
|
||||
* Add: Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
|
||||
- Add a script to run tests.
|
||||
- Add bisect support.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Mar 5 15:47:13 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Update to version 0.2.29 (jsc#PED-9676):
|
||||
General:
|
||||
* Fixed a potential NULL pointer dereference in multithreaded builds.
|
||||
* Added function aliases for `GEMMT` using its new name `GEMMTR`
|
||||
adopted by Reference-BLAS.
|
||||
* Fixed the behavior of the recently added `CBLAS_?GEMMT` functions
|
||||
with row-major data.
|
||||
* Improved thread scaling of multithreaded `SBGEMV`.
|
||||
* Improved thread scaling of multithreaded `TRTRI`.
|
||||
* Fixed compilation of the CBLAS testsuite with gcc14 (and no
|
||||
Fortran compiler).
|
||||
* Fixed placement of the `-fopenmp` flag and libsuffix in the
|
||||
generated pkgconfig file.
|
||||
* Improved the `CMakeConfig` file generated by the Makefile build.
|
||||
* Fixed const-correctness of `cblas_?geadd` in `cblas.h`.
|
||||
* Fixed a potential inaccuracy in multithreaded BLAS3 calls.
|
||||
* Fixed empty implementations of `get`/`set_affinity` that print a
|
||||
warning in OpenMP builds.
|
||||
* Fixed function signatures for TRTRS in the converted C version of
|
||||
LAPACK.
|
||||
* Fixed omission of several single-precision LAPACK symbols in the
|
||||
shared library.
|
||||
* Improved build instructions for the provided "pybench" benchmarks.
|
||||
* Improved documentation, including descriptions of environment
|
||||
variables that affect build and runtime behavior.
|
||||
* Added a separate "make install_tests" target for use with
|
||||
cross-compilations.
|
||||
* Integrated improvements and corrections from Reference-LAPACK:
|
||||
- removed a comparison in LAPACKE `?tpmqrt` that is always false.
|
||||
- fixed the leading dimension for B in tests for GGEV.
|
||||
- replaced `the ?LARFT` functions with a recursive implementation.
|
||||
arm64:
|
||||
* Fixed a long-standing bug in the (generic) `c`/`zgemm_beta` kernel
|
||||
that could lead to reads and writes outside the array bounds in some
|
||||
circumstances.
|
||||
* Rewrote cpu autodetection to scan all cores and return the highest
|
||||
performing type.
|
||||
* Improved the DGEMM performance for SVE targets and small matrix sizes.
|
||||
* improved dimension criteria for forwarding from `GEMM` to `GEMV`
|
||||
kernels.
|
||||
* Added SVE kernels for `ROT` and `SWAP`.
|
||||
* Improved SVE kernels for `SGEMV` and `DGEMV` on `A64FX` and
|
||||
`NEOVERSEV1`.
|
||||
* Fixed NRM2 implementations for generic SVE targets and the Neoverse N2.
|
||||
x86_64:
|
||||
* Fixed a wrong storage size in the SBGEMV kernel for Cooper Lake.
|
||||
* Added cpu autodetection for Intel Granite Rapids.
|
||||
* Added cpu autodetection for AMD Ryzen 5 series.
|
||||
* Added optimized `SOMATCOPY_CT` for AVX-capable targets.
|
||||
* fixed the fallback implementation of `GEMM3M` in GENERIC builds.
|
||||
Power:
|
||||
* Fixed multithreaded `SBGEMM`.
|
||||
* Fixed a CMake build problem on POWER10.
|
||||
* Improved the performance of SGEMV.
|
||||
* Added vectorized implementations of `SBGEMV` and support for
|
||||
forwarding 1xN `SBGEMM` to them.
|
||||
* Fixed illegal instructions and potential memory overflow in SGEMM
|
||||
on PPCG4.
|
||||
* Fixed handling of NaN and Inf arguments in `SSCAL` and `DSCAL` on
|
||||
PPC440,G4 and 970.
|
||||
* Added improved `CGEMM` and `ZGEMM` kernels for POWER10.
|
||||
Riscv64:
|
||||
* Removed thread yielding overhead caused by `sched_yield`.
|
||||
* Replaced some non-standard intrinsics with their official names.
|
||||
* Fixed and sped up the implementations of `CGEMM`/`ZGEMM` `TCOPY`
|
||||
for vector lenghts 128 and 256.
|
||||
* Improved the performance of `SNRM2`/`DNRM2` for RVV1.0 targets.
|
||||
* Added optimized `?OMATCOPY_CN` kernels for RVV1.0 targets.
|
||||
- Add test package.
|
||||
- Add flags: `-Wa,--noexecstack -Wl,-z,noexecstack` to make sure
|
||||
stack is not executable. This works around problems in assembler
|
||||
code for z.
|
||||
- Make stack of empty cpuid.S non-executable as well.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Mar 5 14:17:26 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Set gcc versions for ppc64le (bsc#1239702)
|
||||
* on SLE-15-SP6: v13
|
||||
* on SLE-15-SP7: v14
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Feb 3 14:43:29 UTC 2025 - Andreas Schwab <schwab@suse.de>
|
||||
|
||||
- Disable LTO on riscv64 due to GCC#110812
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Jan 2 15:15:51 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Update to version 0.3.28 (jsc#PED-9676):
|
||||
* General:
|
||||
+ Reworked the unfinished implementation of `HUGETLB` from GotoBLAS
|
||||
for allocating huge memory pages as buffers on suitable systems.
|
||||
+ Changed the unfinished implementation of `GEMM3M` for the generic
|
||||
target on all architectures to at least forward to regular GEMM.
|
||||
+ Improved multithreaded `GEMM` performance for large non-skinny
|
||||
matrices.
|
||||
+ Improved BLAS3 performance on larger multicore systems through
|
||||
improved parallelism.
|
||||
+ Improved performance of the initial memory allocation by reducing
|
||||
locking overhead.
|
||||
+ Improved performance of `GBMV` at small problem sizes by introducing
|
||||
a size barrier for the switch to multithreading.
|
||||
+ Added an implementation of the `CBLAS_GEMM_BATCH` extension.
|
||||
+ Fixed corner cases involving the handling of NAN and INFINITY
|
||||
arguments in `?SCAL` on all architectures.
|
||||
+ Fixed NAN handling and potential accuracy issues in compilations
|
||||
with Intel ICX by supplying a suitable fp-model option by default.
|
||||
+ It is now possible to register a callback function that replaces
|
||||
the built-in support for multithreading with an external backend
|
||||
like TBB (`openblas_set_threads_callback_function`).
|
||||
+ Fixed potential duplication of suffixes in shared library naming.
|
||||
+ Improved C compiler detection by the build system to tolerate
|
||||
more naming variants for gcc builds.
|
||||
+ Fixed an unnecessary dependency of the utest on CBLAS.
|
||||
+ Fixed spurious error reports from the BLAS extensions `utest`.
|
||||
+ Fixed unwanted invocation of the `GEMM3M` tests in cross-
|
||||
compilation.
|
||||
+ Fixed a flaw in the makefile build that could lead to the
|
||||
pkgconfig file containing an entry of `UNKNOWN` for the target
|
||||
cpu after installing.
|
||||
+ Integrated fixes from the Reference-LAPACK project:
|
||||
- Fixed uninitialized variables in the LAPACK tests for `?QP3RK`.
|
||||
- Fixed potential bounds error in `?UNHR_COL`/`?ORHR_COL`.
|
||||
- Fixed potential infinite loop in the LAPACK testsuite.
|
||||
- Make the variable type used for hidden length arguments
|
||||
configurable.
|
||||
+ Fixed `SYTRD` workspace computation and various typos.
|
||||
+ Prevent compiler use of FMA that could increase numerical
|
||||
error in `?GEEVX`.
|
||||
* x86-64:
|
||||
+ Fixed a potential thread buffer overrun in `SBSTOBF16` on small
|
||||
systems.
|
||||
+ Fixed an accuracy issue in `ZSCAL` introduced in 0.3.26.
|
||||
+ Added support for Intel Emerald Rapids and Meteor Lake CPUs.
|
||||
+ Added autodetection support for the Zhaoxin KX-7000 CPU.
|
||||
+ Fixed autodetection of Intel Prescott (probably broken
|
||||
since 0.3.19).
|
||||
+ Fixed compilation of the converter-generated C versions
|
||||
of the LAPACK sources with gcc-14.
|
||||
+ Added support for supplying the L2 cache size via an
|
||||
environment variable (`OPENBLAS_L2_SIZE`) in case it is not
|
||||
correctly reported (as in some VM configurations).
|
||||
+ Improved the error message shown when thread creation fails
|
||||
on startup.
|
||||
* arm64:
|
||||
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
|
||||
1xN or Mx1 matrix to the corresponding `GEMV` kernel.
|
||||
+ Added optimized `SGEMV` and `DGEMV` kernels for A64FX.
|
||||
+ Added optimized SVE kernels for small-matrix `GEMM`.
|
||||
+ Added A64FX to the CPU list for DYNAMIC_ARCH.
|
||||
+ Fixed building with support for CPU affinity.
|
||||
+ Worked around accuracy problems with `C/ZNRM2` on NeoverseN1
|
||||
targets.
|
||||
+ Improved GEMM performance on Neoverse V1.
|
||||
+ Fixed compilation for `NEOVERSEN2` with older compilers.
|
||||
+ Fixed potential miscompilation of the SVE `SDOT` and `DDOT`
|
||||
kernels.
|
||||
+ Fixed potential miscompilation of the non-SVE `CDOT` and
|
||||
`ZDOT` kernels.
|
||||
+ Fixed a potential overflow when using very large user-defined
|
||||
`BUFFERSIZE`.
|
||||
* Power:
|
||||
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a 1xN
|
||||
or Mx1 matrix to the corresponding `GEMV` kernel.
|
||||
+ Significantly improved performance of `SBGEMM`. on POWER10.
|
||||
+ Fixed compilation with OpenMP and the XLF compiler.
|
||||
+ Fixed building of parts of the LAPACK testsuite with XLF.
|
||||
+ Fixed CSWAP/ZSWAP on big-endian POWER10 targets.
|
||||
+ Fixed a performance regression in SAXPY on POWER10 with OpenXL.
|
||||
+ Fixed a potential overflow when using very large user-defined
|
||||
`BUFFERSIZE`.
|
||||
+ Fixed an accuracy issue in the POWER6 kernels for `GEMM` and
|
||||
`GEMV`.
|
||||
* RISCV64:
|
||||
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
|
||||
1xN or Mx1 matrix to the corresponding GEMV kernel.
|
||||
+ Wdded `DYNAMIC_ARCH` support (comprising `GENERIC_RISCV64` and
|
||||
the two RVV 1.0 targets with vector length of 128 and 256).
|
||||
+ Worked around the `ZVL128B` kernels for `AXPBY` mishandling the
|
||||
special case of zero Y increment.
|
||||
- Obsoleted: no-static.patch.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Jul 29 09:21:41 UTC 2024 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Duplicate all options passed to `make` also to `make install`:
|
||||
The openblas build output suggests this: 'Note that any flags
|
||||
passed to make during build should also be passed to make install
|
||||
to circumvent any install errors'.
|
||||
This also makes sure that minimum CPU requirement is set in
|
||||
the pkgconfig file is the same one as used for building.
|
||||
This helps to maintain a reproducible build (boo#1228177).
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Jun 13 07:32:23 UTC 2024 - Andreas Schwab <schwab@suse.de>
|
||||
|
||||
- no-static.patch: do not link statically
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Sun Jun 9 07:07:51 UTC 2024 - Egbert Eich <eich@suse.com>
|
||||
|
||||
|
||||
311
openblas.spec
311
openblas.spec
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# spec file
|
||||
# spec file for package openblas
|
||||
#
|
||||
# Copyright (c) 2024 SUSE LLC
|
||||
# Copyright (c) 2025 SUSE LLC
|
||||
#
|
||||
# All modifications and additions to the file contributed by third parties
|
||||
# remain the property of their copyright owners, unless otherwise agreed
|
||||
@@ -18,8 +18,7 @@
|
||||
|
||||
%global flavor @BUILD_FLAVOR@%{nil}
|
||||
|
||||
%define _vers 0_3_27
|
||||
%define vers 0.3.27
|
||||
%undefine sha1
|
||||
%define so_v 0
|
||||
%define pname openblas
|
||||
|
||||
@@ -41,7 +40,6 @@ ExclusiveArch: do_not_build
|
||||
%define openblas_so_prio 20
|
||||
# we build devel packages only from one flavor
|
||||
%define build_devel 1
|
||||
%{bcond_with hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "pthreads"
|
||||
@@ -52,7 +50,6 @@ ExclusiveArch: do_not_build
|
||||
%else
|
||||
%define openblas_so_prio 20
|
||||
%endif
|
||||
%{bcond_with hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "openmp"
|
||||
@@ -62,110 +59,17 @@ ExclusiveArch: do_not_build
|
||||
%define arch_flavor 1
|
||||
%define openblas_so_prio 50
|
||||
%endif
|
||||
%{bcond_with hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu-hpc"
|
||||
%define compiler_family gnu
|
||||
%undefine c_f_ver
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu-hpc-pthreads"
|
||||
%define compiler_family gnu
|
||||
%undefine c_f_ver
|
||||
%define ext pthreads
|
||||
%define build_flags USE_THREAD=1 USE_OPENMP=0
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu7-hpc"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 7
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu7-hpc-pthreads"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 7
|
||||
%define ext pthreads
|
||||
%define build_flags USE_THREAD=1 USE_OPENMP=0
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu8-hpc"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 8
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu8-hpc-pthreads"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 8
|
||||
%define ext pthreads
|
||||
%define build_flags USE_THREAD=1 USE_OPENMP=0
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu9-hpc"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 9
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu9-hpc-pthreads"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 9
|
||||
%define ext pthreads
|
||||
%define build_flags USE_THREAD=1 USE_OPENMP=0
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu10-hpc"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 10
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu10-hpc-pthreads"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 10
|
||||
%define ext pthreads
|
||||
%define build_flags USE_THREAD=1 USE_OPENMP=0
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu11-hpc"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 11
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu11-hpc-pthreads"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 11
|
||||
%define ext pthreads
|
||||
%define build_flags USE_THREAD=1 USE_OPENMP=0
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu12-hpc"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 12
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%if "%flavor" == "gnu12-hpc-pthreads"
|
||||
%define compiler_family gnu
|
||||
%define c_f_ver 12
|
||||
%define ext pthreads
|
||||
%define build_flags USE_THREAD=1 USE_OPENMP=0
|
||||
%{bcond_without hpc}
|
||||
%endif
|
||||
|
||||
%ifarch ppc64le
|
||||
%if 0%{?c_f_ver} > 9
|
||||
%else
|
||||
%if 0%{?sle_version} == 150700
|
||||
%define cc_v 14
|
||||
%endif
|
||||
%if 0%{?sle_version} == 150600
|
||||
%define cc_v 13
|
||||
%endif
|
||||
%if 0%{?sle_version} == 150500
|
||||
%define cc_v 12
|
||||
%endif
|
||||
@@ -183,7 +87,6 @@ ExclusiveArch: do_not_build
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%if %{without hpc}
|
||||
%define so_a %{so_v}
|
||||
%if 0%{!?package_name:1}
|
||||
%define package_name %{pname}_%{flavor}
|
||||
@@ -191,60 +94,41 @@ ExclusiveArch: do_not_build
|
||||
%define p_prefix %_prefix
|
||||
%define p_includedir %_includedir/%pname
|
||||
%define p_libdir %_libdir/openblas%{?flavor:-%{flavor}}
|
||||
%define p_testdir %_libexecdir/openblas%{?flavor:-%{flavor}}/tests
|
||||
%define p_cmakedir %{p_libdir}/cmake/%{pname}
|
||||
%define num_threads 64
|
||||
|
||||
%if 0%{?sha1:1}
|
||||
%define v_string %{sha1}
|
||||
%else
|
||||
%define so_a %{nil}
|
||||
# Magic for OBS Staging. Only build the flavors required by
|
||||
# other packages in the ring.
|
||||
%if %{with ringdisabled}
|
||||
ExclusiveArch: do_not_build
|
||||
%endif
|
||||
|
||||
%define package_name %{hpc_package_name %_vers}
|
||||
|
||||
%define p_prefix %hpc_prefix
|
||||
%define p_includedir %hpc_includedir
|
||||
%define p_libdir %hpc_libdir
|
||||
%define p_cmakedir %{hpc_libdir}/cmake
|
||||
%define num_threads 256
|
||||
|
||||
%{hpc_init -c %{compiler_family} %{?c_f_ver:-v %{c_f_ver}} %{?ext:-e %{ext}}}
|
||||
%define v_string v%{version}
|
||||
%endif
|
||||
|
||||
Name: %{package_name}
|
||||
Version: %vers
|
||||
Version: 0.3.29
|
||||
Release: 0
|
||||
Summary: An optimized BLAS library based on GotoBLAS2
|
||||
License: BSD-3-Clause
|
||||
Group: Productivity/Scientific/Math
|
||||
URL: http://www.openblas.net
|
||||
Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz#/OpenBLAS-%{version}.tar.gz
|
||||
Source0: https://github.com/xianyi/OpenBLAS/archive/%{v_string}.tar.gz#/OpenBLAS-%{version}%{?sha1:_%{sha1}}.tar.gz
|
||||
Source1: README.SUSE
|
||||
Source2: README.HPC.SUSE
|
||||
Source3: openblas.rpmlintrc
|
||||
Patch101: Link-library-with-z-noexecstack.patch
|
||||
Source3: openblas_tests.sh.in
|
||||
Source4: openblas.rpmlintrc
|
||||
# PATCH port
|
||||
Patch102: Handle-s390-correctly.patch
|
||||
Patch103: openblas-ppc64be_up2_p8.patch
|
||||
Patch104: Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
|
||||
|
||||
#BuildRequires: cmake
|
||||
BuildRequires: memory-constraints
|
||||
%if 0%{?cc_v:1}
|
||||
BuildRequires: gcc%{?cc_v}-fortran
|
||||
%endif
|
||||
%if %{without hpc}
|
||||
BuildRequires: gcc-fortran
|
||||
BuildRequires: update-alternatives
|
||||
Requires(post): update-alternatives
|
||||
Requires(preun):update-alternatives
|
||||
%else
|
||||
BuildRequires: %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel
|
||||
BuildRequires: lua-lmod
|
||||
BuildRequires: suse-hpc
|
||||
%global dep_summary %{summary}
|
||||
%endif
|
||||
Requires(preun): update-alternatives
|
||||
|
||||
%description
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
@@ -252,10 +136,9 @@ OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
%package -n lib%{name}%{so_a}
|
||||
Summary: An optimized BLAS library based on GotoBLAS2, %{flavor} version
|
||||
Group: System/Libraries
|
||||
%if %{without hpc}
|
||||
Requires(post): update-alternatives
|
||||
Requires(post): coreutils
|
||||
Requires(preun):update-alternatives
|
||||
Requires(preun): update-alternatives
|
||||
%if "%flavor" == "serial"
|
||||
Obsoletes: lib%{pname}%{so_v} < %{version}
|
||||
Provides: lib%{pname}%{so_v} = %{version}
|
||||
@@ -268,38 +151,27 @@ Obsoletes: lib%{pname}p0
|
||||
%if "%flavor" == "openmp"
|
||||
Obsoletes: lib%{pname}o0
|
||||
%endif
|
||||
%else # with hpc
|
||||
%hpc_requires
|
||||
%endif
|
||||
|
||||
%description -n lib%{name}%{so_a}
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
%{?with_hpc:%{hpc_master_package -l -L}}
|
||||
|
||||
%package -n lib%{name}-devel
|
||||
Summary: Development libraries for OpenBLAS, %{flavor} version
|
||||
Group: Development/Libraries/C and C++
|
||||
Requires: lib%{name}%{so_a} = %{version}
|
||||
%if %{without hpc}
|
||||
Requires: %{pname}-common-devel = %{version}
|
||||
Requires: lib%{name}%{so_a} = %{version}
|
||||
%if 0%{?arch_flavor}
|
||||
Provides: %{pname}-devel = %version
|
||||
Provides: %{pname}-devel(default) = %version
|
||||
%else
|
||||
Provides: %{pname}-devel(other) = %version
|
||||
%endif
|
||||
%else
|
||||
%hpc_requires_devel
|
||||
%endif
|
||||
|
||||
%description -n lib%{name}-devel
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
This package contains the development libraries for serial OpenBLAS version.
|
||||
|
||||
%{?with_hpc:%{hpc_master_package -l -L devel}}
|
||||
|
||||
%package devel-static
|
||||
Summary: Static version of OpenBLAS
|
||||
Group: Development/Libraries/C and C++
|
||||
@@ -326,23 +198,37 @@ OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
This package contains headers for OpenBLAS.
|
||||
|
||||
%package tests
|
||||
Summary: Unit Tests for openblas library
|
||||
Group: Development/Libraries/C and C++
|
||||
|
||||
%description tests
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
This package contains test binaries.
|
||||
|
||||
%prep
|
||||
|
||||
%setup -q -n OpenBLAS-%{version}
|
||||
%setup -q -n OpenBLAS-%{?sha1:%{sha1}}%{!?sha1:%{version}}
|
||||
%autopatch -p1
|
||||
%ifarch s390
|
||||
sed -i -e "s@m32@m31@" Makefile.system
|
||||
%endif
|
||||
sed -i -e '/FLDFLAGS = \|$(CC)\|$(CXX)/s@$@ $(LDFLAGS_TESTS)@' \
|
||||
test/Makefile ctest/Makefile utest/Makefile cpp_thread_test/Makefile
|
||||
|
||||
%if %{without hpc}
|
||||
cp %{SOURCE1} .
|
||||
grep -q .note.GNU-stack cpuid.S || echo '.section .note.GNU-stack,"",@progbits' >> cpuid.S
|
||||
# Disable sgemmt and dgemmt tests on ppc64le when using gcc13
|
||||
%if "%{?_arch}" == "ppc64le" && 0%{?gcc_version} == 13
|
||||
%if %{suse_version} >= 1600 && !0%{?is_opensuse}
|
||||
# with openblas_target POWER9 the tests work fine
|
||||
%else
|
||||
cp %{SOURCE2} .
|
||||
sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_sgemmt.o *@@' utest/Makefile
|
||||
sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_dgemmt.o *@@' utest/Makefile
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%if %{without hpc}
|
||||
cp %{SOURCE1} .
|
||||
|
||||
# create baselibs.conf based on flavor
|
||||
cat > %{_sourcedir}/baselibs.conf <<EOF
|
||||
lib%{name}%{so_a}
|
||||
@@ -352,7 +238,6 @@ lib%{name}-devel
|
||||
requires -%{name}-<targettype>
|
||||
requires "lib%{name}%{?so_a}-<targettype> = <version>"
|
||||
EOF
|
||||
%endif
|
||||
|
||||
%build
|
||||
|
||||
@@ -370,9 +255,9 @@ EOF
|
||||
%define _lto_cflags %{nil}
|
||||
%endif
|
||||
|
||||
%if %{with hpc}
|
||||
%hpc_debug
|
||||
%hpc_setup_compiler
|
||||
%ifarch riscv64
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110812
|
||||
%global _lto_cflags %{nil}
|
||||
%endif
|
||||
|
||||
# Use DYNAMIC_ARCH everywhere - not sure about PPC?
|
||||
@@ -393,7 +278,11 @@ EOF
|
||||
%global openblas_target %openblas_target TARGET=ZARCH_GENERIC
|
||||
%endif
|
||||
%ifarch ppc64le
|
||||
%if %{suse_version} >= 1600 && !0%{?is_opensuse}
|
||||
%global openblas_target %openblas_target TARGET=POWER9
|
||||
%else
|
||||
%global openblas_target %openblas_target TARGET=POWER8
|
||||
%endif
|
||||
%define openblas_opt BUILD_BFLOAT16=1
|
||||
%endif
|
||||
%ifarch ppc64
|
||||
@@ -408,7 +297,7 @@ EOF
|
||||
%ifarch ppc64
|
||||
%global addopt -mvsx
|
||||
%endif
|
||||
%global addopt %{?addopt} -fno-strict-aliasing
|
||||
%global addopt %{?addopt} -fno-strict-aliasing -Wa,--noexecstack -Wl,-z,noexecstack
|
||||
|
||||
# Make serial, threaded and OpenMP versions
|
||||
|
||||
@@ -420,7 +309,7 @@ EOF
|
||||
# set MAKE_NB_JOBS instead and let the build do the work!
|
||||
# Do not use LIBNAMESUFFIX for new builds as it will not allow
|
||||
# the different flavors to be plugin replacements of each other
|
||||
%if 0%{?suse_version} <= 1500 && %{without hpc}
|
||||
%if 0%{?suse_version} <= 1500
|
||||
%define libnamesuffix LIBNAMESUFFIX=_%{flavor}
|
||||
%endif
|
||||
make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
|
||||
@@ -429,28 +318,39 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
|
||||
NUM_THREADS=%{num_threads} V=1 \
|
||||
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
|
||||
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
|
||||
OPENBLAS_BINARY_DIR=%{p_testdir} \
|
||||
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
|
||||
PREFIX=%{p_prefix} \
|
||||
%{?dynamic_list} \
|
||||
%{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \
|
||||
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
|
||||
%{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} FC=gfortran-%{cc_v} CEXTRALIB=""}}
|
||||
%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""} \
|
||||
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}}
|
||||
|
||||
%install
|
||||
%if %{with hpc}
|
||||
%hpc_setup_compiler
|
||||
%endif
|
||||
|
||||
# Install library and headers
|
||||
# Pass NUM_THREADS again, as it is not propagated from the build step
|
||||
# https://github.com/OpenMathLib/OpenBLAS/issues/4275
|
||||
%make_install %{?build_flags} \
|
||||
mkdir -p %{buildroot}/%{p_testdir}
|
||||
%make_install install_tests %{?openblas_target} %{?build_flags} \
|
||||
%{?openblas_opt} \
|
||||
NUM_THREADS=%{num_threads} \
|
||||
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
|
||||
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
|
||||
OPENBLAS_BINARY_DIR=%{p_testdir} \
|
||||
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
|
||||
%{?libnamesuffix} \
|
||||
%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""} \
|
||||
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
|
||||
PREFIX=%{p_prefix}
|
||||
sed -e 's#@FLAVOR@#%{flavor}#' \
|
||||
-e 's#@COMPILER@#%{?compiler_family:%compiler_family}}#' \
|
||||
< %{S:3} > %{buildroot}/%{p_testdir}/openblas_tests.sh
|
||||
chmod 0755 %{buildroot}/%{p_testdir}/openblas_tests.sh
|
||||
for i in %{buildroot}/%{p_testdir}/*; do
|
||||
case $i in
|
||||
*.dat|*in*) chmod 0644 $i;;
|
||||
*) chmod 0755 $i;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Delete info about OBS host cpu
|
||||
%ifarch %ix86 x86_64
|
||||
@@ -458,8 +358,6 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
|
||||
%{buildroot}%{p_includedir}/openblas_config.h
|
||||
%endif
|
||||
|
||||
%if %{without hpc}
|
||||
|
||||
%if 0%{!?build_devel:1}
|
||||
# We need the includes only once
|
||||
rm -rf %{buildroot}%{p_includedir}/
|
||||
@@ -515,52 +413,6 @@ ln -s %{_sysconfdir}/alternatives/openblas-default%{?a_x}/cmake/openblas %{build
|
||||
ln -s openblas-%{flavor}/lib%{pname}.so.%{so_v} %{buildroot}%{_libdir}/lib%{name}.so.%{so_v}
|
||||
ln -s openblas-%{flavor}/lib%{pname}.so %{buildroot}%{_libdir}/lib%{name}.so
|
||||
%endif
|
||||
%else # with hpc
|
||||
|
||||
# HPC module file
|
||||
%hpc_write_modules_files
|
||||
#%%Module1.0#####################################################################
|
||||
|
||||
proc ModulesHelp { } {
|
||||
|
||||
puts stderr " "
|
||||
puts stderr "This module loads the %{pname} library built with the %{compiler_family} compiler toolchain."
|
||||
puts stderr "\nVersion %{version}\n"
|
||||
|
||||
}
|
||||
module-whatis "Name: %{hpc_upcase %pname} built with %{compiler_family} toolchain"
|
||||
module-whatis "Version: %{version}"
|
||||
module-whatis "Category: runtime library"
|
||||
module-whatis "Description: %{dep_summary}"
|
||||
module-whatis "%{url}"
|
||||
|
||||
set version %{version}
|
||||
|
||||
prepend-path LD_LIBRARY_PATH %{p_libdir}
|
||||
|
||||
setenv %{hpc_upcase %pname}_DIR %{hpc_prefix}
|
||||
|
||||
if {[file isdirectory %{hpc_includedir}]} {
|
||||
prepend-path LIBRARY_PATH %{p_libdir}
|
||||
prepend-path CPATH %{p_includedir}
|
||||
prepend-path C_INCLUDE_PATH %{p_includedir}
|
||||
prepend-path CPLUS_INCLUDE_PATH %{p_includedir}
|
||||
prepend-path INCLUDE %{p_includedir}
|
||||
%hpc_modulefile_add_pkgconfig_path
|
||||
|
||||
setenv %{hpc_upcase %pname}_DIR %{hpc_prefix}
|
||||
setenv %{hpc_upcase %pname}_LIB %{p_libdir}
|
||||
setenv %{hpc_upcase %pname}_INC %{p_includedir}
|
||||
|
||||
}
|
||||
|
||||
family "openblas"
|
||||
EOF
|
||||
%{hpc_write_pkgconfig -l %{pname}}
|
||||
|
||||
%endif # with hpc
|
||||
|
||||
%if %{without hpc}
|
||||
|
||||
# Ensure directory used in older versions are replaced by symlink properly
|
||||
%pre -n %{pname}-common-devel
|
||||
@@ -596,17 +448,9 @@ if [ ! -d %{p_libdir} ]; then
|
||||
fi
|
||||
/sbin/ldconfig
|
||||
|
||||
%else
|
||||
|
||||
%postun -n lib%{name}
|
||||
%hpc_module_delete_if_default
|
||||
|
||||
%endif
|
||||
|
||||
%files -n lib%{name}%{so_a}
|
||||
%defattr(-,root,root,-)
|
||||
%{p_libdir}/lib%{pname}.so.%{so_v}
|
||||
%if %{without hpc}
|
||||
%dir %{p_libdir}
|
||||
%{?libnamesuffix:%{_libdir}/lib%{name}.so.%{so_v}}
|
||||
# Created by %%post
|
||||
@@ -621,26 +465,19 @@ fi
|
||||
%ghost %{_sysconfdir}/alternatives/libcblas.so.3%{?a_x}
|
||||
%ghost %{_sysconfdir}/alternatives/liblapack.so.3%{?a_x}
|
||||
%ghost %{_sysconfdir}/alternatives/liblapacke.so.3%{?a_x}
|
||||
%else
|
||||
%hpc_dirs
|
||||
%{p_libdir}/libopenblas*r*.so
|
||||
%hpc_modules_files
|
||||
%endif
|
||||
|
||||
%files -n lib%{name}-devel
|
||||
%{p_libdir}/lib%{pname}.so
|
||||
%{?libnamesuffix:%{_libdir}/lib%{name}.so}
|
||||
%{p_cmakedir}/
|
||||
%if %{with hpc}
|
||||
%license LICENSE
|
||||
%doc Changelog.txt GotoBLAS* README.md README.HPC.SUSE
|
||||
%hpc_pkgconfig_file
|
||||
%{p_includedir}/
|
||||
%else
|
||||
%dir %{p_libdir}/cmake
|
||||
%dir %{p_libdir}/pkgconfig
|
||||
%{p_libdir}/pkgconfig
|
||||
%endif
|
||||
|
||||
%files tests
|
||||
%dir %{p_testdir}
|
||||
%dir %{dirname:%{p_testdir}}
|
||||
%{p_testdir}/*
|
||||
|
||||
%files devel-static
|
||||
%{p_libdir}/libopenblas*.a
|
||||
|
||||
92
openblas_tests.sh.in
Normal file
92
openblas_tests.sh.in
Normal file
@@ -0,0 +1,92 @@
|
||||
#! /bin/bash
|
||||
|
||||
FLAVOR=@FLAVOR@
|
||||
COMPILER=@COMPILER@
|
||||
# Series 'test'
|
||||
series_test() {
|
||||
${dir}/sblat1 || echo "sblat1 failed"
|
||||
${dir}/dblat1 || echo "dblat1 failed"
|
||||
${dir}/cblat1 || echo "cblat1 failed"
|
||||
${dir}/zblat1 || echo "zblat1 failed"
|
||||
${dir}/sblat2 < ${dir}/sblat2.dat || echo " failed"
|
||||
grep -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || true
|
||||
${dir}/dblat2 < ${dir}/dblat2.dat || echo " failed"
|
||||
grep -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || true
|
||||
${dir}/cblat2 < ${dir}/cblat2.dat || echo " failed"
|
||||
grep -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || true
|
||||
${dir}/zblat2 < ${dir}/zblat2.dat || echo " failed"
|
||||
grep -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || true
|
||||
${dir}/test_sbgemm > SBBLAT3.SUMM || echo "test_sbgemm failed"
|
||||
grep -q FATAL SBBLAT3.SUMM && cat SBBLAT3.SUMM || true
|
||||
${dir}/dblat3 < ${dir}/dblat3.dat || echo "dblat3 failed"
|
||||
grep -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || true
|
||||
${dir}/cblat3 < ${dir}/cblat3.dat || echo "cblat3 failed"
|
||||
grep -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || true
|
||||
[ -x ${dir}/cblat3_3m ] && \
|
||||
{ ${dir}/cblat3_3m < ${dir}/cblat3_3m.dat;
|
||||
grep -q FATAL CBLAT3_3M.SUMM && cat CBLAT3_3M.SUMM || true; }
|
||||
${dir}/zblat3 < ${dir}/zblat3.dat || echo "zblat3 failed";
|
||||
grep -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || true
|
||||
[ -x ${dir}/zblat3_3m ] && \
|
||||
{ ${dir}/zblat3_3m < ${dir}/zblat3_3m.dat || echo "zblat3 failed";
|
||||
grep -q FATAL ZBLAT3_3M.SUMM && cat ZBLAT3_3M.SUMM || true; }
|
||||
}
|
||||
|
||||
# Series 'ctest'
|
||||
series_ctest() {
|
||||
${dir}/xscblat1 || echo "xscblat1 failed"
|
||||
${dir}/xdcblat1 || echo "sdcblat1 failed"
|
||||
${dir}/xccblat1 || echo "xccblat1 failed"
|
||||
${dir}/xzcblat1 || echo "xzcblat1 failed"
|
||||
${dir}/xscblat2 < ${dir}/sin2 || echo "xscblat2 failed"
|
||||
${dir}/xdcblat2 < ${dir}/din2 || echo "xdcblat2 failed"
|
||||
${dir}/xccblat2 < ${dir}/cin2 || echo "xccblat2 failed"
|
||||
${dir}/xzcblat2 < ${dir}/zin2 || echo "xzcblat2 failed"
|
||||
${dir}/xscblat3 < ${dir}/sin3 || echo "xscblat3 failed"
|
||||
${dir}/xdcblat3 < ${dir}/din3 || echo "xdcblat3 failed"
|
||||
${dir}/xccblat3 < ${dir}/cin3 || echo "xccblat3 failed"
|
||||
${dir}/xzcblat3 < ${dir}/zin3 || echo "xzcblat3 failed"
|
||||
[ -x ${dir}/xccblat3_3m ] && { ${dir}/xccblat3_3m < ${dir}/cin3_3m || echo "cin3_3m failed"; };
|
||||
[ -x ${dir}/xzcblat3_3m ] && { ${dir}/xzcblat3_3m < ${dir}/zin3_3m || echo "zin3_3m failed"; };
|
||||
}
|
||||
|
||||
# Series 'utest'
|
||||
series_utest() {
|
||||
${dir}/openblas_utest || echo "openblas_utest failed"
|
||||
${dir}/openblas_utest_ext || echo "openblas_utest_ext failed"
|
||||
}
|
||||
|
||||
|
||||
dir=/usr/lib/openblas-${FLAVOR}/tests
|
||||
|
||||
case $FLAVOR in
|
||||
serial) export THREADS=false; export OMP=false ;;
|
||||
pthreads) export THREADS=true; export OMP=false ;;
|
||||
openmp) export THREADS=false; export OMP=true ;;
|
||||
gnu-hpc) module pure;
|
||||
module load gnu/$COMPILER openblas;
|
||||
dir=$OPENBLAS_DIR/bin
|
||||
export THREADS=false; export OMP=true
|
||||
;;
|
||||
gnu-hpc-ptreads) module pure;
|
||||
module load gnu/$COMPILER_VERSION openblas;
|
||||
export THREADS=true; export OMP=false ;;
|
||||
esac
|
||||
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 series_test
|
||||
if $OMP || $TREADS; then
|
||||
rm -f ?BLAT2.SUMM ?BLAT3.SUMM ?BLAT3_3M.SUMM
|
||||
if $OMP; then
|
||||
OMP_NUM_THREADS=2 series_test
|
||||
else
|
||||
OPENBLAS_NUM_THREADS=2 series_test
|
||||
fi
|
||||
fi
|
||||
|
||||
if $OMP; then
|
||||
OMP_NUM_THREADS=2 series_ctest
|
||||
else
|
||||
OPENBLAS_NUM_THREADS=2 series_ctest
|
||||
fi
|
||||
|
||||
series_utest
|
||||
Reference in New Issue
Block a user