Compare commits
1 Commits
Author | SHA256 | Date | |
---|---|---|---|
db5501e9ac |
24
Link-library-with-z-noexecstack.patch
Normal file
24
Link-library-with-z-noexecstack.patch
Normal file
@@ -0,0 +1,24 @@
|
||||
From: Egbert Eich <eich@suse.com>
|
||||
Date: Wed Nov 30 20:16:21 2022 +0100
|
||||
Subject: Link library with -z,noexecstack
|
||||
Patch-mainline: Not yet
|
||||
Git-commit: adddc0eadc81bcd29c48594793cb33eac0edb572
|
||||
References:
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.com>
|
||||
Signed-off-by: Egbert Eich <eich@suse.de>
|
||||
---
|
||||
exports/Makefile | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
Index: OpenBLAS-0.3.25/exports/Makefile
|
||||
===================================================================
|
||||
--- OpenBLAS-0.3.25.orig/exports/Makefile
|
||||
+++ OpenBLAS-0.3.25/exports/Makefile
|
||||
@@ -193,6 +193,7 @@ else ifeq ($(F_COMPILER), FLANG)
|
||||
else
|
||||
ifneq ($(C_COMPILER), LSB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
+ -Wl,-z,noexecstack \
|
||||
-Wl,--whole-archive $< -Wl,--no-whole-archive \
|
||||
-Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
BIN
OpenBLAS-0.3.25.tar.gz
(Stored with Git LFS)
Normal file
BIN
OpenBLAS-0.3.25.tar.gz
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
OpenBLAS-0.3.29.tar.gz
(Stored with Git LFS)
BIN
OpenBLAS-0.3.29.tar.gz
(Stored with Git LFS)
Binary file not shown.
@@ -1,139 +0,0 @@
|
||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Wed Feb 12 09:04:22 2025 +0100
|
||||
Subject: Restore the non-vectorized code from before PR4880 for POWER8
|
||||
Patch-mainline: Not yet
|
||||
Git-repo: https://github.com/xianyi/OpenBLAS
|
||||
Git-commit: 98b5ef929cfc98f2f3c236966830276c255118d2
|
||||
References: bsc#1239134
|
||||
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.de>
|
||||
---
|
||||
kernel/power/sgemv_t.c | 23 +++++++++++++++++++----
|
||||
1 file changed, 19 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/kernel/power/sgemv_t.c b/kernel/power/sgemv_t.c
|
||||
index e133c815c..ed0a24230 100644
|
||||
--- a/kernel/power/sgemv_t.c
|
||||
+++ b/kernel/power/sgemv_t.c
|
||||
@@ -78,7 +78,17 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp7 += v_x[i] * va7[i];
|
||||
}
|
||||
|
||||
-
|
||||
+ #if defined(POWER8)
|
||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
||||
+
|
||||
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
|
||||
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
|
||||
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
|
||||
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
|
||||
+ #else
|
||||
register __vector float t0, t1, t2, t3;
|
||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
||||
__vector float *v_y = (__vector float*) y;
|
||||
@@ -105,7 +115,7 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
|
||||
v_y[0] += a * temp0;
|
||||
v_y[1] += a * temp4;
|
||||
-
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -132,7 +142,12 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp2 += v_x[i] * va2[i];
|
||||
temp3 += v_x[i] * va3[i];
|
||||
}
|
||||
-
|
||||
+ #if defined(POWER8)
|
||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
||||
+ #else
|
||||
register __vector float t0, t1, t2, t3;
|
||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
||||
__vector float *v_y = (__vector float*) y;
|
||||
@@ -148,7 +163,7 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp0 += temp1 + temp2 + temp3;
|
||||
|
||||
v_y[0] += a * temp0;
|
||||
-
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Wed Feb 12 09:07:20 2025 +0100
|
||||
Subject: Restore the non-vectorized code from before PR4880 for POWER8
|
||||
Patch-mainline: Not yet
|
||||
Git-repo: https://github.com/xianyi/OpenBLAS
|
||||
Git-commit: 81eed868b68c72ea1868663902f0904dc1b22326
|
||||
References: bsc#1239134
|
||||
|
||||
|
||||
Signed-off-by: Egbert Eich <eich@suse.de>
|
||||
---
|
||||
kernel/power/sgemv_t_8.c | 24 ++++++++++++++++++++----
|
||||
1 file changed, 20 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/kernel/power/sgemv_t_8.c b/kernel/power/sgemv_t_8.c
|
||||
index f21f6eb7d..b30bb1137 100644
|
||||
--- a/kernel/power/sgemv_t_8.c
|
||||
+++ b/kernel/power/sgemv_t_8.c
|
||||
@@ -99,7 +99,17 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp7 += vx1* va7_1 + vx2 * va7_2;
|
||||
}
|
||||
|
||||
-
|
||||
+ #if defined(POWER8)
|
||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
||||
+
|
||||
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
|
||||
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
|
||||
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
|
||||
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
|
||||
+ #else
|
||||
register __vector float t0, t1, t2, t3;
|
||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
||||
__vector float *v_y = (__vector float*) y;
|
||||
@@ -126,7 +136,7 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
|
||||
v_y[0] += a * temp0;
|
||||
v_y[1] += a * temp4;
|
||||
-
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -153,7 +163,13 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp2 += v_x[i] * va2[i] + v_x[i+1] * va2[i+1];
|
||||
temp3 += v_x[i] * va3[i] + v_x[i+1] * va3[i+1];
|
||||
}
|
||||
-
|
||||
+
|
||||
+ #if defined(POWER8)
|
||||
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
||||
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
||||
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
||||
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
||||
+ #else
|
||||
register __vector float t0, t1, t2, t3;
|
||||
register __vector float a = { alpha, alpha, alpha, alpha };
|
||||
__vector float *v_y = (__vector float*) y;
|
||||
@@ -169,7 +185,7 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
||||
temp0 += temp1 + temp2 + temp3;
|
||||
|
||||
v_y[0] += a * temp0;
|
||||
-
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
@@ -2,4 +2,6 @@
|
||||
<package>serial</package>
|
||||
<package>pthreads</package>
|
||||
<package>openmp</package>
|
||||
<package>gnu-hpc</package>
|
||||
<package>gnu-hpc-pthreads</package>
|
||||
</multibuild>
|
||||
|
527
openblas.changes
527
openblas.changes
@@ -1,350 +1,9 @@
|
||||
-------------------------------------------------------------------
|
||||
Mon Mar 17 08:51:26 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Disable sgemmt and dgemmt tests in the test suite on power
|
||||
when gcc-13 is used. It is known (bsc#1239134) that some
|
||||
of these tests fail on this architecture when OpenBLAS
|
||||
is being build with the said compiler version ever since
|
||||
these tests were introduced.
|
||||
With this will essentially restore the situation of the
|
||||
version prior to the adition of these tests (0.3.26) where
|
||||
one was unaware of the problem.
|
||||
This is only a temporary measure and will be removed once
|
||||
the issue with gcc-13 has been resolved.
|
||||
- Remove: Link-library-with-z-noexecstack.patch
|
||||
since `-Wa,--noexecstack -Wl,-z,noexecstack` are global options,
|
||||
now.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Fri Mar 14 09:24:18 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Use upstream patch for bsc#1239134 which is more friendly to the
|
||||
non-affected power9 and power10 sub-architectures:
|
||||
Replace:
|
||||
Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
|
||||
by:
|
||||
Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Sat Mar 8 13:23:53 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Revert commit ba47c7f4f301aad100ed166de338b86e01da8465 to
|
||||
prevent failures on Power8 (bsc#1239134)
|
||||
* Add: Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
|
||||
- Add a script to run tests.
|
||||
- Add bisect support.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Mar 5 15:47:13 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Update to version 0.2.29 (jsc#PED-9676):
|
||||
General:
|
||||
* Fixed a potential NULL pointer dereference in multithreaded builds.
|
||||
* Added function aliases for `GEMMT` using its new name `GEMMTR`
|
||||
adopted by Reference-BLAS.
|
||||
* Fixed the behavior of the recently added `CBLAS_?GEMMT` functions
|
||||
with row-major data.
|
||||
* Improved thread scaling of multithreaded `SBGEMV`.
|
||||
* Improved thread scaling of multithreaded `TRTRI`.
|
||||
* Fixed compilation of the CBLAS testsuite with gcc14 (and no
|
||||
Fortran compiler).
|
||||
* Fixed placement of the `-fopenmp` flag and libsuffix in the
|
||||
generated pkgconfig file.
|
||||
* Improved the `CMakeConfig` file generated by the Makefile build.
|
||||
* Fixed const-correctness of `cblas_?geadd` in `cblas.h`.
|
||||
* Fixed a potential inaccuracy in multithreaded BLAS3 calls.
|
||||
* Fixed empty implementations of `get`/`set_affinity` that print a
|
||||
warning in OpenMP builds.
|
||||
* Fixed function signatures for TRTRS in the converted C version of
|
||||
LAPACK.
|
||||
* Fixed omission of several single-precision LAPACK symbols in the
|
||||
shared library.
|
||||
* Improved build instructions for the provided "pybench" benchmarks.
|
||||
* Improved documentation, including descriptions of environment
|
||||
variables that affect build and runtime behavior.
|
||||
* Added a separate "make install_tests" target for use with
|
||||
cross-compilations.
|
||||
* Integrated improvements and corrections from Reference-LAPACK:
|
||||
- removed a comparison in LAPACKE `?tpmqrt` that is always false.
|
||||
- fixed the leading dimension for B in tests for GGEV.
|
||||
- replaced `the ?LARFT` functions with a recursive implementation.
|
||||
arm64:
|
||||
* Fixed a long-standing bug in the (generic) `c`/`zgemm_beta` kernel
|
||||
that could lead to reads and writes outside the array bounds in some
|
||||
circumstances.
|
||||
* Rewrote cpu autodetection to scan all cores and return the highest
|
||||
performing type.
|
||||
* Improved the DGEMM performance for SVE targets and small matrix sizes.
|
||||
* improved dimension criteria for forwarding from `GEMM` to `GEMV`
|
||||
kernels.
|
||||
* Added SVE kernels for `ROT` and `SWAP`.
|
||||
* Improved SVE kernels for `SGEMV` and `DGEMV` on `A64FX` and
|
||||
`NEOVERSEV1`.
|
||||
* Fixed NRM2 implementations for generic SVE targets and the Neoverse N2.
|
||||
x86_64:
|
||||
* Fixed a wrong storage size in the SBGEMV kernel for Cooper Lake.
|
||||
* Added cpu autodetection for Intel Granite Rapids.
|
||||
* Added cpu autodetection for AMD Ryzen 5 series.
|
||||
* Added optimized `SOMATCOPY_CT` for AVX-capable targets.
|
||||
* fixed the fallback implementation of `GEMM3M` in GENERIC builds.
|
||||
Power:
|
||||
* Fixed multithreaded `SBGEMM`.
|
||||
* Fixed a CMake build problem on POWER10.
|
||||
* Improved the performance of SGEMV.
|
||||
* Added vectorized implementations of `SBGEMV` and support for
|
||||
forwarding 1xN `SBGEMM` to them.
|
||||
* Fixed illegal instructions and potential memory overflow in SGEMM
|
||||
on PPCG4.
|
||||
* Fixed handling of NaN and Inf arguments in `SSCAL` and `DSCAL` on
|
||||
PPC440,G4 and 970.
|
||||
* Added improved `CGEMM` and `ZGEMM` kernels for POWER10.
|
||||
Riscv64:
|
||||
* Removed thread yielding overhead caused by `sched_yield`.
|
||||
* Replaced some non-standard intrinsics with their official names.
|
||||
* Fixed and sped up the implementations of `CGEMM`/`ZGEMM` `TCOPY`
|
||||
for vector lenghts 128 and 256.
|
||||
* Improved the performance of `SNRM2`/`DNRM2` for RVV1.0 targets.
|
||||
* Added optimized `?OMATCOPY_CN` kernels for RVV1.0 targets.
|
||||
- Add test package.
|
||||
- Add flags: `-Wa,--noexecstack -Wl,-z,noexecstack` to make sure
|
||||
stack is not executable. This works around problems in assembler
|
||||
code for z.
|
||||
- Make stack of empty cpuid.S non-executable as well.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Mar 5 14:17:26 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Set gcc versions for ppc64le (bsc#1239702)
|
||||
* on SLE-15-SP6: v13
|
||||
* on SLE-15-SP7: v14
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Feb 3 14:43:29 UTC 2025 - Andreas Schwab <schwab@suse.de>
|
||||
|
||||
- Disable LTO on riscv64 due to GCC#110812
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Jan 2 15:15:51 UTC 2025 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Update to version 0.3.28 (jsc#PED-9676):
|
||||
* General:
|
||||
+ Reworked the unfinished implementation of `HUGETLB` from GotoBLAS
|
||||
for allocating huge memory pages as buffers on suitable systems.
|
||||
+ Changed the unfinished implementation of `GEMM3M` for the generic
|
||||
target on all architectures to at least forward to regular GEMM.
|
||||
+ Improved multithreaded `GEMM` performance for large non-skinny
|
||||
matrices.
|
||||
+ Improved BLAS3 performance on larger multicore systems through
|
||||
improved parallelism.
|
||||
+ Improved performance of the initial memory allocation by reducing
|
||||
locking overhead.
|
||||
+ Improved performance of `GBMV` at small problem sizes by introducing
|
||||
a size barrier for the switch to multithreading.
|
||||
+ Added an implementation of the `CBLAS_GEMM_BATCH` extension.
|
||||
+ Fixed corner cases involving the handling of NAN and INFINITY
|
||||
arguments in `?SCAL` on all architectures.
|
||||
+ Fixed NAN handling and potential accuracy issues in compilations
|
||||
with Intel ICX by supplying a suitable fp-model option by default.
|
||||
+ It is now possible to register a callback function that replaces
|
||||
the built-in support for multithreading with an external backend
|
||||
like TBB (`openblas_set_threads_callback_function`).
|
||||
+ Fixed potential duplication of suffixes in shared library naming.
|
||||
+ Improved C compiler detection by the build system to tolerate
|
||||
more naming variants for gcc builds.
|
||||
+ Fixed an unnecessary dependency of the utest on CBLAS.
|
||||
+ Fixed spurious error reports from the BLAS extensions `utest`.
|
||||
+ Fixed unwanted invocation of the `GEMM3M` tests in cross-
|
||||
compilation.
|
||||
+ Fixed a flaw in the makefile build that could lead to the
|
||||
pkgconfig file containing an entry of `UNKNOWN` for the target
|
||||
cpu after installing.
|
||||
+ Integrated fixes from the Reference-LAPACK project:
|
||||
- Fixed uninitialized variables in the LAPACK tests for `?QP3RK`.
|
||||
- Fixed potential bounds error in `?UNHR_COL`/`?ORHR_COL`.
|
||||
- Fixed potential infinite loop in the LAPACK testsuite.
|
||||
- Make the variable type used for hidden length arguments
|
||||
configurable.
|
||||
+ Fixed `SYTRD` workspace computation and various typos.
|
||||
+ Prevent compiler use of FMA that could increase numerical
|
||||
error in `?GEEVX`.
|
||||
* x86-64:
|
||||
+ Fixed a potential thread buffer overrun in `SBSTOBF16` on small
|
||||
systems.
|
||||
+ Fixed an accuracy issue in `ZSCAL` introduced in 0.3.26.
|
||||
+ Added support for Intel Emerald Rapids and Meteor Lake CPUs.
|
||||
+ Added autodetection support for the Zhaoxin KX-7000 CPU.
|
||||
+ Fixed autodetection of Intel Prescott (probably broken
|
||||
since 0.3.19).
|
||||
+ Fixed compilation of the converter-generated C versions
|
||||
of the LAPACK sources with gcc-14.
|
||||
+ Added support for supplying the L2 cache size via an
|
||||
environment variable (`OPENBLAS_L2_SIZE`) in case it is not
|
||||
correctly reported (as in some VM configurations).
|
||||
+ Improved the error message shown when thread creation fails
|
||||
on startup.
|
||||
* arm64:
|
||||
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
|
||||
1xN or Mx1 matrix to the corresponding `GEMV` kernel.
|
||||
+ Added optimized `SGEMV` and `DGEMV` kernels for A64FX.
|
||||
+ Added optimized SVE kernels for small-matrix `GEMM`.
|
||||
+ Added A64FX to the CPU list for DYNAMIC_ARCH.
|
||||
+ Fixed building with support for CPU affinity.
|
||||
+ Worked around accuracy problems with `C/ZNRM2` on NeoverseN1
|
||||
targets.
|
||||
+ Improved GEMM performance on Neoverse V1.
|
||||
+ Fixed compilation for `NEOVERSEN2` with older compilers.
|
||||
+ Fixed potential miscompilation of the SVE `SDOT` and `DDOT`
|
||||
kernels.
|
||||
+ Fixed potential miscompilation of the non-SVE `CDOT` and
|
||||
`ZDOT` kernels.
|
||||
+ Fixed a potential overflow when using very large user-defined
|
||||
`BUFFERSIZE`.
|
||||
* Power:
|
||||
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a 1xN
|
||||
or Mx1 matrix to the corresponding `GEMV` kernel.
|
||||
+ Significantly improved performance of `SBGEMM`. on POWER10.
|
||||
+ Fixed compilation with OpenMP and the XLF compiler.
|
||||
+ Fixed building of parts of the LAPACK testsuite with XLF.
|
||||
+ Fixed CSWAP/ZSWAP on big-endian POWER10 targets.
|
||||
+ Fixed a performance regression in SAXPY on POWER10 with OpenXL.
|
||||
+ Fixed a potential overflow when using very large user-defined
|
||||
`BUFFERSIZE`.
|
||||
+ Fixed an accuracy issue in the POWER6 kernels for `GEMM` and
|
||||
`GEMV`.
|
||||
* RISCV64:
|
||||
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
|
||||
1xN or Mx1 matrix to the corresponding GEMV kernel.
|
||||
+ Wdded `DYNAMIC_ARCH` support (comprising `GENERIC_RISCV64` and
|
||||
the two RVV 1.0 targets with vector length of 128 and 256).
|
||||
+ Worked around the `ZVL128B` kernels for `AXPBY` mishandling the
|
||||
special case of zero Y increment.
|
||||
- Obsoleted: no-static.patch.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Jul 29 09:21:41 UTC 2024 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Duplicate all options passed to `make` also to `make install`:
|
||||
The openblas build output suggests this: 'Note that any flags
|
||||
passed to make during build should also be passed to make install
|
||||
to circumvent any install errors'.
|
||||
This also makes sure that minimum CPU requirement is set in
|
||||
the pkgconfig file is the same one as used for building.
|
||||
This helps to maintain a reproducible build (boo#1228177).
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Jun 13 07:32:23 UTC 2024 - Andreas Schwab <schwab@suse.de>
|
||||
|
||||
- no-static.patch: do not link statically
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Sun Jun 9 07:07:51 UTC 2024 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Update to version 0.3.27 (boo#1225869):
|
||||
General:
|
||||
* Added initial (generic) support for the `CSKY` architecture.
|
||||
* Capped the maximum number of threads used in `GEMM`, `GETRF`
|
||||
and `POTRF` to avoid creating underutilized or idle threads.
|
||||
* Sped up multithreaded `POTRF` on all platforms.
|
||||
* Added extension `openblas_set_num_threads_local()` that returns
|
||||
the previous thread count.
|
||||
* Re-evaluated the `SGEMV` and `DGEMV` load thresholds to avoid
|
||||
activating multithreading for too small workloads.
|
||||
* Improved the fallback code used when the precompiled number of
|
||||
threads is exceeded, and made it callable multiple times
|
||||
during the lifetime of an instance.
|
||||
* Added CBLAS interfaces for the BLAS extensions `?AMIN`,`?AMAX`,
|
||||
`CAXPYC` and `ZAXPYC`.
|
||||
* Fixed a potential buffer overflow in the interface to the
|
||||
`GEMMT` kernels.
|
||||
* Fixed use of incompatible pointer types in `GEMMT` and
|
||||
`C`/`ZAXPBY` as flagged by GCC-14.
|
||||
* Fixed unwanted case sensitivity of the character parameters in
|
||||
`?TRTRS` sped up the OpenMP thread management code.
|
||||
* Fixed sizing of logical variables in `INTERFACE64` builds of
|
||||
the C version of LAPACK.
|
||||
* Fixed inclusion of new LAPACK and LAPACKE functions from
|
||||
LAPACK 3.11 in the shared library.
|
||||
* Modified the error thresholds for `SGS`/`DGS` functions in
|
||||
the LAPACK testsuite to suppress spurious errors.
|
||||
* Added support for calling ?NRM2 with a negative increment value
|
||||
on all architectures.
|
||||
* Fixed handling of the `OPENBLAS_LOOPS` variable in several
|
||||
benchmarks.
|
||||
* Integrated fixes from the Reference-LAPACK project:
|
||||
Increased accuracy in `C`/`ZLARFGP` (Reference-LAPACK PR 981).
|
||||
x86:
|
||||
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
|
||||
x86-64:
|
||||
* Removed all instances of `sched_yield()` on Linux and BSD.
|
||||
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
|
||||
* Added compiler checks for `AVX512BF16` compatibility.
|
||||
* Fixed cpu handling fallbacks for Sapphire Rapids with disabled
|
||||
AVX2 in `DYNAMIC_ARCH` mode.
|
||||
* Fixed extensions `SCSUM` and `DZSUM`.
|
||||
* Improved `GEMM` performance for ZEN targets.
|
||||
arm64:
|
||||
* Added initial support for the Cortex-A76 cpu.
|
||||
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
|
||||
* Fixed default compiler options for gcc (-march and -mtune).
|
||||
* Added support for the NeoverseV2 cpu in `DYNAMIC_ARCH` builds.
|
||||
* Corrected `SCSUM` kernels (erroneously duplicating `SCASUM`
|
||||
behaviour).
|
||||
* Added SVE-enabled kernels for `CSUM`/`ZSUM`.
|
||||
* Worked around an inaccuracy in the `NRM2` kernels for NeoverseN1.
|
||||
power:
|
||||
* Improved performance of `SGEMM` on POWER8/9/10.
|
||||
* Improved performance of `DGEMM` on POWER10.
|
||||
zarch:
|
||||
* Fixed handling of `NaN` and `Inf` arguments in `ZSCAL`.
|
||||
* Fixed calculation of `?SUM` on Z13.
|
||||
- LIBNAMESUFFIX semantics have changed: no separator will be added.
|
||||
Adjusted in spec file.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Feb 15 08:27:33 UTC 2024 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Remove DYNAMIC_LIST for aarch64 for older gcc versions: This has
|
||||
been fixed upstream.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Jan 17 08:47:55 UTC 2024 - Egbert Eich <eich@suse.com>
|
||||
|
||||
- Update to version 0.3.26:
|
||||
* General:
|
||||
- Added type declarations for complex variables to the
|
||||
MSVC-specific parts of the LAPACK header.
|
||||
- Significantly sped up `?GESV` for small problem sizes by
|
||||
introducing a lower bound for multithreading.
|
||||
- Imported additions and corrections from the Reference-LAPACK
|
||||
project:
|
||||
+ Added new LAPACK functions for truncated `QR` with pivoting
|
||||
(Reference-LAPACK PRs 891&941).
|
||||
+ Handle miscalculation of minimum work array size in corner
|
||||
cases (Reference-LAPACK PR 942).
|
||||
+ Fixed use of uninitialized variables in `?GEDMD` and
|
||||
improved inline documentation.
|
||||
+ Fixed use of uninitialized variables (and consequential
|
||||
failures) in `?BBCSD`.
|
||||
+ Added tests for the recently introduced Dynamic Mode
|
||||
Decomposition functions.
|
||||
+ Fixed several memory leaks in the LAPACK testsuite.
|
||||
* x86-64:
|
||||
- Fixed computation of `CASUM` on SkylakeX and newer targets in
|
||||
the special case that AVX512 is not supported by the compiler
|
||||
or operating environment.
|
||||
- Fixed potential undefined behaviour in the `CASUM`/`ZASUM`
|
||||
kernels for AVX512 targets.
|
||||
- worked around a problem in the pre-AVX kernels for `GEMV`
|
||||
* arm64:
|
||||
- Sped up `SGEMM` and `DGEMM` on Neoverse V1 and N1.
|
||||
- Sped up `?DOT` on SVE-capable targets.
|
||||
- Reduced the number of targets in `DYNAMIC_ARCH` builds by
|
||||
eliminating functionally equivalent ones.
|
||||
* POWER:
|
||||
- Improved the SGEMM kernel for POWER10.
|
||||
- Fixed compilation with (very) old versions of gcc.
|
||||
- Added autodetection of the POWERPC 7400 subtype.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Wed Dec 20 12:02:55 UTC 2023 - Giacomo Comes <gcomes.obs@gmail.com>
|
||||
|
||||
@@ -360,188 +19,44 @@ Wed Nov 29 05:43:18 UTC 2023 - Atri Bhattacharya <badshah400@gmail.com>
|
||||
thread count
|
||||
- improved the code to add supplementary thread buffers in
|
||||
case of overflow
|
||||
- fixed a potential division by zero in `?ROTG`
|
||||
- improved the `?MATCOPY` functions to accept zero-sized rows or
|
||||
- fixed a potential division by zero in ?ROTG
|
||||
- improved the ?MATCOPY functions to accept zero-sized rows or
|
||||
columns
|
||||
- corrected empty prototypes in function declarations
|
||||
- cleaned up unused declarations in the f2c-converted versions
|
||||
of the LAPACK sources
|
||||
- fixed compilation with the Cray CCE Compiler suite
|
||||
- improved link line rewriting to avoid mixed libgomp/libomp
|
||||
builds with clang&gfortran
|
||||
- worked around OPENMP builds with LLVM14's libomp hanging on
|
||||
FreeBSD
|
||||
- improved the Makefiles to require less option duplication on
|
||||
"make install"
|
||||
- imported the following changes from the upcoming release
|
||||
3.12 of Reference-LAPACK: LAPACK PR 900, LAPACK PR 904,
|
||||
LAPACK PR 907, LAPACK PR 909, LAPACK PR 926, LAPACK PR 927,
|
||||
LAPACK PR 928 & 930
|
||||
* x86-64:
|
||||
- fixed compile-time autodetection of AMD Ryzen3 and Ryzen4
|
||||
cpus
|
||||
- fixed capability-based fallback selection for unknown cpus
|
||||
in `DYNAMIC_ARCH`
|
||||
- added AVX512 optimizations for `?ASUM` on Intel Sapphire Rapids and
|
||||
in DYNAMIC_ARCH
|
||||
- added AVX512 optimizations for ?ASUM on Sapphire Rapids and
|
||||
Cooper Lake
|
||||
* ARM64:
|
||||
- fixed building on Apple with homebrew gcc
|
||||
- fixed building with XCODE 15
|
||||
- fixed building on A64FX and Cortex A710/X1/X2
|
||||
- increased the default buffer size for recent arm server cpus
|
||||
- increased the default buffer size for recent ARM server cpus
|
||||
* POWER:
|
||||
- added support for `DYNAMIC_ARCH` builds with clang
|
||||
- fixed union declaration in the `BFLOAT16` test case
|
||||
- Changes in version 0.3.24
|
||||
* General:
|
||||
- Declared the arguments of `cblas_xerbla` as `const`
|
||||
(in accordance with the reference implementation
|
||||
and others, the previous discrepancy appears to have dated
|
||||
back to GotoBLAS)
|
||||
- fixed the implementation of `?GEMMT` that was added in 0.3.23
|
||||
- made cpu-specific `SWITCH_RATIO` parameters for GEMM
|
||||
available to `DYNAMIC_ARCH` builds
|
||||
- fixed missing `SSYCONVF` function in the shared library
|
||||
- fixed parallel build logic used with gmake
|
||||
- fixed several issues with the handling of runtime limits on
|
||||
the number of OPENMP threads
|
||||
- corrected the error code returned by `SGEADD`/`DGEADD` when
|
||||
LDA is too small
|
||||
- corrected the error code returned by `IMATCOPY` when LDB
|
||||
is too small
|
||||
- updated `?NRM2` to support negative increment values (as
|
||||
introduced in release 3.10.0 of the Reference BLAS)
|
||||
- updated `?ROTG` to use the safe scaling algorithm introduced
|
||||
in release 3.10.0 of the Reference BLAS
|
||||
- fixed OpenMP builds with CLANG for the case where libomp is
|
||||
not in a standard location
|
||||
- fixed a potential overwrite of unrelated memory during
|
||||
thread initialisation on startup
|
||||
- fixed a potential integer overflow in the multithreading
|
||||
threshold for `?SYMM`/`?SYRK`
|
||||
- fixed build of the LAPACKE interfaces for the LAPACK 3.11.0
|
||||
`?TRSYL` functions added in 0.3.22
|
||||
- applied additions and corrections from the development
|
||||
branch of Reference-LAPACK:
|
||||
- fixed actual arguments passed to a number of LAPACK
|
||||
functions (from Reference-LAPACK PR 885)
|
||||
- fixed workspace query results in LAPACK `?SYTRF`/`?TRECV3`
|
||||
(from Reference-LAPACK PR 883)
|
||||
- fixed derivation of the UPLO parameter in `LAPACKE_?larfb`
|
||||
(from Reference-LAPACK PR 878)
|
||||
- fixed a crash in LAPACK `?GELSDD` on `NRHS=0` (from
|
||||
Reference-LAPACK PR 876)
|
||||
- added new LAPACK utility functions `CRSCL` and `ZRSCL`
|
||||
(from Reference-LAPACK PR 839)
|
||||
- corrected the order of eigenvalues for 2x2 matrices in
|
||||
`?STEMR` (Reference-LAPACK PR 867)
|
||||
- removed spurious reference to OpenMP variables outside
|
||||
OpenMP contexts (Reference-LAPACK PR 860)
|
||||
- updated file comments on use of `LAMBDA` variable in
|
||||
LAPACK (Reference-LAPACK PR 852)
|
||||
- fixed documentation of LAPACK `SLASD0`/`DLASD0`
|
||||
(Reference-LAPACK PR 855)
|
||||
- fixed confusing use of "minor" in LAPACK documentation
|
||||
(Reference-LAPACK PR 849)
|
||||
- added new LAPACK functions ?GEDMD for dynamic mode
|
||||
decomposition (Reference-LAPACK PR 736)
|
||||
- fixed potential stack overflows in the `EIG` part of the
|
||||
LAPACK testsuite (Reference-LAPACK PR 854)
|
||||
- applied small improvements to the variants of
|
||||
Cholesky and QR functions (Reference-LAPACK PR 847)
|
||||
- removed unused variables from LAPACK `?BDSQR`
|
||||
(Reference-LAPACK PR 832)
|
||||
- fixed a potential crash on allocation failure in LAPACKE
|
||||
`SGEESX`/`DGEESX` (Reference-LAPACK PR 836)
|
||||
- added a quick return from `SLARUV`/`DLARUV` for N < 1
|
||||
(Reference-LAPACK PR 837)
|
||||
- updated function descriptions in LAPACK `?GEGS`/`?GEGV`
|
||||
(Reference-LAPACK PR 831)
|
||||
- improved algorithm description in `?GELSY`
|
||||
(Reference-LAPACK PR 833)
|
||||
- fixed scaling in LAPACK `STGSNA`/`DTGSNA`
|
||||
(Reference-LAPACK PR 830)
|
||||
- fixed crash in `LAPACKE_?geqrt` with row-major data
|
||||
(Reference-LAPACK PR 768)
|
||||
- added LAPACKE interfaces for `C/ZUNHR_COL` and
|
||||
`S/DORHR_COL` (Reference-LAPACK PR 827)
|
||||
- added error exit tests for `SYSV`/`SYTD2`/`GEHD2` to
|
||||
the testsuite (Reference-LAPACK PR 795)
|
||||
- fixed typos in LAPACK source and comments
|
||||
(Reference-LAPACK PRs 809,811,812,814,820)
|
||||
- adopt refactored `?GEBAL` implementation
|
||||
(Reference-LAPACK PR 808)
|
||||
* x86_64:
|
||||
- added cpu model autodetection for Intel Alder Lake N
|
||||
- added activation of the AMX tile to the Sapphire Rapids
|
||||
`SBGEMM` kernel
|
||||
- worked around miscompilations of GEMV/SYMV kernels by
|
||||
gcc's tree-vectorizer
|
||||
- fixed runtime detection of Cooperlake and Sapphire Rapids
|
||||
in `DYNAMIC_ARCH`
|
||||
- fixed feature-based cputype fallback in `DYNAMIC_ARCH`
|
||||
- corrected `ZAXPY` result on old pre-AVX hardware for the
|
||||
`INCX=0` case
|
||||
- fixed a potential use of uninitialized variables in ZTRSM
|
||||
* ARMV8:
|
||||
- implemented SWITCH_RATIO parameter for improved GEMM
|
||||
performance on Neoverse
|
||||
- activated SVE SGEMM and DGEMM kernels for Neoverse V1
|
||||
- improved performance of the SVE CGEMM and ZGEMM kernels
|
||||
on Neoverse V1
|
||||
- improved kernel selection for the ARMV8SVE target and added
|
||||
it to `DYNAMIC_ARCH`
|
||||
- fixed runtime check for SVE availability in `DYNAMIC_ARCH`
|
||||
builds to take OS or container restrictions into account
|
||||
- fixed a potential use of uninitialized variables in ZTRSM
|
||||
* POWER:
|
||||
- fixed compiler warnings in the POWER10 SBGEMM kernel
|
||||
- Changes in version 0.3.23
|
||||
* General:
|
||||
- fixed a serious regression in `GETRF`/`GETF2` and
|
||||
`ZGETRF`/`ZGETF2` where subnormal but nonzero data elements
|
||||
triggered the singularity flag
|
||||
- fixed a long-standing bug in `CSPR`/`ZSPR` in single-threaded
|
||||
operation
|
||||
- for cases where elements of the X vector are real numbers (or
|
||||
complex with only the real part zero)
|
||||
* x86_64:
|
||||
- added further CPUID values for Intel Raptor Lake
|
||||
- Changes in version 0.3.22
|
||||
* General:
|
||||
- Updated the included LAPACK to Reference-LAPACK release 3.11.0
|
||||
plus post-release corrections and improvements
|
||||
- Added a threshold for multithreading in `SYMM`, `SYMV` and
|
||||
`SYR2K`
|
||||
- Increased the threshold for multithreading in `SYRK`
|
||||
- OpenBLAS no longer decreases the global `OMP_NUM_THREADS`
|
||||
when it exceeds the maximum thread count the library was
|
||||
compiled for.
|
||||
- fixed `?GETF2` potentially returning `NaN` with tiny matrix
|
||||
elements
|
||||
- fixed `openblas_set_num_threads` to work in `USE_OPENMP`
|
||||
builds.
|
||||
- fixed cpu core counting in `USE_OPENMP` builds returning the
|
||||
number of OMP "places" rather than cores
|
||||
- fixed stride calculation in the optimized small-matrix path of
|
||||
complex `SYR`
|
||||
- fixed building of Reference-LAPACK with recent gfortran
|
||||
- added new environment variable `OPENBLAS_DEFAULT_NUM_THREADS`
|
||||
- added a GEMV-based implementation of `GEMMT`
|
||||
* x86_64:
|
||||
- added autodetection of Intel Raptor Lake cpu models
|
||||
- added SSCAL microkernels for Haswell and newer targets
|
||||
- improved the performance of the Haswell DSCAL microkernel
|
||||
- added CSCAL and ZSCAL microkernels for SkylakeX targets
|
||||
- fixed detection of gfortran and Cray CCE compilers
|
||||
- fixed runtime selection of COOPERLAKE in `DYNAMIC_ARCH` builds
|
||||
- worked around gcc/llvm using risky FMA operations in
|
||||
CSCAL/ZSCAL
|
||||
* ARMV8:
|
||||
- fixed cross-compilation to CortexA53 with CMAKE
|
||||
- fixed compilation with CMAKE and "Arm Compiler for Linux 22.1"
|
||||
- added cpu autodetection for Cortex X3 and A715
|
||||
- fixed conditional compilation of SVE-capable targets in
|
||||
`DYNAMIC_ARCH`
|
||||
- sped up SVE kernels by removing unnecessary prefetches
|
||||
- improved the GEMM performance of Neoverse V1
|
||||
- added SVE kernels for SDOT and DDOT
|
||||
- added an SBGEMM kernel for Neoverse N2
|
||||
- improved cpu-specific compiler option selection for
|
||||
Neoverse cpus
|
||||
- added support for setting `CONSISTENT_FPCSR`
|
||||
- fixed building with the IBM xlf 16.1.1 compiler
|
||||
- fixed building with IBM XL C
|
||||
- added support for DYNAMIC_ARCH builds with clang
|
||||
- fixed union declaration in the BFLOAT16 test case
|
||||
- enable optimizations for the AIX assembler on POWER10
|
||||
* LOONGARCH64:
|
||||
- added an optimized SGEMV kernel
|
||||
- added an optimized DTRSM kernel
|
||||
- Minor rebase of openblas-ppc64be_up2_p8.patch to apply cleanly.
|
||||
- Drop upstreamed patches:
|
||||
* Use-blasint-for-INTERFACE64-compatibility.patch
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# spec file for package openblas
|
||||
# spec file
|
||||
#
|
||||
# Copyright (c) 2025 SUSE LLC
|
||||
# Copyright (c) 2023 SUSE LLC
|
||||
#
|
||||
# All modifications and additions to the file contributed by third parties
|
||||
# remain the property of their copyright owners, unless otherwise agreed
|
||||
@@ -18,9 +18,8 @@
|
||||
|
||||
%global flavor @BUILD_FLAVOR@%{nil}
|
||||
|
||||
%undefine sha1
|
||||
%define _vers 0_3_29
|
||||
%define vers 0.3.29
|
||||
%define _vers 0_3_25
|
||||
%define vers 0.3.25
|
||||
%define so_v 0
|
||||
%define pname openblas
|
||||
|
||||
@@ -167,12 +166,6 @@ ExclusiveArch: do_not_build
|
||||
%ifarch ppc64le
|
||||
%if 0%{?c_f_ver} > 9
|
||||
%else
|
||||
%if 0%{?sle_version} == 150700
|
||||
%define cc_v 14
|
||||
%endif
|
||||
%if 0%{?sle_version} == 150600
|
||||
%define cc_v 13
|
||||
%endif
|
||||
%if 0%{?sle_version} == 150500
|
||||
%define cc_v 12
|
||||
%endif
|
||||
@@ -198,7 +191,6 @@ ExclusiveArch: do_not_build
|
||||
%define p_prefix %_prefix
|
||||
%define p_includedir %_includedir/%pname
|
||||
%define p_libdir %_libdir/openblas%{?flavor:-%{flavor}}
|
||||
%define p_testdir %_libexecdir/openblas%{?flavor:-%{flavor}}/tests
|
||||
%define p_cmakedir %{p_libdir}/cmake/%{pname}
|
||||
%define num_threads 64
|
||||
|
||||
@@ -215,19 +207,12 @@ ExclusiveArch: do_not_build
|
||||
%define p_prefix %hpc_prefix
|
||||
%define p_includedir %hpc_includedir
|
||||
%define p_libdir %hpc_libdir
|
||||
%define p_testdir %hpc_prefix/tests
|
||||
%define p_cmakedir %{hpc_libdir}/cmake
|
||||
%define num_threads 256
|
||||
|
||||
%{hpc_init -c %{compiler_family} %{?c_f_ver:-v %{c_f_ver}} %{?ext:-e %{ext}}}
|
||||
%endif
|
||||
|
||||
%if 0%{?sha1:1}
|
||||
%define v_string %{sha1}
|
||||
%else
|
||||
%define v_string v%{version}
|
||||
%endif
|
||||
|
||||
Name: %{package_name}
|
||||
Version: %vers
|
||||
Release: 0
|
||||
@@ -235,15 +220,14 @@ Summary: An optimized BLAS library based on GotoBLAS2
|
||||
License: BSD-3-Clause
|
||||
Group: Productivity/Scientific/Math
|
||||
URL: http://www.openblas.net
|
||||
Source0: https://github.com/xianyi/OpenBLAS/archive/%{v_string}.tar.gz#/OpenBLAS-%{version}%{?sha1:_%{sha1}}.tar.gz
|
||||
Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz#/OpenBLAS-%{version}.tar.gz
|
||||
Source1: README.SUSE
|
||||
Source2: README.HPC.SUSE
|
||||
Source3: openblas_tests.sh.in
|
||||
Source4: openblas.rpmlintrc
|
||||
Source3: openblas.rpmlintrc
|
||||
Patch101: Link-library-with-z-noexecstack.patch
|
||||
# PATCH port
|
||||
Patch102: Handle-s390-correctly.patch
|
||||
Patch103: openblas-ppc64be_up2_p8.patch
|
||||
Patch104: Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
|
||||
|
||||
#BuildRequires: cmake
|
||||
BuildRequires: memory-constraints
|
||||
@@ -254,7 +238,7 @@ BuildRequires: gcc%{?cc_v}-fortran
|
||||
BuildRequires: gcc-fortran
|
||||
BuildRequires: update-alternatives
|
||||
Requires(post): update-alternatives
|
||||
Requires(preun): update-alternatives
|
||||
Requires(preun):update-alternatives
|
||||
%else
|
||||
BuildRequires: %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel
|
||||
BuildRequires: lua-lmod
|
||||
@@ -271,7 +255,7 @@ Group: System/Libraries
|
||||
%if %{without hpc}
|
||||
Requires(post): update-alternatives
|
||||
Requires(post): coreutils
|
||||
Requires(preun): update-alternatives
|
||||
Requires(preun):update-alternatives
|
||||
%if "%flavor" == "serial"
|
||||
Obsoletes: lib%{pname}%{so_v} < %{version}
|
||||
Provides: lib%{pname}%{so_v} = %{version}
|
||||
@@ -342,30 +326,15 @@ OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
This package contains headers for OpenBLAS.
|
||||
|
||||
%package tests
|
||||
Summary: Unit Tests for openblas library
|
||||
Group: Development/Libraries/C and C++
|
||||
|
||||
%description tests
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
This package contains test binaries.
|
||||
|
||||
%prep
|
||||
|
||||
%setup -q -n OpenBLAS-%{?sha1:%{sha1}}%{!?sha1:%{version}}
|
||||
%setup -q -n OpenBLAS-%{version}
|
||||
%autopatch -p1
|
||||
%ifarch s390
|
||||
sed -i -e "s@m32@m31@" Makefile.system
|
||||
%endif
|
||||
sed -i -e '/FLDFLAGS = \|$(CC)\|$(CXX)/s@$@ $(LDFLAGS_TESTS)@' \
|
||||
test/Makefile ctest/Makefile utest/Makefile cpp_thread_test/Makefile
|
||||
grep -q .note.GNU-stack cpuid.S || echo '.section .note.GNU-stack,"",@progbits' >> cpuid.S
|
||||
# Disable sgemmt and dgemmt tests on ppc64le when using gcc13
|
||||
%if "%{?_arch}" == "ppc64le" && 0%{?gcc_version} == 13
|
||||
sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_sgemmt.o *@@' utest/Makefile
|
||||
sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_dgemmt.o *@@' utest/Makefile
|
||||
%endif
|
||||
|
||||
%if %{without hpc}
|
||||
cp %{SOURCE1} .
|
||||
@@ -401,11 +370,6 @@ EOF
|
||||
%define _lto_cflags %{nil}
|
||||
%endif
|
||||
|
||||
%ifarch riscv64
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110812
|
||||
%global _lto_cflags %{nil}
|
||||
%endif
|
||||
|
||||
%if %{with hpc}
|
||||
%hpc_debug
|
||||
%hpc_setup_compiler
|
||||
@@ -444,7 +408,7 @@ EOF
|
||||
%ifarch ppc64
|
||||
%global addopt -mvsx
|
||||
%endif
|
||||
%global addopt %{?addopt} -fno-strict-aliasing -Wa,--noexecstack -Wl,-z,noexecstack
|
||||
%global addopt %{?addopt} -fno-strict-aliasing
|
||||
|
||||
# Make serial, threaded and OpenMP versions
|
||||
|
||||
@@ -457,7 +421,7 @@ EOF
|
||||
# Do not use LIBNAMESUFFIX for new builds as it will not allow
|
||||
# the different flavors to be plugin replacements of each other
|
||||
%if 0%{?suse_version} <= 1500 && %{without hpc}
|
||||
%define libnamesuffix LIBNAMESUFFIX=_%{flavor}
|
||||
%define libnamesuffix LIBNAMESUFFIX=%flavor
|
||||
%endif
|
||||
make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
|
||||
%{?openblas_opt} \
|
||||
@@ -465,13 +429,12 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
|
||||
NUM_THREADS=%{num_threads} V=1 \
|
||||
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
|
||||
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
|
||||
OPENBLAS_BINARY_DIR=%{p_testdir} \
|
||||
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
|
||||
PREFIX=%{p_prefix} \
|
||||
%{?dynamic_list} \
|
||||
%{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \
|
||||
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
|
||||
%{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} FC=gfortran-%{cc_v} CEXTRALIB=""}}
|
||||
%{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} CEXTRALIB=""}}
|
||||
|
||||
%install
|
||||
%if %{with hpc}
|
||||
@@ -481,28 +444,13 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
|
||||
# Install library and headers
|
||||
# Pass NUM_THREADS again, as it is not propagated from the build step
|
||||
# https://github.com/OpenMathLib/OpenBLAS/issues/4275
|
||||
mkdir -p %{buildroot}/%{p_testdir}
|
||||
%make_install install_tests %{?openblas_target} %{?build_flags} \
|
||||
%{?openblas_opt} \
|
||||
%make_install %{?build_flags} \
|
||||
NUM_THREADS=%{num_threads} \
|
||||
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
|
||||
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
|
||||
OPENBLAS_BINARY_DIR=%{p_testdir} \
|
||||
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
|
||||
%{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \
|
||||
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
|
||||
%{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} FC=gfortran-%{cc_v} CEXTRALIB=""}} \
|
||||
%{?libnamesuffix} \
|
||||
PREFIX=%{p_prefix}
|
||||
sed -e 's#@FLAVOR@#%{flavor}#' \
|
||||
-e 's#@COMPILER@#%{?compiler_family:%compiler_family%{?hpc_gnu_dep_version:/%hpc_gnu_dep_version}}#' \
|
||||
< %{S:3} > %{buildroot}/%{p_testdir}/openblas_tests.sh
|
||||
chmod 0755 %{buildroot}/%{p_testdir}/openblas_tests.sh
|
||||
for i in %{buildroot}/%{p_testdir}/*; do
|
||||
case $i in
|
||||
*.dat|*in*) chmod 0644 $i;;
|
||||
*) chmod 0755 $i;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Delete info about OBS host cpu
|
||||
%ifarch %ix86 x86_64
|
||||
@@ -694,11 +642,6 @@ fi
|
||||
%{p_libdir}/pkgconfig
|
||||
%endif
|
||||
|
||||
%files tests
|
||||
%dir %{p_testdir}
|
||||
%dir %{dirname:%{p_testdir}}
|
||||
%{p_testdir}/*
|
||||
|
||||
%files devel-static
|
||||
%{p_libdir}/libopenblas*.a
|
||||
|
||||
|
@@ -1,92 +0,0 @@
|
||||
#! /bin/bash
|
||||
|
||||
FLAVOR=@FLAVOR@
|
||||
COMPILER=@COMPILER@
|
||||
# Series 'test'
|
||||
series_test() {
|
||||
${dir}/sblat1 || echo "sblat1 failed"
|
||||
${dir}/dblat1 || echo "dblat1 failed"
|
||||
${dir}/cblat1 || echo "cblat1 failed"
|
||||
${dir}/zblat1 || echo "zblat1 failed"
|
||||
${dir}/sblat2 < ${dir}/sblat2.dat || echo " failed"
|
||||
grep -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || true
|
||||
${dir}/dblat2 < ${dir}/dblat2.dat || echo " failed"
|
||||
grep -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || true
|
||||
${dir}/cblat2 < ${dir}/cblat2.dat || echo " failed"
|
||||
grep -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || true
|
||||
${dir}/zblat2 < ${dir}/zblat2.dat || echo " failed"
|
||||
grep -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || true
|
||||
${dir}/test_sbgemm > SBBLAT3.SUMM || echo "test_sbgemm failed"
|
||||
grep -q FATAL SBBLAT3.SUMM && cat SBBLAT3.SUMM || true
|
||||
${dir}/dblat3 < ${dir}/dblat3.dat || echo "dblat3 failed"
|
||||
grep -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || true
|
||||
${dir}/cblat3 < ${dir}/cblat3.dat || echo "cblat3 failed"
|
||||
grep -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || true
|
||||
[ -x ${dir}/cblat3_3m ] && \
|
||||
{ ${dir}/cblat3_3m < ${dir}/cblat3_3m.dat;
|
||||
grep -q FATAL CBLAT3_3M.SUMM && cat CBLAT3_3M.SUMM || true; }
|
||||
${dir}/zblat3 < ${dir}/zblat3.dat || echo "zblat3 failed";
|
||||
grep -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || true
|
||||
[ -x ${dir}/zblat3_3m ] && \
|
||||
{ ${dir}/zblat3_3m < ${dir}/zblat3_3m.dat || echo "zblat3 failed";
|
||||
grep -q FATAL ZBLAT3_3M.SUMM && cat ZBLAT3_3M.SUMM || true; }
|
||||
}
|
||||
|
||||
# Series 'ctest'
|
||||
series_ctest() {
|
||||
${dir}/xscblat1 || echo "xscblat1 failed"
|
||||
${dir}/xdcblat1 || echo "sdcblat1 failed"
|
||||
${dir}/xccblat1 || echo "xccblat1 failed"
|
||||
${dir}/xzcblat1 || echo "xzcblat1 failed"
|
||||
${dir}/xscblat2 < ${dir}/sin2 || echo "xscblat2 failed"
|
||||
${dir}/xdcblat2 < ${dir}/din2 || echo "xdcblat2 failed"
|
||||
${dir}/xccblat2 < ${dir}/cin2 || echo "xccblat2 failed"
|
||||
${dir}/xzcblat2 < ${dir}/zin2 || echo "xzcblat2 failed"
|
||||
${dir}/xscblat3 < ${dir}/sin3 || echo "xscblat3 failed"
|
||||
${dir}/xdcblat3 < ${dir}/din3 || echo "xdcblat3 failed"
|
||||
${dir}/xccblat3 < ${dir}/cin3 || echo "xccblat3 failed"
|
||||
${dir}/xzcblat3 < ${dir}/zin3 || echo "xzcblat3 failed"
|
||||
[ -x ${dir}/xccblat3_3m ] && { ${dir}/xccblat3_3m < ${dir}/cin3_3m || echo "cin3_3m failed"; };
|
||||
[ -x ${dir}/xzcblat3_3m ] && { ${dir}/xzcblat3_3m < ${dir}/zin3_3m || echo "zin3_3m failed"; };
|
||||
}
|
||||
|
||||
# Series 'utest'
|
||||
series_utest() {
|
||||
${dir}/openblas_utest || echo "openblas_utest failed"
|
||||
${dir}/openblas_utest_ext || echo "openblas_utest_ext failed"
|
||||
}
|
||||
|
||||
|
||||
dir=/usr/lib/openblas-${FLAVOR}/tests
|
||||
|
||||
case $FLAVOR in
|
||||
serial) export THREADS=false; export OMP=false ;;
|
||||
pthreads) export THREADS=true; export OMP=false ;;
|
||||
openmp) export THREADS=false; export OMP=true ;;
|
||||
gnu-hpc) module pure;
|
||||
module load gnu/$COMPILER openblas;
|
||||
dir=$OPENBLAS_DIR/bin
|
||||
export THREADS=false; export OMP=true
|
||||
;;
|
||||
gnu-hpc-ptreads) module pure;
|
||||
module load gnu/$COMPILER_VERSION openblas;
|
||||
export THREADS=true; export OMP=false ;;
|
||||
esac
|
||||
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 series_test
|
||||
if $OMP || $TREADS; then
|
||||
rm -f ?BLAT2.SUMM ?BLAT3.SUMM ?BLAT3_3M.SUMM
|
||||
if $OMP; then
|
||||
OMP_NUM_THREADS=2 series_test
|
||||
else
|
||||
OPENBLAS_NUM_THREADS=2 series_test
|
||||
fi
|
||||
fi
|
||||
|
||||
if $OMP; then
|
||||
OMP_NUM_THREADS=2 series_ctest
|
||||
else
|
||||
OPENBLAS_NUM_THREADS=2 series_ctest
|
||||
fi
|
||||
|
||||
series_utest
|
Reference in New Issue
Block a user