28 Commits

Author SHA256 Message Date
eacdebdbba Accepting request 1282433 from science
OBS-URL: https://build.opensuse.org/request/show/1282433
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openblas?expand=0&rev=73
2025-06-04 18:27:19 +00:00
4acfca4a3d - For SLES16 target POWER9 instead of POWER8 which fixes the
issue with the reported sgemm testsuite fails.  [bsc#1239545]

OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=203
2025-06-03 16:53:11 +00:00
91e75280cc Accepting request 1266047 from science
OBS-URL: https://build.opensuse.org/request/show/1266047
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openblas?expand=0&rev=72
2025-04-03 14:45:54 +00:00
Ana Guerrero
91e44b5cce - Disable and remove support for gnu-hpc build flavours (bsc#1239982)
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=201
2025-04-01 08:42:12 +00:00
a2e8b41e3f Accepting request 1253922 from science
- Disable sgemmt and dgemmt tests in the test suite on power
  when gcc-13 is used. It is known (bsc#1239134) that some
  of these tests fail on this architecture when OpenBLAS
  is being build with the said compiler version ever since
  these tests were introduced.
  With this will essentially restore the situation of the
  version prior to the adition of these tests (0.3.26) where
  one was unaware of the problem.
  This is only a temporary measure and will be removed once
  the issue with gcc-13 has been resolved.
- Remove: Link-library-with-z-noexecstack.patch
  since `-Wa,--noexecstack -Wl,-z,noexecstack` are global options,
  now.

- Set gcc versions for ppc64le (bsc#1239702) (forwarded request 1253917 from eeich)

OBS-URL: https://build.opensuse.org/request/show/1253922
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openblas?expand=0&rev=71
2025-03-18 16:37:29 +00:00
af4e0eea8c - Disable sgemmt and dgemmt tests in the test suite on power
when gcc-13 is used. It is known (bsc#1239134) that some
  of these tests fail on this architecture when OpenBLAS
  is being build with the said compiler version ever since
  these tests were introduced.
  With this will essentially restore the situation of the
  version prior to the adition of these tests (0.3.26) where
  one was unaware of the problem.
  This is only a temporary measure and will be removed once
  the issue with gcc-13 has been resolved.
- Remove: Link-library-with-z-noexecstack.patch
  since `-Wa,--noexecstack -Wl,-z,noexecstack` are global options,
  now.

- Set gcc versions for ppc64le (bsc#1239702)

OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=199
2025-03-17 19:36:04 +00:00
f9026b901c Accepting request 1253107 from science
- Use upstream patch for bsc#1239134 which is more friendly to the
  non-affected power9 and power10 sub-architectures:
  Replace:
  Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
  by:
  Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch (forwarded request 1253105 from eeich)

OBS-URL: https://build.opensuse.org/request/show/1253107
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openblas?expand=0&rev=70
2025-03-15 15:14:59 +00:00
53c5fa41e3 - Use upstream patch for bsc#1239134 which is more friendly to the
non-affected power9 and power10 sub-architectures:
  Replace:
  Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
  by:
  Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch

OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=197
2025-03-14 14:36:31 +00:00
67bfc95cea Accepting request 1252226 from science
- Revert  commit ba47c7f4f301aad100ed166de338b86e01da8465 to
  prevent failures on Power8 (bsc#1239134)
  * Add: Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
- Add a script to run tests.
- Add bisect support. (forwarded request 1252221 from eeich)

OBS-URL: https://build.opensuse.org/request/show/1252226
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openblas?expand=0&rev=69
2025-03-13 14:04:24 +00:00
7bfeaf67c4 - Revert commit ba47c7f4f301aad100ed166de338b86e01da8465 to
prevent failures on Power8 (bsc#1239134)
  * Add: Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
- Add a script to run tests.
- Add bisect support.

OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=195
2025-03-12 07:20:47 +00:00
bca0df3804 Accepting request 1251078 from science
- Fix path to work with i586 (forwarded request 1251077 from eeich)

OBS-URL: https://build.opensuse.org/request/show/1251078
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openblas?expand=0&rev=68
2025-03-07 15:39:24 +00:00
10476385d7 Accepting request 1251077 from home:eeich:branches:science
- Fix path to work with i586

OBS-URL: https://build.opensuse.org/request/show/1251077
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=193
2025-03-07 06:00:00 +00:00
14e568f52d Accepting request 1250765 from home:eeich:branches:science
- Fix permissions and path

OBS-URL: https://build.opensuse.org/request/show/1250765
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=192
2025-03-06 17:11:39 +00:00
8b6456cf75 Accepting request 1250625 from home:eeich:branches:science
- Add test package.
- Add flags: `-Wa,--noexecstack -Wl,-z,noexecstack` to make sure
  stack is not executable. This works around problems in assembler
  code for z.
- Make stack of empty cpuid.S non-executable as well.

OBS-URL: https://build.opensuse.org/request/show/1250625
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=191
2025-03-06 12:52:05 +00:00
a0be2a2e26 Accepting request 1250495 from home:eeich:branches:science
- Update to version 0.2.29 (jsc#PED-9676):
  General:
  * Fixed a potential NULL pointer dereference in multithreaded builds.
  * Added function aliases for `GEMMT` using its new name `GEMMTR`
    adopted by Reference-BLAS.
  * Fixed the behavior of the recently added `CBLAS_?GEMMT` functions
    with row-major data.
  * Improved thread scaling of multithreaded `SBGEMV`.
  * Improved thread scaling of multithreaded `TRTRI`.
  * Fixed compilation of the CBLAS testsuite with gcc14 (and no
    Fortran compiler).
  * Fixed placement of the `-fopenmp` flag and libsuffix in the
    generated pkgconfig file.
  * Improved the `CMakeConfig` file generated by the Makefile build.
  * Fixed const-correctness of `cblas_?geadd` in `cblas.h`.
  * Fixed a potential inaccuracy in multithreaded BLAS3 calls.
  * Fixed empty implementations of `get`/`set_affinity` that print a
    warning in OpenMP builds.
  * Fixed function signatures for TRTRS in the converted C version of
    LAPACK.
  * Fixed omission of several single-precision LAPACK symbols in the
    shared library.
  * Improved build instructions for the provided "pybench" benchmarks.
  * Improved documentation, including descriptions of environment
    variables that affect build and runtime behavior.
  * Added a separate "make install_tests" target for use with
    cross-compilations.
  * Integrated improvements and corrections from Reference-LAPACK:
    - removed a comparison in LAPACKE `?tpmqrt` that is always false.
    - fixed the leading dimension for B in tests for GGEV.

OBS-URL: https://build.opensuse.org/request/show/1250495
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=190
2025-03-05 20:05:34 +00:00
cf70d49130 - Update to version 0.3.28 (jsc#PED-9676):
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=189
2025-03-05 13:14:18 +00:00
da1902eb1b Accepting request 1242905 from science
Disable LTO on riscv64 due to GCC#110812

OBS-URL: https://build.opensuse.org/request/show/1242905
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openblas?expand=0&rev=67
2025-02-04 17:10:38 +00:00
bcc3a19335 OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=187 2025-02-03 18:17:41 +00:00
616f244801 Accepting request 1242901 from openSUSE:Factory:RISCV
- Disable LTO on riscv64 due to gcc bug 110812

OBS-URL: https://build.opensuse.org/request/show/1242901
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=186
2025-02-03 18:16:49 +00:00
b5d088b8b5 Accepting request 1234592 from science
- Update to version 0.3.27 (jsc#PED-9676):
  * General:
    + Reworked the unfinished implementation of `HUGETLB` from GotoBLAS
      for allocating huge memory pages as buffers on suitable systems.
    + Changed the unfinished implementation of `GEMM3M` for the generic
      target on all architectures to at least forward to regular GEMM.
    + Improved multithreaded `GEMM` performance for large non-skinny
      matrices.
    + Improved BLAS3 performance on larger multicore systems through
      improved parallelism.
    + Improved performance of the initial memory allocation by reducing
      locking overhead.
    + Improved performance of `GBMV` at small problem sizes by introducing
      a size barrier for the switch to multithreading.
    + Added an implementation of the `CBLAS_GEMM_BATCH` extension.
    + Fixed corner cases involving the handling of NAN and INFINITY
      arguments in `?SCAL` on all architectures.
    + Fixed NAN handling and potential accuracy issues in compilations
      with Intel ICX by supplying a suitable fp-model option by default.
    + It is now possible to register a callback function that replaces
      the built-in support for multithreading with an external backend
      like TBB (`openblas_set_threads_callback_function`).
    + Fixed potential duplication of suffixes in shared library naming.
    + Improved C compiler detection by the build system to tolerate
      more naming variants for gcc builds.
    + Fixed an unnecessary dependency of the utest on CBLAS.
    + Fixed spurious error reports from the BLAS extensions `utest`.
    + Fixed unwanted invocation of the `GEMM3M` tests in cross-
      compilation.
    + Fixed a flaw in the makefile build that could lead to the (forwarded request 1234589 from eeich)

OBS-URL: https://build.opensuse.org/request/show/1234592
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openblas?expand=0&rev=66
2025-01-06 15:04:58 +00:00
caec615da5 Accepting request 1234589 from home:eeich:branches:science
- Update to version 0.3.27 (jsc#PED-9676):
  * General:
    + Reworked the unfinished implementation of `HUGETLB` from GotoBLAS
      for allocating huge memory pages as buffers on suitable systems.
    + Changed the unfinished implementation of `GEMM3M` for the generic
      target on all architectures to at least forward to regular GEMM.
    + Improved multithreaded `GEMM` performance for large non-skinny
      matrices.
    + Improved BLAS3 performance on larger multicore systems through
      improved parallelism.
    + Improved performance of the initial memory allocation by reducing
      locking overhead.
    + Improved performance of `GBMV` at small problem sizes by introducing
      a size barrier for the switch to multithreading.
    + Added an implementation of the `CBLAS_GEMM_BATCH` extension.
    + Fixed corner cases involving the handling of NAN and INFINITY
      arguments in `?SCAL` on all architectures.
    + Fixed NAN handling and potential accuracy issues in compilations
      with Intel ICX by supplying a suitable fp-model option by default.
    + It is now possible to register a callback function that replaces
      the built-in support for multithreading with an external backend
      like TBB (`openblas_set_threads_callback_function`).
    + Fixed potential duplication of suffixes in shared library naming.
    + Improved C compiler detection by the build system to tolerate
      more naming variants for gcc builds.
    + Fixed an unnecessary dependency of the utest on CBLAS.
    + Fixed spurious error reports from the BLAS extensions `utest`.
    + Fixed unwanted invocation of the `GEMM3M` tests in cross-
      compilation.
    + Fixed a flaw in the makefile build that could lead to the

OBS-URL: https://build.opensuse.org/request/show/1234589
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=184
2025-01-02 16:50:32 +00:00
d2b04c7c42 Accepting request 1190851 from science
- Duplicate all options passed to `make` also to `make install`:
  The openblas build output suggests this: 'Note that any flags
  passed to make during build should also be passed to make install
  to circumvent any install errors'.
  This also makes sure that minimum CPU requirement is set in
  the pkgconfig file is the same one as used for building.
  This helps to maintain a reproducible build (boo#1228177). (forwarded request 1190850 from eeich)

OBS-URL: https://build.opensuse.org/request/show/1190851
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openblas?expand=0&rev=65
2024-08-02 15:25:25 +00:00
c3464afaf7 Accepting request 1190850 from home:eeich:branches:science
- Duplicate all options passed to `make` also to `make install`:
  The openblas build output suggests this: 'Note that any flags
  passed to make during build should also be passed to make install
  to circumvent any install errors'.
  This also makes sure that minimum CPU requirement is set in
  the pkgconfig file is the same one as used for building.
  This helps to maintain a reproducible build (boo#1228177).

OBS-URL: https://build.opensuse.org/request/show/1190850
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=182
2024-08-01 05:06:29 +00:00
42c7e85871 Accepting request 1190320 from science
- Make sure the minimum CPU requirement set in the pkgconfig (forwarded request 1190319 from eeich)

OBS-URL: https://build.opensuse.org/request/show/1190320
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openblas?expand=0&rev=64
2024-07-30 09:53:50 +00:00
daf462652e Accepting request 1190319 from home:eeich:branches:science
- Make sure the minimum CPU requirement set in the pkgconfig

OBS-URL: https://build.opensuse.org/request/show/1190319
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=180
2024-07-29 17:47:20 +00:00
a974d174ed Accepting request 1190198 from home:eeich:branches:science
- Make sure the minimum requirement mentioned in the pkgconfig
  file is the same one as used for building. This also helps
  to maintain a reproducible build (boo#1228177).

OBS-URL: https://build.opensuse.org/request/show/1190198
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=179
2024-07-29 10:00:24 +00:00
1100f93cf3 Accepting request 1181635 from science
OBS-URL: https://build.opensuse.org/request/show/1181635
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/openblas?expand=0&rev=63
2024-06-19 14:36:36 +00:00
f51c02a47c Accepting request 1180848 from home:Andreas_Schwab:Factory
- no-static.patch: do not link statically

OBS-URL: https://build.opensuse.org/request/show/1180848
OBS-URL: https://build.opensuse.org/package/show/science/openblas?expand=0&rev=177
2024-06-19 03:35:07 +00:00
9 changed files with 557 additions and 289 deletions

View File

@@ -1,24 +0,0 @@
From: Egbert Eich <eich@suse.com>
Date: Wed Nov 30 20:16:21 2022 +0100
Subject: Link library with -z,noexecstack
Patch-mainline: Not yet
Git-commit: adddc0eadc81bcd29c48594793cb33eac0edb572
References:
Signed-off-by: Egbert Eich <eich@suse.com>
Signed-off-by: Egbert Eich <eich@suse.de>
---
exports/Makefile | 1 +
1 file changed, 1 insertion(+)
Index: OpenBLAS-0.3.25/exports/Makefile
===================================================================
--- OpenBLAS-0.3.25.orig/exports/Makefile
+++ OpenBLAS-0.3.25/exports/Makefile
@@ -193,6 +193,7 @@ else ifeq ($(F_COMPILER), FLANG)
else
ifneq ($(C_COMPILER), LSB)
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
+ -Wl,-z,noexecstack \
-Wl,--whole-archive $< -Wl,--no-whole-archive \
-Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.

View File

@@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:aa2d68b1564fe2b13bc292672608e9cdeeeb6dc34995512e65c3b10f4599e897
size 24493704

BIN
OpenBLAS-0.3.29.tar.gz LFS Normal file

Binary file not shown.

View File

@@ -1,23 +0,0 @@
openSUSE specific packaging
===========================
OpenBLAS provides optimized implementations of BLAS and LAPACK.
openSUSE provides two variants:
* With OpenMP support
* With threading support
The serial variant has been dropped. To run a program
which requires the serial version (ie. because it is
multi-threaded itself), either specify the environment
variable OMP_NUM_THREADS=1 or place a call to
void openblas_set_num_threads(int num_threads);
in your program to limit the number of threads this library
uses to 1.
On x86 systems OpenBLAS uses dynamic architectures support,
so it contains all CPU-related optimizations.
How to switch between the various BLAS/LAPACK implementations
=============================================================
The openmp and threaded variants may be installed in parallel.
To select which one to use please use the 'modules' command.

View File

@@ -0,0 +1,139 @@
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed Feb 12 09:04:22 2025 +0100
Subject: Restore the non-vectorized code from before PR4880 for POWER8
Patch-mainline: Not yet
Git-repo: https://github.com/xianyi/OpenBLAS
Git-commit: 98b5ef929cfc98f2f3c236966830276c255118d2
References: bsc#1239134
Signed-off-by: Egbert Eich <eich@suse.de>
---
kernel/power/sgemv_t.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/kernel/power/sgemv_t.c b/kernel/power/sgemv_t.c
index e133c815c..ed0a24230 100644
--- a/kernel/power/sgemv_t.c
+++ b/kernel/power/sgemv_t.c
@@ -78,7 +78,17 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp7 += v_x[i] * va7[i];
}
-
+ #if defined(POWER8)
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
+
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
+ #else
register __vector float t0, t1, t2, t3;
register __vector float a = { alpha, alpha, alpha, alpha };
__vector float *v_y = (__vector float*) y;
@@ -105,7 +115,7 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
v_y[0] += a * temp0;
v_y[1] += a * temp4;
-
+#endif
}
@@ -132,7 +142,12 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp2 += v_x[i] * va2[i];
temp3 += v_x[i] * va3[i];
}
-
+ #if defined(POWER8)
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
+ #else
register __vector float t0, t1, t2, t3;
register __vector float a = { alpha, alpha, alpha, alpha };
__vector float *v_y = (__vector float*) y;
@@ -148,7 +163,7 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp0 += temp1 + temp2 + temp3;
v_y[0] += a * temp0;
-
+#endif
}
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed Feb 12 09:07:20 2025 +0100
Subject: Restore the non-vectorized code from before PR4880 for POWER8
Patch-mainline: Not yet
Git-repo: https://github.com/xianyi/OpenBLAS
Git-commit: 81eed868b68c72ea1868663902f0904dc1b22326
References: bsc#1239134
Signed-off-by: Egbert Eich <eich@suse.de>
---
kernel/power/sgemv_t_8.c | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/kernel/power/sgemv_t_8.c b/kernel/power/sgemv_t_8.c
index f21f6eb7d..b30bb1137 100644
--- a/kernel/power/sgemv_t_8.c
+++ b/kernel/power/sgemv_t_8.c
@@ -99,7 +99,17 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp7 += vx1* va7_1 + vx2 * va7_2;
}
-
+ #if defined(POWER8)
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
+
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
+ #else
register __vector float t0, t1, t2, t3;
register __vector float a = { alpha, alpha, alpha, alpha };
__vector float *v_y = (__vector float*) y;
@@ -126,7 +136,7 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
v_y[0] += a * temp0;
v_y[1] += a * temp4;
-
+#endif
}
@@ -153,7 +163,13 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp2 += v_x[i] * va2[i] + v_x[i+1] * va2[i+1];
temp3 += v_x[i] * va3[i] + v_x[i+1] * va3[i+1];
}
-
+
+ #if defined(POWER8)
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
+ #else
register __vector float t0, t1, t2, t3;
register __vector float a = { alpha, alpha, alpha, alpha };
__vector float *v_y = (__vector float*) y;
@@ -169,7 +185,7 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
temp0 += temp1 + temp2 + temp3;
v_y[0] += a * temp0;
-
+#endif
}

View File

@@ -2,6 +2,4 @@
<package>serial</package>
<package>pthreads</package>
<package>openmp</package>
<package>gnu-hpc</package>
<package>gnu-hpc-pthreads</package>
</multibuild>

View File

@@ -1,3 +1,252 @@
-------------------------------------------------------------------
Fri May 30 08:46:09 UTC 2025 - Richard Biener <rguenther@suse.com>
- For SLES16 target POWER9 instead of POWER8 which fixes the
issue with the reported sgemm testsuite fails. [bsc#1239545]
-------------------------------------------------------------------
Mon Mar 24 13:16:09 UTC 2025 - Nicolas Morey <nicolas.morey@suse.com>
- Disable and remove support for gnu-hpc build flavours (bsc#1239982)
-------------------------------------------------------------------
Mon Mar 17 08:51:26 UTC 2025 - Egbert Eich <eich@suse.com>
- Disable sgemmt and dgemmt tests in the test suite on power
when gcc-13 is used. It is known (bsc#1239134) that some
of these tests fail on this architecture when OpenBLAS
is being build with the said compiler version ever since
these tests were introduced.
With this will essentially restore the situation of the
version prior to the adition of these tests (0.3.26) where
one was unaware of the problem.
This is only a temporary measure and will be removed once
the issue with gcc-13 has been resolved.
- Remove: Link-library-with-z-noexecstack.patch
since `-Wa,--noexecstack -Wl,-z,noexecstack` are global options,
now.
-------------------------------------------------------------------
Fri Mar 14 09:24:18 UTC 2025 - Egbert Eich <eich@suse.com>
- Use upstream patch for bsc#1239134 which is more friendly to the
non-affected power9 and power10 sub-architectures:
Replace:
Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
by:
Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
-------------------------------------------------------------------
Sat Mar 8 13:23:53 UTC 2025 - Egbert Eich <eich@suse.com>
- Revert commit ba47c7f4f301aad100ed166de338b86e01da8465 to
prevent failures on Power8 (bsc#1239134)
* Add: Revert-ba47c7f4f301aad100ed166de338b86e01da8465.patch
- Add a script to run tests.
- Add bisect support.
-------------------------------------------------------------------
Wed Mar 5 15:47:13 UTC 2025 - Egbert Eich <eich@suse.com>
- Update to version 0.2.29 (jsc#PED-9676):
General:
* Fixed a potential NULL pointer dereference in multithreaded builds.
* Added function aliases for `GEMMT` using its new name `GEMMTR`
adopted by Reference-BLAS.
* Fixed the behavior of the recently added `CBLAS_?GEMMT` functions
with row-major data.
* Improved thread scaling of multithreaded `SBGEMV`.
* Improved thread scaling of multithreaded `TRTRI`.
* Fixed compilation of the CBLAS testsuite with gcc14 (and no
Fortran compiler).
* Fixed placement of the `-fopenmp` flag and libsuffix in the
generated pkgconfig file.
* Improved the `CMakeConfig` file generated by the Makefile build.
* Fixed const-correctness of `cblas_?geadd` in `cblas.h`.
* Fixed a potential inaccuracy in multithreaded BLAS3 calls.
* Fixed empty implementations of `get`/`set_affinity` that print a
warning in OpenMP builds.
* Fixed function signatures for TRTRS in the converted C version of
LAPACK.
* Fixed omission of several single-precision LAPACK symbols in the
shared library.
* Improved build instructions for the provided "pybench" benchmarks.
* Improved documentation, including descriptions of environment
variables that affect build and runtime behavior.
* Added a separate "make install_tests" target for use with
cross-compilations.
* Integrated improvements and corrections from Reference-LAPACK:
- removed a comparison in LAPACKE `?tpmqrt` that is always false.
- fixed the leading dimension for B in tests for GGEV.
- replaced `the ?LARFT` functions with a recursive implementation.
arm64:
* Fixed a long-standing bug in the (generic) `c`/`zgemm_beta` kernel
that could lead to reads and writes outside the array bounds in some
circumstances.
* Rewrote cpu autodetection to scan all cores and return the highest
performing type.
* Improved the DGEMM performance for SVE targets and small matrix sizes.
* improved dimension criteria for forwarding from `GEMM` to `GEMV`
kernels.
* Added SVE kernels for `ROT` and `SWAP`.
* Improved SVE kernels for `SGEMV` and `DGEMV` on `A64FX` and
`NEOVERSEV1`.
* Fixed NRM2 implementations for generic SVE targets and the Neoverse N2.
x86_64:
* Fixed a wrong storage size in the SBGEMV kernel for Cooper Lake.
* Added cpu autodetection for Intel Granite Rapids.
* Added cpu autodetection for AMD Ryzen 5 series.
* Added optimized `SOMATCOPY_CT` for AVX-capable targets.
* fixed the fallback implementation of `GEMM3M` in GENERIC builds.
Power:
* Fixed multithreaded `SBGEMM`.
* Fixed a CMake build problem on POWER10.
* Improved the performance of SGEMV.
* Added vectorized implementations of `SBGEMV` and support for
forwarding 1xN `SBGEMM` to them.
* Fixed illegal instructions and potential memory overflow in SGEMM
on PPCG4.
* Fixed handling of NaN and Inf arguments in `SSCAL` and `DSCAL` on
PPC440,G4 and 970.
* Added improved `CGEMM` and `ZGEMM` kernels for POWER10.
Riscv64:
* Removed thread yielding overhead caused by `sched_yield`.
* Replaced some non-standard intrinsics with their official names.
* Fixed and sped up the implementations of `CGEMM`/`ZGEMM` `TCOPY`
for vector lenghts 128 and 256.
* Improved the performance of `SNRM2`/`DNRM2` for RVV1.0 targets.
* Added optimized `?OMATCOPY_CN` kernels for RVV1.0 targets.
- Add test package.
- Add flags: `-Wa,--noexecstack -Wl,-z,noexecstack` to make sure
stack is not executable. This works around problems in assembler
code for z.
- Make stack of empty cpuid.S non-executable as well.
-------------------------------------------------------------------
Wed Mar 5 14:17:26 UTC 2025 - Egbert Eich <eich@suse.com>
- Set gcc versions for ppc64le (bsc#1239702)
* on SLE-15-SP6: v13
* on SLE-15-SP7: v14
-------------------------------------------------------------------
Mon Feb 3 14:43:29 UTC 2025 - Andreas Schwab <schwab@suse.de>
- Disable LTO on riscv64 due to GCC#110812
-------------------------------------------------------------------
Thu Jan 2 15:15:51 UTC 2025 - Egbert Eich <eich@suse.com>
- Update to version 0.3.28 (jsc#PED-9676):
* General:
+ Reworked the unfinished implementation of `HUGETLB` from GotoBLAS
for allocating huge memory pages as buffers on suitable systems.
+ Changed the unfinished implementation of `GEMM3M` for the generic
target on all architectures to at least forward to regular GEMM.
+ Improved multithreaded `GEMM` performance for large non-skinny
matrices.
+ Improved BLAS3 performance on larger multicore systems through
improved parallelism.
+ Improved performance of the initial memory allocation by reducing
locking overhead.
+ Improved performance of `GBMV` at small problem sizes by introducing
a size barrier for the switch to multithreading.
+ Added an implementation of the `CBLAS_GEMM_BATCH` extension.
+ Fixed corner cases involving the handling of NAN and INFINITY
arguments in `?SCAL` on all architectures.
+ Fixed NAN handling and potential accuracy issues in compilations
with Intel ICX by supplying a suitable fp-model option by default.
+ It is now possible to register a callback function that replaces
the built-in support for multithreading with an external backend
like TBB (`openblas_set_threads_callback_function`).
+ Fixed potential duplication of suffixes in shared library naming.
+ Improved C compiler detection by the build system to tolerate
more naming variants for gcc builds.
+ Fixed an unnecessary dependency of the utest on CBLAS.
+ Fixed spurious error reports from the BLAS extensions `utest`.
+ Fixed unwanted invocation of the `GEMM3M` tests in cross-
compilation.
+ Fixed a flaw in the makefile build that could lead to the
pkgconfig file containing an entry of `UNKNOWN` for the target
cpu after installing.
+ Integrated fixes from the Reference-LAPACK project:
- Fixed uninitialized variables in the LAPACK tests for `?QP3RK`.
- Fixed potential bounds error in `?UNHR_COL`/`?ORHR_COL`.
- Fixed potential infinite loop in the LAPACK testsuite.
- Make the variable type used for hidden length arguments
configurable.
+ Fixed `SYTRD` workspace computation and various typos.
+ Prevent compiler use of FMA that could increase numerical
error in `?GEEVX`.
* x86-64:
+ Fixed a potential thread buffer overrun in `SBSTOBF16` on small
systems.
+ Fixed an accuracy issue in `ZSCAL` introduced in 0.3.26.
+ Added support for Intel Emerald Rapids and Meteor Lake CPUs.
+ Added autodetection support for the Zhaoxin KX-7000 CPU.
+ Fixed autodetection of Intel Prescott (probably broken
since 0.3.19).
+ Fixed compilation of the converter-generated C versions
of the LAPACK sources with gcc-14.
+ Added support for supplying the L2 cache size via an
environment variable (`OPENBLAS_L2_SIZE`) in case it is not
correctly reported (as in some VM configurations).
+ Improved the error message shown when thread creation fails
on startup.
* arm64:
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
1xN or Mx1 matrix to the corresponding `GEMV` kernel.
+ Added optimized `SGEMV` and `DGEMV` kernels for A64FX.
+ Added optimized SVE kernels for small-matrix `GEMM`.
+ Added A64FX to the CPU list for DYNAMIC_ARCH.
+ Fixed building with support for CPU affinity.
+ Worked around accuracy problems with `C/ZNRM2` on NeoverseN1
targets.
+ Improved GEMM performance on Neoverse V1.
+ Fixed compilation for `NEOVERSEN2` with older compilers.
+ Fixed potential miscompilation of the SVE `SDOT` and `DDOT`
kernels.
+ Fixed potential miscompilation of the non-SVE `CDOT` and
`ZDOT` kernels.
+ Fixed a potential overflow when using very large user-defined
`BUFFERSIZE`.
* Power:
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a 1xN
or Mx1 matrix to the corresponding `GEMV` kernel.
+ Significantly improved performance of `SBGEMM`. on POWER10.
+ Fixed compilation with OpenMP and the XLF compiler.
+ Fixed building of parts of the LAPACK testsuite with XLF.
+ Fixed CSWAP/ZSWAP on big-endian POWER10 targets.
+ Fixed a performance regression in SAXPY on POWER10 with OpenXL.
+ Fixed a potential overflow when using very large user-defined
`BUFFERSIZE`.
+ Fixed an accuracy issue in the POWER6 kernels for `GEMM` and
`GEMV`.
* RISCV64:
+ Added a fast path forwarding `SGEMM` and `DGEMM` calls with a
1xN or Mx1 matrix to the corresponding GEMV kernel.
+ Wdded `DYNAMIC_ARCH` support (comprising `GENERIC_RISCV64` and
the two RVV 1.0 targets with vector length of 128 and 256).
+ Worked around the `ZVL128B` kernels for `AXPBY` mishandling the
special case of zero Y increment.
- Obsoleted: no-static.patch.
-------------------------------------------------------------------
Mon Jul 29 09:21:41 UTC 2024 - Egbert Eich <eich@suse.com>
- Duplicate all options passed to `make` also to `make install`:
The openblas build output suggests this: 'Note that any flags
passed to make during build should also be passed to make install
to circumvent any install errors'.
This also makes sure that minimum CPU requirement is set in
the pkgconfig file is the same one as used for building.
This helps to maintain a reproducible build (boo#1228177).
-------------------------------------------------------------------
Thu Jun 13 07:32:23 UTC 2024 - Andreas Schwab <schwab@suse.de>
- no-static.patch: do not link statically
-------------------------------------------------------------------
Sun Jun 9 07:07:51 UTC 2024 - Egbert Eich <eich@suse.com>

View File

@@ -1,7 +1,7 @@
#
# spec file
# spec file for package openblas
#
# Copyright (c) 2024 SUSE LLC
# Copyright (c) 2025 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -18,8 +18,7 @@
%global flavor @BUILD_FLAVOR@%{nil}
%define _vers 0_3_27
%define vers 0.3.27
%undefine sha1
%define so_v 0
%define pname openblas
@@ -41,7 +40,6 @@ ExclusiveArch: do_not_build
%define openblas_so_prio 20
# we build devel packages only from one flavor
%define build_devel 1
%{bcond_with hpc}
%endif
%if "%flavor" == "pthreads"
@@ -52,7 +50,6 @@ ExclusiveArch: do_not_build
%else
%define openblas_so_prio 20
%endif
%{bcond_with hpc}
%endif
%if "%flavor" == "openmp"
@@ -62,110 +59,17 @@ ExclusiveArch: do_not_build
%define arch_flavor 1
%define openblas_so_prio 50
%endif
%{bcond_with hpc}
%endif
%if "%flavor" == "gnu-hpc"
%define compiler_family gnu
%undefine c_f_ver
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu-hpc-pthreads"
%define compiler_family gnu
%undefine c_f_ver
%define ext pthreads
%define build_flags USE_THREAD=1 USE_OPENMP=0
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu7-hpc"
%define compiler_family gnu
%define c_f_ver 7
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu7-hpc-pthreads"
%define compiler_family gnu
%define c_f_ver 7
%define ext pthreads
%define build_flags USE_THREAD=1 USE_OPENMP=0
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu8-hpc"
%define compiler_family gnu
%define c_f_ver 8
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu8-hpc-pthreads"
%define compiler_family gnu
%define c_f_ver 8
%define ext pthreads
%define build_flags USE_THREAD=1 USE_OPENMP=0
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu9-hpc"
%define compiler_family gnu
%define c_f_ver 9
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu9-hpc-pthreads"
%define compiler_family gnu
%define c_f_ver 9
%define ext pthreads
%define build_flags USE_THREAD=1 USE_OPENMP=0
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu10-hpc"
%define compiler_family gnu
%define c_f_ver 10
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu10-hpc-pthreads"
%define compiler_family gnu
%define c_f_ver 10
%define ext pthreads
%define build_flags USE_THREAD=1 USE_OPENMP=0
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu11-hpc"
%define compiler_family gnu
%define c_f_ver 11
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu11-hpc-pthreads"
%define compiler_family gnu
%define c_f_ver 11
%define ext pthreads
%define build_flags USE_THREAD=1 USE_OPENMP=0
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu12-hpc"
%define compiler_family gnu
%define c_f_ver 12
%{bcond_without hpc}
%endif
%if "%flavor" == "gnu12-hpc-pthreads"
%define compiler_family gnu
%define c_f_ver 12
%define ext pthreads
%define build_flags USE_THREAD=1 USE_OPENMP=0
%{bcond_without hpc}
%endif
%ifarch ppc64le
%if 0%{?c_f_ver} > 9
%else
%if 0%{?sle_version} == 150700
%define cc_v 14
%endif
%if 0%{?sle_version} == 150600
%define cc_v 13
%endif
%if 0%{?sle_version} == 150500
%define cc_v 12
%endif
@@ -183,7 +87,6 @@ ExclusiveArch: do_not_build
%endif
%endif
%if %{without hpc}
%define so_a %{so_v}
%if 0%{!?package_name:1}
%define package_name %{pname}_%{flavor}
@@ -191,60 +94,41 @@ ExclusiveArch: do_not_build
%define p_prefix %_prefix
%define p_includedir %_includedir/%pname
%define p_libdir %_libdir/openblas%{?flavor:-%{flavor}}
%define p_testdir %_libexecdir/openblas%{?flavor:-%{flavor}}/tests
%define p_cmakedir %{p_libdir}/cmake/%{pname}
%define num_threads 64
%if 0%{?sha1:1}
%define v_string %{sha1}
%else
%define so_a %{nil}
# Magic for OBS Staging. Only build the flavors required by
# other packages in the ring.
%if %{with ringdisabled}
ExclusiveArch: do_not_build
%endif
%define package_name %{hpc_package_name %_vers}
%define p_prefix %hpc_prefix
%define p_includedir %hpc_includedir
%define p_libdir %hpc_libdir
%define p_cmakedir %{hpc_libdir}/cmake
%define num_threads 256
%{hpc_init -c %{compiler_family} %{?c_f_ver:-v %{c_f_ver}} %{?ext:-e %{ext}}}
%define v_string v%{version}
%endif
Name: %{package_name}
Version: %vers
Version: 0.3.29
Release: 0
Summary: An optimized BLAS library based on GotoBLAS2
License: BSD-3-Clause
Group: Productivity/Scientific/Math
URL: http://www.openblas.net
Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}.tar.gz#/OpenBLAS-%{version}.tar.gz
Source0: https://github.com/xianyi/OpenBLAS/archive/%{v_string}.tar.gz#/OpenBLAS-%{version}%{?sha1:_%{sha1}}.tar.gz
Source1: README.SUSE
Source2: README.HPC.SUSE
Source3: openblas.rpmlintrc
Patch101: Link-library-with-z-noexecstack.patch
Source3: openblas_tests.sh.in
Source4: openblas.rpmlintrc
# PATCH port
Patch102: Handle-s390-correctly.patch
Patch103: openblas-ppc64be_up2_p8.patch
Patch104: Restore-the-non-vectorized-code-from-before-PR4880-for-POWER8.patch
#BuildRequires: cmake
BuildRequires: memory-constraints
%if 0%{?cc_v:1}
BuildRequires: gcc%{?cc_v}-fortran
%endif
%if %{without hpc}
BuildRequires: gcc-fortran
BuildRequires: update-alternatives
Requires(post): update-alternatives
Requires(preun):update-alternatives
%else
BuildRequires: %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel
BuildRequires: lua-lmod
BuildRequires: suse-hpc
%global dep_summary %{summary}
%endif
Requires(preun): update-alternatives
%description
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
@@ -252,10 +136,9 @@ OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
%package -n lib%{name}%{so_a}
Summary: An optimized BLAS library based on GotoBLAS2, %{flavor} version
Group: System/Libraries
%if %{without hpc}
Requires(post): update-alternatives
Requires(post): coreutils
Requires(preun):update-alternatives
Requires(preun): update-alternatives
%if "%flavor" == "serial"
Obsoletes: lib%{pname}%{so_v} < %{version}
Provides: lib%{pname}%{so_v} = %{version}
@@ -268,38 +151,27 @@ Obsoletes: lib%{pname}p0
%if "%flavor" == "openmp"
Obsoletes: lib%{pname}o0
%endif
%else # with hpc
%hpc_requires
%endif
%description -n lib%{name}%{so_a}
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
%{?with_hpc:%{hpc_master_package -l -L}}
%package -n lib%{name}-devel
Summary: Development libraries for OpenBLAS, %{flavor} version
Group: Development/Libraries/C and C++
Requires: lib%{name}%{so_a} = %{version}
%if %{without hpc}
Requires: %{pname}-common-devel = %{version}
Requires: lib%{name}%{so_a} = %{version}
%if 0%{?arch_flavor}
Provides: %{pname}-devel = %version
Provides: %{pname}-devel(default) = %version
%else
Provides: %{pname}-devel(other) = %version
%endif
%else
%hpc_requires_devel
%endif
%description -n lib%{name}-devel
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
This package contains the development libraries for serial OpenBLAS version.
%{?with_hpc:%{hpc_master_package -l -L devel}}
%package devel-static
Summary: Static version of OpenBLAS
Group: Development/Libraries/C and C++
@@ -326,23 +198,37 @@ OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
This package contains headers for OpenBLAS.
%package tests
Summary: Unit Tests for openblas library
Group: Development/Libraries/C and C++
%description tests
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
This package contains test binaries.
%prep
%setup -q -n OpenBLAS-%{version}
%setup -q -n OpenBLAS-%{?sha1:%{sha1}}%{!?sha1:%{version}}
%autopatch -p1
%ifarch s390
sed -i -e "s@m32@m31@" Makefile.system
%endif
sed -i -e '/FLDFLAGS = \|$(CC)\|$(CXX)/s@$@ $(LDFLAGS_TESTS)@' \
test/Makefile ctest/Makefile utest/Makefile cpp_thread_test/Makefile
%if %{without hpc}
cp %{SOURCE1} .
grep -q .note.GNU-stack cpuid.S || echo '.section .note.GNU-stack,"",@progbits' >> cpuid.S
# Disable sgemmt and dgemmt tests on ppc64le when using gcc13
%if "%{?_arch}" == "ppc64le" && 0%{?gcc_version} == 13
%if %{suse_version} >= 1600 && !0%{?is_opensuse}
# with openblas_target POWER9 the tests work fine
%else
cp %{SOURCE2} .
sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_sgemmt.o *@@' utest/Makefile
sed -i -e '/^OBJS_EXT+=/s@[^= ]*/test_dgemmt.o *@@' utest/Makefile
%endif
%endif
%if %{without hpc}
cp %{SOURCE1} .
# create baselibs.conf based on flavor
cat > %{_sourcedir}/baselibs.conf <<EOF
lib%{name}%{so_a}
@@ -352,7 +238,6 @@ lib%{name}-devel
requires -%{name}-<targettype>
requires "lib%{name}%{?so_a}-<targettype> = <version>"
EOF
%endif
%build
@@ -370,9 +255,9 @@ EOF
%define _lto_cflags %{nil}
%endif
%if %{with hpc}
%hpc_debug
%hpc_setup_compiler
%ifarch riscv64
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110812
%global _lto_cflags %{nil}
%endif
# Use DYNAMIC_ARCH everywhere - not sure about PPC?
@@ -393,7 +278,11 @@ EOF
%global openblas_target %openblas_target TARGET=ZARCH_GENERIC
%endif
%ifarch ppc64le
%if %{suse_version} >= 1600 && !0%{?is_opensuse}
%global openblas_target %openblas_target TARGET=POWER9
%else
%global openblas_target %openblas_target TARGET=POWER8
%endif
%define openblas_opt BUILD_BFLOAT16=1
%endif
%ifarch ppc64
@@ -408,7 +297,7 @@ EOF
%ifarch ppc64
%global addopt -mvsx
%endif
%global addopt %{?addopt} -fno-strict-aliasing
%global addopt %{?addopt} -fno-strict-aliasing -Wa,--noexecstack -Wl,-z,noexecstack
# Make serial, threaded and OpenMP versions
@@ -420,7 +309,7 @@ EOF
# set MAKE_NB_JOBS instead and let the build do the work!
# Do not use LIBNAMESUFFIX for new builds as it will not allow
# the different flavors to be plugin replacements of each other
%if 0%{?suse_version} <= 1500 && %{without hpc}
%if 0%{?suse_version} <= 1500
%define libnamesuffix LIBNAMESUFFIX=_%{flavor}
%endif
make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
@@ -429,28 +318,39 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
NUM_THREADS=%{num_threads} V=1 \
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
OPENBLAS_BINARY_DIR=%{p_testdir} \
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
PREFIX=%{p_prefix} \
%{?dynamic_list} \
%{!?with_hpc:%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""}} \
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
%{?with_hpc:%{?cc_v:CC=gcc-%{cc_v} FC=gfortran-%{cc_v} CEXTRALIB=""}}
%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""} \
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}}
%install
%if %{with hpc}
%hpc_setup_compiler
%endif
# Install library and headers
# Pass NUM_THREADS again, as it is not propagated from the build step
# https://github.com/OpenMathLib/OpenBLAS/issues/4275
%make_install %{?build_flags} \
mkdir -p %{buildroot}/%{p_testdir}
%make_install install_tests %{?openblas_target} %{?build_flags} \
%{?openblas_opt} \
NUM_THREADS=%{num_threads} \
OPENBLAS_LIBRARY_DIR=%{p_libdir} \
OPENBLAS_INCLUDE_DIR=%{p_includedir} \
OPENBLAS_BINARY_DIR=%{p_testdir} \
OPENBLAS_CMAKE_DIR=%{p_cmakedir} \
%{?libnamesuffix} \
%{?libnamesuffix} FC=gfortran CC=gcc%{?cc_v:-%{cc_v}} %{?cc_v:CEXTRALIB=""} \
%{?ldflags_tests:LDFLAGS_TESTS=%{ldflags_tests}} \
PREFIX=%{p_prefix}
sed -e 's#@FLAVOR@#%{flavor}#' \
-e 's#@COMPILER@#%{?compiler_family:%compiler_family}}#' \
< %{S:3} > %{buildroot}/%{p_testdir}/openblas_tests.sh
chmod 0755 %{buildroot}/%{p_testdir}/openblas_tests.sh
for i in %{buildroot}/%{p_testdir}/*; do
case $i in
*.dat|*in*) chmod 0644 $i;;
*) chmod 0755 $i;;
esac
done
# Delete info about OBS host cpu
%ifarch %ix86 x86_64
@@ -458,8 +358,6 @@ make MAKE_NB_JOBS=$jobs %{?openblas_target} %{?build_flags} \
%{buildroot}%{p_includedir}/openblas_config.h
%endif
%if %{without hpc}
%if 0%{!?build_devel:1}
# We need the includes only once
rm -rf %{buildroot}%{p_includedir}/
@@ -515,52 +413,6 @@ ln -s %{_sysconfdir}/alternatives/openblas-default%{?a_x}/cmake/openblas %{build
ln -s openblas-%{flavor}/lib%{pname}.so.%{so_v} %{buildroot}%{_libdir}/lib%{name}.so.%{so_v}
ln -s openblas-%{flavor}/lib%{pname}.so %{buildroot}%{_libdir}/lib%{name}.so
%endif
%else # with hpc
# HPC module file
%hpc_write_modules_files
#%%Module1.0#####################################################################
proc ModulesHelp { } {
puts stderr " "
puts stderr "This module loads the %{pname} library built with the %{compiler_family} compiler toolchain."
puts stderr "\nVersion %{version}\n"
}
module-whatis "Name: %{hpc_upcase %pname} built with %{compiler_family} toolchain"
module-whatis "Version: %{version}"
module-whatis "Category: runtime library"
module-whatis "Description: %{dep_summary}"
module-whatis "%{url}"
set version %{version}
prepend-path LD_LIBRARY_PATH %{p_libdir}
setenv %{hpc_upcase %pname}_DIR %{hpc_prefix}
if {[file isdirectory %{hpc_includedir}]} {
prepend-path LIBRARY_PATH %{p_libdir}
prepend-path CPATH %{p_includedir}
prepend-path C_INCLUDE_PATH %{p_includedir}
prepend-path CPLUS_INCLUDE_PATH %{p_includedir}
prepend-path INCLUDE %{p_includedir}
%hpc_modulefile_add_pkgconfig_path
setenv %{hpc_upcase %pname}_DIR %{hpc_prefix}
setenv %{hpc_upcase %pname}_LIB %{p_libdir}
setenv %{hpc_upcase %pname}_INC %{p_includedir}
}
family "openblas"
EOF
%{hpc_write_pkgconfig -l %{pname}}
%endif # with hpc
%if %{without hpc}
# Ensure directory used in older versions are replaced by symlink properly
%pre -n %{pname}-common-devel
@@ -596,17 +448,9 @@ if [ ! -d %{p_libdir} ]; then
fi
/sbin/ldconfig
%else
%postun -n lib%{name}
%hpc_module_delete_if_default
%endif
%files -n lib%{name}%{so_a}
%defattr(-,root,root,-)
%{p_libdir}/lib%{pname}.so.%{so_v}
%if %{without hpc}
%dir %{p_libdir}
%{?libnamesuffix:%{_libdir}/lib%{name}.so.%{so_v}}
# Created by %%post
@@ -621,26 +465,19 @@ fi
%ghost %{_sysconfdir}/alternatives/libcblas.so.3%{?a_x}
%ghost %{_sysconfdir}/alternatives/liblapack.so.3%{?a_x}
%ghost %{_sysconfdir}/alternatives/liblapacke.so.3%{?a_x}
%else
%hpc_dirs
%{p_libdir}/libopenblas*r*.so
%hpc_modules_files
%endif
%files -n lib%{name}-devel
%{p_libdir}/lib%{pname}.so
%{?libnamesuffix:%{_libdir}/lib%{name}.so}
%{p_cmakedir}/
%if %{with hpc}
%license LICENSE
%doc Changelog.txt GotoBLAS* README.md README.HPC.SUSE
%hpc_pkgconfig_file
%{p_includedir}/
%else
%dir %{p_libdir}/cmake
%dir %{p_libdir}/pkgconfig
%{p_libdir}/pkgconfig
%endif
%files tests
%dir %{p_testdir}
%dir %{dirname:%{p_testdir}}
%{p_testdir}/*
%files devel-static
%{p_libdir}/libopenblas*.a

92
openblas_tests.sh.in Normal file
View File

@@ -0,0 +1,92 @@
#! /bin/bash
FLAVOR=@FLAVOR@
COMPILER=@COMPILER@
# Series 'test'
series_test() {
${dir}/sblat1 || echo "sblat1 failed"
${dir}/dblat1 || echo "dblat1 failed"
${dir}/cblat1 || echo "cblat1 failed"
${dir}/zblat1 || echo "zblat1 failed"
${dir}/sblat2 < ${dir}/sblat2.dat || echo " failed"
grep -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || true
${dir}/dblat2 < ${dir}/dblat2.dat || echo " failed"
grep -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || true
${dir}/cblat2 < ${dir}/cblat2.dat || echo " failed"
grep -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || true
${dir}/zblat2 < ${dir}/zblat2.dat || echo " failed"
grep -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || true
${dir}/test_sbgemm > SBBLAT3.SUMM || echo "test_sbgemm failed"
grep -q FATAL SBBLAT3.SUMM && cat SBBLAT3.SUMM || true
${dir}/dblat3 < ${dir}/dblat3.dat || echo "dblat3 failed"
grep -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || true
${dir}/cblat3 < ${dir}/cblat3.dat || echo "cblat3 failed"
grep -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || true
[ -x ${dir}/cblat3_3m ] && \
{ ${dir}/cblat3_3m < ${dir}/cblat3_3m.dat;
grep -q FATAL CBLAT3_3M.SUMM && cat CBLAT3_3M.SUMM || true; }
${dir}/zblat3 < ${dir}/zblat3.dat || echo "zblat3 failed";
grep -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || true
[ -x ${dir}/zblat3_3m ] && \
{ ${dir}/zblat3_3m < ${dir}/zblat3_3m.dat || echo "zblat3 failed";
grep -q FATAL ZBLAT3_3M.SUMM && cat ZBLAT3_3M.SUMM || true; }
}
# Series 'ctest'
series_ctest() {
${dir}/xscblat1 || echo "xscblat1 failed"
${dir}/xdcblat1 || echo "sdcblat1 failed"
${dir}/xccblat1 || echo "xccblat1 failed"
${dir}/xzcblat1 || echo "xzcblat1 failed"
${dir}/xscblat2 < ${dir}/sin2 || echo "xscblat2 failed"
${dir}/xdcblat2 < ${dir}/din2 || echo "xdcblat2 failed"
${dir}/xccblat2 < ${dir}/cin2 || echo "xccblat2 failed"
${dir}/xzcblat2 < ${dir}/zin2 || echo "xzcblat2 failed"
${dir}/xscblat3 < ${dir}/sin3 || echo "xscblat3 failed"
${dir}/xdcblat3 < ${dir}/din3 || echo "xdcblat3 failed"
${dir}/xccblat3 < ${dir}/cin3 || echo "xccblat3 failed"
${dir}/xzcblat3 < ${dir}/zin3 || echo "xzcblat3 failed"
[ -x ${dir}/xccblat3_3m ] && { ${dir}/xccblat3_3m < ${dir}/cin3_3m || echo "cin3_3m failed"; };
[ -x ${dir}/xzcblat3_3m ] && { ${dir}/xzcblat3_3m < ${dir}/zin3_3m || echo "zin3_3m failed"; };
}
# Series 'utest'
series_utest() {
${dir}/openblas_utest || echo "openblas_utest failed"
${dir}/openblas_utest_ext || echo "openblas_utest_ext failed"
}
dir=/usr/lib/openblas-${FLAVOR}/tests
case $FLAVOR in
serial) export THREADS=false; export OMP=false ;;
pthreads) export THREADS=true; export OMP=false ;;
openmp) export THREADS=false; export OMP=true ;;
gnu-hpc) module pure;
module load gnu/$COMPILER openblas;
dir=$OPENBLAS_DIR/bin
export THREADS=false; export OMP=true
;;
gnu-hpc-ptreads) module pure;
module load gnu/$COMPILER_VERSION openblas;
export THREADS=true; export OMP=false ;;
esac
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 series_test
if $OMP || $TREADS; then
rm -f ?BLAT2.SUMM ?BLAT3.SUMM ?BLAT3_3M.SUMM
if $OMP; then
OMP_NUM_THREADS=2 series_test
else
OPENBLAS_NUM_THREADS=2 series_test
fi
fi
if $OMP; then
OMP_NUM_THREADS=2 series_ctest
else
OPENBLAS_NUM_THREADS=2 series_ctest
fi
series_utest