Compare commits

...

1 Commits

Author SHA256 Message Date
d4a8ea30fe Initial import
Signed-off-by: Egbert Eich <eich@suse.com>
2025-07-03 21:02:37 +02:00
11 changed files with 600751 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
From e2a44fe6ad3bcf3c6df84b80d413ed09e1428e72 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 13 Jan 2024 07:48:37 -0500
Subject: [PATCH] Hardcode cblas as the blas library
Signed-off-by: Tom Rix <trix@redhat.com>
---
clients/gtest/CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clients/gtest/CMakeLists.txt b/clients/gtest/CMakeLists.txt
index 9f17fb28..3df9eab4 100644
--- a/clients/gtest/CMakeLists.txt
+++ b/clients/gtest/CMakeLists.txt
@@ -153,7 +153,7 @@ target_include_directories( rocblas-test
if( BUILD_FORTRAN_CLIENTS )
target_link_libraries( rocblas-test PRIVATE rocblas_fortran_client )
endif( )
-target_link_libraries( rocblas-test PRIVATE ${BLAS_LIBRARY} ${GTEST_BOTH_LIBRARIES} roc::rocblas )
+target_link_libraries( rocblas-test PRIVATE cblas ${GTEST_BOTH_LIBRARIES} roc::rocblas )
if( CUDA_FOUND )
target_include_directories( rocblas-test
--
2.43.0

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,25 @@
From 3d82251d51a9804c28cb84d598084fc12ca0418f Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 13 Jan 2024 14:36:01 -0500
Subject: [PATCH] fixup install of tensile output
---
library/src/CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt
index 6acedfb2..2877c6e1 100755
--- a/library/src/CMakeLists.txt
+++ b/library/src/CMakeLists.txt
@@ -591,7 +591,7 @@ if( BUILD_WITH_TENSILE )
if (WIN32)
set( ROCBLAS_TENSILE_LIBRARY_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}/bin/rocblas" CACHE PATH "path to tensile library" )
else()
- set( ROCBLAS_TENSILE_LIBRARY_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}${CMAKE_INSTALL_LIBDIR}/rocblas" CACHE PATH "path to tensile library" )
+ set( ROCBLAS_TENSILE_LIBRARY_DIR "${CMAKE_INSTALL_LIBDIR}/rocblas" CACHE PATH "path to tensile library" )
endif()
# For ASAN package, Tensile library files(which are not shared libraries) are not required
if( NOT ENABLE_ASAN_PACKAGING )
--
2.43.0

View File

@@ -0,0 +1,44 @@
From 2966285dc09ca9c7e6b95c5212a2d5bd46ab8376 Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Fri, 27 Sep 2024 05:40:14 -0700
Subject: [PATCH] offload compress option
Try out --offload-compress
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
---
cmake/build-options.cmake | 2 ++
library/CMakeLists.txt | 4 ++++
2 files changed, 6 insertions(+)
diff --git a/cmake/build-options.cmake b/cmake/build-options.cmake
index cb35e72fb157..26d04c0aefdb 100755
--- a/cmake/build-options.cmake
+++ b/cmake/build-options.cmake
@@ -36,6 +36,8 @@ option( BUILD_SHARED_LIBS "Build rocBLAS as a shared library" ON )
# library without tensile to allow for rapid iteration without GEMM functionality
option( BUILD_WITH_TENSILE "Build full functionality which requires tensile?" ON )
+option( BUILD_OFFLOAD_COMPRESS "Build with offload compress?" OFF )
+
include(clients/cmake/client-build-options.cmake)
if (WIN32)
diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt
index 90a75dd394d2..0386a3058d45 100755
--- a/library/CMakeLists.txt
+++ b/library/CMakeLists.txt
@@ -66,6 +66,10 @@ function( rocblas_library_settings lib_target_ )
# Do not allow Variable Length Arrays (use unique_ptr instead)
target_compile_options( ${lib_target_} PRIVATE -Werror=vla )
+ if ( BUILD_OFFLOAD_COMPRESS )
+ target_compile_options( ${lib_target_} PRIVATE --offload-compress )
+ endif ()
+
target_compile_definitions( ${lib_target_} PRIVATE ROCM_USE_FLOAT16 ROCBLAS_INTERNAL_API ROCBLAS_BETA_FEATURES_API )
# both libraries will use rocblas_EXPORTS
--
2.46.0

View File

@@ -0,0 +1,82 @@
diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt
index 762580d7..f02d62e6 100644
--- a/library/CMakeLists.txt
+++ b/library/CMakeLists.txt
@@ -80,7 +80,7 @@ function( rocblas_library_settings lib_target_ )
target_link_libraries( ${lib_target_} PRIVATE hip::device )
else()
target_link_libraries( ${lib_target_} PRIVATE hip::device -lstdc++fs --rtlib=compiler-rt --unwindlib=libgcc)
- if (BUILD_SHARED_LIBS)
+ if (BUILD_SHARED_LIBS AND NOT DISABLE_ROCTRACER)
target_link_libraries(${lib_target_} PRIVATE -lroctx64)
endif()
endif()
diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt
index 5deab888..f96c7e3e 100644
--- a/library/src/CMakeLists.txt
+++ b/library/src/CMakeLists.txt
@@ -28,6 +28,10 @@
# package_targets is used as a list of install target
set( package_targets rocblas )
+# we want to decrease the number of build deps for EPEL packages so patching in an option
+# to disable its use
+
+option( DISABLE_ROCTRACER "Disable use of rocTRACER for logging" ON )
# Set up Tensile Dependency
if( BUILD_WITH_TENSILE )
@@ -688,7 +692,12 @@ if(BUILD_WITH_HIPBLASLT)
find_package( hipblaslt ${HIPBLASLT_VERSION} REQUIRED CONFIG PATHS ${hipblaslt_path} ${ROCM_PATH})
endif()
-if( NOT BUILD_SHARED_LIBS )
+if ( DISABLE_ROCTRACER )
+ message( "roctracer is disabled for this build" )
+ target_compile_definitions( rocblas PRIVATE DISABLE_ROCTRACER )
+endif()
+
+if( NOT DISABLE_ROCTRACER AND NOT BUILD_SHARED_LIBS )
target_compile_definitions( rocblas PRIVATE ROCBLAS_STATIC_LIB )
endif()
diff --git a/library/src/include/logging.hpp b/library/src/include/logging.hpp
index 352e4e39..d222879a 100644
--- a/library/src/include/logging.hpp
+++ b/library/src/include/logging.hpp
@@ -40,7 +40,7 @@
#include <unordered_map>
#include <utility>
-#if !defined(ROCBLAS_STATIC_LIB) && !defined(WIN32)
+#if !defined(DISABLE_ROCTRACER) && !defined(ROCBLAS_STATIC_LIB) && !defined(WIN32)
#include <roctracer/roctx.h>
#endif
@@ -453,7 +453,7 @@ class Logger
public:
Logger() = default;
-#if !defined(ROCBLAS_STATIC_LIB) && !defined(WIN32)
+#if !defined(DISABLE_ROCTRACER) && !defined(ROCBLAS_STATIC_LIB) && !defined(WIN32)
void log_range(const std::string& name)
{
if(!m_active)
@@ -472,7 +472,7 @@ public:
// ((os << sep << std::forward<Ts>(xs)), ...);
(void)(int[]){(os << sep << std::forward<Ts>(xs), 0)...};
-#if !defined(ROCBLAS_STATIC_LIB) && !defined(WIN32)
+#if !defined(DISABLE_ROCTRACER) && !defined(ROCBLAS_STATIC_LIB) && !defined(WIN32)
log_range(os.str());
#endif
os << std::endl;
@@ -527,7 +527,7 @@ public:
~Logger()
{
-#if !defined(ROCBLAS_STATIC_LIB) && !defined(WIN32)
+#if !defined(DISABLE_ROCTRACER) && !defined(ROCBLAS_STATIC_LIB) && !defined(WIN32)
if(m_active)
{
roctxRangePop();

View File

@@ -0,0 +1,26 @@
From aba3a118fd32f415e63b4e24555b8df98e89292d Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Tue, 3 Oct 2023 10:37:12 -0700
Subject: [PATCH] prepare rocblas cmake for fedora
Signed-off-by: Tom Rix <trix@redhat.com>
---
clients/CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clients/CMakeLists.txt b/clients/CMakeLists.txt
index da44cef0..6d39c1f3 100755
--- a/clients/CMakeLists.txt
+++ b/clients/CMakeLists.txt
@@ -131,7 +131,7 @@ if( BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_TESTS)
message(STATUS "Linking BLIS LIB: ${BLAS_LIBRARY}")
endif()
else()
- set( BLAS_LIBRARY "blas" )
+ set( BLAS_LIBRARY "cblas" )
endif()
else() # WIN32
set( BLAS_INCLUDE_DIR ${OPENBLAS_DIR}/include CACHE PATH "OpenBLAS library include path" )
--
2.41.0

View File

@@ -0,0 +1,80 @@
From: Egbert Eich <eich@suse.com>
Date: Wed Apr 30 16:18:49 2025 +0200
Subject: Modify CMakeLists.txt files to allow to build modules independently
Patch-mainline: Not yet
Git-commit: f4724507a2770b2ed5ecc633aa406ad70a675e6f
References:
Signed-off-by: Egbert Eich <eich@suse.com>
Signed-off-by: Egbert Eich <eich@suse.de>
---
library/src/CMakeLists.txt | 24 +++++-------------------
library/src/TensileInstall/CMakeLists.txt | 19 +++++++++++++++++++
2 files changed, 24 insertions(+), 19 deletions(-)
diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt
index 35342e1..efa732c 100644
--- a/library/src/CMakeLists.txt
+++ b/library/src/CMakeLists.txt
@@ -97,7 +97,9 @@ if( BUILD_WITH_TENSILE )
set_target_properties( TensileHost PROPERTIES OUTPUT_NAME rocblas-tensile CXX_EXTENSIONS NO )
# Tensile host depends on libs build target
- add_dependencies( TensileHost TENSILE_LIBRARY_TARGET )
+ if(NOT DEFINED ENV{TENSILE_SKIP_LIBRARY} OR NOT $ENV{TENSILE_SKIP_LIBRARY})
+ add_dependencies( TensileHost TENSILE_LIBRARY_TARGET )
+ endif()
if( ROCBLAS_SHARED_LIBS )
set( BUILD_SHARED_LIBS ON )
@@ -823,24 +825,8 @@ rocm_install_targets(
${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}
)
-if( BUILD_WITH_TENSILE )
- if (WIN32)
- set( ROCBLAS_TENSILE_LIBRARY_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}/bin/rocblas" CACHE PATH "path to tensile library" )
- else()
- set( ROCBLAS_TENSILE_LIBRARY_DIR "${CMAKE_INSTALL_LIBDIR}/rocblas" CACHE PATH "path to tensile library" )
- endif()
- # For ASAN package, Tensile library files(which are not shared libraries) are not required
- if( NOT ENABLE_ASAN_PACKAGING )
- if( BUILD_SHARED_LIBS )
- set( TENSILE_DATA_COMPONENT_NAME ${CMAKE_INSTALL_DEFAULT_COMPONENT_NAME} )
- else()
- set( TENSILE_DATA_COMPONENT_NAME devel )
- endif()
- rocm_install(
- DIRECTORY ${CMAKE_BINARY_DIR}/Tensile/library
- DESTINATION ${ROCBLAS_TENSILE_LIBRARY_DIR}
- COMPONENT ${TENSILE_DATA_COMPONENT_NAME}) # Use this cmake variable to be compatible with rocm-cmake 0.6 and 0.7
- endif()
+if(NOT DEFINED ENV{TENSILE_SKIP_LIBRARY} OR NOT $ENV{TENSILE_SKIP_LIBRARY})
+ add_subdirectory( TensileInstall )
endif()
if(NOT WIN32)
diff --git a/library/src/TensileInstall/CMakeLists.txt b/library/src/TensileInstall/CMakeLists.txt
new file mode 100644
index 0000000..fa39e9f
--- /dev/null
+++ b/library/src/TensileInstall/CMakeLists.txt
@@ -0,0 +1,19 @@
+if( BUILD_WITH_TENSILE )
+ if (WIN32)
+ set( ROCBLAS_TENSILE_LIBRARY_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}/bin/rocblas" CACHE PATH "path to tensile library" )
+ else()
+ set( ROCBLAS_TENSILE_LIBRARY_DIR "${CMAKE_INSTALL_LIBDIR}/rocblas" CACHE PATH "path to tensile library" )
+ endif()
+ # For ASAN package, Tensile library files(which are not shared libraries) are not required
+ if( NOT ENABLE_ASAN_PACKAGING )
+ if( BUILD_SHARED_LIBS )
+ set( TENSILE_DATA_COMPONENT_NAME ${CMAKE_INSTALL_DEFAULT_COMPONENT_NAME} )
+ else()
+ set( TENSILE_DATA_COMPONENT_NAME devel )
+ endif()
+ rocm_install(
+ DIRECTORY ${CMAKE_BINARY_DIR}/Tensile/library
+ DESTINATION ${ROCBLAS_TENSILE_LIBRARY_DIR}
+ COMPONENT ${TENSILE_DATA_COMPONENT_NAME}) # Use this cmake variable to be compatible with rocm-cmake 0.6 and 0.7
+ endif()
+endif()

14
_constraints Normal file
View File

@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<constraints>
<hardware>
<disk>
<size unit="G">60</size>
</disk>
<physicalmemory>
<size unit="G">16</size>
</physicalmemory>
<processors>4</processors>
<jobs>4</jobs>
</hardware>
<hostlabel exclude="true">SLOW_CPU</hostlabel>
</constraints>

BIN
rocBLAS-6.4.0.tar.gz (Stored with Git LFS) Normal file

Binary file not shown.

3
rocblas.rpmlintrc Normal file
View File

@@ -0,0 +1,3 @@
# librocblas4-arch-.* are no shared libraries but modules loaded onto the GPU.
addFilter("rocblas-arch-.*: W: shared-lib-without-dependency-information")
addFilter("rocblas-arch-.*: E: shlib-fixed-dependency.")

551
rocblas.spec Normal file
View File

@@ -0,0 +1,551 @@
%if 0%{?suse_version}
%global rocblas_name librocblas4
%else
%global rocblas_name rocblas
%endif
%global upstreamname rocBLAS
%global rocm_release 6.4
%global rocm_patch 0
%global rocm_version %{rocm_release}.%{rocm_patch}
%if 0%{?suse_version}
# On SUSE build tensile modules - the .so module is built in a separate package
%bcond_without tensile_package
%define build_tensile_separately 1
%else
%define build_tensile_separately 0
%endif
%global toolchain rocm
# hipcc does not support some clang flags
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/')
%bcond_with debug
%if %{with debug}
%global build_type DEBUG
%else
%global build_type RELEASE
%endif
%bcond_without compress
%if %{with compress}
%global build_compress ON
%else
%global build_compress OFF
%endif
%bcond_with test
%if %{with test} && %{without tensile_package}
%global build_test ON
%global __brp_check_rpaths %{nil}
%else
%global build_test OFF
%endif
# Option to test suite for testing on real HW:
# May have to set gpu under test with
# export HIP_VISIBLE_DEVICES=<num> - 0, 1 etc.
%bcond_with check
# Tensile in 6.4 does not support generics
# https://github.com/ROCm/Tensile/issues/2124
%bcond_without tensile
%if %{with tensile}
%global build_tensile ON
%else
%global build_tensile OFF
%endif
%if 0%{?rhel} || 0%{?sle_version} > 160000
%bcond_with msgpack
%else
%bcond_without msgpack
%endif
# Use ninja if it is available
# Ninja is available on suse but obs times out with ninja build, make doesn't
%if 0%{?fedora}
%bcond_without ninja
%else
%bcond_with ninja
%endif
%if 0%{?rhel} && 0%{?rhel} < 10
# On CS9: /usr/bin/debugedit: Cannot handle 8-byte build ID
%global debug_package %{nil}
%endif
# Compression type and level for source/binary package payloads.
# "w7T0.xzdio" xz level 7 using %%{getncpus} threads
%global _source_payload w7T0.xzdio
%global _binary_payload w7T0.xzdio
# SUSE/OSB times out because -O is added to the make args
# This accumulates all the output from the long running tensile
# jobs.
%global _make_output_sync %{nil}
# OracleLinux 9 has a problem with it's strip not recognizing *.co's
%global __strip %rocmllvm_bindir/llvm-strip
%if %{with ninja}
%global cmake_generator -G Ninja
%else
%global cmake_generator %{nil}
%endif
%global cmake_config \\\
-DCMAKE_CXX_COMPILER=hipcc \\\
-DCMAKE_C_COMPILER=hipcc \\\
-DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \\\
-DCMAKE_AR=%rocmllvm_bindir/llvm-ar \\\
-DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \\\
-DCMAKE_BUILD_TYPE=%{build_type} \\\
-DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \\\
-DCMAKE_SKIP_RPATH=ON \\\
-DCMAKE_VERBOSE_MAKEFILE=ON \\\
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \\\
-DROCM_SYMLINK_LIBS=OFF \\\
-DHIP_PLATFORM=amd \\\
-DBUILD_CLIENTS_BENCHMARKS=%{build_test} \\\
-DBUILD_CLIENTS_TESTS=%{build_test} \\\
-DBUILD_CLIENTS_TESTS_OPENMP=OFF \\\
-DBUILD_FORTRAN_CLIENTS=OFF \\\
-DBLAS_LIBRARY=cblas \\\
-DBUILD_OFFLOAD_COMPRESS=%{build_compress} \\\
-DBUILD_WITH_HIPBLASLT=OFF \\\
-DTensile_COMPILER=hipcc \\\
-DTensile_CPU_THREADS=${CORES} \\\
-DTensile_LIBRARY_FORMAT=%{tensile_library_format} \\\
-DTensile_VERBOSE=%{tensile_verbose} \\\
-DTensile_DIR=${TP}/cmake \\\
-DDISABLE_ROCTRACER=ON \\\
-DBUILD_WITH_PIP=OFF
%bcond_with generic
%global rocm_gpu_list_generic "gfx9-generic;gfx9-4-generic;gfx10-1-generic;gfx10-3-generic;gfx11-generic;gfx12-generic"
%if %{with generic}
%global gpu_list %{rocm_gpu_list_generic}
%else
%global gpu_list %{rocm_gpu_list_default}
%endif
# gfx950 is an experimental target
# Enabling will short circuit the normal build.
# There is no check support.
# To use do
# $ module load rocm/gfx950
# <do stuff>
# $ module purge
%bcond_with gfx950
%if %{with gfx950} && %{with tensile_package}
ExclusiveArch: do_not_build
%endif
Name: %{rocblas_name}
Version: %{rocm_version}
Release: 9%{?dist}
Summary: BLAS implementation for ROCm
Url: https://github.com/ROCmSoftwarePlatform/%{upstreamname}
License: MIT AND BSD-3-Clause
Source0: %{url}/archive/refs/tags/rocm-%{rocm_version}.tar.gz#/%{upstreamname}-%{rocm_version}.tar.gz
Source1: rocblas.rpmlintrc
Patch2: 0001-fixup-install-of-tensile-output.patch
Patch3: Modify-CMakeLists.txt-files-to-allow-to-build-modules-independently.patch
Patch4: 0001-offload-compress-option.patch
Patch6: 0001-option-to-disable-roctracer-logging.patch
%if 0%{build_tensile_separately}
Requires: rocblas-tensile = %version
%if %{with check}
# If %check is enabled, we need to serialize the builds which will introduce
# a circular dependency. The flag below causes OBS to ignore this.
#
#!BuildIgnore: %name
BuildRequires: rocblas-tensile = %version
%endif
%endif
BuildRequires: cmake
BuildRequires: gcc-c++
BuildRequires: rocm-cmake
BuildRequires: rocm-comgr-devel
BuildRequires: rocm-compilersupport-macros
BuildRequires: rocm-hip-devel
BuildRequires: rocm-runtime-devel
BuildRequires: rocm-rpm-macros
BuildRequires: rocm-rpm-macros-modules
%if %{with tensile}
%if 0%{?suse_version}
BuildRequires: %{python_module tensile-devel}
%if %{suse_version} < 1699
BuildRequires: %{python_module joblib}
%endif # suse_version < 1699
# OBS vm times out without console output
%global tensile_verbose 2
%{?with_msgpack:BuildRequires: msgpack-cxx-devel}
%else # ?suse_version
BuildRequires: python3dist(tensile)
%if 0%{?rhel}
%global tensile_verbose 2
%else
%{?with_msgpack:BuildRequires: msgpack-devel}
%global tensile_verbose 1
%global tensile_library_format msgpack
%endif
%endif # suse_version
%if %{with msgpack}
%global tensile_library_format msgpack
%else
%global tensile_library_format yaml
%endif
%else
%global tensile_verbose %{nil}
%global tensile_library_format %{nil}
%endif # tensile
%if %{with compress}
BuildRequires: pkgconfig(libzstd)
%endif
%if %{with test}
%if 0%{?suse_version}
BuildRequires: %{python_module PyYAML}
%else
BuildRequires: python3dist(pyyaml)
%endif
BuildRequires: blas-devel
BuildRequires: libomp-devel
BuildRequires: rocminfo
BuildRequires: rocm-smi-devel
BuildRequires: roctracer-devel
%if 0%{?suse_version}
BuildRequires: cblas-devel
BuildRequires: gcc-fortran
BuildRequires: gtest
%else
BuildRequires: gtest-devel
%endif
%endif
%if %{with ninja}
%if 0%{?fedora} || 0%{?rhel}
BuildRequires: ninja-build
%endif
%if 0%{?suse_version}
BuildRequires: ninja
%define __builder ninja
%endif
%endif
Provides: rocblas = %{version}-%{release}
# Only x86_64 works right now:
ExclusiveArch: x86_64
%description
rocBLAS is the AMD library for Basic Linear Algebra Subprograms
(BLAS) on the ROCm platform. It is implemented in the HIP
programming language and optimized for AMD GPUs.
%post -p /sbin/ldconfig
%postun -p /sbin/ldconfig
%package devel
Summary: Libraries and headers for %{name}
Requires: %{name}%{?_isa} = %{version}-%{release}
Requires: cmake(hip)
Provides: rocblas-devel = %{version}-%{release}
%description devel
%{summary}
%package -n rocblas-tensile
Summary: ROCBlas Tensile Modules
Requires: %{name} = %version
%description -n rocblas-tensile
BLAS architecture modules for all AMDGPU architectures
%if %{with test}
%package test
Summary: Tests for %{name}
Requires: diffutils
Requires: %{name}%{?_isa} = %{version}-%{release}
%description test
%{summary}
%endif
%if %{with gfx950}
%package gfx950
Summary: The gfx950 rocBLAS package
Provides: rocblas-gfx950 = %{version}-%{release}
Conflicts: %{name}
%description gfx950
%{summary}
%package gfx950-devel
Summary: The gfx950 rocBLAS development package
Requires: %{name}-gfx950%{?_isa} = %{version}-%{release}
Provides: rocblas-gfx950-devel = %{version}-%{release}
Conflicts: %{name}-devel
%description gfx950-devel
%{summary}
%if %{with test} && %{without tensile_package}
%package gfx950-test
Summary: The gfx950 rocBLAS test package
Requires: %{name}-gfx950%{?_isa} = %{version}-%{release}
Conflicts: %{name}-test
%description gfx950-test
%{summary}
%endif # gfx950-test
%endif # gfx950
%prep
%autosetup -p1 -n %{upstreamname}-rocm-%{version}
sed -i -e 's@set( BLAS_LIBRARY "blas" )@set( BLAS_LIBRARY "cblas" )@' clients/CMakeLists.txt
sed -i -e 's@target_link_libraries( rocblas-test PRIVATE ${BLAS_LIBRARY} ${GTEST_BOTH_LIBRARIES} roc::rocblas )@target_link_libraries( rocblas-test PRIVATE cblas ${GTEST_BOTH_LIBRARIES} roc::rocblas )@' clients/gtest/CMakeLists.txt
# no git in this build
sed -i -e 's@find_package(Git REQUIRED)@find_package(Git)@' library/CMakeLists.txt
# On Tumbleweed Q2,2025
# /usr/include/gtest/internal/gtest-port.h:279:2: error: C++ versions less than C++14 are not supported.
# 279 | #error C++ versions less than C++14 are not supported.
# Convert the c++11's to c++14
sed -i -e 's@CXX_STANDARD 11@CXX_STANDARD 14@' clients/samples/CMakeLists.txt
%if 0%{?suse_version}
# Suse's libgfortran.so for gcc 14 is here
# /usr/lib64/gcc/x86_64-suse-linux/14/libgfortran.so
# Without adding this path with -L, it isn't found, but thankfully it isn't really needed
sed -i -e 's@list( APPEND COMMON_LINK_LIBS "-lgfortran")@#list( APPEND COMMON_LINK_LIBS "-lgfortran")@' clients/{benchmarks,gtest}/CMakeLists.txt
%endif
%build
# With compat llvm the system clang is wrong
CLANG_PATH=`hipconfig --hipclangpath`
export TENSILE_ROCM_ASSEMBLER_PATH=${CLANG_PATH}/clang++
export TENSILE_ROCM_OFFLOAD_BUNDLER_PATH=${CLANG_PATH}/clang-offload-bundler
# Work around problem with koji's ld
export HIPCC_LINK_FLAGS_APPEND=-fuse-ld=lld
%if %{with tensile}
TP=`/usr/bin/TensileGetPath`
%endif
CORES=`lscpu | grep 'Core(s)' | awk '{ print $4 }'`
if [ ${CORES}x = x ]; then
CORES=1
fi
# Try again..
if [ ${CORES} = 1 ]; then
CORES=`lscpu | grep '^CPU(s)' | awk '{ print $2 }'`
if [ ${CORES}x = x ]; then
CORES=4
fi
fi
%if %{with gfx950}
module load rocm/gfx950
%cmake %{cmake_generator} %{cmake_config} \
-DGPU_TARGETS=${ROCM_GPUS} \
-DBUILD_WITH_TENSILE=OFF \
-DCMAKE_INSTALL_BINDIR=${ROCM_BIN} \
-DCMAKE_INSTALL_INCLUDEDIR=${ROCM_INCLUDE} \
-DCMAKE_INSTALL_LIBDIR=${ROCM_LIB}
%else
%if %{build_tensile_separately} && %{without tensile_package}
export TENSILE_SKIP_LIBRARY=true
%endif
%cmake %{cmake_generator} %{cmake_config} \
-DGPU_TARGETS=%{gpu_list} \
-DBUILD_WITH_TENSILE=%{build_tensile} \
-DCMAKE_INSTALL_LIBDIR=%_libdir \
%endif
%cmake_build %{?with_tensile_package:TENSILE_LIBRARY_TARGET}
%if %{with gfx950}
module purge
%endif
%install
%if %{with tensile_package}
DESTDIR=%{buildroot} /usr/bin/cmake -P build/library/src/TensileInstall/cmake_install.cmake
%else
%cmake_install
if [ -f %{buildroot}%{_prefix}/share/doc/rocblas/LICENSE.md ]; then
rm %{buildroot}%{_prefix}/share/doc/rocblas/LICENSE.md
fi
%endif
%check
%if %{without tensile_package}
%if %{with test}
%if %{with check}
%if 0%{?suse_version}
export LD_LIBRARY_PATH=%{__builddir}/library/src:$LD_LIBRARY_PATH
%{__builddir}/clients/staging/rocblas-test --gtest_brief=1
%else
export LD_LIBRARY_PATH=%{_vpath_builddir}/library/src:$LD_LIBRARY_PATH
%{_vpath_builddir}/clients/staging/rocblas-test --gtest_brief=1
%endif
%endif
%endif
%endif
%if %{with gfx950}
%files gfx950
%license LICENSE.md
%{_libdir}/rocm/gfx950/lib/librocblas.so.4{,.*}
%files gfx950-devel
%dir %{_libdir}/rocm/gfx950/include/rocblas
%dir %{_libdir}/rocm/gfx950/lib/cmake/rocblas
%{_libdir}/rocm/gfx950/include/rocblas/rocblas_module.f90
%{_libdir}/rocm/gfx950/lib/librocblas.so
%{_libdir}/rocm/gfx950/lib/cmake/rocblas/*.cmake
%if %{with test}
%files gfx950-test
%{_libdir}/rocm/gfx950/bin/rocblas*
%endif
%else
%if %{without tensile_package}
%files
%license LICENSE.md
%{_libdir}/librocblas.so.4{,.*}
%if %{with tensile}
%if ! %{build_tensile_separately}
%dir %{_libdir}/rocblas
%dir %{_libdir}/rocblas/library
%{_libdir}/rocblas/library/Kernels*
%{_libdir}/rocblas/library/Tensile*
%endif
%endif # with tensile
%files devel
%doc README.md
%dir %{_libdir}/cmake/rocblas
%dir %{_includedir}/rocblas
%{_includedir}/rocblas/*
%{_libdir}/cmake/rocblas/*.cmake
%{_libdir}/librocblas.so
%if %{with test}
%files test
%{_bindir}/rocblas*
%endif
%else # ?tensile_package
%if %{with tensile}
%files -n rocblas-tensile
%dir %{_libdir}/rocblas
%dir %{_libdir}/rocblas/library
%{_libdir}/rocblas/library/Kernels*
%{_libdir}/rocblas/library/Tensile*
%endif
%endif # ?tensile_package
%endif # gfx950
%changelog
* Thu Jun 12 2025 Egbert Eich <eich@suse.com> - 6.4.0-9
- Build and package core library and arch dependent
tensile modules separately to parallelize the build.
- Fix build and runtime dependencies of test package.
- Restructure spec file (move bcond_with* settings to
the top).
- Add rpmlintrc for SUSE.
* Wed Jun 11 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-8
- Remove suse check for using ldconfig
* Sun May 11 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-7
- Add experimental gfx950
* Tue May 6 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-6
- disable roctracer for everyone
* Tue Apr 29 2025 Tim Flink <tflink@fedoraproject.org> - 6.4.0-5
- add patch for option to disable roctracer logging
- disable roctracer logging for rhel builds
- allow for builds on rhel with ninja
* Tue Apr 29 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-4
- Improve testing for suse
* Sat Apr 26 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-3
- Add generic gpus
* Wed Apr 23 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-2
- Use joblib on sle 15.6 and 16.0
* Fri Apr 18 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-1
- Update to 6.4.0
* Thu Apr 10 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-12
- Reenable ninja
* Fri Apr 4 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-11
- Use rocm-llvm strip
* Thu Feb 27 2025 Cristian Le <git@lecris.dev> - 6.3.0-10
- Add hip requirement to devel package
* Thu Feb 27 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-9
- Enable tensile for RHEL
* Wed Feb 26 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-8
- Enable tensile for SUSE
* Sun Feb 23 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-7
- Use tensile verbosity to avoid OSB timeout
* Wed Feb 19 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-6
- Use tensile cmake from the python location
* Tue Feb 11 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-5
- Remove multibuild
- Fix SLE 15.6
* Sat Jan 18 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-4
- multithread rpm compress
* Tue Jan 14 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-3
- build requires gcc-c++
* Fri Dec 20 2024 Tom Rix <Tom.Rix@amd.com> - 6.3.0-2
- Build type should be release
* Fri Dec 6 2024 Tom Rix <Tom.Rix@amd.com> - 6.3.0-1
- Update to 6.3
* Sun Nov 10 2024 Tom Rix <Tom.Rix@amd.com> - 6.2.1-1
- Stub for tumbleweed