Compare commits

...

10 Commits

10 changed files with 1009 additions and 13 deletions

97
0001-Add-gfx1035.patch Normal file
View File

@@ -0,0 +1,97 @@
From cf989a0a4d0306f6ec4f3e9256064e9f1ea83812 Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Fri, 6 Dec 2024 14:17:30 -0800
Subject: [PATCH] Add gfx1035
---
Tensile/AsmCaps.py | 44 ++++++++++++++++++++++
Tensile/Common.py | 2 +-
Tensile/Source/lib/source/ocl/OclUtils.cpp | 4 ++
3 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
index b838bad22a30..8faea6d285a1 100644
--- a/Tensile/AsmCaps.py
+++ b/Tensile/AsmCaps.py
@@ -639,6 +639,50 @@ CACHED_ASM_CAPS = \
'v_mov_b64': False,
'v_pk_fma_f16': True,
'v_pk_fmac_f16': False},
+ (10, 3, 5): {'HasAddLshl': True,
+ 'HasAtomicAdd': False,
+ 'HasDirectToLdsDest': False,
+ 'HasDirectToLdsNoDest': True,
+ 'HasExplicitCO': True,
+ 'HasExplicitNC': True,
+ 'HasGLCModifier': True,
+ 'HasNTModifier': False,
+ 'HasLshlOr': True,
+ 'HasMFMA': False,
+ 'HasMFMA_b8': False,
+ 'HasMFMA_bf16_1k': False,
+ 'HasMFMA_bf16_original': False,
+ 'HasMFMA_constSrc': False,
+ 'HasMFMA_f64': False,
+ 'HasMFMA_f8': False,
+ 'HasMFMA_i8_908': False,
+ 'HasMFMA_i8_940': False,
+ 'HasMFMA_vgpr': False,
+ 'HasMFMA_xf32': False,
+ 'HasSMulHi': True,
+ 'HasWMMA': False,
+ 'KernargPreloading': False,
+ 'MaxLgkmcnt': 15,
+ 'MaxVmcnt': 63,
+ 'SupportedISA': True,
+ 'SupportedSource': True,
+ 'VOP3v_dot4_i32_i8': True,
+ 'v_dot2_f32_f16': True,
+ 'v_dot2c_f32_f16': True,
+ 'v_dot4_i32_i8': False,
+ 'v_dot4c_i32_i8': True,
+ 'v_fma_f16': True,
+ 'v_fma_f32': True,
+ 'v_fma_f64': True,
+ 'v_fma_mix_f32': True,
+ 'v_fmac_f16': False,
+ 'v_fmac_f32': True,
+ 'v_mac_f16': False,
+ 'v_mac_f32': False,
+ 'v_mad_mix_f32': False,
+ 'v_mov_b64': False,
+ 'v_pk_fma_f16': True,
+ 'v_pk_fmac_f16': False},
(11, 0, 0): {'HasAddLshl': True,
'HasAtomicAdd': True,
'HasDirectToLdsDest': False,
diff --git a/Tensile/Common.py b/Tensile/Common.py
index 4d9c5a9155ee..b02e27c39e76 100644
--- a/Tensile/Common.py
+++ b/Tensile/Common.py
@@ -252,7 +252,7 @@ globalParameters["MaxFileName"] = 64 # If a file name would be long
globalParameters["SupportedISA"] = [(8,0,3),
(9,0,0), (9,0,6), (9,0,8), (9,0,10),
(9,4,0), (9,4,1), (9,4,2),
- (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1),
+ (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,5),
(11,0,0), (11,0,1), (11,0,2), (11,0,3),
(12,0,0), (12,0,1)] # assembly kernels writer supports these architectures
diff --git a/Tensile/Source/lib/source/ocl/OclUtils.cpp b/Tensile/Source/lib/source/ocl/OclUtils.cpp
index eb5a14eccfb1..5242dc77abdf 100644
--- a/Tensile/Source/lib/source/ocl/OclUtils.cpp
+++ b/Tensile/Source/lib/source/ocl/OclUtils.cpp
@@ -176,6 +176,10 @@ namespace Tensile
{
return AMDGPU::Processor::gfx1030;
}
+ else if(deviceString.find("gfx1035") != std::string::npos)
+ {
+ return AMDGPU::Processor::gfx1035;
+ }
else if(deviceString.find("gfx1100") != std::string::npos)
{
return AMDGPU::Processor::gfx1100;
--
2.47.1

186
0001-Add-gfx1103.patch Normal file
View File

@@ -0,0 +1,186 @@
From c67af6b301cb6a0cfc98708682c3bb0b66ad601e Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Fri, 6 Dec 2024 14:16:08 -0800
Subject: [PATCH] Add gfx1103
---
Tensile/AsmCaps.py | 44 +++++++++++++++++++
Tensile/Common.py | 5 ++-
Tensile/Source/CMakeLists.txt | 4 +-
Tensile/Source/lib/include/Tensile/AMDGPU.hpp | 7 +++
.../include/Tensile/PlaceholderLibrary.hpp | 3 ++
Tensile/Source/lib/source/ocl/OclUtils.cpp | 4 ++
6 files changed, 63 insertions(+), 4 deletions(-)
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
index e61580fafde2..b838bad22a30 100644
--- a/Tensile/AsmCaps.py
+++ b/Tensile/AsmCaps.py
@@ -771,6 +771,50 @@ CACHED_ASM_CAPS = \
'v_mov_b64': False,
'v_pk_fma_f16': True,
'v_pk_fmac_f16': False},
+ (11, 0, 3): {'HasAddLshl': True,
+ 'HasAtomicAdd': True,
+ 'HasDirectToLdsDest': False,
+ 'HasDirectToLdsNoDest': False,
+ 'HasExplicitCO': True,
+ 'HasExplicitNC': True,
+ 'HasGLCModifier': True,
+ 'HasNTModifier': False,
+ 'HasLshlOr': True,
+ 'HasMFMA': False,
+ 'HasMFMA_b8': False,
+ 'HasMFMA_bf16_1k': False,
+ 'HasMFMA_bf16_original': False,
+ 'HasMFMA_constSrc': False,
+ 'HasMFMA_f64': False,
+ 'HasMFMA_f8': False,
+ 'HasMFMA_i8_908': False,
+ 'HasMFMA_i8_940': False,
+ 'HasMFMA_vgpr': False,
+ 'HasMFMA_xf32': False,
+ 'HasSMulHi': True,
+ 'HasWMMA': True,
+ 'KernargPreloading': False,
+ 'MaxLgkmcnt': 15,
+ 'MaxVmcnt': 63,
+ 'SupportedISA': True,
+ 'SupportedSource': True,
+ 'VOP3v_dot4_i32_i8': False,
+ 'v_dot2_f32_f16': True,
+ 'v_dot2c_f32_f16': True,
+ 'v_dot4_i32_i8': False,
+ 'v_dot4c_i32_i8': False,
+ 'v_fma_f16': True,
+ 'v_fma_f32': True,
+ 'v_fma_f64': True,
+ 'v_fma_mix_f32': True,
+ 'v_fmac_f16': False,
+ 'v_fmac_f32': True,
+ 'v_mac_f16': False,
+ 'v_mac_f32': False,
+ 'v_mad_mix_f32': False,
+ 'v_mov_b64': False,
+ 'v_pk_fma_f16': True,
+ 'v_pk_fmac_f16': False},
(12, 0, 0): {'HasAddLshl': True,
'HasAtomicAdd': False,
'HasDirectToLdsDest': False,
diff --git a/Tensile/Common.py b/Tensile/Common.py
index ce7c8218d850..4d9c5a9155ee 100644
--- a/Tensile/Common.py
+++ b/Tensile/Common.py
@@ -253,7 +253,7 @@ globalParameters["SupportedISA"] = [(8,0,3),
(9,0,0), (9,0,6), (9,0,8), (9,0,10),
(9,4,0), (9,4,1), (9,4,2),
(10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1),
- (11,0,0), (11,0,1), (11,0,2),
+ (11,0,0), (11,0,1), (11,0,2), (11,0,3),
(12,0,0), (12,0,1)] # assembly kernels writer supports these architectures
globalParameters["CleanupBuildFiles"] = False # cleanup build files (e.g. kernel assembly) once no longer needed
@@ -334,6 +334,7 @@ architectureMap = {
'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14',
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33',
+ 'gfx1103':'gfx1103', 'gfx1151':'gfx1151',
'gfx1200':'gfx1200',
'gfx1201':'gfx1201'
}
@@ -2459,7 +2460,7 @@ def assignGlobalParameters( config ):
if os.name == "nt":
globalParameters["CurrentISA"] = (9,0,6)
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
- isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,5,1), (12,0,0), (12,0,1))
+ isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,0,3), (11,5,1), (12,0,0), (12,0,1))
if globalParameters["CurrentISA"] in isasWithDisabledHWMonitor:
isaString = ', '.join(map(gfxName, isasWithDisabledHWMonitor))
printWarning(f"HardwareMonitor currently disabled for {isaString}")
diff --git a/Tensile/Source/CMakeLists.txt b/Tensile/Source/CMakeLists.txt
index f350b26caf7f..78379e2d21d8 100644
--- a/Tensile/Source/CMakeLists.txt
+++ b/Tensile/Source/CMakeLists.txt
@@ -51,9 +51,9 @@ if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang
endif()
if(CMAKE_CXX_COMPILER STREQUAL "hipcc")
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1151 CACHE STRING "GPU architectures")
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1151 CACHE STRING "GPU architectures")
else()
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1151 CACHE STRING "GPU architectures")
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1151 CACHE STRING "GPU architectures")
endif()
include(CMakeDependentOption)
diff --git a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
index d83ee830d1da..dc0336c3d62d 100644
--- a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
+++ b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
@@ -74,6 +74,7 @@ namespace Tensile
gfx1100 = 1100,
gfx1101 = 1101,
gfx1102 = 1102,
+ gfx1103 = 1103,
gfx1151 = 1151
};
@@ -119,6 +120,8 @@ namespace Tensile
return "gfx1101";
case AMDGPU::Processor::gfx1102:
return "gfx1102";
+ case AMDGPU::Processor::gfx1103:
+ return "gfx1103";
case AMDGPU::Processor::gfx1151:
return "gfx1151";
}
@@ -187,6 +190,10 @@ namespace Tensile
{
return AMDGPU::Processor::gfx1102;
}
+ else if(deviceString.find("gfx1103") != std::string::npos)
+ {
+ return AMDGPU::Processor::gfx1103;
+ }
else if(deviceString.find("gfx1151") != std::string::npos)
{
return AMDGPU::Processor::gfx1151;
diff --git a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
index f83713c04430..4f81795a9065 100644
--- a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
+++ b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
@@ -58,6 +58,7 @@ namespace Tensile
gfx1100,
gfx1101,
gfx1102,
+ gfx1103,
gfx1151,
All
};
@@ -107,6 +108,8 @@ namespace Tensile
return "TensileLibrary_*_gfx1101";
case LazyLoadingInit::gfx1102:
return "TensileLibrary_*_gfx1102";
+ case LazyLoadingInit::gfx1103:
+ return "TensileLibrary_*_gfx1103";
case LazyLoadingInit::gfx1151:
return "TensileLibrary_*_gfx1151";
case LazyLoadingInit::None:
diff --git a/Tensile/Source/lib/source/ocl/OclUtils.cpp b/Tensile/Source/lib/source/ocl/OclUtils.cpp
index ff04c56a1025..eb5a14eccfb1 100644
--- a/Tensile/Source/lib/source/ocl/OclUtils.cpp
+++ b/Tensile/Source/lib/source/ocl/OclUtils.cpp
@@ -188,6 +188,10 @@ namespace Tensile
{
return AMDGPU::Processor::gfx1102;
}
+ else if(deviceString.find("gfx1103") != std::string::npos)
+ {
+ return AMDGPU::Processor::gfx1103;
+ }
else if(deviceString.find("gfx1151") != std::string::npos)
{
return AMDGPU::Processor::gfx1151;
--
2.47.1

View File

@@ -0,0 +1,85 @@
From cd17e816bbac907f0fd704828230bb04db6921dd Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Mon, 28 Oct 2024 05:42:27 -0700
Subject: [PATCH] Add gfx1151 support
Cherry-picked from upstream 642974ee2f3f9d8a8280bf87c462645fb07a2897
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
---
Tensile/AsmCaps.py | 47 +++++++++++++++++++++++++++++++++++++++++++++-
Tensile/Common.py | 3 ++-
2 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
index 22c67e977aa0..b4899da6284c 100644
--- a/Tensile/AsmCaps.py
+++ b/Tensile/AsmCaps.py
@@ -770,4 +770,49 @@ CACHED_ASM_CAPS = \
'v_mad_mix_f32': False,
'v_mov_b64': False,
'v_pk_fma_f16': True,
- 'v_pk_fmac_f16': False}}
+ 'v_pk_fmac_f16': False},
+ (11, 5, 1): {'HasAddLshl': True,
+ 'HasAtomicAdd': True,
+ 'HasDirectToLdsDest': False,
+ 'HasDirectToLdsNoDest': False,
+ 'HasExplicitCO': True,
+ 'HasExplicitNC': True,
+ 'HasGLCModifier': True,
+ 'HasNTModifier': False,
+ 'HasLshlOr': True,
+ 'HasMFMA': False,
+ 'HasMFMA_b8': False,
+ 'HasMFMA_bf16_1k': False,
+ 'HasMFMA_bf16_original': False,
+ 'HasMFMA_constSrc': False,
+ 'HasMFMA_f64': False,
+ 'HasMFMA_f8': False,
+ 'HasMFMA_i8_908': False,
+ 'HasMFMA_i8_940': False,
+ 'HasMFMA_vgpr': False,
+ 'HasMFMA_xf32': False,
+ 'HasSMulHi': True,
+ 'HasWMMA': True,
+ 'KernargPreloading': False,
+ 'MaxLgkmcnt': 15,
+ 'MaxVmcnt': 63,
+ 'SupportedISA': True,
+ 'SupportedSource': True,
+ 'VOP3v_dot4_i32_i8': False,
+ 'v_dot2_f32_f16': True,
+ 'v_dot2c_f32_f16': True,
+ 'v_dot4_i32_i8': False,
+ 'v_dot4c_i32_i8': False,
+ 'v_fma_f16': True,
+ 'v_fma_f32': True,
+ 'v_fma_f64': True,
+ 'v_fma_mix_f32': True,
+ 'v_fmac_f16': False,
+ 'v_fmac_f32': True,
+ 'v_mac_f16': False,
+ 'v_mac_f32': False,
+ 'v_mad_mix_f32': False,
+ 'v_mov_b64': False,
+ 'v_pk_fma_f16': True,
+ 'v_pk_fmac_f16': False},
+}
diff --git a/Tensile/Common.py b/Tensile/Common.py
index 07abbf59397f..8c27486338fc 100644
--- a/Tensile/Common.py
+++ b/Tensile/Common.py
@@ -306,7 +306,8 @@ architectureMap = {
'gfx942':'aquavanjaram942', 'gfx942:xnack+':'aquavanjaram942', 'gfx942:xnack-':'aquavanjaram942',
'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14',
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
- 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33'
+ 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33',
+ 'gfx1151':'gfx1151'
}
def getArchitectureName(gfxName):
--
2.47.0

View File

@@ -0,0 +1,46 @@
From b75119c7b7e9f8b8f7c5ec1da3b6bd9bc7859eec Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Wed, 26 Feb 2025 04:33:46 -0800
Subject: [PATCH] Handle a missing joblib
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
---
Tensile/Parallel.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/Tensile/Parallel.py b/Tensile/Parallel.py
index 9a7e7b57dc2c..18112f76f80d 100644
--- a/Tensile/Parallel.py
+++ b/Tensile/Parallel.py
@@ -26,11 +26,17 @@ import itertools
import os
from typing import Any, Callable
-from joblib import Parallel, delayed
+try:
+ import joblib
+except:
+ joblib = None
+
+if joblib != None:
+ from joblib import Parallel, delayed
def CPUThreadCount(enable=True):
from .Common import globalParameters
- if not enable:
+ if not enable or joblib == None:
return 1
else:
if os.name == "nt":
@@ -80,7 +86,7 @@ def ParallelMap(function: Callable, objects: Any, message: str="", enable: bool=
from . import Utils
threadCount = CPUThreadCount(enable)
- if threadCount <= 1:
+ if threadCount <= 1 or joblib == None:
return list(map(lambda objs: function(*objs), Utils.tqdm(objects, desc=message)))
inputs = list(zip(objects, itertools.repeat(globalParameters)))
--
2.47.1

115
0001-More-gfx1151.patch Normal file
View File

@@ -0,0 +1,115 @@
From bb1f4a2224fb43d0eeca27cbb5ac93950dc06dd3 Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Fri, 6 Dec 2024 14:08:27 -0800
Subject: [PATCH] More gfx1151
---
Tensile/Common.py | 2 +-
Tensile/Source/CMakeLists.txt | 4 ++--
Tensile/Source/lib/include/Tensile/AMDGPU.hpp | 9 ++++++++-
.../Source/lib/include/Tensile/PlaceholderLibrary.hpp | 3 +++
Tensile/Source/lib/source/ocl/OclUtils.cpp | 4 ++++
5 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/Tensile/Common.py b/Tensile/Common.py
index 79ae322ad926..ce7c8218d850 100644
--- a/Tensile/Common.py
+++ b/Tensile/Common.py
@@ -2459,7 +2459,7 @@ def assignGlobalParameters( config ):
if os.name == "nt":
globalParameters["CurrentISA"] = (9,0,6)
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
- isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (12,0,0), (12,0,1))
+ isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,5,1), (12,0,0), (12,0,1))
if globalParameters["CurrentISA"] in isasWithDisabledHWMonitor:
isaString = ', '.join(map(gfxName, isasWithDisabledHWMonitor))
printWarning(f"HardwareMonitor currently disabled for {isaString}")
diff --git a/Tensile/Source/CMakeLists.txt b/Tensile/Source/CMakeLists.txt
index e02b209a262a..f350b26caf7f 100644
--- a/Tensile/Source/CMakeLists.txt
+++ b/Tensile/Source/CMakeLists.txt
@@ -51,9 +51,9 @@ if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang
endif()
if(CMAKE_CXX_COMPILER STREQUAL "hipcc")
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1151 CACHE STRING "GPU architectures")
else()
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1151 CACHE STRING "GPU architectures")
endif()
include(CMakeDependentOption)
diff --git a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
index 0ab8ced5cf5d..d83ee830d1da 100644
--- a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
+++ b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
@@ -73,7 +73,8 @@ namespace Tensile
gfx1035 = 1035,
gfx1100 = 1100,
gfx1101 = 1101,
- gfx1102 = 1102
+ gfx1102 = 1102,
+ gfx1151 = 1151
};
static std::string toString(Processor p)
@@ -118,6 +119,8 @@ namespace Tensile
return "gfx1101";
case AMDGPU::Processor::gfx1102:
return "gfx1102";
+ case AMDGPU::Processor::gfx1151:
+ return "gfx1151";
}
return "";
}
@@ -184,6 +187,10 @@ namespace Tensile
{
return AMDGPU::Processor::gfx1102;
}
+ else if(deviceString.find("gfx1151") != std::string::npos)
+ {
+ return AMDGPU::Processor::gfx1151;
+ }
else
{
return static_cast<AMDGPU::Processor>(0);
diff --git a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
index 10898ec2d1d6..f83713c04430 100644
--- a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
+++ b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
@@ -58,6 +58,7 @@ namespace Tensile
gfx1100,
gfx1101,
gfx1102,
+ gfx1151,
All
};
@@ -106,6 +107,8 @@ namespace Tensile
return "TensileLibrary_*_gfx1101";
case LazyLoadingInit::gfx1102:
return "TensileLibrary_*_gfx1102";
+ case LazyLoadingInit::gfx1151:
+ return "TensileLibrary_*_gfx1151";
case LazyLoadingInit::None:
return "";
}
diff --git a/Tensile/Source/lib/source/ocl/OclUtils.cpp b/Tensile/Source/lib/source/ocl/OclUtils.cpp
index 8ee6d21769f0..ff04c56a1025 100644
--- a/Tensile/Source/lib/source/ocl/OclUtils.cpp
+++ b/Tensile/Source/lib/source/ocl/OclUtils.cpp
@@ -188,6 +188,10 @@ namespace Tensile
{
return AMDGPU::Processor::gfx1102;
}
+ else if(deviceString.find("gfx1151") != std::string::npos)
+ {
+ return AMDGPU::Processor::gfx1151;
+ }
else
{
return static_cast<AMDGPU::Processor>(0);
--
2.47.1

View File

@@ -0,0 +1,46 @@
From ffc29981521b1dd38d262fcfc9ee4ab6377f9957 Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Wed, 26 Feb 2025 06:22:30 -0800
Subject: [PATCH] serialize reading logic files
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
---
Tensile/TensileCreateLibrary.py | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
index a16446063615..3c0472788303 100644
--- a/Tensile/TensileCreateLibrary.py
+++ b/Tensile/TensileCreateLibrary.py
@@ -69,6 +69,7 @@ from .TensileCreateLib.ParseArguments import parseArguments
from .Utilities.Profile import profile
from .Utilities.String import splitDelimitedString
from .Utilities.toFile import toFile
+from .Parallel import CPUThreadCount
TENSILE_MANIFEST_FILENAME = "TensileManifest.txt"
TENSILE_LIBRARY_DIR = "library"
@@ -1308,9 +1309,17 @@ def parseLibraryLogicFiles(logicFiles: List[str]) -> List[LibraryIO.LibraryLogic
Returns:
List of library logic tuples.
"""
- return Common.ParallelMap(
- LibraryIO.parseLibraryLogicFile, logicFiles, "Reading logic files", multiArg=False
- )
+ if CPUThreadCount() < 2:
+ tPrint(1, "Reading logic files")
+ rv = []
+ for lf in logicFiles:
+ tPrint(3, "Reading logic file: %s" % lf)
+ rv.append(LibraryIO.parseLibraryLogicFile(lf))
+ return rv
+ else:
+ return Common.ParallelMap(
+ LibraryIO.parseLibraryLogicFile, logicFiles, "Reading logic files", multiArg=False
+ )
def generateLogicData(
--
2.47.1

View File

@@ -0,0 +1,365 @@
From 4b03428cb375182ea6bd05f40ea7f38a6c1f873c Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Fri, 18 Apr 2025 07:30:37 -0700
Subject: [PATCH] tensile fedora gpus
---
Tensile/AsmCaps.py | 176 ++++++++++++++++++
Tensile/Common.py | 10 +-
Tensile/Source/CMakeLists.txt | 4 +-
Tensile/Source/lib/include/Tensile/AMDGPU.hpp | 36 +++-
.../include/Tensile/PlaceholderLibrary.hpp | 17 +-
5 files changed, 229 insertions(+), 14 deletions(-)
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
index 548b31f28321..78ffa73bd81a 100644
--- a/Tensile/AsmCaps.py
+++ b/Tensile/AsmCaps.py
@@ -639,6 +639,50 @@ CACHED_ASM_CAPS = \
'v_mov_b64': False,
'v_pk_fma_f16': True,
'v_pk_fmac_f16': False},
+ (10, 3, 5): {'HasAddLshl': True,
+ 'HasAtomicAdd': False,
+ 'HasDirectToLdsDest': False,
+ 'HasDirectToLdsNoDest': True,
+ 'HasExplicitCO': True,
+ 'HasExplicitNC': True,
+ 'HasGLCModifier': True,
+ 'HasNTModifier': False,
+ 'HasLshlOr': True,
+ 'HasMFMA': False,
+ 'HasMFMA_b8': False,
+ 'HasMFMA_bf16_1k': False,
+ 'HasMFMA_bf16_original': False,
+ 'HasMFMA_constSrc': False,
+ 'HasMFMA_f64': False,
+ 'HasMFMA_f8': False,
+ 'HasMFMA_i8_908': False,
+ 'HasMFMA_i8_940': False,
+ 'HasMFMA_vgpr': False,
+ 'HasMFMA_xf32': False,
+ 'HasSMulHi': True,
+ 'HasWMMA': False,
+ 'KernargPreloading': False,
+ 'MaxLgkmcnt': 15,
+ 'MaxVmcnt': 63,
+ 'SupportedISA': True,
+ 'SupportedSource': True,
+ 'VOP3v_dot4_i32_i8': True,
+ 'v_dot2_f32_f16': True,
+ 'v_dot2c_f32_f16': True,
+ 'v_dot4_i32_i8': False,
+ 'v_dot4c_i32_i8': True,
+ 'v_fma_f16': True,
+ 'v_fma_f32': True,
+ 'v_fma_f64': True,
+ 'v_fma_mix_f32': True,
+ 'v_fmac_f16': False,
+ 'v_fmac_f32': True,
+ 'v_mac_f16': False,
+ 'v_mac_f32': False,
+ 'v_mad_mix_f32': False,
+ 'v_mov_b64': False,
+ 'v_pk_fma_f16': True,
+ 'v_pk_fmac_f16': False},
(11, 0, 0): {'HasAddLshl': True,
'HasAtomicAdd': True,
'HasDirectToLdsDest': False,
@@ -771,6 +815,94 @@ CACHED_ASM_CAPS = \
'v_mov_b64': False,
'v_pk_fma_f16': True,
'v_pk_fmac_f16': False},
+ (11, 0, 3): {'HasAddLshl': True,
+ 'HasAtomicAdd': True,
+ 'HasDirectToLdsDest': False,
+ 'HasDirectToLdsNoDest': False,
+ 'HasExplicitCO': True,
+ 'HasExplicitNC': True,
+ 'HasGLCModifier': True,
+ 'HasNTModifier': False,
+ 'HasLshlOr': True,
+ 'HasMFMA': False,
+ 'HasMFMA_b8': False,
+ 'HasMFMA_bf16_1k': False,
+ 'HasMFMA_bf16_original': False,
+ 'HasMFMA_constSrc': False,
+ 'HasMFMA_f64': False,
+ 'HasMFMA_f8': False,
+ 'HasMFMA_i8_908': False,
+ 'HasMFMA_i8_940': False,
+ 'HasMFMA_vgpr': False,
+ 'HasMFMA_xf32': False,
+ 'HasSMulHi': True,
+ 'HasWMMA': True,
+ 'KernargPreloading': False,
+ 'MaxLgkmcnt': 15,
+ 'MaxVmcnt': 63,
+ 'SupportedISA': True,
+ 'SupportedSource': True,
+ 'VOP3v_dot4_i32_i8': False,
+ 'v_dot2_f32_f16': True,
+ 'v_dot2c_f32_f16': True,
+ 'v_dot4_i32_i8': False,
+ 'v_dot4c_i32_i8': False,
+ 'v_fma_f16': True,
+ 'v_fma_f32': True,
+ 'v_fma_f64': True,
+ 'v_fma_mix_f32': True,
+ 'v_fmac_f16': False,
+ 'v_fmac_f32': True,
+ 'v_mac_f16': False,
+ 'v_mac_f32': False,
+ 'v_mad_mix_f32': False,
+ 'v_mov_b64': False,
+ 'v_pk_fma_f16': True,
+ 'v_pk_fmac_f16': False},
+ (11, 5, 0): {'HasAddLshl': True,
+ 'HasAtomicAdd': True,
+ 'HasDirectToLdsDest': False,
+ 'HasDirectToLdsNoDest': False,
+ 'HasExplicitCO': True,
+ 'HasExplicitNC': True,
+ 'HasGLCModifier': True,
+ 'HasNTModifier': False,
+ 'HasLshlOr': True,
+ 'HasMFMA': False,
+ 'HasMFMA_b8': False,
+ 'HasMFMA_bf16_1k': False,
+ 'HasMFMA_bf16_original': False,
+ 'HasMFMA_constSrc': False,
+ 'HasMFMA_f64': False,
+ 'HasMFMA_f8': False,
+ 'HasMFMA_i8_908': False,
+ 'HasMFMA_i8_940': False,
+ 'HasMFMA_vgpr': False,
+ 'HasMFMA_xf32': False,
+ 'HasSMulHi': True,
+ 'HasWMMA': True,
+ 'KernargPreloading': False,
+ 'MaxLgkmcnt': 15,
+ 'MaxVmcnt': 63,
+ 'SupportedISA': True,
+ 'SupportedSource': True,
+ 'VOP3v_dot4_i32_i8': False,
+ 'v_dot2_f32_f16': True,
+ 'v_dot2c_f32_f16': True,
+ 'v_dot4_i32_i8': False,
+ 'v_dot4c_i32_i8': False,
+ 'v_fma_f16': True,
+ 'v_fma_f32': True,
+ 'v_fma_f64': True,
+ 'v_fma_mix_f32': True,
+ 'v_fmac_f16': False,
+ 'v_fmac_f32': True,
+ 'v_mac_f16': False,
+ 'v_mac_f32': False,
+ 'v_mad_mix_f32': False,
+ 'v_mov_b64': False,
+ 'v_pk_fma_f16': True,
+ 'v_pk_fmac_f16': False},
(11, 5, 1): {'HasAddLshl': True,
'HasAtomicAdd': True,
'HasDirectToLdsDest': False,
@@ -815,6 +947,50 @@ CACHED_ASM_CAPS = \
'v_mov_b64': False,
'v_pk_fma_f16': True,
'v_pk_fmac_f16': False},
+ (11, 5, 2): {'HasAddLshl': True,
+ 'HasAtomicAdd': True,
+ 'HasDirectToLdsDest': False,
+ 'HasDirectToLdsNoDest': False,
+ 'HasExplicitCO': True,
+ 'HasExplicitNC': True,
+ 'HasGLCModifier': True,
+ 'HasNTModifier': False,
+ 'HasLshlOr': True,
+ 'HasMFMA': False,
+ 'HasMFMA_b8': False,
+ 'HasMFMA_bf16_1k': False,
+ 'HasMFMA_bf16_original': False,
+ 'HasMFMA_constSrc': False,
+ 'HasMFMA_f64': False,
+ 'HasMFMA_f8': False,
+ 'HasMFMA_i8_908': False,
+ 'HasMFMA_i8_940': False,
+ 'HasMFMA_vgpr': False,
+ 'HasMFMA_xf32': False,
+ 'HasSMulHi': True,
+ 'HasWMMA': True,
+ 'KernargPreloading': False,
+ 'MaxLgkmcnt': 15,
+ 'MaxVmcnt': 63,
+ 'SupportedISA': True,
+ 'SupportedSource': True,
+ 'VOP3v_dot4_i32_i8': False,
+ 'v_dot2_f32_f16': True,
+ 'v_dot2c_f32_f16': True,
+ 'v_dot4_i32_i8': False,
+ 'v_dot4c_i32_i8': False,
+ 'v_fma_f16': True,
+ 'v_fma_f32': True,
+ 'v_fma_f64': True,
+ 'v_fma_mix_f32': True,
+ 'v_fmac_f16': False,
+ 'v_fmac_f32': True,
+ 'v_mac_f16': False,
+ 'v_mac_f32': False,
+ 'v_mad_mix_f32': False,
+ 'v_mov_b64': False,
+ 'v_pk_fma_f16': True,
+ 'v_pk_fmac_f16': False},
(12, 0, 0): {'HasAddLshl': True,
'HasAtomicAdd': False,
'HasDirectToLdsDest': False,
diff --git a/Tensile/Common.py b/Tensile/Common.py
index 410c83656fd7..4d212d977c3d 100644
--- a/Tensile/Common.py
+++ b/Tensile/Common.py
@@ -245,9 +245,9 @@ globalParameters["MaxFileName"] = 64 # If a file name would be long
globalParameters["SupportedISA"] = [(8,0,3),
(9,0,0), (9,0,6), (9,0,8), (9,0,10),
(9,4,0), (9,4,1), (9,4,2),
- (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1),
- (11,0,0), (11,0,1), (11,0,2),
- (11,5,1),
+ (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,5),
+ (11,0,0), (11,0,1), (11,0,2), (11,0,3),
+ (11,5,0), (11,5,1), (11,5,2),
(12,0,0), (12,0,1)] # assembly kernels writer supports these architectures
globalParameters["KeepBuildTmp"] = True # Do not remove build artifacts during the build process or build_tmp after build completes
@@ -324,7 +324,7 @@ architectureMap = {
'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14',
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33',
- 'gfx1151':'gfx1151',
+ 'gfx1103':'gfx1103', 'gfx1150':'gfx1150', 'gfx1151':'gfx1151', 'gfx1152':'gfx1152',
'gfx1200':'gfx1200',
'gfx1201':'gfx1201'
}
@@ -2466,7 +2466,7 @@ def assignGlobalParameters( config, capabilitiesCache: Optional[dict] = None ):
if os.name == "nt":
globalParameters["CurrentISA"] = (9,0,6)
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
- isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (12,0,0), (12,0,1))
+ isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,0,3), (11,5,0), (11,5,1), (11,5,2), (12,0,0), (12,0,1))
if globalParameters["CurrentISA"] in isasWithDisabledHWMonitor:
isaString = ', '.join(map(gfxName, isasWithDisabledHWMonitor))
printWarning(f"HardwareMonitor currently disabled for {isaString}")
diff --git a/Tensile/Source/CMakeLists.txt b/Tensile/Source/CMakeLists.txt
index e02b209a262a..7f10ee319518 100644
--- a/Tensile/Source/CMakeLists.txt
+++ b/Tensile/Source/CMakeLists.txt
@@ -51,9 +51,9 @@ if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang
endif()
if(CMAKE_CXX_COMPILER STREQUAL "hipcc")
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 CACHE STRING "GPU architectures")
else()
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 CACHE STRING "GPU architectures")
endif()
include(CMakeDependentOption)
diff --git a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
index 0ab8ced5cf5d..2317ce79f8f2 100644
--- a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
+++ b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
@@ -73,7 +73,11 @@ namespace Tensile
gfx1035 = 1035,
gfx1100 = 1100,
gfx1101 = 1101,
- gfx1102 = 1102
+ gfx1102 = 1102,
+ gfx1103 = 1103,
+ gfx1150 = 1150,
+ gfx1151 = 1151,
+ gfx1152 = 1152,
};
static std::string toString(Processor p)
@@ -118,9 +122,17 @@ namespace Tensile
return "gfx1101";
case AMDGPU::Processor::gfx1102:
return "gfx1102";
- }
- return "";
- }
+ case AMDGPU::Processor::gfx1103:
+ return "gfx1103";
+ case AMDGPU::Processor::gfx1150:
+ return "gfx1150";
+ case AMDGPU::Processor::gfx1151:
+ return "gfx1151";
+ case AMDGPU::Processor::gfx1152:
+ return "gfx1152";
+ }
+ return "";
+ }
AMDGPU::Processor toProcessorId(std::string const& deviceString)
{
@@ -184,6 +196,22 @@ namespace Tensile
{
return AMDGPU::Processor::gfx1102;
}
+ else if(deviceString.find("gfx1103") != std::string::npos)
+ {
+ return AMDGPU::Processor::gfx1103;
+ }
+ else if(deviceString.find("gfx1150") != std::string::npos)
+ {
+ return AMDGPU::Processor::gfx1150;
+ }
+ else if(deviceString.find("gfx1151") != std::string::npos)
+ {
+ return AMDGPU::Processor::gfx1151;
+ }
+ else if(deviceString.find("gfx1152") != std::string::npos)
+ {
+ return AMDGPU::Processor::gfx1152;
+ }
else
{
return static_cast<AMDGPU::Processor>(0);
diff --git a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
index 10898ec2d1d6..f838f15d3ac4 100644
--- a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
+++ b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
@@ -58,6 +58,10 @@ namespace Tensile
gfx1100,
gfx1101,
gfx1102,
+ gfx1103,
+ gfx1151,
+ gfx1150,
+ gfx1152,
All
};
@@ -106,10 +110,17 @@ namespace Tensile
return "TensileLibrary_*_gfx1101";
case LazyLoadingInit::gfx1102:
return "TensileLibrary_*_gfx1102";
- case LazyLoadingInit::None:
- return "";
+ case LazyLoadingInit::gfx1103:
+ return "TensileLibrary_*_gfx1103";
+ case LazyLoadingInit::gfx1150:
+ return "TensileLibrary_*_gfx1150";
+ case LazyLoadingInit::gfx1151:
+ return "TensileLibrary_*_gfx1151";
+ case LazyLoadingInit::gfx1152:
+ return "TensileLibrary_*_gfx1152";
+ case LazyLoadingInit::None:
+ return "";
}
- return "";
}
template <typename MyProblem, typename MySolution = typename MyProblem::Solution>
--
2.48.1

View File

@@ -0,0 +1,27 @@
From f0b325a630277120e67bc6432e99901f96621f88 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sun, 12 May 2024 11:21:39 -0600
Subject: [PATCH] tensile workaround cache problem
---
Tensile/Common.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Tensile/Common.py b/Tensile/Common.py
index 6ececf1c47b3..672cef5aa842 100644
--- a/Tensile/Common.py
+++ b/Tensile/Common.py
@@ -2037,8 +2037,8 @@ def GetAsmCaps(isaVersion):
ignoreCacheCheck = True
# check if derived caps matches asm cap cache
- if not ignoreCacheCheck and derivedAsmCaps != CACHED_ASM_CAPS[isaVersion]:
- printExit("Cached asm caps differ from derived asm caps for {}".format(isaVersion))
+ # if not ignoreCacheCheck and derivedAsmCaps != CACHED_ASM_CAPS[isaVersion]:
+ # printExit("Cached asm caps differ from derived asm caps for {}".format(isaVersion))
return derivedAsmCaps
else:
printWarning("Assembler not present, asm caps loaded from cache are unverified")
--
2.45.0

BIN
Tensile-6.4.0.tar.gz (Stored with Git LFS) Normal file

Binary file not shown.

View File

@@ -9,7 +9,7 @@
%global upstreamname Tensile
%global rocm_release 6.3
%global rocm_release 6.4
%global rocm_patch 0
%global rocm_version %{rocm_release}.%{rocm_patch}
@@ -19,34 +19,35 @@ Name: python-tensile-devel
Name: python-tensile
%endif
Version: %{rocm_version}
Release: 8%{?dist}
Release: 1%{?dist}
Summary: Tool for creating benchmark-driven backend libraries for GEMMs
URL: https://github.com/ROCmSoftwarePlatform/Tensile
License: MIT
Source0: %{url}/archive/rocm-%{version}.tar.gz#/%{upstreamname}-%{version}.tar.gz
Patch1: 0001-Adding-gfx1151-to-6.2-2026.patch
Patch2: 0002-More-gfx1151.patch
Patch3: 0003-Add-gfx1103.patch
Patch4: 0004-Add-gfx1035.patch
Patch5: 0005-Add-gfx1152.patch
Patch6: 0006-Add-gfx1150.patch
Patch1: 0001-tensile-fedora-gpus.patch
#Patch7: 0001-Handle-a-missing-joblib.patch
#Patch8: 0001-serialize-reading-logic-files.patch
%if 0%{?fedora} || 0%{?suse_version}
BuildRequires: fdupes
%endif
%if 0%{?suse_version}
BuildRequires: python-rpm-macros
BuildRequires: %{python_module setuptools}
Requires: hipcc
Requires: rocminfo
# Not available on SLE, tensile may be able to cope
Recommends: %{python_module joblib}
%if %{suse_version} >= 1699
Requires: %{python_module joblib}
%endif
Requires: %{python_module msgpack}
Requires: %{python_module PyYAML}
Requires: %{python_module setuptools}
Requires(post): update-alternatives
Requires(postun): update-alternatives
# Not available on SLE, tensile does not cope
Recommends: python-rich
%else
BuildRequires: python3-devel
BuildRequires: python3dist(setuptools)
@@ -64,7 +65,7 @@ contractions on a GPU. The Tensile library is mainly used as backend library to
rocBLAS. Tensile acts as the performance backbone for a wide variety of
'compute' applications running on AMD GPUs.
%if 0%{?fedora}
%if 0%{?fedora} || 0%{?rhel}
# There are headers and code as part of the code generation.
# This make rpm checkers unhappy
%package -n python3-tensile-devel
@@ -73,9 +74,12 @@ Summary: Tool for creating benchmark-driven backend libraries for GEMMs
Requires: cmake-filesystem
Requires: hipcc
Requires: rocminfo
%if 0%{?fedora}
Requires: python3dist(joblib)
Requires: python3dist(msgpack)
%endif
Requires: python3dist(pyyaml)
Provides: python3-tensile
%description -n python3-tensile-devel
Tensile is a tool for creating benchmark-driven backend libraries for GEMMs,
@@ -110,6 +114,11 @@ sed -i -e 's@globalParameters["IgnoreAsmCapCache"] = False@globalParameters["Ign
sed -i -e 's@arguments["IgnoreAsmCapCache"] = args.IgnoreAsmCapCache@arguments["IgnoreAsmCapCache"] = True@' Tensile/TensileCreateLibrary.py
sed -i -e 's@if not ignoreCacheCheck and derivedAsmCaps@if False and derivedAsmCaps@' Tensile/Common.py
# Reduce requirements
sed -i -e '/joblib/d' requirements.*
sed -i -e '/rich/d' requirements.*
sed -i -e '/msgpack/d' requirements.*
%build
%py3_build
%{?python_build: %python_build}
@@ -129,7 +138,9 @@ rm %{buildroot}%{_bindir}/tensile*
rm -rf %{buildroot}%{python3_sitelib}/%{upstreamname}/Tests
#Clean up dupes:
%if 0%{?fedora} || 0%{?suse_version}
%fdupes %{buildroot}%{_prefix}
%endif
# rm hard links and replace
rm %{buildroot}%{python3_sitelib}/%{upstreamname}/cmake/*.cmake
@@ -172,6 +183,21 @@ mv %{buildroot}%{_datadir}/cmake/Tensile/*.cmake %{buildroot}%{python3_sitelib}/
%{python_sitelib}/%{upstreamname}*.egg-info/*
%changelog
* Fri Apr 18 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-1
- Update to 6.4.0
* Sun Mar 2 2025 Tom Rix <Tom.Rix@amd.com> 6.3.0-12
- Restore provides: for fedora/rhel
* Sat Mar 1 2025 Tom Rix <Tom.Rix@amd.com> 6.3.0-11
- Add requires setuptools for SUSE
* Thu Feb 27 2025 Tom Rix <Tom.Rix@amd.com> 6.3.0-10
- Fix RHEL
* Wed Feb 26 2025 Tom Rix <Tom.Rix@amd.com> 6.3.0-9
- Handle missing joblib
* Thu Feb 20 2025 Tom Rix <Tom.Rix@amd.com> 6.3.0-8
- Remove python-rich suse requires