Compare commits
10 Commits
56930e79fa
...
2e4976a600
Author | SHA256 | Date | |
---|---|---|---|
2e4976a600 | |||
39819a61ea | |||
ab8b2b9984 | |||
736614005d | |||
123d0665d3 | |||
aa7cdc79c9 | |||
3bf83c110b | |||
fdb55434d9 | |||
a9e8116efe | |||
846d4ca404 |
97
0001-Add-gfx1035.patch
Normal file
97
0001-Add-gfx1035.patch
Normal file
@@ -0,0 +1,97 @@
|
||||
From cf989a0a4d0306f6ec4f3e9256064e9f1ea83812 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Fri, 6 Dec 2024 14:17:30 -0800
|
||||
Subject: [PATCH] Add gfx1035
|
||||
|
||||
---
|
||||
Tensile/AsmCaps.py | 44 ++++++++++++++++++++++
|
||||
Tensile/Common.py | 2 +-
|
||||
Tensile/Source/lib/source/ocl/OclUtils.cpp | 4 ++
|
||||
3 files changed, 49 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
|
||||
index b838bad22a30..8faea6d285a1 100644
|
||||
--- a/Tensile/AsmCaps.py
|
||||
+++ b/Tensile/AsmCaps.py
|
||||
@@ -639,6 +639,50 @@ CACHED_ASM_CAPS = \
|
||||
'v_mov_b64': False,
|
||||
'v_pk_fma_f16': True,
|
||||
'v_pk_fmac_f16': False},
|
||||
+ (10, 3, 5): {'HasAddLshl': True,
|
||||
+ 'HasAtomicAdd': False,
|
||||
+ 'HasDirectToLdsDest': False,
|
||||
+ 'HasDirectToLdsNoDest': True,
|
||||
+ 'HasExplicitCO': True,
|
||||
+ 'HasExplicitNC': True,
|
||||
+ 'HasGLCModifier': True,
|
||||
+ 'HasNTModifier': False,
|
||||
+ 'HasLshlOr': True,
|
||||
+ 'HasMFMA': False,
|
||||
+ 'HasMFMA_b8': False,
|
||||
+ 'HasMFMA_bf16_1k': False,
|
||||
+ 'HasMFMA_bf16_original': False,
|
||||
+ 'HasMFMA_constSrc': False,
|
||||
+ 'HasMFMA_f64': False,
|
||||
+ 'HasMFMA_f8': False,
|
||||
+ 'HasMFMA_i8_908': False,
|
||||
+ 'HasMFMA_i8_940': False,
|
||||
+ 'HasMFMA_vgpr': False,
|
||||
+ 'HasMFMA_xf32': False,
|
||||
+ 'HasSMulHi': True,
|
||||
+ 'HasWMMA': False,
|
||||
+ 'KernargPreloading': False,
|
||||
+ 'MaxLgkmcnt': 15,
|
||||
+ 'MaxVmcnt': 63,
|
||||
+ 'SupportedISA': True,
|
||||
+ 'SupportedSource': True,
|
||||
+ 'VOP3v_dot4_i32_i8': True,
|
||||
+ 'v_dot2_f32_f16': True,
|
||||
+ 'v_dot2c_f32_f16': True,
|
||||
+ 'v_dot4_i32_i8': False,
|
||||
+ 'v_dot4c_i32_i8': True,
|
||||
+ 'v_fma_f16': True,
|
||||
+ 'v_fma_f32': True,
|
||||
+ 'v_fma_f64': True,
|
||||
+ 'v_fma_mix_f32': True,
|
||||
+ 'v_fmac_f16': False,
|
||||
+ 'v_fmac_f32': True,
|
||||
+ 'v_mac_f16': False,
|
||||
+ 'v_mac_f32': False,
|
||||
+ 'v_mad_mix_f32': False,
|
||||
+ 'v_mov_b64': False,
|
||||
+ 'v_pk_fma_f16': True,
|
||||
+ 'v_pk_fmac_f16': False},
|
||||
(11, 0, 0): {'HasAddLshl': True,
|
||||
'HasAtomicAdd': True,
|
||||
'HasDirectToLdsDest': False,
|
||||
diff --git a/Tensile/Common.py b/Tensile/Common.py
|
||||
index 4d9c5a9155ee..b02e27c39e76 100644
|
||||
--- a/Tensile/Common.py
|
||||
+++ b/Tensile/Common.py
|
||||
@@ -252,7 +252,7 @@ globalParameters["MaxFileName"] = 64 # If a file name would be long
|
||||
globalParameters["SupportedISA"] = [(8,0,3),
|
||||
(9,0,0), (9,0,6), (9,0,8), (9,0,10),
|
||||
(9,4,0), (9,4,1), (9,4,2),
|
||||
- (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1),
|
||||
+ (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,5),
|
||||
(11,0,0), (11,0,1), (11,0,2), (11,0,3),
|
||||
(12,0,0), (12,0,1)] # assembly kernels writer supports these architectures
|
||||
|
||||
diff --git a/Tensile/Source/lib/source/ocl/OclUtils.cpp b/Tensile/Source/lib/source/ocl/OclUtils.cpp
|
||||
index eb5a14eccfb1..5242dc77abdf 100644
|
||||
--- a/Tensile/Source/lib/source/ocl/OclUtils.cpp
|
||||
+++ b/Tensile/Source/lib/source/ocl/OclUtils.cpp
|
||||
@@ -176,6 +176,10 @@ namespace Tensile
|
||||
{
|
||||
return AMDGPU::Processor::gfx1030;
|
||||
}
|
||||
+ else if(deviceString.find("gfx1035") != std::string::npos)
|
||||
+ {
|
||||
+ return AMDGPU::Processor::gfx1035;
|
||||
+ }
|
||||
else if(deviceString.find("gfx1100") != std::string::npos)
|
||||
{
|
||||
return AMDGPU::Processor::gfx1100;
|
||||
--
|
||||
2.47.1
|
||||
|
186
0001-Add-gfx1103.patch
Normal file
186
0001-Add-gfx1103.patch
Normal file
@@ -0,0 +1,186 @@
|
||||
From c67af6b301cb6a0cfc98708682c3bb0b66ad601e Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Fri, 6 Dec 2024 14:16:08 -0800
|
||||
Subject: [PATCH] Add gfx1103
|
||||
|
||||
---
|
||||
Tensile/AsmCaps.py | 44 +++++++++++++++++++
|
||||
Tensile/Common.py | 5 ++-
|
||||
Tensile/Source/CMakeLists.txt | 4 +-
|
||||
Tensile/Source/lib/include/Tensile/AMDGPU.hpp | 7 +++
|
||||
.../include/Tensile/PlaceholderLibrary.hpp | 3 ++
|
||||
Tensile/Source/lib/source/ocl/OclUtils.cpp | 4 ++
|
||||
6 files changed, 63 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
|
||||
index e61580fafde2..b838bad22a30 100644
|
||||
--- a/Tensile/AsmCaps.py
|
||||
+++ b/Tensile/AsmCaps.py
|
||||
@@ -771,6 +771,50 @@ CACHED_ASM_CAPS = \
|
||||
'v_mov_b64': False,
|
||||
'v_pk_fma_f16': True,
|
||||
'v_pk_fmac_f16': False},
|
||||
+ (11, 0, 3): {'HasAddLshl': True,
|
||||
+ 'HasAtomicAdd': True,
|
||||
+ 'HasDirectToLdsDest': False,
|
||||
+ 'HasDirectToLdsNoDest': False,
|
||||
+ 'HasExplicitCO': True,
|
||||
+ 'HasExplicitNC': True,
|
||||
+ 'HasGLCModifier': True,
|
||||
+ 'HasNTModifier': False,
|
||||
+ 'HasLshlOr': True,
|
||||
+ 'HasMFMA': False,
|
||||
+ 'HasMFMA_b8': False,
|
||||
+ 'HasMFMA_bf16_1k': False,
|
||||
+ 'HasMFMA_bf16_original': False,
|
||||
+ 'HasMFMA_constSrc': False,
|
||||
+ 'HasMFMA_f64': False,
|
||||
+ 'HasMFMA_f8': False,
|
||||
+ 'HasMFMA_i8_908': False,
|
||||
+ 'HasMFMA_i8_940': False,
|
||||
+ 'HasMFMA_vgpr': False,
|
||||
+ 'HasMFMA_xf32': False,
|
||||
+ 'HasSMulHi': True,
|
||||
+ 'HasWMMA': True,
|
||||
+ 'KernargPreloading': False,
|
||||
+ 'MaxLgkmcnt': 15,
|
||||
+ 'MaxVmcnt': 63,
|
||||
+ 'SupportedISA': True,
|
||||
+ 'SupportedSource': True,
|
||||
+ 'VOP3v_dot4_i32_i8': False,
|
||||
+ 'v_dot2_f32_f16': True,
|
||||
+ 'v_dot2c_f32_f16': True,
|
||||
+ 'v_dot4_i32_i8': False,
|
||||
+ 'v_dot4c_i32_i8': False,
|
||||
+ 'v_fma_f16': True,
|
||||
+ 'v_fma_f32': True,
|
||||
+ 'v_fma_f64': True,
|
||||
+ 'v_fma_mix_f32': True,
|
||||
+ 'v_fmac_f16': False,
|
||||
+ 'v_fmac_f32': True,
|
||||
+ 'v_mac_f16': False,
|
||||
+ 'v_mac_f32': False,
|
||||
+ 'v_mad_mix_f32': False,
|
||||
+ 'v_mov_b64': False,
|
||||
+ 'v_pk_fma_f16': True,
|
||||
+ 'v_pk_fmac_f16': False},
|
||||
(12, 0, 0): {'HasAddLshl': True,
|
||||
'HasAtomicAdd': False,
|
||||
'HasDirectToLdsDest': False,
|
||||
diff --git a/Tensile/Common.py b/Tensile/Common.py
|
||||
index ce7c8218d850..4d9c5a9155ee 100644
|
||||
--- a/Tensile/Common.py
|
||||
+++ b/Tensile/Common.py
|
||||
@@ -253,7 +253,7 @@ globalParameters["SupportedISA"] = [(8,0,3),
|
||||
(9,0,0), (9,0,6), (9,0,8), (9,0,10),
|
||||
(9,4,0), (9,4,1), (9,4,2),
|
||||
(10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1),
|
||||
- (11,0,0), (11,0,1), (11,0,2),
|
||||
+ (11,0,0), (11,0,1), (11,0,2), (11,0,3),
|
||||
(12,0,0), (12,0,1)] # assembly kernels writer supports these architectures
|
||||
|
||||
globalParameters["CleanupBuildFiles"] = False # cleanup build files (e.g. kernel assembly) once no longer needed
|
||||
@@ -334,6 +334,7 @@ architectureMap = {
|
||||
'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14',
|
||||
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
|
||||
'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33',
|
||||
+ 'gfx1103':'gfx1103', 'gfx1151':'gfx1151',
|
||||
'gfx1200':'gfx1200',
|
||||
'gfx1201':'gfx1201'
|
||||
}
|
||||
@@ -2459,7 +2460,7 @@ def assignGlobalParameters( config ):
|
||||
if os.name == "nt":
|
||||
globalParameters["CurrentISA"] = (9,0,6)
|
||||
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
|
||||
- isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,5,1), (12,0,0), (12,0,1))
|
||||
+ isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,0,3), (11,5,1), (12,0,0), (12,0,1))
|
||||
if globalParameters["CurrentISA"] in isasWithDisabledHWMonitor:
|
||||
isaString = ', '.join(map(gfxName, isasWithDisabledHWMonitor))
|
||||
printWarning(f"HardwareMonitor currently disabled for {isaString}")
|
||||
diff --git a/Tensile/Source/CMakeLists.txt b/Tensile/Source/CMakeLists.txt
|
||||
index f350b26caf7f..78379e2d21d8 100644
|
||||
--- a/Tensile/Source/CMakeLists.txt
|
||||
+++ b/Tensile/Source/CMakeLists.txt
|
||||
@@ -51,9 +51,9 @@ if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER STREQUAL "hipcc")
|
||||
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1151 CACHE STRING "GPU architectures")
|
||||
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1151 CACHE STRING "GPU architectures")
|
||||
else()
|
||||
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1151 CACHE STRING "GPU architectures")
|
||||
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1151 CACHE STRING "GPU architectures")
|
||||
endif()
|
||||
|
||||
include(CMakeDependentOption)
|
||||
diff --git a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
|
||||
index d83ee830d1da..dc0336c3d62d 100644
|
||||
--- a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
|
||||
+++ b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
|
||||
@@ -74,6 +74,7 @@ namespace Tensile
|
||||
gfx1100 = 1100,
|
||||
gfx1101 = 1101,
|
||||
gfx1102 = 1102,
|
||||
+ gfx1103 = 1103,
|
||||
gfx1151 = 1151
|
||||
};
|
||||
|
||||
@@ -119,6 +120,8 @@ namespace Tensile
|
||||
return "gfx1101";
|
||||
case AMDGPU::Processor::gfx1102:
|
||||
return "gfx1102";
|
||||
+ case AMDGPU::Processor::gfx1103:
|
||||
+ return "gfx1103";
|
||||
case AMDGPU::Processor::gfx1151:
|
||||
return "gfx1151";
|
||||
}
|
||||
@@ -187,6 +190,10 @@ namespace Tensile
|
||||
{
|
||||
return AMDGPU::Processor::gfx1102;
|
||||
}
|
||||
+ else if(deviceString.find("gfx1103") != std::string::npos)
|
||||
+ {
|
||||
+ return AMDGPU::Processor::gfx1103;
|
||||
+ }
|
||||
else if(deviceString.find("gfx1151") != std::string::npos)
|
||||
{
|
||||
return AMDGPU::Processor::gfx1151;
|
||||
diff --git a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
|
||||
index f83713c04430..4f81795a9065 100644
|
||||
--- a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
|
||||
+++ b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
|
||||
@@ -58,6 +58,7 @@ namespace Tensile
|
||||
gfx1100,
|
||||
gfx1101,
|
||||
gfx1102,
|
||||
+ gfx1103,
|
||||
gfx1151,
|
||||
All
|
||||
};
|
||||
@@ -107,6 +108,8 @@ namespace Tensile
|
||||
return "TensileLibrary_*_gfx1101";
|
||||
case LazyLoadingInit::gfx1102:
|
||||
return "TensileLibrary_*_gfx1102";
|
||||
+ case LazyLoadingInit::gfx1103:
|
||||
+ return "TensileLibrary_*_gfx1103";
|
||||
case LazyLoadingInit::gfx1151:
|
||||
return "TensileLibrary_*_gfx1151";
|
||||
case LazyLoadingInit::None:
|
||||
diff --git a/Tensile/Source/lib/source/ocl/OclUtils.cpp b/Tensile/Source/lib/source/ocl/OclUtils.cpp
|
||||
index ff04c56a1025..eb5a14eccfb1 100644
|
||||
--- a/Tensile/Source/lib/source/ocl/OclUtils.cpp
|
||||
+++ b/Tensile/Source/lib/source/ocl/OclUtils.cpp
|
||||
@@ -188,6 +188,10 @@ namespace Tensile
|
||||
{
|
||||
return AMDGPU::Processor::gfx1102;
|
||||
}
|
||||
+ else if(deviceString.find("gfx1103") != std::string::npos)
|
||||
+ {
|
||||
+ return AMDGPU::Processor::gfx1103;
|
||||
+ }
|
||||
else if(deviceString.find("gfx1151") != std::string::npos)
|
||||
{
|
||||
return AMDGPU::Processor::gfx1151;
|
||||
--
|
||||
2.47.1
|
||||
|
85
0001-Add-gfx1151-support.patch
Normal file
85
0001-Add-gfx1151-support.patch
Normal file
@@ -0,0 +1,85 @@
|
||||
From cd17e816bbac907f0fd704828230bb04db6921dd Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Mon, 28 Oct 2024 05:42:27 -0700
|
||||
Subject: [PATCH] Add gfx1151 support
|
||||
|
||||
Cherry-picked from upstream 642974ee2f3f9d8a8280bf87c462645fb07a2897
|
||||
|
||||
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
|
||||
---
|
||||
Tensile/AsmCaps.py | 47 +++++++++++++++++++++++++++++++++++++++++++++-
|
||||
Tensile/Common.py | 3 ++-
|
||||
2 files changed, 48 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
|
||||
index 22c67e977aa0..b4899da6284c 100644
|
||||
--- a/Tensile/AsmCaps.py
|
||||
+++ b/Tensile/AsmCaps.py
|
||||
@@ -770,4 +770,49 @@ CACHED_ASM_CAPS = \
|
||||
'v_mad_mix_f32': False,
|
||||
'v_mov_b64': False,
|
||||
'v_pk_fma_f16': True,
|
||||
- 'v_pk_fmac_f16': False}}
|
||||
+ 'v_pk_fmac_f16': False},
|
||||
+ (11, 5, 1): {'HasAddLshl': True,
|
||||
+ 'HasAtomicAdd': True,
|
||||
+ 'HasDirectToLdsDest': False,
|
||||
+ 'HasDirectToLdsNoDest': False,
|
||||
+ 'HasExplicitCO': True,
|
||||
+ 'HasExplicitNC': True,
|
||||
+ 'HasGLCModifier': True,
|
||||
+ 'HasNTModifier': False,
|
||||
+ 'HasLshlOr': True,
|
||||
+ 'HasMFMA': False,
|
||||
+ 'HasMFMA_b8': False,
|
||||
+ 'HasMFMA_bf16_1k': False,
|
||||
+ 'HasMFMA_bf16_original': False,
|
||||
+ 'HasMFMA_constSrc': False,
|
||||
+ 'HasMFMA_f64': False,
|
||||
+ 'HasMFMA_f8': False,
|
||||
+ 'HasMFMA_i8_908': False,
|
||||
+ 'HasMFMA_i8_940': False,
|
||||
+ 'HasMFMA_vgpr': False,
|
||||
+ 'HasMFMA_xf32': False,
|
||||
+ 'HasSMulHi': True,
|
||||
+ 'HasWMMA': True,
|
||||
+ 'KernargPreloading': False,
|
||||
+ 'MaxLgkmcnt': 15,
|
||||
+ 'MaxVmcnt': 63,
|
||||
+ 'SupportedISA': True,
|
||||
+ 'SupportedSource': True,
|
||||
+ 'VOP3v_dot4_i32_i8': False,
|
||||
+ 'v_dot2_f32_f16': True,
|
||||
+ 'v_dot2c_f32_f16': True,
|
||||
+ 'v_dot4_i32_i8': False,
|
||||
+ 'v_dot4c_i32_i8': False,
|
||||
+ 'v_fma_f16': True,
|
||||
+ 'v_fma_f32': True,
|
||||
+ 'v_fma_f64': True,
|
||||
+ 'v_fma_mix_f32': True,
|
||||
+ 'v_fmac_f16': False,
|
||||
+ 'v_fmac_f32': True,
|
||||
+ 'v_mac_f16': False,
|
||||
+ 'v_mac_f32': False,
|
||||
+ 'v_mad_mix_f32': False,
|
||||
+ 'v_mov_b64': False,
|
||||
+ 'v_pk_fma_f16': True,
|
||||
+ 'v_pk_fmac_f16': False},
|
||||
+}
|
||||
diff --git a/Tensile/Common.py b/Tensile/Common.py
|
||||
index 07abbf59397f..8c27486338fc 100644
|
||||
--- a/Tensile/Common.py
|
||||
+++ b/Tensile/Common.py
|
||||
@@ -306,7 +306,8 @@ architectureMap = {
|
||||
'gfx942':'aquavanjaram942', 'gfx942:xnack+':'aquavanjaram942', 'gfx942:xnack-':'aquavanjaram942',
|
||||
'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14',
|
||||
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
|
||||
- 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33'
|
||||
+ 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33',
|
||||
+ 'gfx1151':'gfx1151'
|
||||
}
|
||||
|
||||
def getArchitectureName(gfxName):
|
||||
--
|
||||
2.47.0
|
||||
|
46
0001-Handle-a-missing-joblib.patch
Normal file
46
0001-Handle-a-missing-joblib.patch
Normal file
@@ -0,0 +1,46 @@
|
||||
From b75119c7b7e9f8b8f7c5ec1da3b6bd9bc7859eec Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Wed, 26 Feb 2025 04:33:46 -0800
|
||||
Subject: [PATCH] Handle a missing joblib
|
||||
|
||||
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
|
||||
---
|
||||
Tensile/Parallel.py | 12 +++++++++---
|
||||
1 file changed, 9 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/Tensile/Parallel.py b/Tensile/Parallel.py
|
||||
index 9a7e7b57dc2c..18112f76f80d 100644
|
||||
--- a/Tensile/Parallel.py
|
||||
+++ b/Tensile/Parallel.py
|
||||
@@ -26,11 +26,17 @@ import itertools
|
||||
import os
|
||||
from typing import Any, Callable
|
||||
|
||||
-from joblib import Parallel, delayed
|
||||
+try:
|
||||
+ import joblib
|
||||
+except:
|
||||
+ joblib = None
|
||||
+
|
||||
+if joblib != None:
|
||||
+ from joblib import Parallel, delayed
|
||||
|
||||
def CPUThreadCount(enable=True):
|
||||
from .Common import globalParameters
|
||||
- if not enable:
|
||||
+ if not enable or joblib == None:
|
||||
return 1
|
||||
else:
|
||||
if os.name == "nt":
|
||||
@@ -80,7 +86,7 @@ def ParallelMap(function: Callable, objects: Any, message: str="", enable: bool=
|
||||
from . import Utils
|
||||
threadCount = CPUThreadCount(enable)
|
||||
|
||||
- if threadCount <= 1:
|
||||
+ if threadCount <= 1 or joblib == None:
|
||||
return list(map(lambda objs: function(*objs), Utils.tqdm(objects, desc=message)))
|
||||
|
||||
inputs = list(zip(objects, itertools.repeat(globalParameters)))
|
||||
--
|
||||
2.47.1
|
||||
|
115
0001-More-gfx1151.patch
Normal file
115
0001-More-gfx1151.patch
Normal file
@@ -0,0 +1,115 @@
|
||||
From bb1f4a2224fb43d0eeca27cbb5ac93950dc06dd3 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Fri, 6 Dec 2024 14:08:27 -0800
|
||||
Subject: [PATCH] More gfx1151
|
||||
|
||||
---
|
||||
Tensile/Common.py | 2 +-
|
||||
Tensile/Source/CMakeLists.txt | 4 ++--
|
||||
Tensile/Source/lib/include/Tensile/AMDGPU.hpp | 9 ++++++++-
|
||||
.../Source/lib/include/Tensile/PlaceholderLibrary.hpp | 3 +++
|
||||
Tensile/Source/lib/source/ocl/OclUtils.cpp | 4 ++++
|
||||
5 files changed, 18 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/Tensile/Common.py b/Tensile/Common.py
|
||||
index 79ae322ad926..ce7c8218d850 100644
|
||||
--- a/Tensile/Common.py
|
||||
+++ b/Tensile/Common.py
|
||||
@@ -2459,7 +2459,7 @@ def assignGlobalParameters( config ):
|
||||
if os.name == "nt":
|
||||
globalParameters["CurrentISA"] = (9,0,6)
|
||||
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
|
||||
- isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (12,0,0), (12,0,1))
|
||||
+ isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,5,1), (12,0,0), (12,0,1))
|
||||
if globalParameters["CurrentISA"] in isasWithDisabledHWMonitor:
|
||||
isaString = ', '.join(map(gfxName, isasWithDisabledHWMonitor))
|
||||
printWarning(f"HardwareMonitor currently disabled for {isaString}")
|
||||
diff --git a/Tensile/Source/CMakeLists.txt b/Tensile/Source/CMakeLists.txt
|
||||
index e02b209a262a..f350b26caf7f 100644
|
||||
--- a/Tensile/Source/CMakeLists.txt
|
||||
+++ b/Tensile/Source/CMakeLists.txt
|
||||
@@ -51,9 +51,9 @@ if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER STREQUAL "hipcc")
|
||||
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
|
||||
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1151 CACHE STRING "GPU architectures")
|
||||
else()
|
||||
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
|
||||
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1151 CACHE STRING "GPU architectures")
|
||||
endif()
|
||||
|
||||
include(CMakeDependentOption)
|
||||
diff --git a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
|
||||
index 0ab8ced5cf5d..d83ee830d1da 100644
|
||||
--- a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
|
||||
+++ b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
|
||||
@@ -73,7 +73,8 @@ namespace Tensile
|
||||
gfx1035 = 1035,
|
||||
gfx1100 = 1100,
|
||||
gfx1101 = 1101,
|
||||
- gfx1102 = 1102
|
||||
+ gfx1102 = 1102,
|
||||
+ gfx1151 = 1151
|
||||
};
|
||||
|
||||
static std::string toString(Processor p)
|
||||
@@ -118,6 +119,8 @@ namespace Tensile
|
||||
return "gfx1101";
|
||||
case AMDGPU::Processor::gfx1102:
|
||||
return "gfx1102";
|
||||
+ case AMDGPU::Processor::gfx1151:
|
||||
+ return "gfx1151";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
@@ -184,6 +187,10 @@ namespace Tensile
|
||||
{
|
||||
return AMDGPU::Processor::gfx1102;
|
||||
}
|
||||
+ else if(deviceString.find("gfx1151") != std::string::npos)
|
||||
+ {
|
||||
+ return AMDGPU::Processor::gfx1151;
|
||||
+ }
|
||||
else
|
||||
{
|
||||
return static_cast<AMDGPU::Processor>(0);
|
||||
diff --git a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
|
||||
index 10898ec2d1d6..f83713c04430 100644
|
||||
--- a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
|
||||
+++ b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
|
||||
@@ -58,6 +58,7 @@ namespace Tensile
|
||||
gfx1100,
|
||||
gfx1101,
|
||||
gfx1102,
|
||||
+ gfx1151,
|
||||
All
|
||||
};
|
||||
|
||||
@@ -106,6 +107,8 @@ namespace Tensile
|
||||
return "TensileLibrary_*_gfx1101";
|
||||
case LazyLoadingInit::gfx1102:
|
||||
return "TensileLibrary_*_gfx1102";
|
||||
+ case LazyLoadingInit::gfx1151:
|
||||
+ return "TensileLibrary_*_gfx1151";
|
||||
case LazyLoadingInit::None:
|
||||
return "";
|
||||
}
|
||||
diff --git a/Tensile/Source/lib/source/ocl/OclUtils.cpp b/Tensile/Source/lib/source/ocl/OclUtils.cpp
|
||||
index 8ee6d21769f0..ff04c56a1025 100644
|
||||
--- a/Tensile/Source/lib/source/ocl/OclUtils.cpp
|
||||
+++ b/Tensile/Source/lib/source/ocl/OclUtils.cpp
|
||||
@@ -188,6 +188,10 @@ namespace Tensile
|
||||
{
|
||||
return AMDGPU::Processor::gfx1102;
|
||||
}
|
||||
+ else if(deviceString.find("gfx1151") != std::string::npos)
|
||||
+ {
|
||||
+ return AMDGPU::Processor::gfx1151;
|
||||
+ }
|
||||
else
|
||||
{
|
||||
return static_cast<AMDGPU::Processor>(0);
|
||||
--
|
||||
2.47.1
|
||||
|
46
0001-serialize-reading-logic-files.patch
Normal file
46
0001-serialize-reading-logic-files.patch
Normal file
@@ -0,0 +1,46 @@
|
||||
From ffc29981521b1dd38d262fcfc9ee4ab6377f9957 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Wed, 26 Feb 2025 06:22:30 -0800
|
||||
Subject: [PATCH] serialize reading logic files
|
||||
|
||||
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
|
||||
---
|
||||
Tensile/TensileCreateLibrary.py | 15 ++++++++++++---
|
||||
1 file changed, 12 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
|
||||
index a16446063615..3c0472788303 100644
|
||||
--- a/Tensile/TensileCreateLibrary.py
|
||||
+++ b/Tensile/TensileCreateLibrary.py
|
||||
@@ -69,6 +69,7 @@ from .TensileCreateLib.ParseArguments import parseArguments
|
||||
from .Utilities.Profile import profile
|
||||
from .Utilities.String import splitDelimitedString
|
||||
from .Utilities.toFile import toFile
|
||||
+from .Parallel import CPUThreadCount
|
||||
|
||||
TENSILE_MANIFEST_FILENAME = "TensileManifest.txt"
|
||||
TENSILE_LIBRARY_DIR = "library"
|
||||
@@ -1308,9 +1309,17 @@ def parseLibraryLogicFiles(logicFiles: List[str]) -> List[LibraryIO.LibraryLogic
|
||||
Returns:
|
||||
List of library logic tuples.
|
||||
"""
|
||||
- return Common.ParallelMap(
|
||||
- LibraryIO.parseLibraryLogicFile, logicFiles, "Reading logic files", multiArg=False
|
||||
- )
|
||||
+ if CPUThreadCount() < 2:
|
||||
+ tPrint(1, "Reading logic files")
|
||||
+ rv = []
|
||||
+ for lf in logicFiles:
|
||||
+ tPrint(3, "Reading logic file: %s" % lf)
|
||||
+ rv.append(LibraryIO.parseLibraryLogicFile(lf))
|
||||
+ return rv
|
||||
+ else:
|
||||
+ return Common.ParallelMap(
|
||||
+ LibraryIO.parseLibraryLogicFile, logicFiles, "Reading logic files", multiArg=False
|
||||
+ )
|
||||
|
||||
|
||||
def generateLogicData(
|
||||
--
|
||||
2.47.1
|
||||
|
365
0001-tensile-fedora-gpus.patch
Normal file
365
0001-tensile-fedora-gpus.patch
Normal file
@@ -0,0 +1,365 @@
|
||||
From 4b03428cb375182ea6bd05f40ea7f38a6c1f873c Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Fri, 18 Apr 2025 07:30:37 -0700
|
||||
Subject: [PATCH] tensile fedora gpus
|
||||
|
||||
---
|
||||
Tensile/AsmCaps.py | 176 ++++++++++++++++++
|
||||
Tensile/Common.py | 10 +-
|
||||
Tensile/Source/CMakeLists.txt | 4 +-
|
||||
Tensile/Source/lib/include/Tensile/AMDGPU.hpp | 36 +++-
|
||||
.../include/Tensile/PlaceholderLibrary.hpp | 17 +-
|
||||
5 files changed, 229 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
|
||||
index 548b31f28321..78ffa73bd81a 100644
|
||||
--- a/Tensile/AsmCaps.py
|
||||
+++ b/Tensile/AsmCaps.py
|
||||
@@ -639,6 +639,50 @@ CACHED_ASM_CAPS = \
|
||||
'v_mov_b64': False,
|
||||
'v_pk_fma_f16': True,
|
||||
'v_pk_fmac_f16': False},
|
||||
+ (10, 3, 5): {'HasAddLshl': True,
|
||||
+ 'HasAtomicAdd': False,
|
||||
+ 'HasDirectToLdsDest': False,
|
||||
+ 'HasDirectToLdsNoDest': True,
|
||||
+ 'HasExplicitCO': True,
|
||||
+ 'HasExplicitNC': True,
|
||||
+ 'HasGLCModifier': True,
|
||||
+ 'HasNTModifier': False,
|
||||
+ 'HasLshlOr': True,
|
||||
+ 'HasMFMA': False,
|
||||
+ 'HasMFMA_b8': False,
|
||||
+ 'HasMFMA_bf16_1k': False,
|
||||
+ 'HasMFMA_bf16_original': False,
|
||||
+ 'HasMFMA_constSrc': False,
|
||||
+ 'HasMFMA_f64': False,
|
||||
+ 'HasMFMA_f8': False,
|
||||
+ 'HasMFMA_i8_908': False,
|
||||
+ 'HasMFMA_i8_940': False,
|
||||
+ 'HasMFMA_vgpr': False,
|
||||
+ 'HasMFMA_xf32': False,
|
||||
+ 'HasSMulHi': True,
|
||||
+ 'HasWMMA': False,
|
||||
+ 'KernargPreloading': False,
|
||||
+ 'MaxLgkmcnt': 15,
|
||||
+ 'MaxVmcnt': 63,
|
||||
+ 'SupportedISA': True,
|
||||
+ 'SupportedSource': True,
|
||||
+ 'VOP3v_dot4_i32_i8': True,
|
||||
+ 'v_dot2_f32_f16': True,
|
||||
+ 'v_dot2c_f32_f16': True,
|
||||
+ 'v_dot4_i32_i8': False,
|
||||
+ 'v_dot4c_i32_i8': True,
|
||||
+ 'v_fma_f16': True,
|
||||
+ 'v_fma_f32': True,
|
||||
+ 'v_fma_f64': True,
|
||||
+ 'v_fma_mix_f32': True,
|
||||
+ 'v_fmac_f16': False,
|
||||
+ 'v_fmac_f32': True,
|
||||
+ 'v_mac_f16': False,
|
||||
+ 'v_mac_f32': False,
|
||||
+ 'v_mad_mix_f32': False,
|
||||
+ 'v_mov_b64': False,
|
||||
+ 'v_pk_fma_f16': True,
|
||||
+ 'v_pk_fmac_f16': False},
|
||||
(11, 0, 0): {'HasAddLshl': True,
|
||||
'HasAtomicAdd': True,
|
||||
'HasDirectToLdsDest': False,
|
||||
@@ -771,6 +815,94 @@ CACHED_ASM_CAPS = \
|
||||
'v_mov_b64': False,
|
||||
'v_pk_fma_f16': True,
|
||||
'v_pk_fmac_f16': False},
|
||||
+ (11, 0, 3): {'HasAddLshl': True,
|
||||
+ 'HasAtomicAdd': True,
|
||||
+ 'HasDirectToLdsDest': False,
|
||||
+ 'HasDirectToLdsNoDest': False,
|
||||
+ 'HasExplicitCO': True,
|
||||
+ 'HasExplicitNC': True,
|
||||
+ 'HasGLCModifier': True,
|
||||
+ 'HasNTModifier': False,
|
||||
+ 'HasLshlOr': True,
|
||||
+ 'HasMFMA': False,
|
||||
+ 'HasMFMA_b8': False,
|
||||
+ 'HasMFMA_bf16_1k': False,
|
||||
+ 'HasMFMA_bf16_original': False,
|
||||
+ 'HasMFMA_constSrc': False,
|
||||
+ 'HasMFMA_f64': False,
|
||||
+ 'HasMFMA_f8': False,
|
||||
+ 'HasMFMA_i8_908': False,
|
||||
+ 'HasMFMA_i8_940': False,
|
||||
+ 'HasMFMA_vgpr': False,
|
||||
+ 'HasMFMA_xf32': False,
|
||||
+ 'HasSMulHi': True,
|
||||
+ 'HasWMMA': True,
|
||||
+ 'KernargPreloading': False,
|
||||
+ 'MaxLgkmcnt': 15,
|
||||
+ 'MaxVmcnt': 63,
|
||||
+ 'SupportedISA': True,
|
||||
+ 'SupportedSource': True,
|
||||
+ 'VOP3v_dot4_i32_i8': False,
|
||||
+ 'v_dot2_f32_f16': True,
|
||||
+ 'v_dot2c_f32_f16': True,
|
||||
+ 'v_dot4_i32_i8': False,
|
||||
+ 'v_dot4c_i32_i8': False,
|
||||
+ 'v_fma_f16': True,
|
||||
+ 'v_fma_f32': True,
|
||||
+ 'v_fma_f64': True,
|
||||
+ 'v_fma_mix_f32': True,
|
||||
+ 'v_fmac_f16': False,
|
||||
+ 'v_fmac_f32': True,
|
||||
+ 'v_mac_f16': False,
|
||||
+ 'v_mac_f32': False,
|
||||
+ 'v_mad_mix_f32': False,
|
||||
+ 'v_mov_b64': False,
|
||||
+ 'v_pk_fma_f16': True,
|
||||
+ 'v_pk_fmac_f16': False},
|
||||
+ (11, 5, 0): {'HasAddLshl': True,
|
||||
+ 'HasAtomicAdd': True,
|
||||
+ 'HasDirectToLdsDest': False,
|
||||
+ 'HasDirectToLdsNoDest': False,
|
||||
+ 'HasExplicitCO': True,
|
||||
+ 'HasExplicitNC': True,
|
||||
+ 'HasGLCModifier': True,
|
||||
+ 'HasNTModifier': False,
|
||||
+ 'HasLshlOr': True,
|
||||
+ 'HasMFMA': False,
|
||||
+ 'HasMFMA_b8': False,
|
||||
+ 'HasMFMA_bf16_1k': False,
|
||||
+ 'HasMFMA_bf16_original': False,
|
||||
+ 'HasMFMA_constSrc': False,
|
||||
+ 'HasMFMA_f64': False,
|
||||
+ 'HasMFMA_f8': False,
|
||||
+ 'HasMFMA_i8_908': False,
|
||||
+ 'HasMFMA_i8_940': False,
|
||||
+ 'HasMFMA_vgpr': False,
|
||||
+ 'HasMFMA_xf32': False,
|
||||
+ 'HasSMulHi': True,
|
||||
+ 'HasWMMA': True,
|
||||
+ 'KernargPreloading': False,
|
||||
+ 'MaxLgkmcnt': 15,
|
||||
+ 'MaxVmcnt': 63,
|
||||
+ 'SupportedISA': True,
|
||||
+ 'SupportedSource': True,
|
||||
+ 'VOP3v_dot4_i32_i8': False,
|
||||
+ 'v_dot2_f32_f16': True,
|
||||
+ 'v_dot2c_f32_f16': True,
|
||||
+ 'v_dot4_i32_i8': False,
|
||||
+ 'v_dot4c_i32_i8': False,
|
||||
+ 'v_fma_f16': True,
|
||||
+ 'v_fma_f32': True,
|
||||
+ 'v_fma_f64': True,
|
||||
+ 'v_fma_mix_f32': True,
|
||||
+ 'v_fmac_f16': False,
|
||||
+ 'v_fmac_f32': True,
|
||||
+ 'v_mac_f16': False,
|
||||
+ 'v_mac_f32': False,
|
||||
+ 'v_mad_mix_f32': False,
|
||||
+ 'v_mov_b64': False,
|
||||
+ 'v_pk_fma_f16': True,
|
||||
+ 'v_pk_fmac_f16': False},
|
||||
(11, 5, 1): {'HasAddLshl': True,
|
||||
'HasAtomicAdd': True,
|
||||
'HasDirectToLdsDest': False,
|
||||
@@ -815,6 +947,50 @@ CACHED_ASM_CAPS = \
|
||||
'v_mov_b64': False,
|
||||
'v_pk_fma_f16': True,
|
||||
'v_pk_fmac_f16': False},
|
||||
+ (11, 5, 2): {'HasAddLshl': True,
|
||||
+ 'HasAtomicAdd': True,
|
||||
+ 'HasDirectToLdsDest': False,
|
||||
+ 'HasDirectToLdsNoDest': False,
|
||||
+ 'HasExplicitCO': True,
|
||||
+ 'HasExplicitNC': True,
|
||||
+ 'HasGLCModifier': True,
|
||||
+ 'HasNTModifier': False,
|
||||
+ 'HasLshlOr': True,
|
||||
+ 'HasMFMA': False,
|
||||
+ 'HasMFMA_b8': False,
|
||||
+ 'HasMFMA_bf16_1k': False,
|
||||
+ 'HasMFMA_bf16_original': False,
|
||||
+ 'HasMFMA_constSrc': False,
|
||||
+ 'HasMFMA_f64': False,
|
||||
+ 'HasMFMA_f8': False,
|
||||
+ 'HasMFMA_i8_908': False,
|
||||
+ 'HasMFMA_i8_940': False,
|
||||
+ 'HasMFMA_vgpr': False,
|
||||
+ 'HasMFMA_xf32': False,
|
||||
+ 'HasSMulHi': True,
|
||||
+ 'HasWMMA': True,
|
||||
+ 'KernargPreloading': False,
|
||||
+ 'MaxLgkmcnt': 15,
|
||||
+ 'MaxVmcnt': 63,
|
||||
+ 'SupportedISA': True,
|
||||
+ 'SupportedSource': True,
|
||||
+ 'VOP3v_dot4_i32_i8': False,
|
||||
+ 'v_dot2_f32_f16': True,
|
||||
+ 'v_dot2c_f32_f16': True,
|
||||
+ 'v_dot4_i32_i8': False,
|
||||
+ 'v_dot4c_i32_i8': False,
|
||||
+ 'v_fma_f16': True,
|
||||
+ 'v_fma_f32': True,
|
||||
+ 'v_fma_f64': True,
|
||||
+ 'v_fma_mix_f32': True,
|
||||
+ 'v_fmac_f16': False,
|
||||
+ 'v_fmac_f32': True,
|
||||
+ 'v_mac_f16': False,
|
||||
+ 'v_mac_f32': False,
|
||||
+ 'v_mad_mix_f32': False,
|
||||
+ 'v_mov_b64': False,
|
||||
+ 'v_pk_fma_f16': True,
|
||||
+ 'v_pk_fmac_f16': False},
|
||||
(12, 0, 0): {'HasAddLshl': True,
|
||||
'HasAtomicAdd': False,
|
||||
'HasDirectToLdsDest': False,
|
||||
diff --git a/Tensile/Common.py b/Tensile/Common.py
|
||||
index 410c83656fd7..4d212d977c3d 100644
|
||||
--- a/Tensile/Common.py
|
||||
+++ b/Tensile/Common.py
|
||||
@@ -245,9 +245,9 @@ globalParameters["MaxFileName"] = 64 # If a file name would be long
|
||||
globalParameters["SupportedISA"] = [(8,0,3),
|
||||
(9,0,0), (9,0,6), (9,0,8), (9,0,10),
|
||||
(9,4,0), (9,4,1), (9,4,2),
|
||||
- (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1),
|
||||
- (11,0,0), (11,0,1), (11,0,2),
|
||||
- (11,5,1),
|
||||
+ (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,5),
|
||||
+ (11,0,0), (11,0,1), (11,0,2), (11,0,3),
|
||||
+ (11,5,0), (11,5,1), (11,5,2),
|
||||
(12,0,0), (12,0,1)] # assembly kernels writer supports these architectures
|
||||
|
||||
globalParameters["KeepBuildTmp"] = True # Do not remove build artifacts during the build process or build_tmp after build completes
|
||||
@@ -324,7 +324,7 @@ architectureMap = {
|
||||
'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14',
|
||||
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
|
||||
'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33',
|
||||
- 'gfx1151':'gfx1151',
|
||||
+ 'gfx1103':'gfx1103', 'gfx1150':'gfx1150', 'gfx1151':'gfx1151', 'gfx1152':'gfx1152',
|
||||
'gfx1200':'gfx1200',
|
||||
'gfx1201':'gfx1201'
|
||||
}
|
||||
@@ -2466,7 +2466,7 @@ def assignGlobalParameters( config, capabilitiesCache: Optional[dict] = None ):
|
||||
if os.name == "nt":
|
||||
globalParameters["CurrentISA"] = (9,0,6)
|
||||
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
|
||||
- isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (12,0,0), (12,0,1))
|
||||
+ isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,0,3), (11,5,0), (11,5,1), (11,5,2), (12,0,0), (12,0,1))
|
||||
if globalParameters["CurrentISA"] in isasWithDisabledHWMonitor:
|
||||
isaString = ', '.join(map(gfxName, isasWithDisabledHWMonitor))
|
||||
printWarning(f"HardwareMonitor currently disabled for {isaString}")
|
||||
diff --git a/Tensile/Source/CMakeLists.txt b/Tensile/Source/CMakeLists.txt
|
||||
index e02b209a262a..7f10ee319518 100644
|
||||
--- a/Tensile/Source/CMakeLists.txt
|
||||
+++ b/Tensile/Source/CMakeLists.txt
|
||||
@@ -51,9 +51,9 @@ if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER STREQUAL "hipcc")
|
||||
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
|
||||
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 CACHE STRING "GPU architectures")
|
||||
else()
|
||||
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
|
||||
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 CACHE STRING "GPU architectures")
|
||||
endif()
|
||||
|
||||
include(CMakeDependentOption)
|
||||
diff --git a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
|
||||
index 0ab8ced5cf5d..2317ce79f8f2 100644
|
||||
--- a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
|
||||
+++ b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
|
||||
@@ -73,7 +73,11 @@ namespace Tensile
|
||||
gfx1035 = 1035,
|
||||
gfx1100 = 1100,
|
||||
gfx1101 = 1101,
|
||||
- gfx1102 = 1102
|
||||
+ gfx1102 = 1102,
|
||||
+ gfx1103 = 1103,
|
||||
+ gfx1150 = 1150,
|
||||
+ gfx1151 = 1151,
|
||||
+ gfx1152 = 1152,
|
||||
};
|
||||
|
||||
static std::string toString(Processor p)
|
||||
@@ -118,9 +122,17 @@ namespace Tensile
|
||||
return "gfx1101";
|
||||
case AMDGPU::Processor::gfx1102:
|
||||
return "gfx1102";
|
||||
- }
|
||||
- return "";
|
||||
- }
|
||||
+ case AMDGPU::Processor::gfx1103:
|
||||
+ return "gfx1103";
|
||||
+ case AMDGPU::Processor::gfx1150:
|
||||
+ return "gfx1150";
|
||||
+ case AMDGPU::Processor::gfx1151:
|
||||
+ return "gfx1151";
|
||||
+ case AMDGPU::Processor::gfx1152:
|
||||
+ return "gfx1152";
|
||||
+ }
|
||||
+ return "";
|
||||
+ }
|
||||
|
||||
AMDGPU::Processor toProcessorId(std::string const& deviceString)
|
||||
{
|
||||
@@ -184,6 +196,22 @@ namespace Tensile
|
||||
{
|
||||
return AMDGPU::Processor::gfx1102;
|
||||
}
|
||||
+ else if(deviceString.find("gfx1103") != std::string::npos)
|
||||
+ {
|
||||
+ return AMDGPU::Processor::gfx1103;
|
||||
+ }
|
||||
+ else if(deviceString.find("gfx1150") != std::string::npos)
|
||||
+ {
|
||||
+ return AMDGPU::Processor::gfx1150;
|
||||
+ }
|
||||
+ else if(deviceString.find("gfx1151") != std::string::npos)
|
||||
+ {
|
||||
+ return AMDGPU::Processor::gfx1151;
|
||||
+ }
|
||||
+ else if(deviceString.find("gfx1152") != std::string::npos)
|
||||
+ {
|
||||
+ return AMDGPU::Processor::gfx1152;
|
||||
+ }
|
||||
else
|
||||
{
|
||||
return static_cast<AMDGPU::Processor>(0);
|
||||
diff --git a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
|
||||
index 10898ec2d1d6..f838f15d3ac4 100644
|
||||
--- a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
|
||||
+++ b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
|
||||
@@ -58,6 +58,10 @@ namespace Tensile
|
||||
gfx1100,
|
||||
gfx1101,
|
||||
gfx1102,
|
||||
+ gfx1103,
|
||||
+ gfx1151,
|
||||
+ gfx1150,
|
||||
+ gfx1152,
|
||||
All
|
||||
};
|
||||
|
||||
@@ -106,10 +110,17 @@ namespace Tensile
|
||||
return "TensileLibrary_*_gfx1101";
|
||||
case LazyLoadingInit::gfx1102:
|
||||
return "TensileLibrary_*_gfx1102";
|
||||
- case LazyLoadingInit::None:
|
||||
- return "";
|
||||
+ case LazyLoadingInit::gfx1103:
|
||||
+ return "TensileLibrary_*_gfx1103";
|
||||
+ case LazyLoadingInit::gfx1150:
|
||||
+ return "TensileLibrary_*_gfx1150";
|
||||
+ case LazyLoadingInit::gfx1151:
|
||||
+ return "TensileLibrary_*_gfx1151";
|
||||
+ case LazyLoadingInit::gfx1152:
|
||||
+ return "TensileLibrary_*_gfx1152";
|
||||
+ case LazyLoadingInit::None:
|
||||
+ return "";
|
||||
}
|
||||
- return "";
|
||||
}
|
||||
|
||||
template <typename MyProblem, typename MySolution = typename MyProblem::Solution>
|
||||
--
|
||||
2.48.1
|
||||
|
27
0001-tensile-workaround-cache-problem.patch
Normal file
27
0001-tensile-workaround-cache-problem.patch
Normal file
@@ -0,0 +1,27 @@
|
||||
From f0b325a630277120e67bc6432e99901f96621f88 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Sun, 12 May 2024 11:21:39 -0600
|
||||
Subject: [PATCH] tensile workaround cache problem
|
||||
|
||||
---
|
||||
Tensile/Common.py | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/Tensile/Common.py b/Tensile/Common.py
|
||||
index 6ececf1c47b3..672cef5aa842 100644
|
||||
--- a/Tensile/Common.py
|
||||
+++ b/Tensile/Common.py
|
||||
@@ -2037,8 +2037,8 @@ def GetAsmCaps(isaVersion):
|
||||
ignoreCacheCheck = True
|
||||
|
||||
# check if derived caps matches asm cap cache
|
||||
- if not ignoreCacheCheck and derivedAsmCaps != CACHED_ASM_CAPS[isaVersion]:
|
||||
- printExit("Cached asm caps differ from derived asm caps for {}".format(isaVersion))
|
||||
+ # if not ignoreCacheCheck and derivedAsmCaps != CACHED_ASM_CAPS[isaVersion]:
|
||||
+ # printExit("Cached asm caps differ from derived asm caps for {}".format(isaVersion))
|
||||
return derivedAsmCaps
|
||||
else:
|
||||
printWarning("Assembler not present, asm caps loaded from cache are unverified")
|
||||
--
|
||||
2.45.0
|
||||
|
BIN
Tensile-6.4.0.tar.gz
(Stored with Git LFS)
Normal file
BIN
Tensile-6.4.0.tar.gz
(Stored with Git LFS)
Normal file
Binary file not shown.
@@ -9,7 +9,7 @@
|
||||
|
||||
%global upstreamname Tensile
|
||||
|
||||
%global rocm_release 6.3
|
||||
%global rocm_release 6.4
|
||||
%global rocm_patch 0
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
@@ -19,34 +19,35 @@ Name: python-tensile-devel
|
||||
Name: python-tensile
|
||||
%endif
|
||||
Version: %{rocm_version}
|
||||
Release: 8%{?dist}
|
||||
Release: 1%{?dist}
|
||||
Summary: Tool for creating benchmark-driven backend libraries for GEMMs
|
||||
|
||||
URL: https://github.com/ROCmSoftwarePlatform/Tensile
|
||||
License: MIT
|
||||
Source0: %{url}/archive/rocm-%{version}.tar.gz#/%{upstreamname}-%{version}.tar.gz
|
||||
|
||||
Patch1: 0001-Adding-gfx1151-to-6.2-2026.patch
|
||||
Patch2: 0002-More-gfx1151.patch
|
||||
Patch3: 0003-Add-gfx1103.patch
|
||||
Patch4: 0004-Add-gfx1035.patch
|
||||
Patch5: 0005-Add-gfx1152.patch
|
||||
Patch6: 0006-Add-gfx1150.patch
|
||||
Patch1: 0001-tensile-fedora-gpus.patch
|
||||
|
||||
#Patch7: 0001-Handle-a-missing-joblib.patch
|
||||
#Patch8: 0001-serialize-reading-logic-files.patch
|
||||
|
||||
%if 0%{?fedora} || 0%{?suse_version}
|
||||
BuildRequires: fdupes
|
||||
%endif
|
||||
|
||||
%if 0%{?suse_version}
|
||||
BuildRequires: python-rpm-macros
|
||||
BuildRequires: %{python_module setuptools}
|
||||
Requires: hipcc
|
||||
Requires: rocminfo
|
||||
# Not available on SLE, tensile may be able to cope
|
||||
Recommends: %{python_module joblib}
|
||||
%if %{suse_version} >= 1699
|
||||
Requires: %{python_module joblib}
|
||||
%endif
|
||||
Requires: %{python_module msgpack}
|
||||
Requires: %{python_module PyYAML}
|
||||
Requires: %{python_module setuptools}
|
||||
Requires(post): update-alternatives
|
||||
Requires(postun): update-alternatives
|
||||
# Not available on SLE, tensile does not cope
|
||||
Recommends: python-rich
|
||||
%else
|
||||
BuildRequires: python3-devel
|
||||
BuildRequires: python3dist(setuptools)
|
||||
@@ -64,7 +65,7 @@ contractions on a GPU. The Tensile library is mainly used as backend library to
|
||||
rocBLAS. Tensile acts as the performance backbone for a wide variety of
|
||||
'compute' applications running on AMD GPUs.
|
||||
|
||||
%if 0%{?fedora}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
# There are headers and code as part of the code generation.
|
||||
# This make rpm checkers unhappy
|
||||
%package -n python3-tensile-devel
|
||||
@@ -73,9 +74,12 @@ Summary: Tool for creating benchmark-driven backend libraries for GEMMs
|
||||
Requires: cmake-filesystem
|
||||
Requires: hipcc
|
||||
Requires: rocminfo
|
||||
%if 0%{?fedora}
|
||||
Requires: python3dist(joblib)
|
||||
Requires: python3dist(msgpack)
|
||||
%endif
|
||||
Requires: python3dist(pyyaml)
|
||||
Provides: python3-tensile
|
||||
|
||||
%description -n python3-tensile-devel
|
||||
Tensile is a tool for creating benchmark-driven backend libraries for GEMMs,
|
||||
@@ -110,6 +114,11 @@ sed -i -e 's@globalParameters["IgnoreAsmCapCache"] = False@globalParameters["Ign
|
||||
sed -i -e 's@arguments["IgnoreAsmCapCache"] = args.IgnoreAsmCapCache@arguments["IgnoreAsmCapCache"] = True@' Tensile/TensileCreateLibrary.py
|
||||
sed -i -e 's@if not ignoreCacheCheck and derivedAsmCaps@if False and derivedAsmCaps@' Tensile/Common.py
|
||||
|
||||
# Reduce requirements
|
||||
sed -i -e '/joblib/d' requirements.*
|
||||
sed -i -e '/rich/d' requirements.*
|
||||
sed -i -e '/msgpack/d' requirements.*
|
||||
|
||||
%build
|
||||
%py3_build
|
||||
%{?python_build: %python_build}
|
||||
@@ -129,7 +138,9 @@ rm %{buildroot}%{_bindir}/tensile*
|
||||
rm -rf %{buildroot}%{python3_sitelib}/%{upstreamname}/Tests
|
||||
|
||||
#Clean up dupes:
|
||||
%if 0%{?fedora} || 0%{?suse_version}
|
||||
%fdupes %{buildroot}%{_prefix}
|
||||
%endif
|
||||
|
||||
# rm hard links and replace
|
||||
rm %{buildroot}%{python3_sitelib}/%{upstreamname}/cmake/*.cmake
|
||||
@@ -172,6 +183,21 @@ mv %{buildroot}%{_datadir}/cmake/Tensile/*.cmake %{buildroot}%{python3_sitelib}/
|
||||
%{python_sitelib}/%{upstreamname}*.egg-info/*
|
||||
|
||||
%changelog
|
||||
* Fri Apr 18 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-1
|
||||
- Update to 6.4.0
|
||||
|
||||
* Sun Mar 2 2025 Tom Rix <Tom.Rix@amd.com> 6.3.0-12
|
||||
- Restore provides: for fedora/rhel
|
||||
|
||||
* Sat Mar 1 2025 Tom Rix <Tom.Rix@amd.com> 6.3.0-11
|
||||
- Add requires setuptools for SUSE
|
||||
|
||||
* Thu Feb 27 2025 Tom Rix <Tom.Rix@amd.com> 6.3.0-10
|
||||
- Fix RHEL
|
||||
|
||||
* Wed Feb 26 2025 Tom Rix <Tom.Rix@amd.com> 6.3.0-9
|
||||
- Handle missing joblib
|
||||
|
||||
* Thu Feb 20 2025 Tom Rix <Tom.Rix@amd.com> 6.3.0-8
|
||||
- Remove python-rich suse requires
|
||||
|
||||
|
Reference in New Issue
Block a user