Files
python-tensile/0005-Add-gfx1152.patch

155 lines
7.1 KiB
Diff

From 5c70822a0f158c33ac04f472955f40d773d6ff0b Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Wed, 15 Jan 2025 05:36:00 -0800
Subject: [PATCH 5/5] Add gfx1152
---
Tensile/AsmCaps.py | 44 +++++++++++++++++++
Tensile/Common.py | 6 +--
Tensile/Source/CMakeLists.txt | 4 +-
.../include/Tensile/PlaceholderLibrary.hpp | 3 ++
Tensile/Source/lib/source/ocl/OclUtils.cpp | 4 ++
5 files changed, 56 insertions(+), 5 deletions(-)
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
index f08d623fce66..1819418f76f2 100644
--- a/Tensile/AsmCaps.py
+++ b/Tensile/AsmCaps.py
@@ -903,6 +903,50 @@ CACHED_ASM_CAPS = \
'v_mov_b64': False,
'v_pk_fma_f16': True,
'v_pk_fmac_f16': False},
+ (11, 5, 2): {'HasAddLshl': True,
+ 'HasAtomicAdd': True,
+ 'HasDirectToLdsDest': False,
+ 'HasDirectToLdsNoDest': False,
+ 'HasExplicitCO': True,
+ 'HasExplicitNC': True,
+ 'HasGLCModifier': True,
+ 'HasNTModifier': False,
+ 'HasLshlOr': True,
+ 'HasMFMA': False,
+ 'HasMFMA_b8': False,
+ 'HasMFMA_bf16_1k': False,
+ 'HasMFMA_bf16_original': False,
+ 'HasMFMA_constSrc': False,
+ 'HasMFMA_f64': False,
+ 'HasMFMA_f8': False,
+ 'HasMFMA_i8_908': False,
+ 'HasMFMA_i8_940': False,
+ 'HasMFMA_vgpr': False,
+ 'HasMFMA_xf32': False,
+ 'HasSMulHi': True,
+ 'HasWMMA': True,
+ 'KernargPreloading': False,
+ 'MaxLgkmcnt': 15,
+ 'MaxVmcnt': 63,
+ 'SupportedISA': True,
+ 'SupportedSource': True,
+ 'VOP3v_dot4_i32_i8': False,
+ 'v_dot2_f32_f16': True,
+ 'v_dot2c_f32_f16': True,
+ 'v_dot4_i32_i8': False,
+ 'v_dot4c_i32_i8': False,
+ 'v_fma_f16': True,
+ 'v_fma_f32': True,
+ 'v_fma_f64': True,
+ 'v_fma_mix_f32': True,
+ 'v_fmac_f16': False,
+ 'v_fmac_f32': True,
+ 'v_mac_f16': False,
+ 'v_mac_f32': False,
+ 'v_mad_mix_f32': False,
+ 'v_mov_b64': False,
+ 'v_pk_fma_f16': True,
+ 'v_pk_fmac_f16': False},
(12, 0, 0): {'HasAddLshl': True,
'HasAtomicAdd': False,
'HasDirectToLdsDest': False,
diff --git a/Tensile/Common.py b/Tensile/Common.py
index 5b4fe2954c55..e9ff575cd578 100644
--- a/Tensile/Common.py
+++ b/Tensile/Common.py
@@ -254,7 +254,7 @@ globalParameters["SupportedISA"] = [(8,0,3),
(9,4,0), (9,4,1), (9,4,2),
(10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,5),
(11,0,0), (11,0,1), (11,0,2), (11,0,3),
- (11,5,1), (12,0,0), (12,0,1)] # assembly kernels writer supports these architectures
+ (11,5,1), (11,5,2), (12,0,0), (12,0,1)] # assembly kernels writer supports these architectures
globalParameters["CleanupBuildFiles"] = False # cleanup build files (e.g. kernel assembly) once no longer needed
globalParameters["GenerateManifestAndExit"] = False # Output manifest file with list of expected library objects and exit
@@ -334,7 +334,7 @@ architectureMap = {
'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14',
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33',
- 'gfx1103':'gfx1103', 'gfx1151':'gfx1151',
+ 'gfx1103':'gfx1103', 'gfx1151':'gfx1151', 'gfx1152':'gfx1152',
'gfx1200':'gfx1200',
'gfx1201':'gfx1201'
}
@@ -2460,7 +2460,7 @@ def assignGlobalParameters( config ):
if os.name == "nt":
globalParameters["CurrentISA"] = (9,0,6)
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
- isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,0,3), (11,5,1), (12,0,0), (12,0,1))
+ isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,0,3), (11,5,1), (11,5,2), (12,0,0), (12,0,1))
if globalParameters["CurrentISA"] in isasWithDisabledHWMonitor:
isaString = ', '.join(map(gfxName, isasWithDisabledHWMonitor))
printWarning(f"HardwareMonitor currently disabled for {isaString}")
diff --git a/Tensile/Source/CMakeLists.txt b/Tensile/Source/CMakeLists.txt
index 78379e2d21d8..67ba9f3be7ba 100644
--- a/Tensile/Source/CMakeLists.txt
+++ b/Tensile/Source/CMakeLists.txt
@@ -51,9 +51,9 @@ if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang
endif()
if(CMAKE_CXX_COMPILER STREQUAL "hipcc")
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1151 CACHE STRING "GPU architectures")
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1151 gfx1152 CACHE STRING "GPU architectures")
else()
- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1151 CACHE STRING "GPU architectures")
+ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103 gfx1151 gfx1152 CACHE STRING "GPU architectures")
endif()
include(CMakeDependentOption)
diff --git a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
index 4f81795a9065..bed38b0d9fef 100644
--- a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
+++ b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
@@ -60,6 +60,7 @@ namespace Tensile
gfx1102,
gfx1103,
gfx1151,
+ gfx1152,
All
};
@@ -112,6 +113,8 @@ namespace Tensile
return "TensileLibrary_*_gfx1103";
case LazyLoadingInit::gfx1151:
return "TensileLibrary_*_gfx1151";
+ case LazyLoadingInit::gfx1152:
+ return "TensileLibrary_*_gfx1152";
case LazyLoadingInit::None:
return "";
}
diff --git a/Tensile/Source/lib/source/ocl/OclUtils.cpp b/Tensile/Source/lib/source/ocl/OclUtils.cpp
index eb5a14eccfb1..2b790342451f 100644
--- a/Tensile/Source/lib/source/ocl/OclUtils.cpp
+++ b/Tensile/Source/lib/source/ocl/OclUtils.cpp
@@ -196,6 +196,10 @@ namespace Tensile
{
return AMDGPU::Processor::gfx1151;
}
+ else if(deviceString.find("gfx1152") != std::string::npos)
+ {
+ return AMDGPU::Processor::gfx1152;
+ }
else
{
return static_cast<AMDGPU::Processor>(0);
--
2.47.1