SHA256
1
0
forked from pool/pocl
pocl/pocl-llvm16.patch

204 lines
8.2 KiB
Diff
Raw Normal View History

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8c8741b6..85de3968 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -520,7 +520,7 @@ if(ENABLE_LLVM)
# of the code, since this macro adds another layer of complexity.
if(LLVM_VERSION VERSION_LESS_EQUAL 14.0)
set(ENABLE_LLVM_OPAQUE_POINTERS OFF CACHE INTERNAL "llvm opaque pointers" FORCE)
- elseif(LLVM_VERSION VERSION_EQUAL 15.0)
+ elseif(LLVM_VERSION VERSION_EQUAL 15.0 OR LLVM_VERSION VERSION_EQUAL 16.0)
option(ENABLE_LLVM_OPAQUE_POINTERS "Handle the change to llvm opaque pointers." ON)
else()
set(ENABLE_LLVM_OPAQUE_POINTERS ON CACHE INTERNAL "llvm opaque pointers" FORCE)
diff --git a/cmake/LLVM.cmake b/cmake/LLVM.cmake
index 18138f36..00ad2186 100644
--- a/cmake/LLVM.cmake
+++ b/cmake/LLVM.cmake
@@ -122,8 +122,10 @@ string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_LIBDIR "${LLVM_LIBDI
run_llvm_config(LLVM_INCLUDEDIR --includedir)
string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}")
-run_llvm_config(LLVM_SRC_ROOT --src-root)
-run_llvm_config(LLVM_OBJ_ROOT --obj-root)
+if(LLVM_VERSION_MAJOR LESS 16)
+ run_llvm_config(LLVM_SRC_ROOT --src-root)
+ run_llvm_config(LLVM_OBJ_ROOT --obj-root)
+endif()
string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_OBJ_ROOT "${LLVM_OBJ_ROOT}")
run_llvm_config(LLVM_ALL_TARGETS --targets-built)
run_llvm_config(LLVM_HOST_TARGET --host-target)
@@ -772,12 +774,35 @@ endif()
####################################################################
-if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED ${CL_DISABLE_HALF})
- set(CL_DISABLE_HALF 0)
- message(STATUS "Checking fp16 support")
- custom_try_compile_clang_silent("__fp16 callfp16(__fp16 a) { return a * (__fp16)1.8; };" "__fp16 x=callfp16((__fp16)argc);" RESV ${CLANG_TARGET_OPTION}${LLC_TRIPLE} ${CLANG_MARCH_FLAG}${LLC_HOST_CPU})
- if(RESV)
+# Clang documentation on Language Extensions:
+# __fp16 is supported on every target, as it is purely a storage format
+# _Float16 is currently only supported on the following targets... SPIR, x86
+# DIsabled for non-x86-64 because of limitations:
+# The _Float16 type requires SSE2 feature and above due to the instruction
+# limitations. When using it on i386 targets, you need to specify -msse2
+# explicitly.
+# For targets without F16C feature or above, please make sure:
+# Use GCC 12.0 and above if you are using libgcc.
+# If you are using compiler-rt, use the same version with the compiler.
+# Early versions provided FP16 builtins in a different ABI. A workaround is
+# to use a small code snippet to check the ABI if you cannot make sure of it.
+
+if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED CL_DISABLE_HALF)
+ # LLVM <15 doesn't support FP16 emulation
+ # LLVM 15 crashes on some code, with the FP16 emulation
+ # CONFORMANCE disables FP16 b/c it's incomplete
+ # enabled on x86-64 only for now
+ if((LLVM_VERSION_MAJOR LESS 16) OR ENABLE_CONFORMANCE OR (NOT X86_64))
+ message(STATUS "FP16 support disabled")
set(CL_DISABLE_HALF 1)
+ else()
+ set(CL_DISABLE_HALF 0)
+ message(STATUS "Checking fp16 support")
+ custom_try_compile_clang_silent("_Float16 callfp16(_Float16 a) { return a * 1.8f16; };" "_Float16 x=callfp16((_Float16)argc);"
+ RESV ${CLANG_TARGET_OPTION}${LLC_TRIPLE} ${CLANG_MARCH_FLAG}${LLC_HOST_CPU})
+ if(RESV)
+ set(CL_DISABLE_HALF 1)
+ endif()
endif()
endif()
diff --git a/cmake/bitcode_rules.cmake b/cmake/bitcode_rules.cmake
index 3d1a373f..10a309c2 100644
--- a/cmake/bitcode_rules.cmake
+++ b/cmake/bitcode_rules.cmake
@@ -167,7 +167,7 @@ function(compile_ll_to_bc FILENAME SUBDIR BCLIST)
list(APPEND ${BCLIST} "${BC_FILE}")
set(${BCLIST} ${${BCLIST}} PARENT_SCOPE)
- if(LLVM_VERSION VERSION_EQUAL 15.0)
+ if(LLVM_VERSION VERSION_EQUAL 15.0 OR LLVM_VERSION VERSION_EQUAL 16.0)
# both of these are necesssary. some of the files (like barrier.ll)
# don't contain any pointers and thus cannot be guessed; if llvm-as
# produces the wrong opaque-type file, later llvm-link will fail
diff --git a/include/_kernel_c.h b/include/_kernel_c.h
index e1038ea5..73ad9d18 100644
--- a/include/_kernel_c.h
+++ b/include/_kernel_c.h
@@ -112,7 +112,7 @@ typedef uint uint16 __attribute__((__ext_vector_type__(16)));
/* NOTE: the Clang's __fp16 does not work robustly in C mode,
it might produce invalid code at least with half vectors.
Using the native 'half' type in OpenCL C mode works better. */
-typedef __fp16 half;
+typedef _Float16 half;
typedef half half2 __attribute__((__ext_vector_type__(2)));
typedef half half3 __attribute__((__ext_vector_type__(3)));
diff --git a/lib/CL/pocl_llvm_build.cc b/lib/CL/pocl_llvm_build.cc
index 3ef6e0c5..84413a48 100644
--- a/lib/CL/pocl_llvm_build.cc
+++ b/lib/CL/pocl_llvm_build.cc
@@ -263,7 +263,7 @@ int pocl_llvm_build_program(cl_program program,
size_t fastmath_flag = user_options.find("-cl-fast-relaxed-math");
-#if (CLANG_MAJOR == 15)
+#if (CLANG_MAJOR == 15) || (CLANG_MAJOR == 16)
#ifdef LLVM_OPAQUE_POINTERS
ss << "-opaque-pointers ";
#else
diff --git a/lib/CL/pocl_llvm_utils.cc b/lib/CL/pocl_llvm_utils.cc
index 3cc7fcc5..0690af17 100644
--- a/lib/CL/pocl_llvm_utils.cc
+++ b/lib/CL/pocl_llvm_utils.cc
@@ -273,7 +273,9 @@ void InitializeLLVM() {
initializeAnalysis(Registry);
initializeTransformUtils(Registry);
initializeInstCombine(Registry);
+#ifdef LLVM_OLDER_THAN_16_0
initializeInstrumentation(Registry);
+#endif
initializeTarget(Registry);
}
@@ -390,7 +392,7 @@ void pocl_llvm_create_context(cl_context ctx) {
data->Context = new llvm::LLVMContext();
assert(data->Context);
-#if (CLANG_MAJOR == 15)
+#if (CLANG_MAJOR == 15) || (CLANG_MAJOR == 16)
#ifdef LLVM_OPAQUE_POINTERS
data->Context->setOpaquePointers(true);
#else
diff --git a/lib/llvmopencl/BarrierTailReplication.cc b/lib/llvmopencl/BarrierTailReplication.cc
index 98bc0635..43802f73 100644
--- a/lib/llvmopencl/BarrierTailReplication.cc
+++ b/lib/llvmopencl/BarrierTailReplication.cc
@@ -352,7 +352,11 @@ BarrierTailReplication::ReplicateBasicBlocks(BasicBlockVector &new_graph,
i2 != e2; ++i2) {
Instruction *i = i2->clone();
reference_map.insert(std::make_pair(&*i2, i));
+#ifdef LLVM_OLDER_THAN_16_0
new_b->getInstList().push_back(i);
+#else
+ i->insertInto(new_b, new_b->end());
+#endif
}
// Add predicates to PHINodes of basic blocks the replicated
diff --git a/lib/llvmopencl/ParallelRegion.cc b/lib/llvmopencl/ParallelRegion.cc
index dfe56e03..639bd686 100644
--- a/lib/llvmopencl/ParallelRegion.cc
+++ b/lib/llvmopencl/ParallelRegion.cc
@@ -201,12 +201,17 @@ ParallelRegion::chainAfter(ParallelRegion *region)
#endif
BasicBlock *successor = t->getSuccessor(0);
- Function::BasicBlockListType &bb_list =
- successor->getParent()->getBasicBlockList();
-
- for (iterator i = begin(), e = end(); i != e; ++i)
+ Function *F = successor->getParent();
+#ifdef LLVM_OLDER_THAN_16_0
+ Function::BasicBlockListType &bb_list =
+ F->getBasicBlockList();
+ for (iterator i = begin(), e = end(); i != e; ++i)
bb_list.insertAfter(tail->getIterator(), *i);
+#else
+ for (iterator i = begin(), e = end(); i != e; ++i)
+ F->insert(tail->getIterator(), *i);
+#endif
t->setSuccessor(0, entryBB());
t = exitBB()->getTerminator();
diff --git a/lib/llvmopencl/WorkItemAliasAnalysis.cc b/lib/llvmopencl/WorkItemAliasAnalysis.cc
index 02dcd2ec..8a85b25a 100644
--- a/lib/llvmopencl/WorkItemAliasAnalysis.cc
+++ b/lib/llvmopencl/WorkItemAliasAnalysis.cc
@@ -51,6 +51,12 @@ using namespace llvm;
typedef llvm::MemoryLocation Location;
typedef llvm::AliasResult AliasResult;
+#ifdef LLVM_OLDER_THAN_16_0
+#define AAResultB AAResultBase<WorkItemAAResult>
+#else
+#define AAResultB AAResultBase
+#endif
+
/// WorkItemAliasAnalysis - This is a simple alias analysis
/// implementation that uses pocl metadata to make sure memory accesses from
/// different work items are not aliasing.
@@ -58,8 +64,8 @@ typedef llvm::AliasResult AliasResult;
// LLVM 3.8+
-class WorkItemAAResult : public AAResultBase<WorkItemAAResult> {
- friend AAResultBase<WorkItemAAResult>;
+class WorkItemAAResult : public AAResultB {
+ friend AAResultB;
public:
static char ID;