diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c8741b6..85de3968 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -520,7 +520,7 @@ if(ENABLE_LLVM) # of the code, since this macro adds another layer of complexity. if(LLVM_VERSION VERSION_LESS_EQUAL 14.0) set(ENABLE_LLVM_OPAQUE_POINTERS OFF CACHE INTERNAL "llvm opaque pointers" FORCE) - elseif(LLVM_VERSION VERSION_EQUAL 15.0) + elseif(LLVM_VERSION VERSION_EQUAL 15.0 OR LLVM_VERSION VERSION_EQUAL 16.0) option(ENABLE_LLVM_OPAQUE_POINTERS "Handle the change to llvm opaque pointers." ON) else() set(ENABLE_LLVM_OPAQUE_POINTERS ON CACHE INTERNAL "llvm opaque pointers" FORCE) diff --git a/cmake/LLVM.cmake b/cmake/LLVM.cmake index 18138f36..00ad2186 100644 --- a/cmake/LLVM.cmake +++ b/cmake/LLVM.cmake @@ -122,8 +122,10 @@ string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_LIBDIR "${LLVM_LIBDI run_llvm_config(LLVM_INCLUDEDIR --includedir) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}") -run_llvm_config(LLVM_SRC_ROOT --src-root) -run_llvm_config(LLVM_OBJ_ROOT --obj-root) +if(LLVM_VERSION_MAJOR LESS 16) + run_llvm_config(LLVM_SRC_ROOT --src-root) + run_llvm_config(LLVM_OBJ_ROOT --obj-root) +endif() string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_OBJ_ROOT "${LLVM_OBJ_ROOT}") run_llvm_config(LLVM_ALL_TARGETS --targets-built) run_llvm_config(LLVM_HOST_TARGET --host-target) @@ -772,12 +774,35 @@ endif() #################################################################### -if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED ${CL_DISABLE_HALF}) - set(CL_DISABLE_HALF 0) - message(STATUS "Checking fp16 support") - custom_try_compile_clang_silent("__fp16 callfp16(__fp16 a) { return a * (__fp16)1.8; };" "__fp16 x=callfp16((__fp16)argc);" RESV ${CLANG_TARGET_OPTION}${LLC_TRIPLE} ${CLANG_MARCH_FLAG}${LLC_HOST_CPU}) - if(RESV) +# Clang documentation on Language Extensions: +# __fp16 is supported on every target, as it is purely a storage format +# _Float16 is currently only supported on the following targets... SPIR, x86 +# DIsabled for non-x86-64 because of limitations: +# The _Float16 type requires SSE2 feature and above due to the instruction +# limitations. When using it on i386 targets, you need to specify -msse2 +# explicitly. +# For targets without F16C feature or above, please make sure: +# Use GCC 12.0 and above if you are using libgcc. +# If you are using compiler-rt, use the same version with the compiler. +# Early versions provided FP16 builtins in a different ABI. A workaround is +# to use a small code snippet to check the ABI if you cannot make sure of it. + +if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED CL_DISABLE_HALF) + # LLVM <15 doesn't support FP16 emulation + # LLVM 15 crashes on some code, with the FP16 emulation + # CONFORMANCE disables FP16 b/c it's incomplete + # enabled on x86-64 only for now + if((LLVM_VERSION_MAJOR LESS 16) OR ENABLE_CONFORMANCE OR (NOT X86_64)) + message(STATUS "FP16 support disabled") set(CL_DISABLE_HALF 1) + else() + set(CL_DISABLE_HALF 0) + message(STATUS "Checking fp16 support") + custom_try_compile_clang_silent("_Float16 callfp16(_Float16 a) { return a * 1.8f16; };" "_Float16 x=callfp16((_Float16)argc);" + RESV ${CLANG_TARGET_OPTION}${LLC_TRIPLE} ${CLANG_MARCH_FLAG}${LLC_HOST_CPU}) + if(RESV) + set(CL_DISABLE_HALF 1) + endif() endif() endif() diff --git a/cmake/bitcode_rules.cmake b/cmake/bitcode_rules.cmake index 3d1a373f..10a309c2 100644 --- a/cmake/bitcode_rules.cmake +++ b/cmake/bitcode_rules.cmake @@ -167,7 +167,7 @@ function(compile_ll_to_bc FILENAME SUBDIR BCLIST) list(APPEND ${BCLIST} "${BC_FILE}") set(${BCLIST} ${${BCLIST}} PARENT_SCOPE) - if(LLVM_VERSION VERSION_EQUAL 15.0) + if(LLVM_VERSION VERSION_EQUAL 15.0 OR LLVM_VERSION VERSION_EQUAL 16.0) # both of these are necesssary. some of the files (like barrier.ll) # don't contain any pointers and thus cannot be guessed; if llvm-as # produces the wrong opaque-type file, later llvm-link will fail diff --git a/include/_kernel_c.h b/include/_kernel_c.h index e1038ea5..73ad9d18 100644 --- a/include/_kernel_c.h +++ b/include/_kernel_c.h @@ -112,7 +112,7 @@ typedef uint uint16 __attribute__((__ext_vector_type__(16))); /* NOTE: the Clang's __fp16 does not work robustly in C mode, it might produce invalid code at least with half vectors. Using the native 'half' type in OpenCL C mode works better. */ -typedef __fp16 half; +typedef _Float16 half; typedef half half2 __attribute__((__ext_vector_type__(2))); typedef half half3 __attribute__((__ext_vector_type__(3))); diff --git a/lib/CL/pocl_llvm_build.cc b/lib/CL/pocl_llvm_build.cc index 3ef6e0c5..84413a48 100644 --- a/lib/CL/pocl_llvm_build.cc +++ b/lib/CL/pocl_llvm_build.cc @@ -263,7 +263,7 @@ int pocl_llvm_build_program(cl_program program, size_t fastmath_flag = user_options.find("-cl-fast-relaxed-math"); -#if (CLANG_MAJOR == 15) +#if (CLANG_MAJOR == 15) || (CLANG_MAJOR == 16) #ifdef LLVM_OPAQUE_POINTERS ss << "-opaque-pointers "; #else diff --git a/lib/CL/pocl_llvm_utils.cc b/lib/CL/pocl_llvm_utils.cc index 3cc7fcc5..0690af17 100644 --- a/lib/CL/pocl_llvm_utils.cc +++ b/lib/CL/pocl_llvm_utils.cc @@ -273,7 +273,9 @@ void InitializeLLVM() { initializeAnalysis(Registry); initializeTransformUtils(Registry); initializeInstCombine(Registry); +#ifdef LLVM_OLDER_THAN_16_0 initializeInstrumentation(Registry); +#endif initializeTarget(Registry); } @@ -390,7 +392,7 @@ void pocl_llvm_create_context(cl_context ctx) { data->Context = new llvm::LLVMContext(); assert(data->Context); -#if (CLANG_MAJOR == 15) +#if (CLANG_MAJOR == 15) || (CLANG_MAJOR == 16) #ifdef LLVM_OPAQUE_POINTERS data->Context->setOpaquePointers(true); #else diff --git a/lib/llvmopencl/BarrierTailReplication.cc b/lib/llvmopencl/BarrierTailReplication.cc index 98bc0635..43802f73 100644 --- a/lib/llvmopencl/BarrierTailReplication.cc +++ b/lib/llvmopencl/BarrierTailReplication.cc @@ -352,7 +352,11 @@ BarrierTailReplication::ReplicateBasicBlocks(BasicBlockVector &new_graph, i2 != e2; ++i2) { Instruction *i = i2->clone(); reference_map.insert(std::make_pair(&*i2, i)); +#ifdef LLVM_OLDER_THAN_16_0 new_b->getInstList().push_back(i); +#else + i->insertInto(new_b, new_b->end()); +#endif } // Add predicates to PHINodes of basic blocks the replicated diff --git a/lib/llvmopencl/ParallelRegion.cc b/lib/llvmopencl/ParallelRegion.cc index dfe56e03..639bd686 100644 --- a/lib/llvmopencl/ParallelRegion.cc +++ b/lib/llvmopencl/ParallelRegion.cc @@ -201,12 +201,17 @@ ParallelRegion::chainAfter(ParallelRegion *region) #endif BasicBlock *successor = t->getSuccessor(0); - Function::BasicBlockListType &bb_list = - successor->getParent()->getBasicBlockList(); - - for (iterator i = begin(), e = end(); i != e; ++i) + Function *F = successor->getParent(); +#ifdef LLVM_OLDER_THAN_16_0 + Function::BasicBlockListType &bb_list = + F->getBasicBlockList(); + for (iterator i = begin(), e = end(); i != e; ++i) bb_list.insertAfter(tail->getIterator(), *i); +#else + for (iterator i = begin(), e = end(); i != e; ++i) + F->insert(tail->getIterator(), *i); +#endif t->setSuccessor(0, entryBB()); t = exitBB()->getTerminator(); diff --git a/lib/llvmopencl/WorkItemAliasAnalysis.cc b/lib/llvmopencl/WorkItemAliasAnalysis.cc index 02dcd2ec..8a85b25a 100644 --- a/lib/llvmopencl/WorkItemAliasAnalysis.cc +++ b/lib/llvmopencl/WorkItemAliasAnalysis.cc @@ -51,6 +51,12 @@ using namespace llvm; typedef llvm::MemoryLocation Location; typedef llvm::AliasResult AliasResult; +#ifdef LLVM_OLDER_THAN_16_0 +#define AAResultB AAResultBase +#else +#define AAResultB AAResultBase +#endif + /// WorkItemAliasAnalysis - This is a simple alias analysis /// implementation that uses pocl metadata to make sure memory accesses from /// different work items are not aliasing. @@ -58,8 +64,8 @@ typedef llvm::AliasResult AliasResult; // LLVM 3.8+ -class WorkItemAAResult : public AAResultBase { - friend AAResultBase; +class WorkItemAAResult : public AAResultB { + friend AAResultB; public: static char ID;