diff --git a/pocl-llvm16.patch b/pocl-llvm16.patch new file mode 100644 index 0000000..43245a6 --- /dev/null +++ b/pocl-llvm16.patch @@ -0,0 +1,203 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 8c8741b6..85de3968 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -520,7 +520,7 @@ if(ENABLE_LLVM) + # of the code, since this macro adds another layer of complexity. + if(LLVM_VERSION VERSION_LESS_EQUAL 14.0) + set(ENABLE_LLVM_OPAQUE_POINTERS OFF CACHE INTERNAL "llvm opaque pointers" FORCE) +- elseif(LLVM_VERSION VERSION_EQUAL 15.0) ++ elseif(LLVM_VERSION VERSION_EQUAL 15.0 OR LLVM_VERSION VERSION_EQUAL 16.0) + option(ENABLE_LLVM_OPAQUE_POINTERS "Handle the change to llvm opaque pointers." ON) + else() + set(ENABLE_LLVM_OPAQUE_POINTERS ON CACHE INTERNAL "llvm opaque pointers" FORCE) +diff --git a/cmake/LLVM.cmake b/cmake/LLVM.cmake +index 18138f36..00ad2186 100644 +--- a/cmake/LLVM.cmake ++++ b/cmake/LLVM.cmake +@@ -122,8 +122,10 @@ string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_LIBDIR "${LLVM_LIBDI + run_llvm_config(LLVM_INCLUDEDIR --includedir) + string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}") + +-run_llvm_config(LLVM_SRC_ROOT --src-root) +-run_llvm_config(LLVM_OBJ_ROOT --obj-root) ++if(LLVM_VERSION_MAJOR LESS 16) ++ run_llvm_config(LLVM_SRC_ROOT --src-root) ++ run_llvm_config(LLVM_OBJ_ROOT --obj-root) ++endif() + string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_OBJ_ROOT "${LLVM_OBJ_ROOT}") + run_llvm_config(LLVM_ALL_TARGETS --targets-built) + run_llvm_config(LLVM_HOST_TARGET --host-target) +@@ -772,12 +774,35 @@ endif() + + #################################################################### + +-if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED ${CL_DISABLE_HALF}) +- set(CL_DISABLE_HALF 0) +- message(STATUS "Checking fp16 support") +- custom_try_compile_clang_silent("__fp16 callfp16(__fp16 a) { return a * (__fp16)1.8; };" "__fp16 x=callfp16((__fp16)argc);" RESV ${CLANG_TARGET_OPTION}${LLC_TRIPLE} ${CLANG_MARCH_FLAG}${LLC_HOST_CPU}) +- if(RESV) ++# Clang documentation on Language Extensions: ++# __fp16 is supported on every target, as it is purely a storage format ++# _Float16 is currently only supported on the following targets... SPIR, x86 ++# DIsabled for non-x86-64 because of limitations: ++# The _Float16 type requires SSE2 feature and above due to the instruction ++# limitations. When using it on i386 targets, you need to specify -msse2 ++# explicitly. ++# For targets without F16C feature or above, please make sure: ++# Use GCC 12.0 and above if you are using libgcc. ++# If you are using compiler-rt, use the same version with the compiler. ++# Early versions provided FP16 builtins in a different ABI. A workaround is ++# to use a small code snippet to check the ABI if you cannot make sure of it. ++ ++if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED CL_DISABLE_HALF) ++ # LLVM <15 doesn't support FP16 emulation ++ # LLVM 15 crashes on some code, with the FP16 emulation ++ # CONFORMANCE disables FP16 b/c it's incomplete ++ # enabled on x86-64 only for now ++ if((LLVM_VERSION_MAJOR LESS 16) OR ENABLE_CONFORMANCE OR (NOT X86_64)) ++ message(STATUS "FP16 support disabled") + set(CL_DISABLE_HALF 1) ++ else() ++ set(CL_DISABLE_HALF 0) ++ message(STATUS "Checking fp16 support") ++ custom_try_compile_clang_silent("_Float16 callfp16(_Float16 a) { return a * 1.8f16; };" "_Float16 x=callfp16((_Float16)argc);" ++ RESV ${CLANG_TARGET_OPTION}${LLC_TRIPLE} ${CLANG_MARCH_FLAG}${LLC_HOST_CPU}) ++ if(RESV) ++ set(CL_DISABLE_HALF 1) ++ endif() + endif() + endif() + +diff --git a/cmake/bitcode_rules.cmake b/cmake/bitcode_rules.cmake +index 3d1a373f..10a309c2 100644 +--- a/cmake/bitcode_rules.cmake ++++ b/cmake/bitcode_rules.cmake +@@ -167,7 +167,7 @@ function(compile_ll_to_bc FILENAME SUBDIR BCLIST) + list(APPEND ${BCLIST} "${BC_FILE}") + set(${BCLIST} ${${BCLIST}} PARENT_SCOPE) + +- if(LLVM_VERSION VERSION_EQUAL 15.0) ++ if(LLVM_VERSION VERSION_EQUAL 15.0 OR LLVM_VERSION VERSION_EQUAL 16.0) + # both of these are necesssary. some of the files (like barrier.ll) + # don't contain any pointers and thus cannot be guessed; if llvm-as + # produces the wrong opaque-type file, later llvm-link will fail +diff --git a/include/_kernel_c.h b/include/_kernel_c.h +index e1038ea5..73ad9d18 100644 +--- a/include/_kernel_c.h ++++ b/include/_kernel_c.h +@@ -112,7 +112,7 @@ typedef uint uint16 __attribute__((__ext_vector_type__(16))); + /* NOTE: the Clang's __fp16 does not work robustly in C mode, + it might produce invalid code at least with half vectors. + Using the native 'half' type in OpenCL C mode works better. */ +-typedef __fp16 half; ++typedef _Float16 half; + + typedef half half2 __attribute__((__ext_vector_type__(2))); + typedef half half3 __attribute__((__ext_vector_type__(3))); +diff --git a/lib/CL/pocl_llvm_build.cc b/lib/CL/pocl_llvm_build.cc +index 3ef6e0c5..84413a48 100644 +--- a/lib/CL/pocl_llvm_build.cc ++++ b/lib/CL/pocl_llvm_build.cc +@@ -263,7 +263,7 @@ int pocl_llvm_build_program(cl_program program, + + size_t fastmath_flag = user_options.find("-cl-fast-relaxed-math"); + +-#if (CLANG_MAJOR == 15) ++#if (CLANG_MAJOR == 15) || (CLANG_MAJOR == 16) + #ifdef LLVM_OPAQUE_POINTERS + ss << "-opaque-pointers "; + #else +diff --git a/lib/CL/pocl_llvm_utils.cc b/lib/CL/pocl_llvm_utils.cc +index 3cc7fcc5..0690af17 100644 +--- a/lib/CL/pocl_llvm_utils.cc ++++ b/lib/CL/pocl_llvm_utils.cc +@@ -273,7 +273,9 @@ void InitializeLLVM() { + initializeAnalysis(Registry); + initializeTransformUtils(Registry); + initializeInstCombine(Registry); ++#ifdef LLVM_OLDER_THAN_16_0 + initializeInstrumentation(Registry); ++#endif + initializeTarget(Registry); + } + +@@ -390,7 +392,7 @@ void pocl_llvm_create_context(cl_context ctx) { + + data->Context = new llvm::LLVMContext(); + assert(data->Context); +-#if (CLANG_MAJOR == 15) ++#if (CLANG_MAJOR == 15) || (CLANG_MAJOR == 16) + #ifdef LLVM_OPAQUE_POINTERS + data->Context->setOpaquePointers(true); + #else +diff --git a/lib/llvmopencl/BarrierTailReplication.cc b/lib/llvmopencl/BarrierTailReplication.cc +index 98bc0635..43802f73 100644 +--- a/lib/llvmopencl/BarrierTailReplication.cc ++++ b/lib/llvmopencl/BarrierTailReplication.cc +@@ -352,7 +352,11 @@ BarrierTailReplication::ReplicateBasicBlocks(BasicBlockVector &new_graph, + i2 != e2; ++i2) { + Instruction *i = i2->clone(); + reference_map.insert(std::make_pair(&*i2, i)); ++#ifdef LLVM_OLDER_THAN_16_0 + new_b->getInstList().push_back(i); ++#else ++ i->insertInto(new_b, new_b->end()); ++#endif + } + + // Add predicates to PHINodes of basic blocks the replicated +diff --git a/lib/llvmopencl/ParallelRegion.cc b/lib/llvmopencl/ParallelRegion.cc +index dfe56e03..639bd686 100644 +--- a/lib/llvmopencl/ParallelRegion.cc ++++ b/lib/llvmopencl/ParallelRegion.cc +@@ -201,12 +201,17 @@ ParallelRegion::chainAfter(ParallelRegion *region) + #endif + + BasicBlock *successor = t->getSuccessor(0); +- Function::BasicBlockListType &bb_list = +- successor->getParent()->getBasicBlockList(); +- +- for (iterator i = begin(), e = end(); i != e; ++i) ++ Function *F = successor->getParent(); + ++#ifdef LLVM_OLDER_THAN_16_0 ++ Function::BasicBlockListType &bb_list = ++ F->getBasicBlockList(); ++ for (iterator i = begin(), e = end(); i != e; ++i) + bb_list.insertAfter(tail->getIterator(), *i); ++#else ++ for (iterator i = begin(), e = end(); i != e; ++i) ++ F->insert(tail->getIterator(), *i); ++#endif + t->setSuccessor(0, entryBB()); + + t = exitBB()->getTerminator(); +diff --git a/lib/llvmopencl/WorkItemAliasAnalysis.cc b/lib/llvmopencl/WorkItemAliasAnalysis.cc +index 02dcd2ec..8a85b25a 100644 +--- a/lib/llvmopencl/WorkItemAliasAnalysis.cc ++++ b/lib/llvmopencl/WorkItemAliasAnalysis.cc +@@ -51,6 +51,12 @@ using namespace llvm; + typedef llvm::MemoryLocation Location; + typedef llvm::AliasResult AliasResult; + ++#ifdef LLVM_OLDER_THAN_16_0 ++#define AAResultB AAResultBase ++#else ++#define AAResultB AAResultBase ++#endif ++ + /// WorkItemAliasAnalysis - This is a simple alias analysis + /// implementation that uses pocl metadata to make sure memory accesses from + /// different work items are not aliasing. +@@ -58,8 +64,8 @@ typedef llvm::AliasResult AliasResult; + + // LLVM 3.8+ + +-class WorkItemAAResult : public AAResultBase { +- friend AAResultBase; ++class WorkItemAAResult : public AAResultB { ++ friend AAResultB; + + public: + static char ID; diff --git a/pocl.changes b/pocl.changes index 0e2ced6..6289b53 100644 --- a/pocl.changes +++ b/pocl.changes @@ -1,3 +1,10 @@ +------------------------------------------------------------------- +Sat May 13 05:43:35 UTC 2023 - Carsten Ziepke + +- Add pocl-llvm16.patch, enables support for LLVM 16 +- Use BuildRequires ocl-icd-devel instead pkgconfig(OpenCL), fix + have choice for pkgconfig(OpenCL) for SLE + ------------------------------------------------------------------- Sat Dec 24 12:27:06 UTC 2022 - Andreas Schwab diff --git a/pocl.spec b/pocl.spec index 2e48d02..b84d336 100644 --- a/pocl.spec +++ b/pocl.spec @@ -1,7 +1,7 @@ # # spec file for package pocl # -# Copyright (c) 2022 SUSE LLC +# Copyright (c) 2023 SUSE LLC # Copyright (c) 2014 Guillaume GARDET # # All modifications and additions to the file contributed by third parties @@ -30,13 +30,17 @@ URL: http://portablecl.org/ Source0: https://github.com/pocl/pocl/archive/v%{version}.tar.gz#/%{name}-%{version}.tar.gz Source99: pocl-rpmlintrc Patch0: link_against_libclang-cpp_so.patch +# PATCH-FIX-UPSTREAM - pocl-llvm16.patch - enables support for LLVM 16 +# https://github.com/pocl/pocl/commit/20d1bfa9bfd301964f7b2fc6d7f4589dd04e1b5c +# https://github.com/pocl/pocl/commit/bf50f0052e4248cd1acfaaa8da95c5e4ca52f815 +Patch1: pocl-llvm16.patch +BuildRequires: (clang-devel >= 6.0.0 with clang-devel < 17) BuildRequires: cmake BuildRequires: gcc-c++ BuildRequires: ninja +BuildRequires: ocl-icd-devel BuildRequires: opencl-headers BuildRequires: pkgconfig -BuildRequires: (clang-devel >= 6.0.0 with clang-devel < 16) -BuildRequires: pkgconfig(OpenCL) BuildRequires: pkgconfig(hwloc) # PPC has limited support/testing from upstream # s390(x) is also not supported, so use ExclusiveArch @@ -84,6 +88,7 @@ This subpackage provides the development files needed for pocl. %prep %setup -q %patch0 -p1 +%patch1 -p1 %build %define __builder ninja