From cdc7a4110282d30caa25666855928848fc87031cb137e454b3605ec8890bb384 Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Thu, 19 Jan 2023 20:24:57 +0000 Subject: [PATCH] - Add llvm-workaround-superfluous-branches.patch: hints LLVM to eliminate branches until gh#llvm/llvm-project#28804 is solved. OBS-URL: https://build.opensuse.org/package/show/devel:tools:compiler/llvm15?expand=0&rev=18 --- llvm-workaround-superfluous-branches.patch | 14 ++++++++++++++ llvm15.changes | 11 ++++++----- llvm15.spec | 15 +++++++++------ 3 files changed, 29 insertions(+), 11 deletions(-) create mode 100644 llvm-workaround-superfluous-branches.patch diff --git a/llvm-workaround-superfluous-branches.patch b/llvm-workaround-superfluous-branches.patch new file mode 100644 index 0000000..8f0c9dc --- /dev/null +++ b/llvm-workaround-superfluous-branches.patch @@ -0,0 +1,14 @@ +diff --git a/llvm/include/llvm/Support/Casting.h b/llvm/include/llvm/Support/Casting.h +index b6bbff8..1b68640 100644 +--- a/llvm/include/llvm/Support/Casting.h ++++ b/llvm/include/llvm/Support/Casting.h +@@ -605,6 +605,9 @@ LLVM_NODISCARD inline decltype(auto) dyn_cast(From &Val) { + + template + LLVM_NODISCARD inline decltype(auto) dyn_cast(From *Val) { ++#if defined(__clang__) && defined(NDEBUG) ++ __builtin_assume(Val); ++#endif + return CastInfo::doCastIfPossible(Val); + } + diff --git a/llvm15.changes b/llvm15.changes index adf010c..a152056 100644 --- a/llvm15.changes +++ b/llvm15.changes @@ -5,11 +5,12 @@ Sat Jan 14 14:06:38 UTC 2023 - Aaron Puchert * This release contains bug-fixes for the LLVM 15.0.0 release. This release is API and ABI compatible with 15.0.0. - Rebase llvm-do-not-install-static-libraries.patch. -- Build stage 2 with -fno-plt if we're using LTO: since building - with -Wl,-z,now the PLT stubs are basically dead code, and - eliminating the indirection should improve code locality and - reduce BTB pressure for the quite frequent cross-DSO calls. - With LTO we should not need linker relaxation. +- Build stage 2 with -fno-plt on x86_64: since building with + -Wl,-z,now the PLT stubs are basically dead code, so eliminating + the indirection reduces the number of branches and improves code + locality for the quite frequent cross-DSO calls. +- Add llvm-workaround-superfluous-branches.patch: hints LLVM to + eliminate branches until gh#llvm/llvm-project#28804 is solved. ------------------------------------------------------------------- Sun Dec 4 21:43:38 UTC 2022 - Aaron Puchert diff --git a/llvm15.spec b/llvm15.spec index 5c20842..262346a 100644 --- a/llvm15.spec +++ b/llvm15.spec @@ -367,6 +367,8 @@ Patch13: llvm-normally-versioned-libllvm.patch Patch14: llvm-do-not-install-static-libraries.patch # PATCH-FIX-OPENSUSE (or -UPSTREAM?): we disable RPATHs, but the test driver drops LD_LIBRARY_PATH. Patch15: libcxx-test-library-path.patch +# PATCH-FIX-UPSTREAM (?): Work around gh#llvm/llvm-project#28804 by hinting with __builtin_assume. +Patch16: llvm-workaround-superfluous-branches.patch Patch20: llvm_build_tablegen_component_as_shared_library.patch Patch21: tests-use-python3.patch Patch22: llvm-better-detect-64bit-atomics-support.patch @@ -807,6 +809,7 @@ This package contains the development files for Polly. %patch5 -p1 %patch13 -p1 %patch14 -p1 +%patch16 -p2 %patch20 -p1 %patch21 -p1 %patch22 -p1 @@ -979,12 +982,12 @@ if ! ./stage1/bin/clang -c -xc -Werror -fstack-clash-protection -o /dev/null /de then flags=$(echo -n %flags | sed 's/-fstack-clash-protection//'); fi -# 4) Add -fno-plt: With -Wl,-z,now we don't need the PLT anymore, allowing us to -# reduce the number of branches for the quite frequent cross-DSO calls. This -# is good for code locality and reduces the pressure on the BTB. -# However, do this only when we're using LTO, since otherwise indirect -# branches have to be relaxed by the linker, which might cause regressions. -%if %{with thin_lto} +# 4) Add -fno-plt: With -Wl,-z,now the PLT is basically dead code, so we can +# now go the direct route for quite frequent cross-DSO calls. This reduces +# branches in a typical execution by ~5 percent, instructions/cycles +# by ~4 percent, and reduces pressure on the instruction cache. We do this +# only on x86_64 where it doesn't increase the code size significantly. +%ifarch x86_64 flags="$flags -fno-plt" %endif