julia/new-pass-manager.patch
Soc Virnyl Estela 916acc0f00 - Rename/add more description of the following patches.
* libblastrampoline-hardcoded-libs.patch
  * llvm-link-shared.patch
  * llvm-set-of-custom-patches.patch
  * mbedtls-hardcoded-libs.patch
  * new-pass-manager.patch
  * openlibm.patch
  * support-float16-depending-on-llvm-and-platform.patch
  * use-newpm-asan.patch
  * use-system-libuv-correctly.patch
- Renamed/removed patches
  * 21d4c2f1.patch
  * 959902f1.patch
  * e08e1444.patch
  * f11bfc6c.patch

OBS-URL: https://build.opensuse.org/package/show/science/julia?expand=0&rev=115
2023-12-09 08:46:17 +00:00

305 lines
13 KiB
Diff

From e08e14449fdec30d83ae2b9f0d6d1f4a9acf0b75 Mon Sep 17 00:00:00 2001
From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com>
Date: Mon, 17 Apr 2023 19:37:59 +0000
Subject: [PATCH] Bring in newpm (new pass manager) updates to master (#47038)
* Workaround missing ASAN global
* Add alias analysis at O2 instead of O3
* Disable runtime unrolling
* Make SimpleLoopUnswitch act like LoopUnswitch
* Add --time-passes support
* Only add verification passes in debug mode
* Hide assertion function
---
src/codegen.cpp | 11 ++++++++++-
src/jitlayers.cpp | 49 ++++++++++++++++++++++++++++++++++++-----------
src/jitlayers.h | 17 ++++++++++++----
src/pipeline.cpp | 27 ++++++++++++++------------
4 files changed, 76 insertions(+), 28 deletions(-)
diff --git a/src/codegen.cpp b/src/codegen.cpp
index b6b86ba4442e1..fb8cefe5eb44f 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -8838,6 +8838,15 @@ extern "C" void jl_init_llvm(void)
clopt = llvmopts.lookup("enable-tail-merge"); // NOO TOUCHIE; NO TOUCH! See #922
if (clopt->getNumOccurrences() == 0)
cl::ProvidePositionalOption(clopt, "0", 1);
+#ifdef JL_USE_NEW_PM
+ // For parity with LoopUnswitch
+ clopt = llvmopts.lookup("unswitch-threshold");
+ if (clopt->getNumOccurrences() == 0)
+ cl::ProvidePositionalOption(clopt, "100", 1);
+ clopt = llvmopts.lookup("enable-unswitch-cost-multiplier");
+ if (clopt->getNumOccurrences() == 0)
+ cl::ProvidePositionalOption(clopt, "false", 1);
+#endif
// if the patch adding this option has been applied, lower its limit to provide
// better DAGCombiner performance.
clopt = llvmopts.lookup("combiner-store-merge-dependence-limit");
@@ -8916,7 +8925,7 @@ extern "C" JL_DLLEXPORT void jl_init_codegen_impl(void)
extern "C" JL_DLLEXPORT void jl_teardown_codegen_impl() JL_NOTSAFEPOINT
{
// output LLVM timings and statistics
- reportAndResetTimings();
+ jl_ExecutionEngine->printTimers();
PrintStatistics();
}
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index c7e202b98efab..29665d4e420b9 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -1103,6 +1103,8 @@ namespace {
std::unique_ptr<TargetMachine> TM;
int optlevel;
PMCreator(TargetMachine &TM, int optlevel) : TM(cantFail(createJTMBFromTM(TM, optlevel).createTargetMachine())), optlevel(optlevel) {}
+ // overload for newpm compatibility
+ PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &) : PMCreator(TM, optlevel) {}
PMCreator(const PMCreator &other) : PMCreator(*other.TM, other.optlevel) {}
PMCreator(PMCreator &&other) : TM(std::move(other.TM)), optlevel(other.optlevel) {}
friend void swap(PMCreator &self, PMCreator &other) {
@@ -1128,16 +1131,21 @@ namespace {
struct PMCreator {
orc::JITTargetMachineBuilder JTMB;
OptimizationLevel O;
- PMCreator(TargetMachine &TM, int optlevel) : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)) {}
+ std::vector<std::function<void()>> &printers;
+ PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) JL_NOTSAFEPOINT : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers) {}
auto operator()() {
- return std::make_unique<NewPM>(cantFail(JTMB.createTargetMachine()), O);
+ auto NPM = std::make_unique<NewPM>(cantFail(JTMB.createTargetMachine()), O);
+ printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT {
+ NPM->printTimers();
+ });
+ return NPM;
}
};
#endif
struct OptimizerT {
- OptimizerT(TargetMachine &TM, int optlevel) : optlevel(optlevel), PMs(PMCreator(TM, optlevel)) {}
+ OptimizerT(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) : optlevel(optlevel), PMs(PMCreator(TM, optlevel, printers)) {}
OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
TSM.withModuleDo([&](Module &M) {
@@ -1247,10 +1255,14 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
return jl_data_layout;
}
-JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel)
+JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers)
: CompileLayer(BaseLayer.getExecutionSession(), BaseLayer,
std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM.Options), TM, optlevel)),
- OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer, OptimizerT(TM, optlevel)) {}
+ OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer, OptimizerT(TM, optlevel, PrintLLVMTimers)) {}
+
+#ifdef _COMPILER_ASAN_ENABLED_
+int64_t ___asan_globals_registered;
+#endif
JuliaOJIT::JuliaOJIT()
: TM(createTargetMachine()),
@@ -1285,10 +1297,10 @@ JuliaOJIT::JuliaOJIT()
),
#endif
Pipelines{
- std::make_unique<PipelineT>(ObjectLayer, *TM, 0),
- std::make_unique<PipelineT>(ObjectLayer, *TM, 1),
- std::make_unique<PipelineT>(ObjectLayer, *TM, 2),
- std::make_unique<PipelineT>(ObjectLayer, *TM, 3),
+ std::make_unique<PipelineT>(ObjectLayer, *TM, 0, PrintLLVMTimers),
+ std::make_unique<PipelineT>(ObjectLayer, *TM, 1, PrintLLVMTimers),
+ std::make_unique<PipelineT>(ObjectLayer, *TM, 2, PrintLLVMTimers),
+ std::make_unique<PipelineT>(ObjectLayer, *TM, 3, PrintLLVMTimers),
},
OptSelLayer(Pipelines)
{
@@ -1393,6 +1405,11 @@ JuliaOJIT::JuliaOJIT()
reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::origin)), JITSymbolFlags::Exported);
cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt)));
#endif
+#ifdef _COMPILER_ASAN_ENABLED_
+ orc::SymbolMap asan_crt;
+ asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&___asan_globals_registered, JITSymbolFlags::Exported);
+ cantFail(JD.define(orc::absoluteSymbols(asan_crt)));
+#endif
}
JuliaOJIT::~JuliaOJIT() = default;
@@ -1583,6 +1600,16 @@ size_t JuliaOJIT::getTotalBytes() const
}
#endif
+void JuliaOJIT::printTimers()
+{
+#ifdef JL_USE_NEW_PM
+ for (auto &printer : PrintLLVMTimers) {
+ printer();
+ }
+#endif
+ reportAndResetTimings();
+}
+
JuliaOJIT *jl_ExecutionEngine;
// destructively move the contents of src into dest
diff --git a/src/jitlayers.h b/src/jitlayers.h
index d8c06df44176f..7f07034586c80 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -42,9 +42,7 @@
// and feature support (e.g. Windows, JITEventListeners for various profilers,
// etc.). Thus, we currently only use JITLink where absolutely required, that is,
// for Mac/aarch64.
-// #define JL_FORCE_JITLINK
-
-#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) || defined(JL_FORCE_JITLINK)
+#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) || defined(_COMPILER_ASAN_ENABLED_) || defined(JL_FORCE_JITLINK)
# if JL_LLVM_VERSION < 130000
# pragma message("On aarch64-darwin, LLVM version >= 13 is required for JITLink; fallback suffers from occasional segfaults")
# endif
@@ -91,6 +89,12 @@ struct OptimizationOptions {
}
};
+// LLVM's new pass manager is scheduled to replace the legacy pass manager
+// for middle-end IR optimizations. However, we have not qualified the new
+// pass manager on our optimization pipeline yet, so this remains an optional
+// define
+// #define JL_USE_NEW_PM
+
struct NewPM {
std::unique_ptr<TargetMachine> TM;
StandardInstrumentations SI;
@@ -103,6 +107,8 @@ struct NewPM {
NewPM(std::unique_ptr<TargetMachine> TM, OptimizationLevel O, OptimizationOptions options = OptimizationOptions::defaults());
void run(Module &M);
+
+ void printTimers();
};
struct AnalysisManagers {
@@ -420,7 +426,7 @@ class JuliaOJIT {
std::unique_ptr<WNMutex> mutex;
};
struct PipelineT {
- PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel);
+ PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers);
CompileLayerT CompileLayer;
OptimizeLayerT OptimizeLayer;
};
@@ -490,6 +496,7 @@ class JuliaOJIT {
TargetIRAnalysis getTargetIRAnalysis() const;
size_t getTotalBytes() const;
+ void printTimers();
JITDebugInfoRegistry &getDebugInfoRegistry() JL_NOTSAFEPOINT {
return DebugRegistry;
@@ -522,6 +529,8 @@ class JuliaOJIT {
jl_locked_stream dump_compiles_stream;
jl_locked_stream dump_llvm_opt_stream;
+ std::vector<std::function<void()>> PrintLLVMTimers;
+
ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;
#ifndef JL_USE_JITLINK
diff --git a/src/pipeline.cpp b/src/pipeline.cpp
index ae2b1c3202f04..4403653a9d8e4 100644
--- a/src/pipeline.cpp
+++ b/src/pipeline.cpp
@@ -146,7 +146,7 @@ namespace {
// Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask);
// Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
// Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn();
- MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
+ // MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
// MPM.addPass(ModuleAddressSanitizerPass(
// Opts, UseGlobalGC, UseOdrIndicator, DestructorKind));
//Let's assume the defaults are actually fine for our purposes
@@ -173,11 +173,13 @@ namespace {
// }
}
- void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) {
+#ifdef JL_DEBUG_BUILD
+ static inline void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) {
if (!llvm_only)
MPM.addPass(llvm::createModuleToFunctionPassAdaptor(GCInvariantVerifierPass()));
MPM.addPass(VerifierPass());
}
+#endif
auto basicSimplifyCFGOptions() {
return SimplifyCFGOptions()
@@ -244,9 +246,9 @@ namespace {
//Use for O1 and below
void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) {
-// #ifdef JL_DEBUG_BUILD
+#ifdef JL_DEBUG_BUILD
addVerificationPasses(MPM, options.llvm_only);
-// #endif
+#endif
invokePipelineStartCallbacks(MPM, PB, O);
MPM.addPass(ConstantMergePass());
if (!options.dump_native) {
@@ -320,9 +322,9 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza
//Use for O2 and above
void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) {
-// #ifdef JL_DEBUG_BUILD
+#ifdef JL_DEBUG_BUILD
addVerificationPasses(MPM, options.llvm_only);
-// #endif
+#endif
invokePipelineStartCallbacks(MPM, PB, O);
MPM.addPass(ConstantMergePass());
{
@@ -382,7 +384,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat
#endif
LPM2.addPass(LICMPass(LICMOptions()));
JULIA_PASS(LPM2.addPass(JuliaLICMPass()));
- LPM2.addPass(SimpleLoopUnswitchPass());
+ LPM2.addPass(SimpleLoopUnswitchPass(true, true));
LPM2.addPass(LICMPass(LICMOptions()));
JULIA_PASS(LPM2.addPass(JuliaLICMPass()));
//LICM needs MemorySSA now, so we must use it
@@ -399,7 +401,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat
//We don't know if the loop end callbacks support MSSA
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
}
- FPM.addPass(LoopUnrollPass());
+ FPM.addPass(LoopUnrollPass(LoopUnrollOptions().setRuntime(false)));
JULIA_PASS(FPM.addPass(AllocOptPass()));
FPM.addPass(SROAPass());
FPM.addPass(InstSimplifyPass());
@@ -541,11 +543,8 @@ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
// Register the AA manager first so that our version is the one used.
FAM.registerPass([&] JL_NOTSAFEPOINT {
AAManager AA;
- // TODO: Why are we only doing this for -O3?
- if (O.getSpeedupLevel() >= 3) {
- AA.registerFunctionAnalysis<BasicAA>();
- }
if (O.getSpeedupLevel() >= 2) {
+ AA.registerFunctionAnalysis<BasicAA>();
AA.registerFunctionAnalysis<ScopedNoAliasAA>();
AA.registerFunctionAnalysis<TypeBasedAA>();
}
@@ -603,6 +602,10 @@ void NewPM::run(Module &M) {
#endif
}
+void NewPM::printTimers() {
+ SI.getTimePasses().print();
+}
+
OptimizationLevel getOptLevel(int optlevel) {
switch (std::min(std::max(optlevel, 0), 3)) {
case 0: