diff --git a/9_4_4-bootstrap-sources.tar.gz b/9_4_4-bootstrap-sources.tar.gz deleted file mode 100644 index 0b5cf96..0000000 --- a/9_4_4-bootstrap-sources.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3de754bb3e45d4b3980ed238781cfd808f4e7c84ceae73213662f1bf5ea2d8fa -size 1526663 diff --git a/9_8_2-bootstrap-sources.tar.gz b/9_8_2-bootstrap-sources.tar.gz new file mode 100644 index 0000000..adf4dab --- /dev/null +++ b/9_8_2-bootstrap-sources.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b8e6fae1aa500de67fcc88aa6fe49690f0dd5c87f6710592db4cda54c8086f7 +size 1446589 diff --git a/cabal-riscv64.patch b/cabal-riscv64.patch deleted file mode 100644 index c9278db..0000000 --- a/cabal-riscv64.patch +++ /dev/null @@ -1,41 +0,0 @@ -Index: ghc-9.4.5/libraries/Cabal/Cabal-syntax/src/Distribution/System.hs -=================================================================== ---- ghc-9.4.5.orig/libraries/Cabal/Cabal-syntax/src/Distribution/System.hs -+++ ghc-9.4.5/libraries/Cabal/Cabal-syntax/src/Distribution/System.hs -@@ -159,7 +159,7 @@ buildOS = classifyOS Permissive System.I - - -- | These are the known Arches: I386, X86_64, PPC, PPC64, Sparc, - -- Arm, AArch64, Mips, SH, IA64, S390, S390X, Alpha, Hppa, Rs6000, ---- M68k, Vax, JavaScript and Wasm32. -+-- M68k, Vax, RISCV64, JavaScript and Wasm32. - -- - -- The following aliases can also be used: - -- * PPC alias: powerpc -@@ -174,6 +174,7 @@ data Arch = I386 | X86_64 | PPC | PPC - | IA64 | S390 | S390X - | Alpha | Hppa | Rs6000 - | M68k | Vax -+ | RISCV64 - | JavaScript - | Wasm32 - | OtherArch String -@@ -189,6 +190,7 @@ knownArches = [I386, X86_64, PPC, PPC64, - ,IA64, S390, S390X - ,Alpha, Hppa, Rs6000 - ,M68k, Vax -+ ,RISCV64 - ,JavaScript - ,Wasm32] - -Index: ghc-9.4.5/libraries/Cabal/Cabal/src/Distribution/Simple/PreProcess.hs -=================================================================== ---- ghc-9.4.5.orig/libraries/Cabal/Cabal/src/Distribution/Simple/PreProcess.hs -+++ ghc-9.4.5/libraries/Cabal/Cabal/src/Distribution/Simple/PreProcess.hs -@@ -729,6 +729,7 @@ platformDefines lbi = - Rs6000 -> ["rs6000"] - M68k -> ["m68k"] - Vax -> ["vax"] -+ RISCV64 -> ["riscv64"] - JavaScript -> ["javascript"] - Wasm32 -> ["wasm32"] - OtherArch _ -> [] diff --git a/ghc-9.10.1-src.tar.xz b/ghc-9.10.1-src.tar.xz new file mode 100644 index 0000000..d0165a9 --- /dev/null +++ b/ghc-9.10.1-src.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf386a302d4ee054791ffd51748900f15d71760fd199157922d120cc1f89e2f7 +size 32828552 diff --git a/ghc-9.8.3-src.tar.xz b/ghc-9.8.3-src.tar.xz deleted file mode 100644 index 60d6354..0000000 --- a/ghc-9.8.3-src.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99e40d729ec8831a633b75fd85d65dd10e31a0133dec9d198d686a273679ab70 -size 32565948 diff --git a/ghc-Cabal-install-PATH-warning.patch b/ghc-Cabal-install-PATH-warning.patch index e59d86d..87f0448 100644 --- a/ghc-Cabal-install-PATH-warning.patch +++ b/ghc-Cabal-install-PATH-warning.patch @@ -1,14 +1,18 @@ -Index: ghc-9.2.0.20210331/libraries/Cabal/Cabal/src/Distribution/Simple/Install.hs +Index: ghc-9.10.1/libraries/Cabal/Cabal/src/Distribution/Simple/Install.hs =================================================================== ---- ghc-9.2.0.20210331.orig/libraries/Cabal/Cabal/src/Distribution/Simple/Install.hs -+++ ghc-9.2.0.20210331/libraries/Cabal/Cabal/src/Distribution/Simple/Install.hs -@@ -206,8 +206,7 @@ copyComponent verbosity pkg_descr lbi (C - ++ " in " ++ binPref) - inPath <- isInSearchPath binPref - when (not inPath) $ -- warn verbosity ("The directory " ++ binPref -- ++ " is not in the system search path.") -+ warn verbosity ("Executable installed in " ++ binPref) - case compilerFlavor (compiler lbi) of - GHC -> GHC.installExe verbosity lbi binPref buildPref progFix pkg_descr exe - GHCJS -> GHCJS.installExe verbosity lbi binPref buildPref progFix pkg_descr exe +--- ghc-9.10.1.orig/libraries/Cabal/Cabal/src/Distribution/Simple/Install.hs ++++ ghc-9.10.1/libraries/Cabal/Cabal/src/Distribution/Simple/Install.hs +@@ -261,12 +261,7 @@ copyComponent verbosity pkg_descr lbi (C + ) + inPath <- isInSearchPath binPref + when (not inPath) $ +- warn +- verbosity +- ( "The directory " +- ++ binPref +- ++ " is not in the system search path." +- ) ++ warn verbosity ("Executable installed in " ++ binPref) + case compilerFlavor (compiler lbi) of + GHC -> GHC.installExe verbosity lbi binPref buildPref progFix pkg_descr exe + GHCJS -> GHCJS.installExe verbosity lbi binPref buildPref progFix pkg_descr exe diff --git a/ghc-with-interpreter.patch b/ghc-with-interpreter.patch deleted file mode 100644 index ad2dd2e..0000000 --- a/ghc-with-interpreter.patch +++ /dev/null @@ -1,14 +0,0 @@ -Index: ghc-9.4.5/hadrian/src/Oracles/Setting.hs -=================================================================== ---- ghc-9.4.5.orig/hadrian/src/Oracles/Setting.hs -+++ ghc-9.4.5/hadrian/src/Oracles/Setting.hs -@@ -292,7 +292,8 @@ ghcWithInterpreter = do - , "darwin", "kfreebsdgnu" ] - goodArch <- anyTargetArch [ "i386", "x86_64", "powerpc" - , "arm", "aarch64", "s390x" -- , "powerpc64", "powerpc64le" ] -+ , "powerpc64", "powerpc64le" -+ , "riscv64" ] - return $ goodOs && goodArch - - -- | Variants of the ARM architecture. diff --git a/ghc.changes b/ghc.changes index 5870b0a..2b7135b 100644 --- a/ghc.changes +++ b/ghc.changes @@ -1,3 +1,26 @@ +------------------------------------------------------------------- +Mon Nov 25 14:46:09 UTC 2024 - Andreas Schwab + +- riscv64-ncg.patch: Backport of MR 13105, NCG for RISCV64 +- Don't require on llvm on riscv64 + +------------------------------------------------------------------- +Fri Nov 22 14:54:22 UTC 2024 - Ondřej Súkup + +- Update to version 9.10.1. The change log for this release is at: + https://downloads.haskell.org/~ghc/9.10.1/docs/users_guide/9.10.1-notes.html +- use ghc-bootstrap 9.8.2 +- enable numa on all architectures +- cleanup dead parts of specfile +- patch changes: + * added: + - hadrian-9.10-deps.patch + - os-string-be.patch + - ppc64le-miscompilation.patch + * dropped: + - cabal-riscv64.patch + - ghc-with-interpreter.patch + ------------------------------------------------------------------- Mon Oct 21 10:36:19 UTC 2024 - Ondřej Súkup diff --git a/ghc.spec b/ghc.spec index 61a887f..5a446e7 100644 --- a/ghc.spec +++ b/ghc.spec @@ -1,7 +1,7 @@ # # spec file for package ghc # -# Copyright (c) 2024 SUSE LLC +# Copyright (c) 2023 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -11,31 +11,19 @@ # case the license is the MIT License). An "Open Source License" is a # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. - # Please submit bugfixes or comments via https://bugs.opensuse.org/ # +%define full_version 9.10.1 +%define short_version 9.10.1 -%define full_version 9.8.3 -%define short_version 9.8.3 - -%ifnarch s390x -%define with_libnuma 1 -%else -%define with_libnuma 0 -%endif - -%global llvm_major 14 +%global llvm_major 15 # conditionals - # disable prof, docs, perf build # bcond_with for production builds: disable quick build %bcond_with quickbuild -# make sure ghc libraries' ABI hashes unchanged (ghcX.Y not supported yet) -%bcond_with abicheck - # bcond_without for production builds: use Hadrian buildsystem %bcond_without hadrian @@ -70,14 +58,15 @@ %{?with_haddock:%bcond_without manual} %endif -%global ghc_llvm_archs s390x riscv64 +%global ghc_llvm_archs s390x %global ghc_unregisterized_arches noarch -%global base_ver 4.19.2.0 +%global base_ver 4.20.0.0 %global ghc_compact_ver 0.1.0.0 -%global hpc_ver 0.7.0.0 +%global hpc_ver 0.7.0.1 %global hsc2hs_ver 0.68.8 %global ghc_bignum_ver 1.3 +%global xhtml_ver 3000.2.2.1 Name: ghc Version: %{short_version} @@ -87,24 +76,31 @@ License: BSD-3-Clause URL: https://www.haskell.org/ghc/ Source: https://downloads.haskell.org/~ghc/%{full_version}/ghc-%{version}-src.tar.xz Source2: ghc-rpmlintrc -Source4: 9_4_4-bootstrap-sources.tar.gz +Source4: 9_8_2-bootstrap-sources.tar.gz Source5: ghc-pkg.man Source6: haddock.man Source7: runghc.man + Patch1: ghc-gen_contents_index-haddock-path.patch # https://ghc.haskell.org/trac/ghc/ticket/15689 Patch2: ghc-Cabal-install-PATH-warning.patch + # PATCH-FIX-UPSTREAM Disable-unboxed-arrays.patch ptrommler@icloud.com -- Do not use unboxed arrays on big-endian platforms. See Haskell Trac #15411. Patch3: Disable-unboxed-arrays.patch -# PATCH-FIX-UPSTREAM Hadrian: enable GHCi support on riscv64 (dd38aca95a) -Patch100: ghc-with-interpreter.patch -# PATCH-FIX-UPSTREAM libraries/Cabal: Add support for the 64-bit RISC-V architecture (d89526f98) -Patch101: cabal-riscv64.patch + +Patch5: ppc64le-miscompilation.patch + +Patch100: os-string-be.patch Patch200: ghc-hadrian-s390x-rts--qg.patch +Patch300: hadrian-9.10-deps.patch + +# Backport of MR 13105 (NCG for RISCV64) +Patch243: riscv64-ncg.patch + BuildRequires: binutils-devel BuildRequires: gcc-PIE BuildRequires: gcc-c++ -BuildRequires: ghc-bootstrap >= 9.4 +BuildRequires: ghc-bootstrap >= 9.8 BuildRequires: ghc-bootstrap-helpers >= 1.3 BuildRequires: ghc-rpm-macros-extra => 2.6.1 BuildRequires: glibc-devel @@ -112,6 +108,7 @@ BuildRequires: gmp-devel BuildRequires: libdw-devel BuildRequires: libelf-devel BuildRequires: libffi-devel +BuildRequires: libdwarf-devel BuildRequires: libtool %ifarch %{ghc_llvm_archs} BuildRequires: clang%{llvm_major} @@ -138,9 +135,7 @@ Recommends: %{name}-compiler-default = %{version}-%{release} %if %{with manual} BuildRequires: python3-Sphinx %endif -%if %{with_libnuma} BuildRequires: libnuma-devel -%endif %if %{with haddock} Suggests: %{name}-doc = %{version}-%{release} Suggests: %{name}-doc-index = %{version}-%{release} @@ -172,11 +167,11 @@ Haskell home page at . %package compiler Summary: GHC compiler and utilities License: BSD-3-Clause -Requires: %{name}-filesystem = %{version}-%{release} Requires: gcc Requires: ghc-base-devel = %{base_ver}-%{release} +Requires: %{name}-filesystem = %{version}-%{release} Provides: hsc2hs = %{hsc2hs_ver}-%{release} -%ifarch riscv64 s390x +%ifarch %{ghc_llvm_archs} Requires: clang%{llvm_major} Requires: llvm%{llvm_major} %endif @@ -216,6 +211,7 @@ This package provides some common directories used for Haskell libraries documentation. %endif + %if %{with manual} %package manual Summary: GHC manual @@ -227,48 +223,51 @@ BuildArch: noarch This package provides the User Guide and Haddock manual. %endif + %global ghc_version_override %{version} %global ghc_pkg_c_deps ghc-compiler = %{ghc_version_override}-%{release} %global version %{ghc_version_override} -%if %{with_libnuma} -%define libnuma_dep ,libnuma-devel -%else -%define libnuma_dep %{nil} -%endif #!ForceMultiversion -%ghc_lib_subpackage -d Cabal-3.10.3.0 -%ghc_lib_subpackage -d Cabal-syntax-3.10.3.0 -%ghc_lib_subpackage -d array-0.5.8.0 -%ghc_lib_subpackage -d -c gmp-devel,libffi-devel,libdw-devel,libelf-devel%{libnuma_dep} base-%{base_ver} -%ghc_lib_subpackage -d binary-0.8.9.1 -%ghc_lib_subpackage -d -x ghc-bignum-%{ghc_bignum_ver} +%ghc_lib_subpackage -d Cabal-3.12.0.0 +%ghc_lib_subpackage -d Cabal-syntax-3.12.0.0 +%ghc_lib_subpackage -d array-0.5.7.0 +%ghc_lib_subpackage -d -c gmp-devel,libffi-devel,libdw-devel,libelf-devel,libnuma-devel base-%{base_ver} +%ghc_lib_subpackage -d binary-0.8.9.2 %ghc_lib_subpackage -d bytestring-0.12.1.0 -%ghc_lib_subpackage -d containers-0.6.8 -%ghc_lib_subpackage -d deepseq-1.5.1.0 -%ghc_lib_subpackage -d directory-1.3.8.5 +%ghc_lib_subpackage -d containers-0.7 +%ghc_lib_subpackage -d deepseq-1.5.0.0 +%ghc_lib_subpackage -d directory-1.3.8.3 %ghc_lib_subpackage -d exceptions-0.10.7 -%ghc_lib_subpackage -d filepath-1.4.200.1 +%ghc_lib_subpackage -d filepath-1.5.2.0 %ghc_lib_subpackage -d -x ghc-%{ghc_version_override} +%ghc_lib_subpackage -d -x ghc-bignum-%{ghc_bignum_ver} %ghc_lib_subpackage -d -x ghc-boot-%{ghc_version_override} %ghc_lib_subpackage -d ghc-boot-th-%{ghc_version_override} %ghc_lib_subpackage -d -x ghc-compact-%{ghc_compact_ver} +%ghc_lib_subpackage -d ghc-experimental-0.1.0.0 %ghc_lib_subpackage -d -x ghc-heap-%{ghc_version_override} +%ghc_lib_subpackage -d ghc-internal-9.1001.0 %ghc_lib_subpackage -d -x ghci-%{ghc_version_override} %ghc_lib_subpackage -d haskeline-0.8.2.1 %ghc_lib_subpackage -d -x hpc-%{hpc_ver} %ghc_lib_subpackage -d mtl-2.3.1 %ghc_lib_subpackage -d parsec-3.1.17.0 %ghc_lib_subpackage -d pretty-1.1.3.6 -%ghc_lib_subpackage -d process-1.6.25.0 -%ghc_lib_subpackage -d semaphore-compat-1.0.0 +%ghc_lib_subpackage -d process-1.6.19.0 %ghc_lib_subpackage -d stm-2.5.3.1 -%ghc_lib_subpackage -d template-haskell-2.21.0.0 +%ghc_lib_subpackage -d semaphore-compat-1.0.0 +%ghc_lib_subpackage -d template-haskell-2.22.0.0 %ghc_lib_subpackage -d -c ncurses-devel terminfo-0.4.1.6 %ghc_lib_subpackage -d text-2.1.1 %ghc_lib_subpackage -d time-1.12.2 -%ghc_lib_subpackage -d transformers-0.6.1.0 -%ghc_lib_subpackage -d unix-2.8.4.0 -%ghc_lib_subpackage -d xhtml-3000.2.2.1 +%ghc_lib_subpackage -d transformers-0.6.1.1 +%ghc_lib_subpackage -d unix-2.8.5.1 +%ghc_lib_subpackage -d xhtml-%{xhtml_ver} + +# new in 9.10 +%ghc_lib_subpackage -d os-string-2.0.2 +%ghc_lib_subpackage -d ghc-toolchain-0.1.0.0 +%ghc_lib_subpackage -d ghc-platform-0.1.0.0 %global version %{ghc_version_override} @@ -301,17 +300,24 @@ Installing this package causes %{name}-*-prof packages corresponding to %ifarch s390x %patch -P 3 -p1 %endif + +%patch -P 5 -p1 + %patch -P 100 -p1 -%patch -P 101 -p1 + %ifarch ppc64le s390x riscv64 %patch -P 200 -p1 %endif +%patch -P 300 -p1 + +%patch -P 243 -p1 + rm libffi-tarballs/libffi-*.tar.gz %build cp %{SOURCE4} ./ -hadrian/bootstrap/bootstrap.py --bootstrap-sources 9_4_4-bootstrap-sources.tar.gz +hadrian/bootstrap/bootstrap.py --bootstrap-sources 9_8_2-bootstrap-sources.tar.gz %global hadrian _build/bin/hadrian @@ -352,7 +358,7 @@ python3 boot.source --hadrian %endif %global jobs_nr %{?_smp_mflags} %else -%global jobs_nr -j1 +%global jobs_nr -j1 %endif %{hadrian} %{jobs_nr} --flavour=%{?with_quickbuild:quick+no_profiled_libs}%{!?with_quickbuild:perf%{!?with_ghc_prof:+no_profiled_libs}}%{?hadrian_llvm} %{hadrian_docs} binary-dist-dir --hash-unit-ids @@ -371,21 +377,25 @@ for i in $(find %{buildroot} -type f -executable -exec sh -c "file {} | grep -q chrpath -d $i done + %if %{with haddock} # remove short hashes for d in %{buildroot}%{ghc_html_libraries_dir}/*/; do mv $d $(echo $d | sed -e "s/\(.*\)-.*/\\1/") done %endif - + # containers src moved to a subdir cp -p libraries/containers/containers/LICENSE libraries/containers/LICENSE # hack for Cabal-syntax/LICENSE mkdir -p libraries/Cabal-syntax cp -p libraries/Cabal/Cabal-syntax/LICENSE libraries/Cabal-syntax - +# hack for ghc-toolchain +mkdir -p libraries/ghc-toolchain +cp -p LICENSE libraries/ghc-toolchain rm -f %{name}-*.files + # FIXME replace with ghc_subpackages_list for i in %{ghc_packages_list}; do name=$(echo $i | sed -e "s/\(.*\)-.*/\1/") @@ -397,8 +407,8 @@ done echo "%%dir %{ghclibdir}" >> %{name}-base%{?_ghcdynlibdir:-devel}.files %ghc_gen_filelists ghc %{ghc_version_override} -%ghc_gen_filelists ghc-boot %{ghc_version_override} %ghc_gen_filelists ghc-bignum %{ghc_bignum_ver} +%ghc_gen_filelists ghc-boot %{ghc_version_override} %ghc_gen_filelists ghc-compact %{ghc_compact_ver} %ghc_gen_filelists ghc-heap %{ghc_version_override} %ghc_gen_filelists ghci %{ghc_version_override} @@ -423,13 +433,13 @@ fi %if %{with ghc_prof} ls %{buildroot}%{ghclibdir}/bin/ghc-iserv-prof* >> %{name}-base-prof.files -ls %{buildroot}%{ghclibdir}/lib/bin/ghc-iserv-prof >> %{name}-base-prof.files echo "%%dir %{ghcliblib}/bin" %endif sed -i -e "s|^%{buildroot}||g" %{name}-base*.files sed -i -e "s|%{buildroot}||g" %{buildroot}%{_bindir}/* + %if %{with haddock} rm %{buildroot}%{_docdir}/ghc-%{version}/archives/libraries.html.tar.xz %endif @@ -438,25 +448,27 @@ rm %{buildroot}%{_docdir}/ghc-%{version}/archives/Haddock.html.tar.xz rm %{buildroot}%{_docdir}/ghc-%{version}/archives/users_guide.html.tar.xz %endif + mkdir -p %{buildroot}%{_mandir}/man1 install -p -m 0644 %{SOURCE5} %{buildroot}%{_mandir}/man1/ghc-pkg.1 install -p -m 0644 %{SOURCE6} %{buildroot}%{_mandir}/man1/haddock.1 install -p -m 0644 %{SOURCE7} %{buildroot}%{_mandir}/man1/runghc.1 + rm %{buildroot}%{ghclibdir}/lib/package.conf.d/.stamp rm %{buildroot}%{ghclibdir}/lib/package.conf.d/*.conf.copy # https://gitlab.haskell.org/ghc/ghc/-/issues/24121 rm %{buildroot}%{ghclibdir}/share/doc/%{ghcplatform}/*/LICENSE -( -cd %{buildroot}%{ghclibdir}/lib/bin -for i in *; do - if [ -f %{buildroot}%{ghclibdir}/bin/$i ]; then - ln -sf ../../bin/$i - fi -done -) +#( +#cd %{buildroot}%{ghclibdir}/lib/bin +#for i in *; do +# if [ -f %{buildroot}%{ghclibdir}/bin/$i ]; then +# ln -sf ../../bin/$i +# fi +#done +#) %check # Actually, I took this from Jens Petersen's Fedora package @@ -518,17 +530,18 @@ $GHC --info %dir %{ghcliblib} %dir %{ghcliblib}/%{ghcplatform} %dir %{ghclibdir}/bin -%dir %{ghcliblib}/bin %{ghclibdir}/bin/ghc %{ghclibdir}/bin/ghc-iserv %{ghclibdir}/bin/ghc-iserv-dyn %{ghclibdir}/bin/ghc-pkg +%{ghclibdir}/bin/ghc-toolchain-bin %{ghclibdir}/bin/hpc %{ghclibdir}/bin/hsc2hs %{ghclibdir}/bin/runghc %{ghclibdir}/bin/hp2ps %{ghclibdir}/bin/unlit %{ghclibdir}/bin/ghc-%{version} +%{ghclibdir}/bin/ghc-toolchain-bin-ghc-%{version} %{ghclibdir}/bin/ghc-iserv-ghc-%{version} %{ghclibdir}/bin/ghc-iserv-dyn-ghc-%{version} %{ghclibdir}/bin/ghc-pkg-%{version} @@ -541,14 +554,13 @@ $GHC --info %{ghclibdir}/bin/runhaskell %{ghclibdir}/bin/runhaskell-%{version} %{ghclibdir}/bin/unlit-ghc-%{version} -%{ghclibdir}/lib/bin/ghc-iserv -%{ghclibdir}/lib/bin/ghc-iserv-dyn -%{ghclibdir}/lib/bin/unlit %{ghcliblib}/ghc-interp.js %{ghcliblib}/ghc-usage.txt %{ghcliblib}/ghci-usage.txt %{ghcliblib}/llvm-passes %{ghcliblib}/llvm-targets +%{ghcliblib}/post-link.mjs +%{ghcliblib}/prelude.js %dir %{ghcliblib}/package.conf.d %ghost %{ghcliblib}/package.conf.d/package.cache %{ghcliblib}/package.conf.d/package.cache.lock diff --git a/hadrian-9.10-deps.patch b/hadrian-9.10-deps.patch new file mode 100644 index 0000000..e9dc0d6 --- /dev/null +++ b/hadrian-9.10-deps.patch @@ -0,0 +1,74 @@ +--- ghc-9.10.0.20240313/hadrian/hadrian.cabal.orig 2024-03-13 21:23:13.000000000 +0800 ++++ ghc-9.10.0.20240313/hadrian/hadrian.cabal 2024-03-14 21:53:00.180444498 +0800 +@@ -36,6 +36,8 @@ + main-is: Main.hs + hs-source-dirs: . + , src ++ , ../libraries/ghc-platform/src ++ , ../utils/ghc-toolchain/src + other-modules: Base + , Builder + , CommandLine +@@ -152,6 +154,7 @@ + , directory >= 1.3.1.0 && < 1.4 + , extra >= 1.4.7 + , filepath ++ , process + , time + , mtl >= 2.2 && < 2.4 + , parsec >= 3.1 && < 3.2 +@@ -168,8 +171,6 @@ + , text >= 1.2 && < 3 + , cryptohash-sha256 >= 0.11 && < 0.12 + , base16-bytestring >= 0.1.1 && < 1.1.0.0 +- , ghc-platform +- , ghc-toolchain + ghc-options: -Wall + -Wincomplete-record-updates + -Wredundant-constraints +--- ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Program.hs.orig 2024-03-13 21:23:14.000000000 +0800 ++++ ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Program.hs 2024-03-14 22:06:00.298622371 +0800 +@@ -1,3 +1,5 @@ ++{-# LANGUAGE NoImplicitPrelude #-} ++ + module GHC.Toolchain.Program + ( Program(..) + , shProgram +--- ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Cc.hs~ 2024-03-13 21:23:14.000000000 +0800 ++++ ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Cc.hs 2024-03-14 22:06:23.356740644 +0800 +@@ -1,3 +1,4 @@ ++{-# LANGUAGE NoImplicitPrelude #-} + {-# LANGUAGE NamedFieldPuns #-} + {-# LANGUAGE ViewPatterns #-} + +diff -up ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Ar.hs~ ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Ar.hs +--- ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Ar.hs~ 2024-03-13 21:23:14.000000000 +0800 ++++ ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Ar.hs 2024-03-14 22:09:48.872794751 +0800 +@@ -1,3 +1,4 @@ ++{-# LANGUAGE NoImplicitPrelude #-} + {-# LANGUAGE NamedFieldPuns #-} + {-# LANGUAGE RecordWildCards #-} + +diff -up ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Cpp.hs~ ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Cpp.hs +--- ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Cpp.hs~ 2024-03-13 21:23:14.000000000 +0800 ++++ ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Cpp.hs 2024-03-14 22:10:18.872948622 +0800 +@@ -1,3 +1,4 @@ ++{-# LANGUAGE NoImplicitPrelude #-} + {-# LANGUAGE NamedFieldPuns #-} + + module GHC.Toolchain.Tools.Cpp (HsCpp(..), findHsCpp, Cpp(..), findCpp) where +diff -up ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Link.hs~ ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Link.hs +--- ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Link.hs~ 2024-03-13 21:23:14.000000000 +0800 ++++ ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/Link.hs 2024-03-14 22:10:12.954918270 +0800 +@@ -1,3 +1,4 @@ ++{-# LANGUAGE NoImplicitPrelude #-} + {-# OPTIONS_GHC -Wno-name-shadowing #-} + {-# LANGUAGE NamedFieldPuns #-} + {-# LANGUAGE RecordWildCards #-} +--- ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/MergeObjs.hs~ 2024-03-13 21:23:14.000000000 +0800 ++++ ghc-9.10.0.20240313/utils/ghc-toolchain/src/GHC/Toolchain/Tools/MergeObjs.hs 2024-03-14 22:13:08.399807784 +0800 +@@ -1,3 +1,4 @@ ++{-# LANGUAGE NoImplicitPrelude #-} + {-# LANGUAGE NamedFieldPuns #-} + {-# LANGUAGE CPP #-} + diff --git a/os-string-be.patch b/os-string-be.patch new file mode 100644 index 0000000..105a155 --- /dev/null +++ b/os-string-be.patch @@ -0,0 +1,25 @@ +From bafe87d871399b58ce4a50592b980c990a3eac39 Mon Sep 17 00:00:00 2001 +From: Bodigrim +Date: Thu, 9 May 2024 01:02:44 +0100 +Subject: [PATCH] Fix compilation on big-endian arches + +--- + libraries/os-string/System/OsString/Data/ByteString/Short/Internal.hs | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/libraries/os-string/System/OsString/Data/ByteString/Short/Internal.hs b/libraries/os-string/System/OsString/Data/ByteString/Short/Internal.hs +index fedc199..f7ddcd8 100644 +--- a/libraries/os-string/System/OsString/Data/ByteString/Short/Internal.hs ++++ b/libraries/os-string/System/OsString/Data/ByteString/Short/Internal.hs +@@ -311,7 +311,11 @@ word16ToLE#, word16FromLE# :: Word16# -> Word16# + word16ToLE#, word16FromLE# :: Word# -> Word# + #endif + #ifdef WORDS_BIGENDIAN ++#if MIN_VERSION_base(4,16,0) ++word16ToLE# w = wordToWord16# (byteSwap16# (word16ToWord# w)) ++#else + word16ToLE# = byteSwap16# ++#endif + #else + word16ToLE# w# = w# + #endif diff --git a/ppc64le-miscompilation.patch b/ppc64le-miscompilation.patch new file mode 100644 index 0000000..539acce --- /dev/null +++ b/ppc64le-miscompilation.patch @@ -0,0 +1,25 @@ +From 3f128c7d6c145985e3e12fda173e7e9a5a9c03f7 Mon Sep 17 00:00:00 2001 +From: Peter Trommler +Date: Sat, 15 Jun 2024 08:55:30 +0200 +Subject: [PATCH 4/4] PPC NCG: Fix sign hints in C calls + +--- + compiler/GHC/CmmToAsm/PPC/CodeGen.hs | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs +index cca47f7bac2..060bd7159eb 100644 +--- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs ++++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs +@@ -1770,7 +1770,7 @@ genCCall' config gcp target dest_regs args + _ -> panic "genCall': unknown calling conv." + + argReps = map (cmmExprType platform) args +- (argHints, _) = foreignTargetHints target ++ (_, argHints) = foreignTargetHints target + + roundTo a x | x `mod` a == 0 = x + | otherwise = x + a - (x `mod` a) +-- +GitLab + diff --git a/riscv64-ncg.patch b/riscv64-ncg.patch new file mode 100644 index 0000000..b71a015 --- /dev/null +++ b/riscv64-ncg.patch @@ -0,0 +1,6217 @@ +Index: ghc-9.10.1/CODEOWNERS +=================================================================== +--- ghc-9.10.1.orig/CODEOWNERS ++++ ghc-9.10.1/CODEOWNERS +@@ -40,6 +40,7 @@ + /compiler/GHC/HsToCore/Foreign/Wasm.hs @TerrorJack + /compiler/GHC/Tc/Deriv/ @RyanGlScott + /compiler/GHC/CmmToAsm/ @simonmar @bgamari @AndreasK ++/compiler/GHC/CmmToAsm/RV64/ @supersven @angerman + /compiler/GHC/CmmToAsm/Wasm/ @TerrorJack + /compiler/GHC/CmmToLlvm/ @angerman + /compiler/GHC/StgToCmm/ @simonmar @osa1 +Index: ghc-9.10.1/compiler/CodeGen.Platform.h +=================================================================== +--- ghc-9.10.1.orig/compiler/CodeGen.Platform.h ++++ ghc-9.10.1/compiler/CodeGen.Platform.h +@@ -1,7 +1,8 @@ + + import GHC.Cmm.Expr + #if !(defined(MACHREGS_i386) || defined(MACHREGS_x86_64) \ +- || defined(MACHREGS_powerpc) || defined(MACHREGS_aarch64)) ++ || defined(MACHREGS_powerpc) || defined(MACHREGS_aarch64) \ ++ || defined(MACHREGS_riscv64)) + import GHC.Utils.Panic.Plain + #endif + import GHC.Platform.Reg +@@ -1041,6 +1042,105 @@ freeReg 18 = False + + # if defined(REG_Base) + freeReg REG_Base = False ++# endif ++# if defined(REG_Sp) ++freeReg REG_Sp = False ++# endif ++# if defined(REG_SpLim) ++freeReg REG_SpLim = False ++# endif ++# if defined(REG_Hp) ++freeReg REG_Hp = False ++# endif ++# if defined(REG_HpLim) ++freeReg REG_HpLim = False ++# endif ++ ++# if defined(REG_R1) ++freeReg REG_R1 = False ++# endif ++# if defined(REG_R2) ++freeReg REG_R2 = False ++# endif ++# if defined(REG_R3) ++freeReg REG_R3 = False ++# endif ++# if defined(REG_R4) ++freeReg REG_R4 = False ++# endif ++# if defined(REG_R5) ++freeReg REG_R5 = False ++# endif ++# if defined(REG_R6) ++freeReg REG_R6 = False ++# endif ++# if defined(REG_R7) ++freeReg REG_R7 = False ++# endif ++# if defined(REG_R8) ++freeReg REG_R8 = False ++# endif ++ ++# if defined(REG_F1) ++freeReg REG_F1 = False ++# endif ++# if defined(REG_F2) ++freeReg REG_F2 = False ++# endif ++# if defined(REG_F3) ++freeReg REG_F3 = False ++# endif ++# if defined(REG_F4) ++freeReg REG_F4 = False ++# endif ++# if defined(REG_F5) ++freeReg REG_F5 = False ++# endif ++# if defined(REG_F6) ++freeReg REG_F6 = False ++# endif ++ ++# if defined(REG_D1) ++freeReg REG_D1 = False ++# endif ++# if defined(REG_D2) ++freeReg REG_D2 = False ++# endif ++# if defined(REG_D3) ++freeReg REG_D3 = False ++# endif ++# if defined(REG_D4) ++freeReg REG_D4 = False ++# endif ++# if defined(REG_D5) ++freeReg REG_D5 = False ++# endif ++# if defined(REG_D6) ++freeReg REG_D6 = False ++# endif ++ ++freeReg _ = True ++ ++#elif defined(MACHREGS_riscv64) ++ ++-- zero reg ++freeReg 0 = False ++-- link register ++freeReg 1 = False ++-- stack pointer ++freeReg 2 = False ++-- global pointer ++freeReg 3 = False ++-- thread pointer ++freeReg 4 = False ++-- frame pointer ++freeReg 8 = False ++-- made-up inter-procedural (ip) register ++-- See Note [The made-up RISCV64 TMP (IP) register] ++freeReg 31 = False ++ ++# if defined(REG_Base) ++freeReg REG_Base = False + # endif + # if defined(REG_Sp) + freeReg REG_Sp = False +Index: ghc-9.10.1/compiler/GHC/Cmm/CLabel.hs +=================================================================== +--- ghc-9.10.1.orig/compiler/GHC/Cmm/CLabel.hs ++++ ghc-9.10.1/compiler/GHC/Cmm/CLabel.hs +@@ -1720,6 +1720,8 @@ pprDynamicLinkerAsmLabel !platform dllIn + | platformArch platform == ArchAArch64 + = ppLbl + ++ | platformArch platform == ArchRISCV64 ++ = ppLbl + + | platformArch platform == ArchX86_64 + = case dllInfo of +Index: ghc-9.10.1/compiler/GHC/CmmToAsm.hs +=================================================================== +--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm.hs ++++ ghc-9.10.1/compiler/GHC/CmmToAsm.hs +@@ -67,6 +67,7 @@ import qualified GHC.CmmToAsm.X86 as X + import qualified GHC.CmmToAsm.PPC as PPC + import qualified GHC.CmmToAsm.AArch64 as AArch64 + import qualified GHC.CmmToAsm.Wasm as Wasm32 ++import qualified GHC.CmmToAsm.RV64 as RV64 + + import GHC.CmmToAsm.Reg.Liveness + import qualified GHC.CmmToAsm.Reg.Linear as Linear +@@ -148,7 +149,7 @@ nativeCodeGen logger ts config modLoc h + ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha" + ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb" + ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel" +- ArchRISCV64 -> panic "nativeCodeGen: No NCG for RISCV64" ++ ArchRISCV64 -> nCG' (RV64.ncgRV64 config) + ArchLoongArch64->panic "nativeCodeGen: No NCG for LoongArch64" + ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch" + ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript" +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Dwarf/Constants.hs +=================================================================== +--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Dwarf/Constants.hs ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/Dwarf/Constants.hs +@@ -240,6 +240,7 @@ dwarfRegNo p r = case platformArch p of + | r == xmm15 -> 32 + ArchPPC_64 _ -> fromIntegral $ toRegNo r + ArchAArch64 -> fromIntegral $ toRegNo r ++ ArchRISCV64 -> fromIntegral $ toRegNo r + _other -> error "dwarfRegNo: Unsupported platform or unknown register!" + + -- | Virtual register number to use for return address. +@@ -252,5 +253,6 @@ dwarfReturnRegNo p + ArchX86 -> 8 -- eip + ArchX86_64 -> 16 -- rip + ArchPPC_64 ELF_V2 -> 65 -- lr (link register) +- ArchAArch64-> 30 ++ ArchAArch64 -> 30 ++ ArchRISCV64 -> 1 -- ra (return address) + _other -> error "dwarfReturnRegNo: Unsupported platform!" +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/PIC.hs +=================================================================== +--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/PIC.hs ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/PIC.hs +@@ -132,6 +132,11 @@ cmmMakeDynamicReference config reference + addImport symbolPtr + return $ cmmMakePicReference config symbolPtr + ++ AccessViaSymbolPtr | ArchRISCV64 <- platformArch platform -> do ++ let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl ++ addImport symbolPtr ++ return $ cmmMakePicReference config symbolPtr ++ + AccessViaSymbolPtr -> do + let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl + addImport symbolPtr +@@ -164,6 +169,10 @@ cmmMakePicReference config lbl + | ArchAArch64 <- platformArch platform + = CmmLit $ CmmLabel lbl + ++ -- as on AArch64, there's no pic base register. ++ | ArchRISCV64 <- platformArch platform ++ = CmmLit $ CmmLabel lbl ++ + | OSAIX <- platformOS platform + = CmmMachOp (MO_Add W32) + [ CmmReg (CmmGlobal $ GlobalRegUse PicBaseReg (bWord platform)) +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64.hs +=================================================================== +--- /dev/null ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64.hs +@@ -0,0 +1,57 @@ ++{-# OPTIONS_GHC -fno-warn-orphans #-} ++ ++-- | Native code generator for RiscV64 architectures ++module GHC.CmmToAsm.RV64 (ncgRV64) where ++ ++import GHC.CmmToAsm.Config ++import GHC.CmmToAsm.Instr ++import GHC.CmmToAsm.Monad ++import GHC.CmmToAsm.RV64.CodeGen qualified as RV64 ++import GHC.CmmToAsm.RV64.Instr qualified as RV64 ++import GHC.CmmToAsm.RV64.Ppr qualified as RV64 ++import GHC.CmmToAsm.RV64.RegInfo qualified as RV64 ++import GHC.CmmToAsm.RV64.Regs qualified as RV64 ++import GHC.CmmToAsm.Types ++import GHC.Prelude ++import GHC.Utils.Outputable (ftext) ++ ++ncgRV64 :: NCGConfig -> NcgImpl RawCmmStatics RV64.Instr RV64.JumpDest ++ncgRV64 config = ++ NcgImpl ++ { ncgConfig = config, ++ cmmTopCodeGen = RV64.cmmTopCodeGen, ++ generateJumpTableForInstr = RV64.generateJumpTableForInstr config, ++ getJumpDestBlockId = RV64.getJumpDestBlockId, ++ canShortcut = RV64.canShortcut, ++ shortcutStatics = RV64.shortcutStatics, ++ shortcutJump = RV64.shortcutJump, ++ pprNatCmmDeclS = RV64.pprNatCmmDecl config, ++ pprNatCmmDeclH = RV64.pprNatCmmDecl config, ++ maxSpillSlots = RV64.maxSpillSlots config, ++ allocatableRegs = RV64.allocatableRegs platform, ++ ncgAllocMoreStack = RV64.allocMoreStack platform, ++ ncgMakeFarBranches = RV64.makeFarBranches, ++ extractUnwindPoints = const [], ++ invertCondBranches = \_ _ -> id ++ } ++ where ++ platform = ncgPlatform config ++ ++-- | `Instruction` instance for RV64 ++instance Instruction RV64.Instr where ++ regUsageOfInstr = RV64.regUsageOfInstr ++ patchRegsOfInstr = RV64.patchRegsOfInstr ++ isJumpishInstr = RV64.isJumpishInstr ++ jumpDestsOfInstr = RV64.jumpDestsOfInstr ++ patchJumpInstr = RV64.patchJumpInstr ++ mkSpillInstr = RV64.mkSpillInstr ++ mkLoadInstr = RV64.mkLoadInstr ++ takeDeltaInstr = RV64.takeDeltaInstr ++ isMetaInstr = RV64.isMetaInstr ++ mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr ++ takeRegRegMoveInstr = RV64.takeRegRegMoveInstr ++ mkJumpInstr = RV64.mkJumpInstr ++ mkStackAllocInstr = RV64.mkStackAllocInstr ++ mkStackDeallocInstr = RV64.mkStackDeallocInstr ++ mkComment = pure . RV64.COMMENT . ftext ++ pprInstr = RV64.pprInstr +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/CodeGen.hs +=================================================================== +--- /dev/null ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/CodeGen.hs +@@ -0,0 +1,2207 @@ ++{-# LANGUAGE BangPatterns #-} ++{-# LANGUAGE BinaryLiterals #-} ++{-# LANGUAGE GADTs #-} ++{-# LANGUAGE OverloadedStrings #-} ++ ++module GHC.CmmToAsm.RV64.CodeGen ++ ( cmmTopCodeGen, ++ generateJumpTableForInstr, ++ makeFarBranches, ++ ) ++where ++ ++import Control.Monad ++import Data.Maybe ++import Data.Word ++import GHC.Cmm ++import GHC.Cmm.BlockId ++import GHC.Cmm.CLabel ++import GHC.Cmm.Dataflow.Block ++import GHC.Cmm.Dataflow.Graph ++import GHC.Cmm.Dataflow.Label ++import GHC.Cmm.DebugBlock ++import GHC.Cmm.Switch ++import GHC.Cmm.Utils ++import GHC.CmmToAsm.CPrim ++import GHC.CmmToAsm.Config ++import GHC.CmmToAsm.Format ++import GHC.CmmToAsm.Monad ++ ( NatM, ++ getBlockIdNat, ++ getConfig, ++ getDebugBlock, ++ getFileId, ++ getNewLabelNat, ++ getNewRegNat, ++ getPicBaseMaybeNat, ++ getPlatform, ++ ) ++import GHC.CmmToAsm.PIC ++import GHC.CmmToAsm.RV64.Cond ++import GHC.CmmToAsm.RV64.Instr ++import GHC.CmmToAsm.RV64.Regs ++import GHC.CmmToAsm.Types ++import GHC.Data.FastString ++import GHC.Data.OrdList ++import GHC.Float ++import GHC.Platform ++import GHC.Platform.Reg ++import GHC.Platform.Regs ++import GHC.Prelude hiding (EQ) ++import GHC.Types.Basic ++import GHC.Types.ForeignCall ++import GHC.Types.SrcLoc (srcSpanFile, srcSpanStartCol, srcSpanStartLine) ++import GHC.Types.Tickish (GenTickish (..)) ++import GHC.Types.Unique.Supply ++import GHC.Utils.Constants (debugIsOn) ++import GHC.Utils.Misc ++import GHC.Utils.Monad ++import GHC.Utils.Outputable ++import GHC.Utils.Panic ++ ++-- For an overview of an NCG's structure, see Note [General layout of an NCG] ++ ++cmmTopCodeGen :: ++ RawCmmDecl -> ++ NatM [NatCmmDecl RawCmmStatics Instr] ++-- Thus we'll have to deal with either CmmProc ... ++cmmTopCodeGen _cmm@(CmmProc info lab live graph) = do ++ picBaseMb <- getPicBaseMaybeNat ++ when (isJust picBaseMb) $ panic "RV64.cmmTopCodeGen: Unexpected PIC base register (RISCV ISA does not define one)" ++ ++ let blocks = toBlockListEntryFirst graph ++ (nat_blocks, statics) <- mapAndUnzipM basicBlockCodeGen blocks ++ ++ let proc = CmmProc info lab live (ListGraph $ concat nat_blocks) ++ tops = proc : concat statics ++ ++ pure tops ++ ++-- ... or CmmData. ++cmmTopCodeGen (CmmData sec dat) = pure [CmmData sec dat] -- no translation, we just use CmmStatic ++ ++basicBlockCodeGen :: ++ Block CmmNode C C -> ++ NatM ++ ( [NatBasicBlock Instr], ++ [NatCmmDecl RawCmmStatics Instr] ++ ) ++basicBlockCodeGen block = do ++ config <- getConfig ++ let (_, nodes, tail) = blockSplit block ++ id = entryLabel block ++ stmts = blockToList nodes ++ ++ header_comment_instr ++ | debugIsOn = ++ unitOL ++ $ MULTILINE_COMMENT ++ ( text "-- --------------------------- basicBlockCodeGen --------------------------- --\n" ++ $+$ withPprStyle defaultDumpStyle (pdoc (ncgPlatform config) block) ++ ) ++ | otherwise = nilOL ++ ++ -- Generate location directive `.loc` (DWARF debug location info) ++ loc_instrs <- genLocInstrs ++ ++ -- Generate other instructions ++ mid_instrs <- stmtsToInstrs stmts ++ (!tail_instrs) <- stmtToInstrs tail ++ ++ let instrs = header_comment_instr `appOL` loc_instrs `appOL` mid_instrs `appOL` tail_instrs ++ ++ -- TODO: Then x86 backend runs @verifyBasicBlock@ here. How important it is to ++ -- have a valid CFG is an open question: This and the AArch64 and PPC NCGs ++ -- work fine without it. ++ ++ -- Code generation may introduce new basic block boundaries, which are ++ -- indicated by the NEWBLOCK instruction. We must split up the instruction ++ -- stream into basic blocks again. Also, we extract LDATAs here too. ++ (top, other_blocks, statics) = foldrOL mkBlocks ([], [], []) instrs ++ ++ return (BasicBlock id top : other_blocks, statics) ++ where ++ genLocInstrs :: NatM (OrdList Instr) ++ genLocInstrs = do ++ dbg <- getDebugBlock (entryLabel block) ++ case dblSourceTick =<< dbg of ++ Just (SourceNote span name) -> ++ do ++ fileId <- getFileId (srcSpanFile span) ++ let line = srcSpanStartLine span; col = srcSpanStartCol span ++ pure $ unitOL $ LOCATION fileId line col name ++ _ -> pure nilOL ++ ++mkBlocks :: ++ Instr -> ++ ([Instr], [GenBasicBlock Instr], [GenCmmDecl RawCmmStatics h g]) -> ++ ([Instr], [GenBasicBlock Instr], [GenCmmDecl RawCmmStatics h g]) ++mkBlocks (NEWBLOCK id) (instrs, blocks, statics) = ++ ([], BasicBlock id instrs : blocks, statics) ++mkBlocks (LDATA sec dat) (instrs, blocks, statics) = ++ (instrs, blocks, CmmData sec dat : statics) ++mkBlocks instr (instrs, blocks, statics) = ++ (instr : instrs, blocks, statics) ++ ++-- ----------------------------------------------------------------------------- ++ ++-- | Utilities ++ ++-- | Annotate an `Instr` with a `SDoc` comment ++ann :: SDoc -> Instr -> Instr ++ann doc instr {- debugIsOn -} = ANN doc instr ++{-# INLINE ann #-} ++ ++-- Using pprExpr will hide the AST, @ANN@ will end up in the assembly with ++-- -dppr-debug. The idea is that we can trivially see how a cmm expression ++-- ended up producing the assembly we see. By having the verbatim AST printed ++-- we can simply check the patterns that were matched to arrive at the assembly ++-- we generated. ++-- ++-- pprExpr will hide a lot of noise of the underlying data structure and print ++-- the expression into something that can be easily read by a human. However ++-- going back to the exact CmmExpr representation can be laborious and adds ++-- indirections to find the matches that lead to the assembly. ++-- ++-- An improvement could be to have ++-- ++-- (pprExpr genericPlatform e) <> parens (text. show e) ++-- ++-- to have the best of both worlds. ++-- ++-- Note: debugIsOn is too restrictive, it only works for debug compilers. ++-- However, we do not only want to inspect this for debug compilers. Ideally ++-- we'd have a check for -dppr-debug here already, such that we don't even ++-- generate the ANN expressions. However, as they are lazy, they shouldn't be ++-- forced until we actually force them, and without -dppr-debug they should ++-- never end up being forced. ++annExpr :: CmmExpr -> Instr -> Instr ++annExpr e {- debugIsOn -} = ANN (text . show $ e) ++-- annExpr e instr {- debugIsOn -} = ANN (pprExpr genericPlatform e) instr ++-- annExpr _ instr = instr ++{-# INLINE annExpr #-} ++ ++-- ----------------------------------------------------------------------------- ++-- Generating a table-branch ++ ++-- Note [RISCV64 Jump Tables] ++-- ~~~~~~~~~~~~~~~~~~~~~~~~~~ ++-- ++-- Jump tables are implemented by generating a table of relative addresses, ++-- where each entry is the relative offset to the target block from the first ++-- entry / table label (`generateJumpTableForInstr`). Using the jump table means ++-- loading the entry's value and jumping to the calculated absolute address ++-- (`genSwitch`). ++-- ++-- For example, this Cmm switch ++-- ++-- switch [1 .. 10] _s2wn::I64 { ++-- case 1 : goto c347; ++-- case 2 : goto c348; ++-- case 3 : goto c349; ++-- case 4 : goto c34a; ++-- case 5 : goto c34b; ++-- case 6 : goto c34c; ++-- case 7 : goto c34d; ++-- case 8 : goto c34e; ++-- case 9 : goto c34f; ++-- case 10 : goto c34g; ++-- } // CmmSwitch ++-- ++-- leads to this jump table in Assembly ++-- ++-- .section .rodata ++-- .balign 8 ++-- .Ln34G: ++-- .quad 0 ++-- .quad .Lc347-(.Ln34G)+0 ++-- .quad .Lc348-(.Ln34G)+0 ++-- .quad .Lc349-(.Ln34G)+0 ++-- .quad .Lc34a-(.Ln34G)+0 ++-- .quad .Lc34b-(.Ln34G)+0 ++-- .quad .Lc34c-(.Ln34G)+0 ++-- .quad .Lc34d-(.Ln34G)+0 ++-- .quad .Lc34e-(.Ln34G)+0 ++-- .quad .Lc34f-(.Ln34G)+0 ++-- .quad .Lc34g-(.Ln34G)+0 ++-- ++-- and this indexing code where the jump should be done (register t0 contains ++-- the index) ++-- ++-- addi t0, t0, 0 // silly move (ignore it) ++-- la t1, .Ln34G // load the table's address ++-- sll t0, t0, 3 // index * 8 -> offset in bytes ++-- add t0, t0, t1 // address of the table's entry ++-- ld t0, 0(t0) // load entry ++-- add t0, t0, t1 // relative to absolute address ++-- jalr zero, t0, 0 // jump to the block ++-- ++-- In object code (disassembled) the table looks like ++-- ++-- 0000000000000000 <.Ln34G>: ++-- ... ++-- 8: R_RISCV_ADD64 .Lc347 ++-- 8: R_RISCV_SUB64 .Ln34G ++-- 10: R_RISCV_ADD64 .Lc348 ++-- 10: R_RISCV_SUB64 .Ln34G ++-- 18: R_RISCV_ADD64 .Lc349 ++-- 18: R_RISCV_SUB64 .Ln34G ++-- 20: R_RISCV_ADD64 .Lc34a ++-- 20: R_RISCV_SUB64 .Ln34G ++-- 28: R_RISCV_ADD64 .Lc34b ++-- 28: R_RISCV_SUB64 .Ln34G ++-- 30: R_RISCV_ADD64 .Lc34c ++-- 30: R_RISCV_SUB64 .Ln34G ++-- 38: R_RISCV_ADD64 .Lc34d ++-- 38: R_RISCV_SUB64 .Ln34G ++-- 40: R_RISCV_ADD64 .Lc34e ++-- 40: R_RISCV_SUB64 .Ln34G ++-- 48: R_RISCV_ADD64 .Lc34f ++-- 48: R_RISCV_SUB64 .Ln34G ++-- 50: R_RISCV_ADD64 .Lc34g ++-- 50: R_RISCV_SUB64 .Ln34G ++-- ++-- I.e. the relative offset calculations are done by the linker via relocations. ++-- This seems to be PIC compatible; at least `scanelf` (pax-utils) does not ++-- complain. ++ ++ ++-- | Generate jump to jump table target ++-- ++-- The index into the jump table is calulated by evaluating @expr@. The ++-- corresponding table entry contains the relative address to jump to (relative ++-- to the jump table's first entry / the table's own label). ++genSwitch :: NCGConfig -> CmmExpr -> SwitchTargets -> NatM InstrBlock ++genSwitch config expr targets = do ++ (reg, fmt1, e_code) <- getSomeReg indexExpr ++ let fmt = II64 ++ targetReg <- getNewRegNat fmt ++ lbl <- getNewLabelNat ++ dynRef <- cmmMakeDynamicReference config DataReference lbl ++ (tableReg, fmt2, t_code) <- getSomeReg dynRef ++ let code = ++ toOL ++ [ COMMENT (text "indexExpr" <+> (text . show) indexExpr), ++ COMMENT (text "dynRef" <+> (text . show) dynRef) ++ ] ++ `appOL` e_code ++ `appOL` t_code ++ `appOL` toOL ++ [ COMMENT (ftext "Jump table for switch"), ++ -- index to offset into the table (relative to tableReg) ++ annExpr expr (SLL (OpReg (formatToWidth fmt1) reg) (OpReg (formatToWidth fmt1) reg) (OpImm (ImmInt 3))), ++ -- calculate table entry address ++ ADD (OpReg W64 targetReg) (OpReg (formatToWidth fmt1) reg) (OpReg (formatToWidth fmt2) tableReg), ++ -- load table entry (relative offset from tableReg (first entry) to target label) ++ LDRU II64 (OpReg W64 targetReg) (OpAddr (AddrRegImm targetReg (ImmInt 0))), ++ -- calculate absolute address of the target label ++ ADD (OpReg W64 targetReg) (OpReg W64 targetReg) (OpReg W64 tableReg), ++ -- prepare jump to target label ++ J_TBL ids (Just lbl) targetReg ++ ] ++ return code ++ where ++ -- See Note [Sub-word subtlety during jump-table indexing] in ++ -- GHC.CmmToAsm.X86.CodeGen for why we must first offset, then widen. ++ indexExpr0 = cmmOffset platform expr offset ++ -- We widen to a native-width register to sanitize the high bits ++ indexExpr = ++ CmmMachOp ++ (MO_UU_Conv expr_w (platformWordWidth platform)) ++ [indexExpr0] ++ expr_w = cmmExprWidth platform expr ++ (offset, ids) = switchTargetsToTable targets ++ platform = ncgPlatform config ++ ++-- | Generate jump table data (if required) ++-- ++-- The idea is to emit one table entry per case. The entry is the relative ++-- address of the block to jump to (relative to the table's first entry / ++-- table's own label.) The calculation itself is done by the linker. ++generateJumpTableForInstr :: ++ NCGConfig -> ++ Instr -> ++ Maybe (NatCmmDecl RawCmmStatics Instr) ++generateJumpTableForInstr config (J_TBL ids (Just lbl) _) = ++ let jumpTable = ++ map jumpTableEntryRel ids ++ where ++ jumpTableEntryRel Nothing = ++ CmmStaticLit (CmmInt 0 (ncgWordWidth config)) ++ jumpTableEntryRel (Just blockid) = ++ CmmStaticLit ++ ( CmmLabelDiffOff ++ blockLabel ++ lbl ++ 0 ++ (ncgWordWidth config) ++ ) ++ where ++ blockLabel = blockLbl blockid ++ in Just (CmmData (Section ReadOnlyData lbl) (CmmStaticsRaw lbl jumpTable)) ++generateJumpTableForInstr _ _ = Nothing ++ ++-- ----------------------------------------------------------------------------- ++-- Top-level of the instruction selector ++ ++stmtsToInstrs :: ++ -- | Cmm Statements ++ [CmmNode O O] -> ++ -- | Resulting instruction ++ NatM InstrBlock ++stmtsToInstrs stmts = concatOL <$> mapM stmtToInstrs stmts ++ ++stmtToInstrs :: ++ CmmNode e x -> ++ -- | Resulting instructions ++ NatM InstrBlock ++stmtToInstrs stmt = do ++ config <- getConfig ++ platform <- getPlatform ++ case stmt of ++ CmmUnsafeForeignCall target result_regs args -> ++ genCCall target result_regs args ++ CmmComment s -> pure (unitOL (COMMENT (ftext s))) ++ CmmTick {} -> pure nilOL ++ CmmAssign reg src ++ | isFloatType ty -> assignReg_FltCode format reg src ++ | otherwise -> assignReg_IntCode format reg src ++ where ++ ty = cmmRegType reg ++ format = cmmTypeFormat ty ++ CmmStore addr src _alignment ++ | isFloatType ty -> assignMem_FltCode format addr src ++ | otherwise -> assignMem_IntCode format addr src ++ where ++ ty = cmmExprType platform src ++ format = cmmTypeFormat ty ++ CmmBranch id -> genBranch id ++ -- We try to arrange blocks such that the likely branch is the fallthrough ++ -- in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here. ++ CmmCondBranch arg true false _prediction -> ++ genCondBranch true false arg ++ CmmSwitch arg ids -> genSwitch config arg ids ++ CmmCall {cml_target = arg} -> genJump arg ++ CmmUnwind _regs -> pure nilOL ++ -- Intentionally not have a default case here: If anybody adds a ++ -- constructor, the compiler should force them to think about this here. ++ CmmForeignCall {} -> pprPanic "stmtToInstrs: statement should have been cps'd away" (pdoc platform stmt) ++ CmmEntry {} -> pprPanic "stmtToInstrs: statement should have been cps'd away" (pdoc platform stmt) ++ ++-------------------------------------------------------------------------------- ++ ++-- | 'InstrBlock's are the insn sequences generated by the insn selectors. ++-- ++-- They are really trees of insns to facilitate fast appending, where a ++-- left-to-right traversal yields the insns in the correct order. ++type InstrBlock = ++ OrdList Instr ++ ++-- | Register's passed up the tree. ++-- ++-- If the stix code forces the register to live in a pre-decided machine ++-- register, it comes out as @Fixed@; otherwise, it comes out as @Any@, and the ++-- parent can decide which register to put it in. ++data Register ++ = Fixed Format Reg InstrBlock ++ | Any Format (Reg -> InstrBlock) ++ ++-- | Sometimes we need to change the Format of a register. Primarily during ++-- conversion. ++swizzleRegisterRep :: Format -> Register -> Register ++swizzleRegisterRep format' (Fixed _format reg code) = Fixed format' reg code ++swizzleRegisterRep format' (Any _format codefn) = Any format' codefn ++ ++-- | Grab a `Reg` for a `CmmReg` ++-- ++-- `LocalReg`s are assigned virtual registers (`RegVirtual`), `GlobalReg`s are ++-- assigned real registers (`RegReal`). It is an error if a `GlobalReg` is not a ++-- STG register. ++getRegisterReg :: Platform -> CmmReg -> Reg ++getRegisterReg _ (CmmLocal (LocalReg u pk)) = ++ RegVirtual $ mkVirtualReg u (cmmTypeFormat pk) ++getRegisterReg platform (CmmGlobal mid) = ++ case globalRegMaybe platform (globalRegUseGlobalReg mid) of ++ Just reg -> RegReal reg ++ Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) ++ ++-- ----------------------------------------------------------------------------- ++-- General things for putting together code sequences ++ ++-- | Compute an expression into any register ++getSomeReg :: CmmExpr -> NatM (Reg, Format, InstrBlock) ++getSomeReg expr = do ++ r <- getRegister expr ++ case r of ++ Any rep code -> do ++ newReg <- getNewRegNat rep ++ return (newReg, rep, code newReg) ++ Fixed rep reg code -> ++ return (reg, rep, code) ++ ++-- | Compute an expression into any floating-point register ++-- ++-- If the initial expression is not a floating-point expression, finally move ++-- the result into a floating-point register. ++getFloatReg :: (HasCallStack) => CmmExpr -> NatM (Reg, Format, InstrBlock) ++getFloatReg expr = do ++ r <- getRegister expr ++ case r of ++ Any rep code | isFloatFormat rep -> do ++ newReg <- getNewRegNat rep ++ return (newReg, rep, code newReg) ++ Any II32 code -> do ++ newReg <- getNewRegNat FF32 ++ return (newReg, FF32, code newReg) ++ Any II64 code -> do ++ newReg <- getNewRegNat FF64 ++ return (newReg, FF64, code newReg) ++ Any _w _code -> do ++ config <- getConfig ++ pprPanic "can't do getFloatReg on" (pdoc (ncgPlatform config) expr) ++ -- can't do much for fixed. ++ Fixed rep reg code -> ++ return (reg, rep, code) ++ ++-- | Map `CmmLit` to `OpImm` ++-- ++-- N.B. this is a partial function, because not all `CmmLit`s have an immediate ++-- representation. ++litToImm' :: CmmLit -> Operand ++litToImm' = OpImm . litToImm ++ ++-- | Compute a `CmmExpr` into a `Register` ++getRegister :: CmmExpr -> NatM Register ++getRegister e = do ++ config <- getConfig ++ getRegister' config (ncgPlatform config) e ++ ++-- | The register width to be used for an operation on the given width ++-- operand. ++opRegWidth :: Width -> Width ++opRegWidth W64 = W64 ++opRegWidth W32 = W32 ++opRegWidth W16 = W32 ++opRegWidth W8 = W32 ++opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w) ++ ++-- Note [Signed arithmetic on RISCV64] ++-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++-- Handling signed arithmetic on sub-word-size values on RISCV64 is a bit ++-- tricky as Cmm's type system does not capture signedness. While 32-bit values ++-- are fairly easy to handle due to RISCV64's 32-bit instruction variants ++-- (denoted by use of %wN registers), 16- and 8-bit values require quite some ++-- care. ++-- ++-- We handle 16-and 8-bit values by using the 32-bit operations and ++-- sign-/zero-extending operands and truncate results as necessary. For ++-- simplicity we maintain the invariant that a register containing a ++-- sub-word-size value always contains the zero-extended form of that value ++-- in between operations. ++-- ++-- For instance, consider the program, ++-- ++-- test(bits64 buffer) ++-- bits8 a = bits8[buffer]; ++-- bits8 b = %mul(a, 42); ++-- bits8 c = %not(b); ++-- bits8 d = %shrl(c, 4::bits8); ++-- return (d); ++-- } ++-- ++-- This program begins by loading `a` from memory, for which we use a ++-- zero-extended byte-size load. We next sign-extend `a` to 32-bits, and use a ++-- 32-bit multiplication to compute `b`, and truncate the result back down to ++-- 8-bits. ++-- ++-- Next we compute `c`: The `%not` requires no extension of its operands, but ++-- we must still truncate the result back down to 8-bits. Finally the `%shrl` ++-- requires no extension and no truncate since we can assume that ++-- `c` is zero-extended. ++-- ++-- The "RISC-V Sign Extension Optimizations" LLVM tech talk presentation by ++-- Craig Topper covers possible future improvements ++-- (https://llvm.org/devmtg/2022-11/slides/TechTalk21-RISC-VSignExtensionOptimizations.pdf) ++-- ++-- ++-- Note [Handling PIC on RV64] ++-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++-- RV64 does not have a special PIC register, the general approach is to simply ++-- do PC-relative addressing or go through the GOT. There is assembly support ++-- for both. ++-- ++-- rv64 assembly has a `la` (load address) pseudo-instruction, that allows ++-- loading a label's address into a register. The instruction is desugared into ++-- different addressing modes, e.g. PC-relative addressing: ++-- ++-- 1: lui rd1, %pcrel_hi(label) ++-- addi rd1, %pcrel_lo(1b) ++-- ++-- See https://sourceware.org/binutils/docs/as/RISC_002dV_002dModifiers.html, ++-- PIC can be enabled/disabled through ++-- ++-- .option pic ++-- ++-- See https://sourceware.org/binutils/docs/as/RISC_002dV_002dDirectives.html#RISC_002dV_002dDirectives ++-- ++-- CmmGlobal @PicBaseReg@'s are generated in @GHC.CmmToAsm.PIC@ in the ++-- @cmmMakePicReference@. This is in turn called from @cmmMakeDynamicReference@ ++-- also in @Cmm.CmmToAsm.PIC@ from where it is also exported. There are two ++-- callsites for this. One is in this module to produce the @target@ in @genCCall@ ++-- the other is in @GHC.CmmToAsm@ in @cmmExprNative@. ++-- ++-- Conceptually we do not want any special PicBaseReg to be used on RV64. If ++-- we want to distinguish between symbol loading, we need to address this through ++-- the way we load it, not through a register. ++-- ++ ++getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register ++-- OPTIMIZATION WARNING: CmmExpr rewrites ++-- 1. Rewrite: Reg + (-n) => Reg - n ++-- TODO: this expression shouldn't even be generated to begin with. ++getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) ++ | i < 0 = ++ getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)]) ++getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) ++ | i < 0 = ++ getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)]) ++-- Generic case. ++getRegister' config plat expr = ++ case expr of ++ CmmReg (CmmGlobal (GlobalRegUse PicBaseReg _)) -> ++ -- See Note [Handling PIC on RV64] ++ pprPanic "getRegister': There's no PIC base register on RISCV" (ppr PicBaseReg) ++ CmmLit lit -> ++ case lit of ++ CmmInt 0 w -> pure $ Fixed (intFormat w) zeroReg nilOL ++ CmmInt i w -> ++ -- narrowU is important: Negative immediates may be ++ -- sign-extended on load! ++ let imm = OpImm . ImmInteger $ narrowU w i ++ in pure (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) imm))) ++ CmmFloat 0 w -> do ++ let op = litToImm' lit ++ pure (Any (floatFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) op))) ++ CmmFloat _f W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr) ++ CmmFloat _f W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr) ++ CmmFloat f W32 -> do ++ let word = castFloatToWord32 (fromRational f) :: Word32 ++ intReg <- getNewRegNat (intFormat W32) ++ return ++ ( Any ++ (floatFormat W32) ++ ( \dst -> ++ toOL ++ [ annExpr expr ++ $ MOV (OpReg W32 intReg) (OpImm (ImmInteger (fromIntegral word))), ++ MOV (OpReg W32 dst) (OpReg W32 intReg) ++ ] ++ ) ++ ) ++ CmmFloat f W64 -> do ++ let word = castDoubleToWord64 (fromRational f) :: Word64 ++ intReg <- getNewRegNat (intFormat W64) ++ return ++ ( Any ++ (floatFormat W64) ++ ( \dst -> ++ toOL ++ [ annExpr expr ++ $ MOV (OpReg W64 intReg) (OpImm (ImmInteger (fromIntegral word))), ++ MOV (OpReg W64 dst) (OpReg W64 intReg) ++ ] ++ ) ++ ) ++ CmmFloat _f _w -> pprPanic "getRegister' (CmmLit:CmmFloat), unsupported float lit" (pdoc plat expr) ++ CmmVec _lits -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr) ++ CmmLabel lbl -> do ++ let op = OpImm (ImmCLbl lbl) ++ rep = cmmLitType plat lit ++ format = cmmTypeFormat rep ++ return (Any format (\dst -> unitOL $ annExpr expr (LDR format (OpReg (formatToWidth format) dst) op))) ++ CmmLabelOff lbl off | isNbitEncodeable 12 (fromIntegral off) -> do ++ let op = OpImm (ImmIndex lbl off) ++ rep = cmmLitType plat lit ++ format = cmmTypeFormat rep ++ return (Any format (\dst -> unitOL $ LDR format (OpReg (formatToWidth format) dst) op)) ++ CmmLabelOff lbl off -> do ++ let op = litToImm' (CmmLabel lbl) ++ rep = cmmLitType plat lit ++ format = cmmTypeFormat rep ++ width = typeWidth rep ++ (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width) ++ return ++ ( Any ++ format ++ ( \dst -> ++ off_code ++ `snocOL` LDR format (OpReg (formatToWidth format) dst) op ++ `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r) ++ ) ++ ) ++ CmmLabelDiffOff {} -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) ++ CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) ++ CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) ++ CmmLoad mem rep _ -> do ++ let format = cmmTypeFormat rep ++ width = typeWidth rep ++ Amode addr addr_code <- getAmode plat width mem ++ case width of ++ w ++ | w <= W64 -> ++ -- Load without sign-extension. See Note [Signed arithmetic on RISCV64] ++ pure ++ ( Any ++ format ++ ( \dst -> ++ addr_code ++ `snocOL` LDRU format (OpReg width dst) (OpAddr addr) ++ ) ++ ) ++ _ -> ++ pprPanic ("Width too big! Cannot load: " ++ show width) (pdoc plat expr) ++ CmmStackSlot _ _ -> ++ pprPanic "getRegister' (CmmStackSlot): " (pdoc plat expr) ++ CmmReg reg -> ++ return ++ ( Fixed ++ (cmmTypeFormat (cmmRegType reg)) ++ (getRegisterReg plat reg) ++ nilOL ++ ) ++ CmmRegOff reg off | isNbitEncodeable 12 (fromIntegral off) -> do ++ getRegister' config plat ++ $ CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] ++ where ++ width = typeWidth (cmmRegType reg) ++ CmmRegOff reg off -> do ++ (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width) ++ (reg, _format, code) <- getSomeReg $ CmmReg reg ++ return ++ $ Any ++ (intFormat width) ++ ( \dst -> ++ off_code ++ `appOL` code ++ `snocOL` ADD (OpReg width dst) (OpReg width reg) (OpReg width off_r) ++ ) ++ where ++ width = typeWidth (cmmRegType reg) ++ ++ -- Handle MO_RelaxedRead as a normal CmmLoad, to allow ++ -- non-trivial addressing modes to be used. ++ CmmMachOp (MO_RelaxedRead w) [e] -> ++ getRegister (CmmLoad e (cmmBits w) NaturallyAligned) ++ -- for MachOps, see GHC.Cmm.MachOp ++ -- For CmmMachOp, see GHC.Cmm.Expr ++ CmmMachOp op [e] -> do ++ (reg, _format, code) <- getSomeReg e ++ case op of ++ MO_Not w -> return $ Any (intFormat w) $ \dst -> ++ let w' = opRegWidth w ++ in code ++ `snocOL` ++ -- pseudo instruction `not` is `xori rd, rs, -1` ++ ann (text "not") (XORI (OpReg w' dst) (OpReg w' reg) (OpImm (ImmInt (-1)))) ++ `appOL` truncateReg w' w dst -- See Note [Signed arithmetic on RISCV64] ++ MO_S_Neg w -> negate code w reg ++ MO_F_Neg w -> ++ return ++ $ Any ++ (floatFormat w) ++ ( \dst -> ++ code ++ `snocOL` NEG (OpReg w dst) (OpReg w reg) ++ ) ++ -- TODO: Can this case happen? ++ MO_SF_Conv from to | from < W32 -> do ++ -- extend to the smallest available representation ++ (reg_x, code_x) <- signExtendReg from W32 reg ++ pure ++ $ Any ++ (floatFormat to) ++ ( \dst -> ++ code ++ `appOL` code_x ++ `snocOL` annExpr expr (FCVT IntToFloat (OpReg to dst) (OpReg from reg_x)) -- (Signed ConVerT Float) ++ ) ++ MO_SF_Conv from to -> ++ pure ++ $ Any ++ (floatFormat to) ++ ( \dst -> ++ code ++ `snocOL` annExpr expr (FCVT IntToFloat (OpReg to dst) (OpReg from reg)) -- (Signed ConVerT Float) ++ ) ++ MO_FS_Conv from to ++ | to < W32 -> ++ pure ++ $ Any ++ (intFormat to) ++ ( \dst -> ++ code ++ `snocOL` ++ -- W32 is the smallest width to convert to. Decrease width afterwards. ++ annExpr expr (FCVT FloatToInt (OpReg W32 dst) (OpReg from reg)) ++ `appOL` signExtendAdjustPrecission W32 to dst dst -- (float convert (-> zero) signed) ++ ) ++ MO_FS_Conv from to -> ++ pure ++ $ Any ++ (intFormat to) ++ ( \dst -> ++ code ++ `snocOL` annExpr expr (FCVT FloatToInt (OpReg to dst) (OpReg from reg)) ++ `appOL` truncateReg from to dst -- (float convert (-> zero) signed) ++ ) ++ MO_UU_Conv from to ++ | from <= to -> ++ pure ++ $ Any ++ (intFormat to) ++ ( \dst -> ++ code ++ `snocOL` annExpr e (MOV (OpReg to dst) (OpReg from reg)) ++ ) ++ MO_UU_Conv from to -> ++ pure ++ $ Any ++ (intFormat to) ++ ( \dst -> ++ code ++ `snocOL` annExpr e (MOV (OpReg from dst) (OpReg from reg)) ++ `appOL` truncateReg from to dst ++ ) ++ MO_SS_Conv from to -> ss_conv from to reg code ++ MO_FF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` annExpr e (FCVT FloatToFloat (OpReg to dst) (OpReg from reg))) ++ -- Conversions ++ -- TODO: Duplication with MO_UU_Conv ++ MO_XX_Conv from to ++ | to < from -> ++ pure ++ $ Any ++ (intFormat to) ++ ( \dst -> ++ code ++ `snocOL` annExpr e (MOV (OpReg from dst) (OpReg from reg)) ++ `appOL` truncateReg from to dst ++ ) ++ MO_XX_Conv _from to -> swizzleRegisterRep (intFormat to) <$> getRegister e ++ MO_AlignmentCheck align wordWidth -> do ++ reg <- getRegister' config plat e ++ addAlignmentCheck align wordWidth reg ++ x -> pprPanic ("getRegister' (monadic CmmMachOp): " ++ show x) (pdoc plat expr) ++ where ++ -- In the case of 16- or 8-bit values we need to sign-extend to 32-bits ++ -- See Note [Signed arithmetic on RISCV64]. ++ negate code w reg = do ++ let w' = opRegWidth w ++ (reg', code_sx) <- signExtendReg w w' reg ++ return $ Any (intFormat w) $ \dst -> ++ code ++ `appOL` code_sx ++ `snocOL` NEG (OpReg w' dst) (OpReg w' reg') ++ `appOL` truncateReg w' w dst ++ ++ ss_conv from to reg code ++ | from < to = do ++ pure $ Any (intFormat to) $ \dst -> ++ code ++ `appOL` signExtend from to reg dst ++ `appOL` truncateReg from to dst ++ | from > to = ++ pure $ Any (intFormat to) $ \dst -> ++ code ++ `appOL` toOL ++ [ ann ++ (text "MO_SS_Conv: narrow register signed" <+> ppr reg <+> ppr from <> text "->" <> ppr to) ++ (SLL (OpReg to dst) (OpReg from reg) (OpImm (ImmInt shift))), ++ -- signed right shift ++ SRA (OpReg to dst) (OpReg to dst) (OpImm (ImmInt shift)) ++ ] ++ `appOL` truncateReg from to dst ++ | otherwise = ++ -- No conversion necessary: Just copy. ++ pure $ Any (intFormat from) $ \dst -> ++ code `snocOL` MOV (OpReg from dst) (OpReg from reg) ++ where ++ shift = 64 - (widthInBits from - widthInBits to) ++ ++ -- Dyadic machops: ++ -- ++ -- The general idea is: ++ -- compute x <- x ++ -- compute x <- y ++ -- OP x, x, x ++ -- ++ -- TODO: for now we'll only implement the 64bit versions. And rely on the ++ -- fallthrough to alert us if things go wrong! ++ -- OPTIMIZATION WARNING: Dyadic CmmMachOp destructuring ++ -- 0. TODO This should not exist! Rewrite: Reg +- 0 -> Reg ++ CmmMachOp (MO_Add _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr' ++ CmmMachOp (MO_Sub _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr' ++ -- 1. Compute Reg +/- n directly. ++ -- For Add/Sub we can directly encode 12bits, or 12bits lsl #12. ++ CmmMachOp (MO_Add w) [CmmReg reg, CmmLit (CmmInt n _)] ++ | fitsIn12bitImm n -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ADD (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) ++ where ++ -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12. ++ w' = formatToWidth (cmmTypeFormat (cmmRegType reg)) ++ r' = getRegisterReg plat reg ++ CmmMachOp (MO_Sub w) [CmmReg reg, CmmLit (CmmInt n _)] ++ | fitsIn12bitImm n -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (SUB (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) ++ where ++ -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12. ++ w' = formatToWidth (cmmTypeFormat (cmmRegType reg)) ++ r' = getRegisterReg plat reg ++ CmmMachOp (MO_U_Quot w) [x, y] | w == W8 || w == W16 -> do ++ (reg_x, format_x, code_x) <- getSomeReg x ++ (reg_y, format_y, code_y) <- getSomeReg y ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `appOL` truncateReg (formatToWidth format_x) w reg_x ++ `appOL` code_y ++ `appOL` truncateReg (formatToWidth format_y) w reg_y ++ `snocOL` annExpr expr (DIVU (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)) ++ ) ++ ++ -- 2. Shifts. x << n, x >> n. ++ CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)] ++ | w == W32, ++ 0 <= n, ++ n < 32 -> do ++ (reg_x, _format_x, code_x) <- getSomeReg x ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))) ++ `appOL` truncateReg w w dst ++ ) ++ CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)] ++ | w == W64, ++ 0 <= n, ++ n < 64 -> do ++ (reg_x, _format_x, code_x) <- getSomeReg x ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))) ++ `appOL` truncateReg w w dst ++ ) ++ CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do ++ (reg_x, format_x, code_x) <- getSomeReg x ++ (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `appOL` code_x' ++ `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n))) ++ ) ++ CmmMachOp (MO_S_Shr w) [x, y] -> do ++ (reg_x, format_x, code_x) <- getSomeReg x ++ (reg_y, _format_y, code_y) <- getSomeReg y ++ (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `appOL` code_x' ++ `appOL` code_y ++ `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpReg w reg_y)) ++ ) ++ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] ++ | w == W8, ++ 0 <= n, ++ n < 8 -> do ++ (reg_x, format_x, code_x) <- getSomeReg x ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `appOL` truncateReg (formatToWidth format_x) w reg_x ++ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))) ++ ) ++ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] ++ | w == W16, ++ 0 <= n, ++ n < 16 -> do ++ (reg_x, format_x, code_x) <- getSomeReg x ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `appOL` truncateReg (formatToWidth format_x) w reg_x ++ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))) ++ ) ++ CmmMachOp (MO_U_Shr w) [x, y] | w == W8 || w == W16 -> do ++ (reg_x, format_x, code_x) <- getSomeReg x ++ (reg_y, _format_y, code_y) <- getSomeReg y ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `appOL` code_y ++ `appOL` truncateReg (formatToWidth format_x) w reg_x ++ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)) ++ ) ++ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] ++ | w == W32, ++ 0 <= n, ++ n < 32 -> do ++ (reg_x, _format_x, code_x) <- getSomeReg x ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))) ++ ) ++ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] ++ | w == W64, ++ 0 <= n, ++ n < 64 -> do ++ (reg_x, _format_x, code_x) <- getSomeReg x ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))) ++ ) ++ ++ -- 3. Logic &&, || ++ CmmMachOp (MO_And w) [CmmReg reg, CmmLit (CmmInt n _)] ++ | fitsIn12bitImm n -> ++ return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) ++ where ++ w' = formatToWidth (cmmTypeFormat (cmmRegType reg)) ++ r' = getRegisterReg plat reg ++ CmmMachOp (MO_Or w) [CmmReg reg, CmmLit (CmmInt n _)] ++ | fitsIn12bitImm n -> ++ return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ORI (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) ++ where ++ w' = formatToWidth (cmmTypeFormat (cmmRegType reg)) ++ r' = getRegisterReg plat reg ++ ++ -- Generic binary case. ++ CmmMachOp op [x, y] -> do ++ let -- A "plain" operation. ++ bitOp w op = do ++ -- compute x <- x ++ -- compute x <- y ++ -- x, x, x ++ (reg_x, format_x, code_x) <- getSomeReg x ++ (reg_y, format_y, code_y) <- getSomeReg y ++ massertPpr (isIntFormat format_x == isIntFormat format_y) $ text "bitOp: incompatible" ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `appOL` code_y ++ `appOL` op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y) ++ ) ++ ++ -- A (potentially signed) integer operation. ++ -- In the case of 8- and 16-bit signed arithmetic we must first ++ -- sign-extend both arguments to 32-bits. ++ -- See Note [Signed arithmetic on RISCV64]. ++ intOp is_signed w op = do ++ -- compute x <- x ++ -- compute x <- y ++ -- x, x, x ++ (reg_x, format_x, code_x) <- getSomeReg x ++ (reg_y, format_y, code_y) <- getSomeReg y ++ massertPpr (isIntFormat format_x && isIntFormat format_y) $ text "intOp: non-int" ++ -- This is the width of the registers on which the operation ++ -- should be performed. ++ let w' = opRegWidth w ++ signExt r ++ | not is_signed = return (r, nilOL) ++ | otherwise = signExtendReg w w' r ++ (reg_x_sx, code_x_sx) <- signExt reg_x ++ (reg_y_sx, code_y_sx) <- signExt reg_y ++ return $ Any (intFormat w) $ \dst -> ++ code_x ++ `appOL` code_y ++ `appOL` ++ -- sign-extend both operands ++ code_x_sx ++ `appOL` code_y_sx ++ `appOL` op (OpReg w' dst) (OpReg w' reg_x_sx) (OpReg w' reg_y_sx) ++ `appOL` truncateReg w' w dst -- truncate back to the operand's original width ++ floatOp w op = do ++ (reg_fx, format_x, code_fx) <- getFloatReg x ++ (reg_fy, format_y, code_fy) <- getFloatReg y ++ massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatOp: non-float" ++ return ++ $ Any ++ (floatFormat w) ++ ( \dst -> ++ code_fx ++ `appOL` code_fy ++ `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy) ++ ) ++ ++ -- need a special one for conditionals, as they return ints ++ floatCond w op = do ++ (reg_fx, format_x, code_fx) <- getFloatReg x ++ (reg_fy, format_y, code_fy) <- getFloatReg y ++ massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatCond: non-float" ++ return ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_fx ++ `appOL` code_fy ++ `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy) ++ ) ++ ++ case op of ++ -- Integer operations ++ -- Add/Sub should only be Integer Options. ++ MO_Add w -> intOp False w (\d x y -> unitOL $ annExpr expr (ADD d x y)) ++ -- TODO: Handle sub-word case ++ MO_Sub w -> intOp False w (\d x y -> unitOL $ annExpr expr (SUB d x y)) ++ -- N.B. We needn't sign-extend sub-word size (in)equality comparisons ++ -- since we don't care about ordering. ++ MO_Eq w -> bitOp w (\d x y -> unitOL $ annExpr expr (CSET d x y EQ)) ++ MO_Ne w -> bitOp w (\d x y -> unitOL $ annExpr expr (CSET d x y NE)) ++ -- Signed multiply/divide ++ MO_Mul w -> intOp True w (\d x y -> unitOL $ annExpr expr (MUL d x y)) ++ MO_S_MulMayOflo w -> do_mul_may_oflo w x y ++ MO_S_Quot w -> intOp True w (\d x y -> unitOL $ annExpr expr (DIV d x y)) ++ MO_S_Rem w -> intOp True w (\d x y -> unitOL $ annExpr expr (REM d x y)) ++ -- Unsigned multiply/divide ++ MO_U_Quot w -> intOp False w (\d x y -> unitOL $ annExpr expr (DIVU d x y)) ++ MO_U_Rem w -> intOp False w (\d x y -> unitOL $ annExpr expr (REMU d x y)) ++ -- Signed comparisons ++ MO_S_Ge w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SGE)) ++ MO_S_Le w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SLE)) ++ MO_S_Gt w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SGT)) ++ MO_S_Lt w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SLT)) ++ -- Unsigned comparisons ++ MO_U_Ge w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y UGE)) ++ MO_U_Le w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y ULE)) ++ MO_U_Gt w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y UGT)) ++ MO_U_Lt w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y ULT)) ++ -- Floating point arithmetic ++ MO_F_Add w -> floatOp w (\d x y -> unitOL $ annExpr expr (ADD d x y)) ++ MO_F_Sub w -> floatOp w (\d x y -> unitOL $ annExpr expr (SUB d x y)) ++ MO_F_Mul w -> floatOp w (\d x y -> unitOL $ annExpr expr (MUL d x y)) ++ MO_F_Quot w -> floatOp w (\d x y -> unitOL $ annExpr expr (DIV d x y)) ++ -- Floating point comparison ++ MO_F_Eq w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y EQ)) ++ MO_F_Ne w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y NE)) ++ MO_F_Ge w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FGE)) ++ MO_F_Le w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FLE)) -- x <= y <=> y > x ++ MO_F_Gt w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FGT)) ++ MO_F_Lt w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FLT)) -- x < y <=> y >= x ++ ++ -- Bitwise operations ++ MO_And w -> bitOp w (\d x y -> unitOL $ annExpr expr (AND d x y)) ++ MO_Or w -> bitOp w (\d x y -> unitOL $ annExpr expr (OR d x y)) ++ MO_Xor w -> bitOp w (\d x y -> unitOL $ annExpr expr (XOR d x y)) ++ MO_Shl w -> intOp False w (\d x y -> unitOL $ annExpr expr (SLL d x y)) ++ MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (SRL d x y)) ++ MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (SRA d x y)) ++ op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ pprMachOp op <+> text "in" <+> pdoc plat expr ++ ++ -- Generic ternary case. ++ CmmMachOp op [x, y, z] -> ++ case op of ++ -- Floating-point fused multiply-add operations ++ -- ++ -- x86 fmadd x * y + z <=> RISCV64 fmadd : d = r1 * r2 + r3 ++ -- x86 fmsub x * y - z <=> RISCV64 fnmsub: d = r1 * r2 - r3 ++ -- x86 fnmadd - x * y + z <=> RISCV64 fmsub : d = - r1 * r2 + r3 ++ -- x86 fnmsub - x * y - z <=> RISCV64 fnmadd: d = - r1 * r2 - r3 ++ MO_FMA var w -> case var of ++ FMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMAdd d n m a) ++ FMSub -> float3Op w (\d n m a -> unitOL $ FMA FMSub d n m a) ++ FNMAdd -> float3Op w (\d n m a -> unitOL $ FMA FNMSub d n m a) ++ FNMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMAdd d n m a) ++ _ -> ++ pprPanic "getRegister' (unhandled ternary CmmMachOp): " ++ $ pprMachOp op ++ <+> text "in" ++ <+> pdoc plat expr ++ where ++ float3Op w op = do ++ (reg_fx, format_x, code_fx) <- getFloatReg x ++ (reg_fy, format_y, code_fy) <- getFloatReg y ++ (reg_fz, format_z, code_fz) <- getFloatReg z ++ massertPpr (isFloatFormat format_x && isFloatFormat format_y && isFloatFormat format_z) ++ $ text "float3Op: non-float" ++ pure ++ $ Any (floatFormat w) ++ $ \dst -> ++ code_fx ++ `appOL` code_fy ++ `appOL` code_fz ++ `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy) (OpReg w reg_fz) ++ CmmMachOp _op _xs -> ++ pprPanic "getRegister' (variadic CmmMachOp): " (pdoc plat expr) ++ where ++ isNbitEncodeable :: Int -> Integer -> Bool ++ isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift) ++ -- N.B. MUL does not set the overflow flag. ++ -- Return 0 when the operation cannot overflow, /= 0 otherwise ++ do_mul_may_oflo :: Width -> CmmExpr -> CmmExpr -> NatM Register ++ do_mul_may_oflo w _x _y | w > W64 = pprPanic "Cannot multiply larger than 64bit" (ppr w) ++ do_mul_may_oflo w@W64 x y = do ++ (reg_x, format_x, code_x) <- getSomeReg x ++ (reg_y, format_y, code_y) <- getSomeReg y ++ -- TODO: Can't we clobber reg_x and reg_y to save registers? ++ lo <- getNewRegNat II64 ++ hi <- getNewRegNat II64 ++ -- TODO: Overhaul CSET: 3rd operand isn't needed for SNEZ ++ let nonSense = OpImm (ImmInt 0) ++ pure ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `appOL` signExtend (formatToWidth format_x) W64 reg_x reg_x ++ `appOL` code_y ++ `appOL` signExtend (formatToWidth format_y) W64 reg_y reg_y ++ `appOL` toOL ++ [ annExpr expr (MULH (OpReg w hi) (OpReg w reg_x) (OpReg w reg_y)), ++ MUL (OpReg w lo) (OpReg w reg_x) (OpReg w reg_y), ++ SRA (OpReg w lo) (OpReg w lo) (OpImm (ImmInt (widthInBits W64 - 1))), ++ ann ++ (text "Set flag if result of MULH contains more than sign bits.") ++ (XOR (OpReg w hi) (OpReg w hi) (OpReg w lo)), ++ CSET (OpReg w dst) (OpReg w hi) nonSense NE ++ ] ++ ) ++ do_mul_may_oflo w x y = do ++ (reg_x, format_x, code_x) <- getSomeReg x ++ (reg_y, format_y, code_y) <- getSomeReg y ++ let width_x = formatToWidth format_x ++ width_y = formatToWidth format_y ++ if w > width_x && w > width_y ++ then ++ pure ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ -- 8bit * 8bit cannot overflow 16bit ++ -- 16bit * 16bit cannot overflow 32bit ++ -- 32bit * 32bit cannot overflow 64bit ++ unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 0))) ++ ) ++ else do ++ let use32BitMul = w <= W32 && width_x <= W32 && width_y <= W32 ++ nonSense = OpImm (ImmInt 0) ++ if use32BitMul ++ then do ++ narrowedReg <- getNewRegNat II64 ++ pure ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ code_x ++ `appOL` signExtend (formatToWidth format_x) W32 reg_x reg_x ++ `appOL` code_y ++ `appOL` signExtend (formatToWidth format_y) W32 reg_y reg_y ++ `snocOL` annExpr expr (MUL (OpReg W32 dst) (OpReg W32 reg_x) (OpReg W32 reg_y)) ++ `appOL` signExtendAdjustPrecission W32 w dst narrowedReg ++ `appOL` toOL ++ [ ann ++ (text "Check if the multiplied value fits in the narrowed register") ++ (SUB (OpReg w dst) (OpReg w dst) (OpReg w narrowedReg)), ++ CSET (OpReg w dst) (OpReg w dst) nonSense NE ++ ] ++ ) ++ else ++ pure ++ $ Any ++ (intFormat w) ++ ( \dst -> ++ -- Do not handle this unlikely case. Just tell that it may overflow. ++ unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 1))) ++ ) ++ ++-- | Instructions to sign-extend the value in the given register from width @w@ ++-- up to width @w'@. ++signExtendReg :: Width -> Width -> Reg -> NatM (Reg, OrdList Instr) ++signExtendReg w _w' r | w == W64 = pure (r, nilOL) ++signExtendReg w w' r = do ++ r' <- getNewRegNat (intFormat w') ++ let instrs = signExtend w w' r r' ++ pure (r', instrs) ++ ++-- | Sign extends to 64bit, if needed ++-- ++-- Source `Reg` @r@ stays untouched, while the conversion happens on destination ++-- `Reg` @r'@. ++signExtend :: Width -> Width -> Reg -> Reg -> OrdList Instr ++signExtend w w' _r _r' | w > w' = pprPanic "This is not a sign extension, but a truncation." $ ppr w <> text "->" <+> ppr w' ++signExtend w w' _r _r' | w > W64 || w' > W64 = pprPanic "Unexpected width (max is 64bit):" $ ppr w <> text "->" <+> ppr w' ++signExtend w w' r r' | w == W64 && w' == W64 && r == r' = nilOL ++signExtend w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg w' r') (OpReg w r) ++signExtend w w' r r' ++ | w == W32 && w' == W64 = ++ unitOL ++ $ ann ++ (text "sign-extend register (SEXT.W)" <+> ppr r <+> ppr w <> text "->" <> ppr w') ++ -- `ADDIW r r 0` is the pseudo-op SEXT.W ++ (ADD (OpReg w' r') (OpReg w r) (OpImm (ImmInt 0))) ++signExtend w w' r r' = ++ toOL ++ [ ann ++ (text "narrow register signed" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w') ++ (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))), ++ -- signed (arithmetic) right shift ++ SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift)) ++ ] ++ where ++ shift = 64 - widthInBits w ++ ++-- | Sign extends to 64bit, if needed and reduces the precission to the target `Width` (@w'@) ++-- ++-- Source `Reg` @r@ stays untouched, while the conversion happens on destination ++-- `Reg` @r'@. ++signExtendAdjustPrecission :: Width -> Width -> Reg -> Reg -> OrdList Instr ++signExtendAdjustPrecission w w' _r _r' | w > W64 || w' > W64 = pprPanic "Unexpected width (max is 64bit):" $ ppr w <> text "->" <+> ppr w' ++signExtendAdjustPrecission w w' r r' | w == W64 && w' == W64 && r == r' = nilOL ++signExtendAdjustPrecission w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg w' r') (OpReg w r) ++signExtendAdjustPrecission w w' r r' ++ | w == W32 && w' == W64 = ++ unitOL ++ $ ann ++ (text "sign-extend register (SEXT.W)" <+> ppr r <+> ppr w <> text "->" <> ppr w') ++ -- `ADDIW r r 0` is the pseudo-op SEXT.W ++ (ADD (OpReg w' r') (OpReg w r) (OpImm (ImmInt 0))) ++signExtendAdjustPrecission w w' r r' ++ | w > w' = ++ toOL ++ [ ann ++ (text "narrow register signed" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w') ++ (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))), ++ -- signed (arithmetic) right shift ++ SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift)) ++ ] ++ where ++ shift = 64 - widthInBits w' ++signExtendAdjustPrecission w w' r r' = ++ toOL ++ [ ann ++ (text "sign extend register" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w') ++ (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))), ++ -- signed (arithmetic) right shift ++ SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift)) ++ ] ++ where ++ shift = 64 - widthInBits w ++ ++-- | Instructions to truncate the value in the given register from width @w@ ++-- to width @w'@. ++-- ++-- In other words, it just cuts the width out of the register. N.B.: This ++-- ignores signedness (no sign extension takes place)! ++truncateReg :: Width -> Width -> Reg -> OrdList Instr ++truncateReg _w w' _r | w' == W64 = nilOL ++truncateReg _w w' r | w' > W64 = pprPanic "Cannot truncate to width bigger than register size (max is 64bit):" $ text (show r) <> char ':' <+> ppr w' ++truncateReg w _w' r | w > W64 = pprPanic "Unexpected register size (max is 64bit):" $ text (show r) <> char ':' <+> ppr w ++truncateReg w w' r = ++ toOL ++ [ ann ++ (text "truncate register" <+> ppr r <+> ppr w <> text "->" <> ppr w') ++ (SLL (OpReg w' r) (OpReg w r) (OpImm (ImmInt shift))), ++ -- SHL ignores signedness! ++ SRL (OpReg w' r) (OpReg w r) (OpImm (ImmInt shift)) ++ ] ++ where ++ shift = 64 - widthInBits w' ++ ++-- | Given a 'Register', produce a new 'Register' with an instruction block ++-- which will check the value for alignment. Used for @-falignment-sanitisation@. ++addAlignmentCheck :: Int -> Width -> Register -> NatM Register ++addAlignmentCheck align wordWidth reg = do ++ jumpReg <- getNewRegNat II64 ++ cmpReg <- getNewRegNat II64 ++ okayLblId <- getBlockIdNat ++ ++ pure $ case reg of ++ Fixed fmt reg code -> Fixed fmt reg (code `appOL` check fmt jumpReg cmpReg okayLblId reg) ++ Any fmt f -> Any fmt (\reg -> f reg `appOL` check fmt jumpReg cmpReg okayLblId reg) ++ where ++ check :: Format -> Reg -> Reg -> BlockId -> Reg -> InstrBlock ++ check fmt jumpReg cmpReg okayLblId reg = ++ let width = formatToWidth fmt ++ in assert (not $ isFloatFormat fmt) ++ $ toOL ++ [ ann ++ (text "Alignment check - alignment: " <> int align <> text ", word width: " <> text (show wordWidth)) ++ (AND (OpReg width cmpReg) (OpReg width reg) (OpImm $ ImmInt $ align - 1)), ++ BCOND EQ (OpReg width cmpReg) zero (TBlock okayLblId), ++ COMMENT (text "Alignment check failed"), ++ LDR II64 (OpReg W64 jumpReg) (OpImm $ ImmCLbl mkBadAlignmentLabel), ++ B (TReg jumpReg), ++ NEWBLOCK okayLblId ++ ] ++ ++-- ----------------------------------------------------------------------------- ++-- The 'Amode' type: Memory addressing modes passed up the tree. ++data Amode = Amode AddrMode InstrBlock ++ ++-- | Provide the value of a `CmmExpr` with an `Amode` ++-- ++-- N.B. this function should be used to provide operands to load and store ++-- instructions with signed 12bit wide immediates (S & I types). For other ++-- immediate sizes and formats (e.g. B type uses multiples of 2) this function ++-- would need to be adjusted. ++getAmode :: ++ Platform -> ++ -- | width of loaded value ++ Width -> ++ CmmExpr -> ++ NatM Amode ++-- TODO: Specialize stuff we can destructure here. ++ ++-- LDR/STR: Immediate can be represented with 12bits ++getAmode platform w (CmmRegOff reg off) ++ | w <= W64, ++ fitsIn12bitImm off = ++ return $ Amode (AddrRegImm reg' off') nilOL ++ where ++ reg' = getRegisterReg platform reg ++ off' = ImmInt off ++ ++-- For Stores we often see something like this: ++-- CmmStore (CmmMachOp (MO_Add w) [CmmLoad expr, CmmLit (CmmInt n w')]) (expr2) ++-- E.g. a CmmStoreOff really. This can be translated to `str $expr2, [$expr, #n ] ++-- for `n` in range. ++getAmode _platform _ (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')]) ++ | fitsIn12bitImm off = ++ do ++ (reg, _format, code) <- getSomeReg expr ++ return $ Amode (AddrRegImm reg (ImmInteger off)) code ++getAmode _platform _ (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')]) ++ | fitsIn12bitImm (-off) = ++ do ++ (reg, _format, code) <- getSomeReg expr ++ return $ Amode (AddrRegImm reg (ImmInteger (-off))) code ++ ++-- Generic case ++getAmode _platform _ expr = ++ do ++ (reg, _format, code) <- getSomeReg expr ++ return $ Amode (AddrReg reg) code ++ ++-- ----------------------------------------------------------------------------- ++-- Generating assignments ++ ++-- Assignments are really at the heart of the whole code generation ++-- business. Almost all top-level nodes of any real importance are ++-- assignments, which correspond to loads, stores, or register ++-- transfers. If we're really lucky, some of the register transfers ++-- will go away, because we can use the destination register to ++-- complete the code generation for the right hand side. This only ++-- fails when the right hand side is forced into a fixed register ++-- (e.g. the result of a call). ++ ++assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock ++assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock ++assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock ++assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock ++assignMem_IntCode rep addrE srcE = ++ do ++ (src_reg, _format, code) <- getSomeReg srcE ++ platform <- getPlatform ++ let w = formatToWidth rep ++ Amode addr addr_code <- getAmode platform w addrE ++ return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE))) ++ `consOL` ( code ++ `appOL` addr_code ++ `snocOL` STR rep (OpReg w src_reg) (OpAddr addr) ++ ) ++ ++assignReg_IntCode _ reg src = ++ do ++ platform <- getPlatform ++ let dst = getRegisterReg platform reg ++ r <- getRegister src ++ return $ case r of ++ Any _ code -> ++ COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) ++ `consOL` code dst ++ Fixed format freg fcode -> ++ COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) ++ `consOL` ( fcode ++ `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg) ++ ) ++ ++-- Let's treat Floating point stuff ++-- as integer code for now. Opaque. ++assignMem_FltCode = assignMem_IntCode ++ ++assignReg_FltCode = assignReg_IntCode ++ ++-- ----------------------------------------------------------------------------- ++-- Jumps ++-- AArch64 has 26bits for targets, whereas RiscV only has 20. ++-- Thus we need to distinguish between far (outside of the) ++-- current compilation unit. And regular branches. ++-- RiscV has ±2MB of displacement, whereas AArch64 has ±128MB. ++-- Thus for most branches we can get away with encoding it ++-- directly in the instruction rather than always loading the ++-- address into a register and then using that to jump. ++-- Under the assumption that our linked build product is less than ++-- ~2*128MB of TEXT, and there are no jump that span the whole ++-- TEXT segment. ++-- Something where riscv's compressed instruction might come in ++-- handy. ++genJump :: CmmExpr {-the branch target-} -> NatM InstrBlock ++genJump expr = do ++ (target, _format, code) <- getSomeReg expr ++ return (code `appOL` unitOL (annExpr expr (B (TReg target)))) ++ ++-- ----------------------------------------------------------------------------- ++-- Unconditional branches ++genBranch :: BlockId -> NatM InstrBlock ++genBranch = return . toOL . mkJumpInstr ++ ++-- ----------------------------------------------------------------------------- ++-- Conditional branches ++genCondJump :: ++ BlockId -> ++ CmmExpr -> ++ NatM InstrBlock ++genCondJump bid expr = do ++ case expr of ++ -- Optimized == 0 case. ++ CmmMachOp (MO_Eq w) [x, CmmLit (CmmInt 0 _)] -> do ++ (reg_x, _format_x, code_x) <- getSomeReg x ++ return $ code_x `snocOL` annExpr expr (BCOND EQ zero (OpReg w reg_x) (TBlock bid)) ++ ++ -- Optimized /= 0 case. ++ CmmMachOp (MO_Ne w) [x, CmmLit (CmmInt 0 _)] -> do ++ (reg_x, _format_x, code_x) <- getSomeReg x ++ return $ code_x `snocOL` annExpr expr (BCOND NE zero (OpReg w reg_x) (TBlock bid)) ++ ++ -- Generic case. ++ CmmMachOp mop [x, y] -> do ++ let ubcond w cmp = do ++ -- compute both sides. ++ (reg_x, format_x, code_x) <- getSomeReg x ++ (reg_y, format_y, code_y) <- getSomeReg y ++ let x' = OpReg w reg_x ++ y' = OpReg w reg_y ++ return $ case w of ++ w ++ | w == W8 || w == W16 -> ++ code_x ++ `appOL` truncateReg (formatToWidth format_x) w reg_x ++ `appOL` code_y ++ `appOL` truncateReg (formatToWidth format_y) w reg_y ++ `appOL` code_y ++ `snocOL` annExpr expr (BCOND cmp x' y' (TBlock bid)) ++ _ -> ++ code_x ++ `appOL` code_y ++ `snocOL` annExpr expr (BCOND cmp x' y' (TBlock bid)) ++ ++ sbcond w cmp = do ++ -- compute both sides. ++ (reg_x, format_x, code_x) <- getSomeReg x ++ (reg_y, format_y, code_y) <- getSomeReg y ++ let x' = OpReg w reg_x ++ y' = OpReg w reg_y ++ return $ case w of ++ w ++ | w `elem` [W8, W16, W32] -> ++ code_x ++ `appOL` signExtend (formatToWidth format_x) W64 reg_x reg_x ++ `appOL` code_y ++ `appOL` signExtend (formatToWidth format_y) W64 reg_y reg_y ++ `appOL` unitOL (annExpr expr (BCOND cmp x' y' (TBlock bid))) ++ _ -> code_x `appOL` code_y `appOL` unitOL (annExpr expr (BCOND cmp x' y' (TBlock bid))) ++ ++ fbcond w cmp = do ++ -- ensure we get float regs ++ (reg_fx, _format_fx, code_fx) <- getFloatReg x ++ (reg_fy, _format_fy, code_fy) <- getFloatReg y ++ condOpReg <- OpReg W64 <$> getNewRegNat II64 ++ oneReg <- getNewRegNat II64 ++ return $ code_fx ++ `appOL` code_fy ++ `snocOL` annExpr expr (CSET condOpReg (OpReg w reg_fx) (OpReg w reg_fy) cmp) ++ `snocOL` MOV (OpReg W64 oneReg) (OpImm (ImmInt 1)) ++ `snocOL` BCOND EQ condOpReg (OpReg w oneReg) (TBlock bid) ++ ++ case mop of ++ MO_F_Eq w -> fbcond w EQ ++ MO_F_Ne w -> fbcond w NE ++ MO_F_Gt w -> fbcond w FGT ++ MO_F_Ge w -> fbcond w FGE ++ MO_F_Lt w -> fbcond w FLT ++ MO_F_Le w -> fbcond w FLE ++ MO_Eq w -> sbcond w EQ ++ MO_Ne w -> sbcond w NE ++ MO_S_Gt w -> sbcond w SGT ++ MO_S_Ge w -> sbcond w SGE ++ MO_S_Lt w -> sbcond w SLT ++ MO_S_Le w -> sbcond w SLE ++ MO_U_Gt w -> ubcond w UGT ++ MO_U_Ge w -> ubcond w UGE ++ MO_U_Lt w -> ubcond w ULT ++ MO_U_Le w -> ubcond w ULE ++ _ -> pprPanic "RV64.genCondJump:case mop: " (text $ show expr) ++ _ -> pprPanic "RV64.genCondJump: " (text $ show expr) ++ ++-- | Generate conditional branching instructions ++-- ++-- This is basically an "if with else" statement. ++genCondBranch :: ++ -- | the true branch target ++ BlockId -> ++ -- | the false branch target ++ BlockId -> ++ -- | the condition on which to branch ++ CmmExpr -> ++ -- | Instructions ++ NatM InstrBlock ++genCondBranch true false expr = ++ appOL ++ <$> genCondJump true expr ++ <*> genBranch false ++ ++-- ----------------------------------------------------------------------------- ++-- Generating C calls ++ ++-- | Generate a call to a C function. ++-- ++-- - Integer values are passed in GP registers a0-a7. ++-- - Floating point values are passed in FP registers fa0-fa7. ++-- - If there are no free floating point registers, the FP values are passed in GP registers. ++-- - If all GP registers are taken, the values are spilled as whole words (!) onto the stack. ++-- - For integers/words, the return value is in a0. ++-- - The return value is in fa0 if the return type is a floating point value. ++genCCall :: ++ ForeignTarget -> -- function to call ++ [CmmFormal] -> -- where to put the result ++ [CmmActual] -> -- arguments (of mixed type) ++ NatM InstrBlock ++-- TODO: Specialize where we can. ++-- Generic impl ++genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do ++ -- we want to pass arg_regs into allArgRegs ++ -- The target :: ForeignTarget call can either ++ -- be a foreign procedure with an address expr ++ -- and a calling convention. ++ (call_target_reg, call_target_code) <- ++ -- Compute the address of the call target into a register. This ++ -- addressing enables us to jump through the whole address space ++ -- without further ado. PC-relative addressing would involve ++ -- instructions to do similar, though. ++ do ++ (reg, _format, reg_code) <- getSomeReg expr ++ pure (reg, reg_code) ++ -- compute the code and register logic for all arg_regs. ++ -- this will give us the format information to match on. ++ arg_regs' <- mapM getSomeReg arg_regs ++ ++ -- Now this is stupid. Our Cmm expressions doesn't carry the proper sizes ++ -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in ++ -- STG; this then breaks packing of stack arguments, if we need to pack ++ -- for the pcs, e.g. darwinpcs. Option one would be to fix the Int type ++ -- in Cmm proper. Option two, which we choose here is to use extended Hint ++ -- information to contain the size information and use that when packing ++ -- arguments, spilled onto the stack. ++ let (_res_hints, arg_hints) = foreignTargetHints target ++ arg_regs'' = zipWith (\(r, f, c) h -> (r, f, h, c)) arg_regs' arg_hints ++ ++ (stackSpaceWords, passRegs, passArgumentsCode) <- passArguments allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL ++ ++ readResultsCode <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL ++ ++ let moveStackDown 0 = ++ toOL ++ [ PUSH_STACK_FRAME, ++ DELTA (-16) ++ ] ++ moveStackDown i | odd i = moveStackDown (i + 1) ++ moveStackDown i = ++ toOL ++ [ PUSH_STACK_FRAME, ++ SUB (OpReg W64 spMachReg) (OpReg W64 spMachReg) (OpImm (ImmInt (8 * i))), ++ DELTA (-8 * i - 16) ++ ] ++ moveStackUp 0 = ++ toOL ++ [ POP_STACK_FRAME, ++ DELTA 0 ++ ] ++ moveStackUp i | odd i = moveStackUp (i + 1) ++ moveStackUp i = ++ toOL ++ [ ADD (OpReg W64 spMachReg) (OpReg W64 spMachReg) (OpImm (ImmInt (8 * i))), ++ POP_STACK_FRAME, ++ DELTA 0 ++ ] ++ ++ let code = ++ call_target_code -- compute the label (possibly into a register) ++ `appOL` moveStackDown stackSpaceWords ++ `appOL` passArgumentsCode -- put the arguments into x0, ... ++ `snocOL` BL call_target_reg passRegs -- branch and link (C calls aren't tail calls, but return) ++ `appOL` readResultsCode -- parse the results into registers ++ `appOL` moveStackUp stackSpaceWords ++ return code ++ where ++ -- Implementiation of the RISCV ABI calling convention. ++ -- https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/948463cd5dbebea7c1869e20146b17a2cc8fda2f/riscv-cc.adoc#integer-calling-convention ++ passArguments :: [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock) ++ -- Base case: no more arguments to pass (left) ++ passArguments _ _ [] stackSpaceWords accumRegs accumCode = return (stackSpaceWords, accumRegs, accumCode) ++ -- Still have GP regs, and we want to pass an GP argument. ++ passArguments (gpReg : gpRegs) fpRegs ((r, format, hint, code_r) : args) stackSpaceWords accumRegs accumCode | isIntFormat format = do ++ -- RISCV64 Integer Calling Convention: "When passed in registers or on the ++ -- stack, integer scalars narrower than XLEN bits are widened according to ++ -- the sign of their type up to 32 bits, then sign-extended to XLEN bits." ++ let w = formatToWidth format ++ assignArg = ++ if hint == SignedHint ++ then ++ COMMENT (text "Pass gp argument sign-extended (SignedHint): " <> ppr r) ++ `consOL` signExtend w W64 r gpReg ++ else ++ toOL ++ [ COMMENT (text "Pass gp argument sign-extended (SignedHint): " <> ppr r), ++ MOV (OpReg w gpReg) (OpReg w r) ++ ] ++ accumCode' = ++ accumCode ++ `appOL` code_r ++ `appOL` assignArg ++ passArguments gpRegs fpRegs args stackSpaceWords (gpReg : accumRegs) accumCode' ++ ++ -- Still have FP regs, and we want to pass an FP argument. ++ passArguments gpRegs (fpReg : fpRegs) ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isFloatFormat format = do ++ let w = formatToWidth format ++ mov = MOV (OpReg w fpReg) (OpReg w r) ++ accumCode' = ++ accumCode ++ `appOL` code_r ++ `snocOL` ann (text "Pass fp argument: " <> ppr r) mov ++ passArguments gpRegs fpRegs args stackSpaceWords (fpReg : accumRegs) accumCode' ++ ++ -- No mor regs left to pass. Must pass on stack. ++ passArguments [] [] ((r, format, hint, code_r) : args) stackSpaceWords accumRegs accumCode = do ++ let w = formatToWidth format ++ spOffet = 8 * stackSpaceWords ++ str = STR format (OpReg w r) (OpAddr (AddrRegImm spMachReg (ImmInt spOffet))) ++ stackCode = ++ if hint == SignedHint ++ then ++ code_r ++ `appOL` signExtend w W64 r tmpReg ++ `snocOL` ann (text "Pass signed argument (size " <> ppr w <> text ") on the stack: " <> ppr tmpReg) str ++ else ++ code_r ++ `snocOL` ann (text "Pass unsigned argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str ++ passArguments [] [] args (stackSpaceWords + 1) accumRegs (stackCode `appOL` accumCode) ++ ++ -- Still have fpRegs left, but want to pass a GP argument. Must be passed on the stack then. ++ passArguments [] fpRegs ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isIntFormat format = do ++ let w = formatToWidth format ++ spOffet = 8 * stackSpaceWords ++ str = STR format (OpReg w r) (OpAddr (AddrRegImm spMachReg (ImmInt spOffet))) ++ stackCode = ++ code_r ++ `snocOL` ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str ++ passArguments [] fpRegs args (stackSpaceWords + 1) accumRegs (stackCode `appOL` accumCode) ++ ++ -- Still have gpRegs left, but want to pass a FP argument. Must be passed in gpReg then. ++ passArguments (gpReg : gpRegs) [] ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isFloatFormat format = do ++ let w = formatToWidth format ++ mov = MOV (OpReg w gpReg) (OpReg w r) ++ accumCode' = ++ accumCode ++ `appOL` code_r ++ `snocOL` ann (text "Pass fp argument in gpReg: " <> ppr r) mov ++ passArguments gpRegs [] args stackSpaceWords (gpReg : accumRegs) accumCode' ++ passArguments _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state") ++ ++ readResults :: [Reg] -> [Reg] -> [LocalReg] -> [Reg] -> InstrBlock -> NatM InstrBlock ++ readResults _ _ [] _ accumCode = return accumCode ++ readResults [] _ _ _ _ = do ++ platform <- getPlatform ++ pprPanic "genCCall, out of gp registers when reading results" (pdoc platform target) ++ readResults _ [] _ _ _ = do ++ platform <- getPlatform ++ pprPanic "genCCall, out of fp registers when reading results" (pdoc platform target) ++ readResults (gpReg : gpRegs) (fpReg : fpRegs) (dst : dsts) accumRegs accumCode = do ++ -- gp/fp reg -> dst ++ platform <- getPlatform ++ let rep = cmmRegType (CmmLocal dst) ++ format = cmmTypeFormat rep ++ w = cmmRegWidth (CmmLocal dst) ++ r_dst = getRegisterReg platform (CmmLocal dst) ++ if isFloatFormat format ++ then readResults (gpReg : gpRegs) fpRegs dsts (fpReg : accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w fpReg)) ++ else ++ readResults gpRegs (fpReg : fpRegs) dsts (gpReg : accumRegs) ++ $ accumCode ++ `snocOL` MOV (OpReg w r_dst) (OpReg w gpReg) ++ `appOL` ++ -- truncate, otherwise an unexpectedly big value might be used in upfollowing calculations ++ truncateReg W64 w r_dst ++genCCall (PrimTarget mop) dest_regs arg_regs = do ++ case mop of ++ MO_F32_Fabs ++ | [arg_reg] <- arg_regs, ++ [dest_reg] <- dest_regs -> ++ unaryFloatOp W32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg ++ MO_F64_Fabs ++ | [arg_reg] <- arg_regs, ++ [dest_reg] <- dest_regs -> ++ unaryFloatOp W64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg ++ -- 64 bit float ops ++ MO_F64_Pwr -> mkCCall "pow" ++ MO_F64_Sin -> mkCCall "sin" ++ MO_F64_Cos -> mkCCall "cos" ++ MO_F64_Tan -> mkCCall "tan" ++ MO_F64_Sinh -> mkCCall "sinh" ++ MO_F64_Cosh -> mkCCall "cosh" ++ MO_F64_Tanh -> mkCCall "tanh" ++ MO_F64_Asin -> mkCCall "asin" ++ MO_F64_Acos -> mkCCall "acos" ++ MO_F64_Atan -> mkCCall "atan" ++ MO_F64_Asinh -> mkCCall "asinh" ++ MO_F64_Acosh -> mkCCall "acosh" ++ MO_F64_Atanh -> mkCCall "atanh" ++ MO_F64_Log -> mkCCall "log" ++ MO_F64_Log1P -> mkCCall "log1p" ++ MO_F64_Exp -> mkCCall "exp" ++ MO_F64_ExpM1 -> mkCCall "expm1" ++ MO_F64_Fabs -> mkCCall "fabs" ++ MO_F64_Sqrt -> mkCCall "sqrt" ++ -- 32 bit float ops ++ MO_F32_Pwr -> mkCCall "powf" ++ MO_F32_Sin -> mkCCall "sinf" ++ MO_F32_Cos -> mkCCall "cosf" ++ MO_F32_Tan -> mkCCall "tanf" ++ MO_F32_Sinh -> mkCCall "sinhf" ++ MO_F32_Cosh -> mkCCall "coshf" ++ MO_F32_Tanh -> mkCCall "tanhf" ++ MO_F32_Asin -> mkCCall "asinf" ++ MO_F32_Acos -> mkCCall "acosf" ++ MO_F32_Atan -> mkCCall "atanf" ++ MO_F32_Asinh -> mkCCall "asinhf" ++ MO_F32_Acosh -> mkCCall "acoshf" ++ MO_F32_Atanh -> mkCCall "atanhf" ++ MO_F32_Log -> mkCCall "logf" ++ MO_F32_Log1P -> mkCCall "log1pf" ++ MO_F32_Exp -> mkCCall "expf" ++ MO_F32_ExpM1 -> mkCCall "expm1f" ++ MO_F32_Fabs -> mkCCall "fabsf" ++ MO_F32_Sqrt -> mkCCall "sqrtf" ++ -- 64-bit primops ++ MO_I64_ToI -> mkCCall "hs_int64ToInt" ++ MO_I64_FromI -> mkCCall "hs_intToInt64" ++ MO_W64_ToW -> mkCCall "hs_word64ToWord" ++ MO_W64_FromW -> mkCCall "hs_wordToWord64" ++ MO_x64_Neg -> mkCCall "hs_neg64" ++ MO_x64_Add -> mkCCall "hs_add64" ++ MO_x64_Sub -> mkCCall "hs_sub64" ++ MO_x64_Mul -> mkCCall "hs_mul64" ++ MO_I64_Quot -> mkCCall "hs_quotInt64" ++ MO_I64_Rem -> mkCCall "hs_remInt64" ++ MO_W64_Quot -> mkCCall "hs_quotWord64" ++ MO_W64_Rem -> mkCCall "hs_remWord64" ++ MO_x64_And -> mkCCall "hs_and64" ++ MO_x64_Or -> mkCCall "hs_or64" ++ MO_x64_Xor -> mkCCall "hs_xor64" ++ MO_x64_Not -> mkCCall "hs_not64" ++ MO_x64_Shl -> mkCCall "hs_uncheckedShiftL64" ++ MO_I64_Shr -> mkCCall "hs_uncheckedIShiftRA64" ++ MO_W64_Shr -> mkCCall "hs_uncheckedShiftRL64" ++ MO_x64_Eq -> mkCCall "hs_eq64" ++ MO_x64_Ne -> mkCCall "hs_ne64" ++ MO_I64_Ge -> mkCCall "hs_geInt64" ++ MO_I64_Gt -> mkCCall "hs_gtInt64" ++ MO_I64_Le -> mkCCall "hs_leInt64" ++ MO_I64_Lt -> mkCCall "hs_ltInt64" ++ MO_W64_Ge -> mkCCall "hs_geWord64" ++ MO_W64_Gt -> mkCCall "hs_gtWord64" ++ MO_W64_Le -> mkCCall "hs_leWord64" ++ MO_W64_Lt -> mkCCall "hs_ltWord64" ++ -- Conversion ++ MO_UF_Conv w -> mkCCall (word2FloatLabel w) ++ -- Optional MachOps ++ -- These are enabled/disabled by backend flags: GHC.StgToCmm.Config ++ MO_S_Mul2 _w -> unsupported mop ++ MO_S_QuotRem _w -> unsupported mop ++ MO_U_QuotRem _w -> unsupported mop ++ MO_U_QuotRem2 _w -> unsupported mop ++ MO_Add2 _w -> unsupported mop ++ MO_AddWordC _w -> unsupported mop ++ MO_SubWordC _w -> unsupported mop ++ MO_AddIntC _w -> unsupported mop ++ MO_SubIntC _w -> unsupported mop ++ MO_U_Mul2 _w -> unsupported mop ++ -- Memory Ordering ++ -- The related C functions are: ++ -- #include ++ -- atomic_thread_fence(memory_order_acquire); ++ -- atomic_thread_fence(memory_order_release); ++ -- atomic_thread_fence(memory_order_seq_cst); ++ MO_AcquireFence -> pure (unitOL (FENCE FenceRead FenceReadWrite)) ++ MO_ReleaseFence -> pure (unitOL (FENCE FenceReadWrite FenceWrite)) ++ MO_SeqCstFence -> pure (unitOL (FENCE FenceReadWrite FenceReadWrite)) ++ MO_Touch -> pure nilOL -- Keep variables live (when using interior pointers) ++ -- Prefetch ++ MO_Prefetch_Data _n -> pure nilOL -- Prefetch hint. ++ ++ -- Memory copy/set/move/cmp, with alignment for optimization ++ MO_Memcpy _align -> mkCCall "memcpy" ++ MO_Memset _align -> mkCCall "memset" ++ MO_Memmove _align -> mkCCall "memmove" ++ MO_Memcmp _align -> mkCCall "memcmp" ++ MO_SuspendThread -> mkCCall "suspendThread" ++ MO_ResumeThread -> mkCCall "resumeThread" ++ MO_PopCnt w -> mkCCall (popCntLabel w) ++ MO_Pdep w -> mkCCall (pdepLabel w) ++ MO_Pext w -> mkCCall (pextLabel w) ++ MO_Clz w -> mkCCall (clzLabel w) ++ MO_Ctz w -> mkCCall (ctzLabel w) ++ MO_BSwap w -> mkCCall (bSwapLabel w) ++ MO_BRev w -> mkCCall (bRevLabel w) ++ -- Atomic read-modify-write. ++ mo@(MO_AtomicRead w ord) ++ | [p_reg] <- arg_regs, ++ [dst_reg] <- dest_regs -> do ++ (p, _fmt_p, code_p) <- getSomeReg p_reg ++ platform <- getPlatform ++ -- Analog to the related MachOps (above) ++ -- The related C functions are: ++ -- #include ++ -- __atomic_load_n(&a, __ATOMIC_ACQUIRE); ++ -- __atomic_load_n(&a, __ATOMIC_SEQ_CST); ++ let instrs = case ord of ++ MemOrderRelaxed -> unitOL $ ann moDescr (LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)) ++ MemOrderAcquire -> ++ toOL ++ [ ann moDescr (LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)), ++ FENCE FenceRead FenceReadWrite ++ ] ++ MemOrderSeqCst -> ++ toOL ++ [ ann moDescr (FENCE FenceReadWrite FenceReadWrite), ++ LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p), ++ FENCE FenceRead FenceReadWrite ++ ] ++ MemOrderRelease -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo ++ dst = getRegisterReg platform (CmmLocal dst_reg) ++ moDescr = (text . show) mo ++ code = code_p `appOL` instrs ++ return code ++ | otherwise -> panic "mal-formed AtomicRead" ++ mo@(MO_AtomicWrite w ord) ++ | [p_reg, val_reg] <- arg_regs -> do ++ (p, _fmt_p, code_p) <- getSomeReg p_reg ++ (val, fmt_val, code_val) <- getSomeReg val_reg ++ -- Analog to the related MachOps (above) ++ -- The related C functions are: ++ -- #include ++ -- __atomic_store_n(&a, 23, __ATOMIC_SEQ_CST); ++ -- __atomic_store_n(&a, 23, __ATOMIC_RELEASE); ++ let instrs = case ord of ++ MemOrderRelaxed -> unitOL $ ann moDescr (STR fmt_val (OpReg w val) (OpAddr $ AddrReg p)) ++ MemOrderSeqCst -> ++ toOL ++ [ ann moDescr (FENCE FenceReadWrite FenceWrite), ++ STR fmt_val (OpReg w val) (OpAddr $ AddrReg p), ++ FENCE FenceReadWrite FenceReadWrite ++ ] ++ MemOrderRelease -> ++ toOL ++ [ ann moDescr (FENCE FenceReadWrite FenceWrite), ++ STR fmt_val (OpReg w val) (OpAddr $ AddrReg p) ++ ] ++ MemOrderAcquire -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo ++ moDescr = (text . show) mo ++ code = ++ code_p ++ `appOL` code_val ++ `appOL` instrs ++ pure code ++ | otherwise -> panic "mal-formed AtomicWrite" ++ MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop) ++ MO_Cmpxchg w -> mkCCall (cmpxchgLabel w) ++ -- -- Should be an AtomicRMW variant eventually. ++ -- -- Sequential consistent. ++ -- TODO: this should be implemented properly! ++ MO_Xchg w -> mkCCall (xchgLabel w) ++ where ++ unsupported :: (Show a) => a -> b ++ unsupported mop = ++ panic ++ ( "outOfLineCmmOp: " ++ ++ show mop ++ ++ " not supported here" ++ ) ++ mkCCall :: FastString -> NatM InstrBlock ++ mkCCall name = do ++ config <- getConfig ++ target <- ++ cmmMakeDynamicReference config CallReference ++ $ mkForeignLabel name Nothing ForeignLabelInThisPackage IsFunction ++ let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn ++ genCCall (ForeignTarget target cconv) dest_regs arg_regs ++ ++ unaryFloatOp w op arg_reg dest_reg = do ++ platform <- getPlatform ++ (reg_fx, _format_x, code_fx) <- getFloatReg arg_reg ++ let dst = getRegisterReg platform (CmmLocal dest_reg) ++ let code = code_fx `appOL` op (OpReg w dst) (OpReg w reg_fx) ++ pure code ++ ++{- Note [RISCV64 far jumps] ++~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++RISCV64 conditional jump instructions can only encode an offset of +/-4KiB ++(12bits) which is usually enough but can be exceeded in edge cases. In these ++cases we will replace: ++ ++ b.cond foo ++ ++with the sequence: ++ ++ b.cond ++ b ++ : ++ la reg foo ++ b reg ++ : ++ ++and ++ ++ b foo ++ ++with the sequence: ++ ++ la reg foo ++ b reg ++ ++Compared to AArch64 the target label is loaded to a register, because ++unconditional jump instructions can only address +/-1MiB. The LA ++pseudo-instruction will be replaced by up to two real instructions, ensuring ++correct addressing. ++ ++One could surely find more efficient replacements, taking PC-relative addressing ++into account. This could be a future improvement. (As far branches are pretty ++rare, one might question and measure the value of such improvement.) ++ ++RISCV has many pseudo-instructions which emit more than one real instructions. ++Thus, we count the real instructions after the Assembler has seen them. ++ ++We make some simplifications in the name of performance which can result in ++overestimating jump <-> label offsets: ++ ++\* To avoid having to recalculate the label offsets once we replaced a jump we simply ++ assume all label jumps will be expanded to a three instruction far jump sequence. ++\* For labels associated with a info table we assume the info table is 64byte large. ++ Most info tables are smaller than that but it means we don't have to distinguish ++ between multiple types of info tables. ++ ++In terms of implementation we walk the instruction stream at least once calculating ++label offsets, and if we determine during this that the functions body is big enough ++to potentially contain out of range jumps we walk the instructions a second time, replacing ++out of range jumps with the sequence of instructions described above. ++ ++-} ++ ++-- | A conditional jump to a far target ++-- ++-- By loading the far target into a register for the jump, we can address the ++-- whole memory range. ++genCondFarJump :: (MonadUnique m) => Cond -> Operand -> Operand -> BlockId -> m InstrBlock ++genCondFarJump cond op1 op2 far_target = do ++ skip_lbl_id <- newBlockId ++ jmp_lbl_id <- newBlockId ++ ++ -- TODO: We can improve this by inverting the condition ++ -- but it's not quite trivial since we don't know if we ++ -- need to consider float orderings. ++ -- So we take the hit of the additional jump in the false ++ -- case for now. ++ return ++ $ toOL ++ [ ann (text "Conditional far jump to: " <> ppr far_target) ++ $ BCOND cond op1 op2 (TBlock jmp_lbl_id), ++ B (TBlock skip_lbl_id), ++ NEWBLOCK jmp_lbl_id, ++ LDR II64 (OpReg W64 tmpReg) (OpImm (ImmCLbl (blockLbl far_target))), ++ B (TReg tmpReg), ++ NEWBLOCK skip_lbl_id ++ ] ++ ++-- | An unconditional jump to a far target ++-- ++-- By loading the far target into a register for the jump, we can address the ++-- whole memory range. ++genFarJump :: (MonadUnique m) => BlockId -> m InstrBlock ++genFarJump far_target = ++ return ++ $ toOL ++ [ ann (text "Unconditional far jump to: " <> ppr far_target) ++ $ LDR II64 (OpReg W64 tmpReg) (OpImm (ImmCLbl (blockLbl far_target))), ++ B (TReg tmpReg) ++ ] ++ ++-- See Note [RISCV64 far jumps] ++data BlockInRange = InRange | NotInRange BlockId ++ ++-- See Note [RISCV64 far jumps] ++makeFarBranches :: ++ Platform -> ++ LabelMap RawCmmStatics -> ++ [NatBasicBlock Instr] -> ++ UniqSM [NatBasicBlock Instr] ++makeFarBranches {- only used when debugging -} _platform statics basic_blocks = do ++ -- All offsets/positions are counted in multiples of 4 bytes (the size of RISCV64 instructions) ++ -- That is an offset of 1 represents a 4-byte/one instruction offset. ++ let (func_size, lblMap) = foldl' calc_lbl_positions (0, mapEmpty) basic_blocks ++ if func_size < max_jump_dist ++ then pure basic_blocks ++ else do ++ (_, blocks) <- mapAccumLM (replace_blk lblMap) 0 basic_blocks ++ pure $ concat blocks ++ where ++ -- pprTrace "lblMap" (ppr lblMap) $ basic_blocks ++ ++ -- 2^11, 12 bit immediate with one bit is reserved for the sign ++ max_jump_dist = 2 ^ (11 :: Int) - 1 :: Int ++ -- Currently all inline info tables fit into 64 bytes. ++ max_info_size = 16 :: Int ++ long_bc_jump_size = 5 :: Int ++ long_b_jump_size = 2 :: Int ++ ++ -- Replace out of range conditional jumps with unconditional jumps. ++ replace_blk :: LabelMap Int -> Int -> GenBasicBlock Instr -> UniqSM (Int, [GenBasicBlock Instr]) ++ replace_blk !m !pos (BasicBlock lbl instrs) = do ++ -- Account for a potential info table before the label. ++ let !block_pos = pos + infoTblSize_maybe lbl ++ (!pos', instrs') <- mapAccumLM (replace_jump m) block_pos instrs ++ let instrs'' = concat instrs' ++ -- We might have introduced new labels, so split the instructions into basic blocks again if neccesary. ++ let (top, split_blocks, no_data) = foldr mkBlocks ([], [], []) instrs'' ++ -- There should be no data in the instruction stream at this point ++ massert (null no_data) ++ ++ let final_blocks = BasicBlock lbl top : split_blocks ++ pure (pos', final_blocks) ++ ++ replace_jump :: LabelMap Int -> Int -> Instr -> UniqSM (Int, [Instr]) ++ replace_jump !m !pos instr = do ++ case instr of ++ ANN ann instr -> do ++ (idx, instr' : instrs') <- replace_jump m pos instr ++ pure (idx, ANN ann instr' : instrs') ++ BCOND cond op1 op2 t -> ++ case target_in_range m t pos of ++ InRange -> pure (pos + instr_size instr, [instr]) ++ NotInRange far_target -> do ++ jmp_code <- genCondFarJump cond op1 op2 far_target ++ pure (pos + instr_size instr, fromOL jmp_code) ++ B t -> ++ case target_in_range m t pos of ++ InRange -> pure (pos + instr_size instr, [instr]) ++ NotInRange far_target -> do ++ jmp_code <- genFarJump far_target ++ pure (pos + instr_size instr, fromOL jmp_code) ++ _ -> pure (pos + instr_size instr, [instr]) ++ ++ target_in_range :: LabelMap Int -> Target -> Int -> BlockInRange ++ target_in_range m target src = ++ case target of ++ (TReg {}) -> InRange ++ (TBlock bid) -> block_in_range m src bid ++ ++ block_in_range :: LabelMap Int -> Int -> BlockId -> BlockInRange ++ block_in_range m src_pos dest_lbl = ++ case mapLookup dest_lbl m of ++ Nothing -> ++ pprTrace "not in range" (ppr dest_lbl) ++ $ NotInRange dest_lbl ++ Just dest_pos -> ++ if abs (dest_pos - src_pos) < max_jump_dist ++ then InRange ++ else NotInRange dest_lbl ++ ++ calc_lbl_positions :: (Int, LabelMap Int) -> GenBasicBlock Instr -> (Int, LabelMap Int) ++ calc_lbl_positions (pos, m) (BasicBlock lbl instrs) = ++ let !pos' = pos + infoTblSize_maybe lbl ++ in foldl' instr_pos (pos', mapInsert lbl pos' m) instrs ++ ++ instr_pos :: (Int, LabelMap Int) -> Instr -> (Int, LabelMap Int) ++ instr_pos (pos, m) instr = (pos + instr_size instr, m) ++ ++ infoTblSize_maybe bid = ++ case mapLookup bid statics of ++ Nothing -> 0 :: Int ++ Just _info_static -> max_info_size ++ ++ instr_size :: Instr -> Int ++ instr_size i = case i of ++ COMMENT {} -> 0 ++ MULTILINE_COMMENT {} -> 0 ++ ANN _ instr -> instr_size instr ++ LOCATION {} -> 0 ++ DELTA {} -> 0 ++ -- At this point there should be no NEWBLOCK in the instruction stream (pos, mapInsert bid pos m) ++ NEWBLOCK {} -> panic "mkFarBranched - Unexpected" ++ LDATA {} -> panic "mkFarBranched - Unexpected" ++ PUSH_STACK_FRAME -> 4 ++ POP_STACK_FRAME -> 4 ++ ADD {} -> 1 ++ MUL {} -> 1 ++ MULH {} -> 1 ++ NEG {} -> 1 ++ DIV {} -> 1 ++ REM {} -> 1 ++ REMU {} -> 1 ++ SUB {} -> 1 ++ DIVU {} -> 1 ++ AND {} -> 1 ++ OR {} -> 1 ++ SRA {} -> 1 ++ XOR {} -> 1 ++ SLL {} -> 1 ++ SRL {} -> 1 ++ MOV {} -> 2 ++ ORI {} -> 1 ++ XORI {} -> 1 ++ CSET {} -> 2 ++ STR {} -> 1 ++ LDR {} -> 3 ++ LDRU {} -> 1 ++ FENCE {} -> 1 ++ FCVT {} -> 1 ++ FABS {} -> 1 ++ FMA {} -> 1 ++ -- estimate the subsituted size for jumps to lables ++ -- jumps to registers have size 1 ++ BCOND {} -> long_bc_jump_size ++ B (TBlock _) -> long_b_jump_size ++ B (TReg _) -> 1 ++ BL _ _ -> 1 ++ J_TBL {} -> 1 +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Cond.hs +=================================================================== +--- /dev/null ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Cond.hs +@@ -0,0 +1,42 @@ ++module GHC.CmmToAsm.RV64.Cond ++ ( Cond (..), ++ ) ++where ++ ++import GHC.Prelude hiding (EQ) ++ ++-- | Condition codes. ++-- ++-- Used in conditional branches and bit setters. According to the available ++-- instruction set, some conditions are encoded as their negated opposites. I.e. ++-- these are logical things that don't necessarily map 1:1 to hardware/ISA. ++data Cond ++ = -- | int and float ++ EQ ++ | -- | int and float ++ NE ++ | -- | signed less than ++ SLT ++ | -- | signed less than or equal ++ SLE ++ | -- | signed greater than or equal ++ SGE ++ | -- | signed greater than ++ SGT ++ | -- | unsigned less than ++ ULT ++ | -- | unsigned less than or equal ++ ULE ++ | -- | unsigned greater than or equal ++ UGE ++ | -- | unsigned greater than ++ UGT ++ | -- | floating point instruction @flt@ ++ FLT ++ | -- | floating point instruction @fle@ ++ FLE ++ | -- | floating point instruction @fge@ ++ FGE ++ | -- | floating point instruction @fgt@ ++ FGT ++ deriving (Eq, Show) +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Instr.hs +=================================================================== +--- /dev/null ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Instr.hs +@@ -0,0 +1,823 @@ ++-- All instructions will be rendered eventually. Thus, there's no benefit in ++-- being lazy in data types. ++{-# LANGUAGE StrictData #-} ++{-# OPTIONS_GHC -fno-warn-orphans #-} ++ ++module GHC.CmmToAsm.RV64.Instr where ++ ++import Data.Maybe ++import GHC.Cmm ++import GHC.Cmm.BlockId ++import GHC.Cmm.CLabel ++import GHC.Cmm.Dataflow.Label ++import GHC.CmmToAsm.Config ++import GHC.CmmToAsm.Format ++import GHC.CmmToAsm.Instr (RegUsage (..)) ++import GHC.CmmToAsm.RV64.Cond ++import GHC.CmmToAsm.RV64.Regs ++import GHC.CmmToAsm.Types ++import GHC.CmmToAsm.Utils ++import GHC.Data.FastString (LexicalFastString) ++import GHC.Platform ++import GHC.Platform.Reg ++import GHC.Platform.Regs ++import GHC.Prelude ++import GHC.Stack ++import GHC.Types.Unique.Supply ++import GHC.Utils.Outputable ++import GHC.Utils.Panic ++ ++-- | Stack frame header size in bytes. ++-- ++-- The stack frame header is made of the values that are always saved ++-- (regardless of the context.) It consists of the saved return address and a ++-- pointer to the previous frame. Thus, its size is two stack frame slots which ++-- equals two addresses/words (2 * 8 byte). ++stackFrameHeaderSize :: Int ++stackFrameHeaderSize = 2 * spillSlotSize ++ ++-- | All registers are 8 byte wide. ++spillSlotSize :: Int ++spillSlotSize = 8 ++ ++-- | The number of bytes that the stack pointer should be aligned to. ++stackAlign :: Int ++stackAlign = 16 ++ ++-- | The number of spill slots available without allocating more. ++maxSpillSlots :: NCGConfig -> Int ++maxSpillSlots config = ++ ( (ncgSpillPreallocSize config - stackFrameHeaderSize) ++ `div` spillSlotSize ++ ) ++ - 1 ++ ++-- | Convert a spill slot number to a *byte* offset. ++spillSlotToOffset :: Int -> Int ++spillSlotToOffset slot = ++ stackFrameHeaderSize + spillSlotSize * slot ++ ++instance Outputable RegUsage where ++ ppr (RU reads writes) = text "RegUsage(reads:" <+> ppr reads <> comma <+> text "writes:" <+> ppr writes <> char ')' ++ ++-- | Get the registers that are being used by this instruction. ++-- regUsage doesn't need to do any trickery for jumps and such. ++-- Just state precisely the regs read and written by that insn. ++-- The consequences of control flow transfers, as far as register ++-- allocation goes, are taken care of by the register allocator. ++-- ++-- RegUsage = RU [] [] ++regUsageOfInstr :: Platform -> Instr -> RegUsage ++regUsageOfInstr platform instr = case instr of ++ ANN _ i -> regUsageOfInstr platform i ++ COMMENT {} -> usage ([], []) ++ MULTILINE_COMMENT {} -> usage ([], []) ++ PUSH_STACK_FRAME -> usage ([], []) ++ POP_STACK_FRAME -> usage ([], []) ++ LOCATION {} -> usage ([], []) ++ DELTA {} -> usage ([], []) ++ ADD dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ MUL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ NEG dst src -> usage (regOp src, regOp dst) ++ MULH dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ DIV dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ REM dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ REMU dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ SUB dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ DIVU dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ OR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ SRA dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ XOR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ SLL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ SRL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ++ MOV dst src -> usage (regOp src, regOp dst) ++ -- ORI's third operand is always an immediate ++ ORI dst src1 _ -> usage (regOp src1, regOp dst) ++ XORI dst src1 _ -> usage (regOp src1, regOp dst) ++ J_TBL _ _ t -> usage ([t], []) ++ B t -> usage (regTarget t, []) ++ BCOND _ l r t -> usage (regTarget t ++ regOp l ++ regOp r, []) ++ BL t ps -> usage (t : ps, callerSavedRegisters) ++ CSET dst l r _ -> usage (regOp l ++ regOp r, regOp dst) ++ STR _ src dst -> usage (regOp src ++ regOp dst, []) ++ LDR _ dst src -> usage (regOp src, regOp dst) ++ LDRU _ dst src -> usage (regOp src, regOp dst) ++ FENCE _ _ -> usage ([], []) ++ FCVT _variant dst src -> usage (regOp src, regOp dst) ++ FABS dst src -> usage (regOp src, regOp dst) ++ FMA _ dst src1 src2 src3 -> ++ usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst) ++ _ -> panic $ "regUsageOfInstr: " ++ instrCon instr ++ where ++ -- filtering the usage is necessary, otherwise the register ++ -- allocator will try to allocate pre-defined fixed stg ++ -- registers as well, as they show up. ++ usage :: ([Reg], [Reg]) -> RegUsage ++ usage (srcRegs, dstRegs) = ++ RU ++ (filter (interesting platform) srcRegs) ++ (filter (interesting platform) dstRegs) ++ ++ regAddr :: AddrMode -> [Reg] ++ regAddr (AddrRegImm r1 _imm) = [r1] ++ regAddr (AddrReg r1) = [r1] ++ ++ regOp :: Operand -> [Reg] ++ regOp (OpReg _w r1) = [r1] ++ regOp (OpAddr a) = regAddr a ++ regOp (OpImm _imm) = [] ++ ++ regTarget :: Target -> [Reg] ++ regTarget (TBlock _bid) = [] ++ regTarget (TReg r1) = [r1] ++ ++ -- Is this register interesting for the register allocator? ++ interesting :: Platform -> Reg -> Bool ++ interesting _ (RegVirtual _) = True ++ interesting platform (RegReal (RealRegSingle i)) = freeReg platform i ++ ++-- | Caller-saved registers (according to calling convention) ++-- ++-- These registers may be clobbered after a jump. ++callerSavedRegisters :: [Reg] ++callerSavedRegisters = ++ [regSingle raRegNo] ++ ++ map regSingle [t0RegNo .. t2RegNo] ++ ++ map regSingle [a0RegNo .. a7RegNo] ++ ++ map regSingle [t3RegNo .. t6RegNo] ++ ++ map regSingle [ft0RegNo .. ft7RegNo] ++ ++ map regSingle [fa0RegNo .. fa7RegNo] ++ ++-- | Apply a given mapping to all the register references in this instruction. ++patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr ++patchRegsOfInstr instr env = case instr of ++ ANN d i -> ANN d (patchRegsOfInstr i env) ++ COMMENT {} -> instr ++ MULTILINE_COMMENT {} -> instr ++ PUSH_STACK_FRAME -> instr ++ POP_STACK_FRAME -> instr ++ LOCATION {} -> instr ++ DELTA {} -> instr ++ ADD o1 o2 o3 -> ADD (patchOp o1) (patchOp o2) (patchOp o3) ++ MUL o1 o2 o3 -> MUL (patchOp o1) (patchOp o2) (patchOp o3) ++ NEG o1 o2 -> NEG (patchOp o1) (patchOp o2) ++ MULH o1 o2 o3 -> MULH (patchOp o1) (patchOp o2) (patchOp o3) ++ DIV o1 o2 o3 -> DIV (patchOp o1) (patchOp o2) (patchOp o3) ++ REM o1 o2 o3 -> REM (patchOp o1) (patchOp o2) (patchOp o3) ++ REMU o1 o2 o3 -> REMU (patchOp o1) (patchOp o2) (patchOp o3) ++ SUB o1 o2 o3 -> SUB (patchOp o1) (patchOp o2) (patchOp o3) ++ DIVU o1 o2 o3 -> DIVU (patchOp o1) (patchOp o2) (patchOp o3) ++ AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3) ++ OR o1 o2 o3 -> OR (patchOp o1) (patchOp o2) (patchOp o3) ++ SRA o1 o2 o3 -> SRA (patchOp o1) (patchOp o2) (patchOp o3) ++ XOR o1 o2 o3 -> XOR (patchOp o1) (patchOp o2) (patchOp o3) ++ SLL o1 o2 o3 -> SLL (patchOp o1) (patchOp o2) (patchOp o3) ++ SRL o1 o2 o3 -> SRL (patchOp o1) (patchOp o2) (patchOp o3) ++ MOV o1 o2 -> MOV (patchOp o1) (patchOp o2) ++ -- o3 cannot be a register for ORI (always an immediate) ++ ORI o1 o2 o3 -> ORI (patchOp o1) (patchOp o2) (patchOp o3) ++ XORI o1 o2 o3 -> XORI (patchOp o1) (patchOp o2) (patchOp o3) ++ J_TBL ids mbLbl t -> J_TBL ids mbLbl (env t) ++ B t -> B (patchTarget t) ++ BL t ps -> BL (patchReg t) ps ++ BCOND c o1 o2 t -> BCOND c (patchOp o1) (patchOp o2) (patchTarget t) ++ CSET o l r c -> CSET (patchOp o) (patchOp l) (patchOp r) c ++ STR f o1 o2 -> STR f (patchOp o1) (patchOp o2) ++ LDR f o1 o2 -> LDR f (patchOp o1) (patchOp o2) ++ LDRU f o1 o2 -> LDRU f (patchOp o1) (patchOp o2) ++ FENCE o1 o2 -> FENCE o1 o2 ++ FCVT variant o1 o2 -> FCVT variant (patchOp o1) (patchOp o2) ++ FABS o1 o2 -> FABS (patchOp o1) (patchOp o2) ++ FMA s o1 o2 o3 o4 -> ++ FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4) ++ _ -> panic $ "patchRegsOfInstr: " ++ instrCon instr ++ where ++ patchOp :: Operand -> Operand ++ patchOp (OpReg w r) = OpReg w (env r) ++ patchOp (OpAddr a) = OpAddr (patchAddr a) ++ patchOp opImm = opImm ++ ++ patchTarget :: Target -> Target ++ patchTarget (TReg r) = TReg (env r) ++ patchTarget tBlock = tBlock ++ ++ patchAddr :: AddrMode -> AddrMode ++ patchAddr (AddrRegImm r1 imm) = AddrRegImm (env r1) imm ++ patchAddr (AddrReg r) = AddrReg (env r) ++ ++ patchReg :: Reg -> Reg ++ patchReg = env ++ ++-- | Checks whether this instruction is a jump/branch instruction. ++-- ++-- One that can change the flow of control in a way that the ++-- register allocator needs to worry about. ++isJumpishInstr :: Instr -> Bool ++isJumpishInstr instr = case instr of ++ ANN _ i -> isJumpishInstr i ++ J_TBL {} -> True ++ B {} -> True ++ BL {} -> True ++ BCOND {} -> True ++ _ -> False ++ ++-- | Get the `BlockId`s of the jump destinations (if any) ++jumpDestsOfInstr :: Instr -> [BlockId] ++jumpDestsOfInstr (ANN _ i) = jumpDestsOfInstr i ++jumpDestsOfInstr (J_TBL ids _mbLbl _r) = catMaybes ids ++jumpDestsOfInstr (B t) = [id | TBlock id <- [t]] ++jumpDestsOfInstr (BCOND _ _ _ t) = [id | TBlock id <- [t]] ++jumpDestsOfInstr _ = [] ++ ++-- | Change the destination of this (potential) jump instruction. ++-- ++-- Used in the linear allocator when adding fixup blocks for join ++-- points. ++patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr ++patchJumpInstr instr patchF = ++ case instr of ++ ANN d i -> ANN d (patchJumpInstr i patchF) ++ J_TBL ids mbLbl r -> J_TBL (map (fmap patchF) ids) mbLbl r ++ B (TBlock bid) -> B (TBlock (patchF bid)) ++ BCOND c o1 o2 (TBlock bid) -> BCOND c o1 o2 (TBlock (patchF bid)) ++ _ -> panic $ "patchJumpInstr: " ++ instrCon instr ++ ++-- ----------------------------------------------------------------------------- ++-- Note [RISCV64 Spills and Reloads] ++-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++-- ++-- We reserve @RESERVED_C_STACK_BYTES@ on the C stack for spilling and reloading ++-- registers. The load and store instructions of RISCV64 address with a signed ++-- 12-bit immediate + a register; machine stackpointer (sp/x2) in this case. ++-- ++-- The @RESERVED_C_STACK_BYTES@ is 16k, so we can't always address into it in a ++-- single load/store instruction. There are offsets to sp (not to be confused ++-- with STG's SP!) which need a register to be calculated. ++-- ++-- Using sp to compute the offset would violate assumptions about the stack pointer ++-- pointing to the top of the stack during signal handling. As we can't force ++-- every signal to use its own stack, we have to ensure that the stack pointer ++-- always points to the top of the stack, and we can't use it for computation. ++-- ++-- So, we reserve one register (TMP) for this purpose (and other, unrelated ++-- intermediate operations.) See Note [The made-up RISCV64 TMP (IP) register] ++ ++-- | Generate instructions to spill a register into a spill slot. ++mkSpillInstr :: ++ (HasCallStack) => ++ NCGConfig -> ++ -- | register to spill ++ Reg -> ++ -- | current stack delta ++ Int -> ++ -- | spill slot to use ++ Int -> ++ [Instr] ++mkSpillInstr _config reg delta slot = ++ case off - delta of ++ imm | fitsIn12bitImm imm -> [mkStrSpImm imm] ++ imm -> ++ [ movImmToTmp imm, ++ addSpToTmp, ++ mkStrTmp ++ ] ++ where ++ fmt = case reg of ++ RegReal (RealRegSingle n) | n < d0RegNo -> II64 ++ _ -> FF64 ++ mkStrSpImm imm = ++ ANN (text "Spill@" <> int (off - delta)) ++ $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm))) ++ movImmToTmp imm = ++ ANN (text "Spill: TMP <- " <> int imm) ++ $ MOV tmp (OpImm (ImmInt imm)) ++ addSpToTmp = ++ ANN (text "Spill: TMP <- SP + TMP ") ++ $ ADD tmp tmp sp ++ mkStrTmp = ++ ANN (text "Spill@" <> int (off - delta)) ++ $ STR fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg)) ++ ++ off = spillSlotToOffset slot ++ ++-- | Generate instructions to load a register from a spill slot. ++mkLoadInstr :: ++ NCGConfig -> ++ -- | register to load ++ Reg -> ++ -- | current stack delta ++ Int -> ++ -- | spill slot to use ++ Int -> ++ [Instr] ++mkLoadInstr _config reg delta slot = ++ case off - delta of ++ imm | fitsIn12bitImm imm -> [mkLdrSpImm imm] ++ imm -> ++ [ movImmToTmp imm, ++ addSpToTmp, ++ mkLdrTmp ++ ] ++ where ++ fmt = case reg of ++ RegReal (RealRegSingle n) | n < d0RegNo -> II64 ++ _ -> FF64 ++ mkLdrSpImm imm = ++ ANN (text "Reload@" <> int (off - delta)) ++ $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm))) ++ movImmToTmp imm = ++ ANN (text "Reload: TMP <- " <> int imm) ++ $ MOV tmp (OpImm (ImmInt imm)) ++ addSpToTmp = ++ ANN (text "Reload: TMP <- SP + TMP ") ++ $ ADD tmp tmp sp ++ mkLdrTmp = ++ ANN (text "Reload@" <> int (off - delta)) ++ $ LDR fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg)) ++ ++ off = spillSlotToOffset slot ++ ++-- | See if this instruction is telling us the current C stack delta ++takeDeltaInstr :: Instr -> Maybe Int ++takeDeltaInstr (ANN _ i) = takeDeltaInstr i ++takeDeltaInstr (DELTA i) = Just i ++takeDeltaInstr _ = Nothing ++ ++-- | Not real instructions. Just meta data ++isMetaInstr :: Instr -> Bool ++isMetaInstr instr = ++ case instr of ++ ANN _ i -> isMetaInstr i ++ COMMENT {} -> True ++ MULTILINE_COMMENT {} -> True ++ LOCATION {} -> True ++ LDATA {} -> True ++ NEWBLOCK {} -> True ++ DELTA {} -> True ++ PUSH_STACK_FRAME -> True ++ POP_STACK_FRAME -> True ++ _ -> False ++ ++-- | Copy the value in a register to another one. ++-- ++-- Must work for all register classes. ++mkRegRegMoveInstr :: Reg -> Reg -> Instr ++mkRegRegMoveInstr src dst = ANN desc instr ++ where ++ desc = text "Reg->Reg Move: " <> ppr src <> text " -> " <> ppr dst ++ instr = MOV (operandFromReg dst) (operandFromReg src) ++ ++-- | Take the source and destination from this (potential) reg -> reg move instruction ++-- ++-- We have to be a bit careful here: A `MOV` can also mean an implicit ++-- conversion. This case is filtered out. ++takeRegRegMoveInstr :: Instr -> Maybe (Reg, Reg) ++takeRegRegMoveInstr (MOV (OpReg width dst) (OpReg width' src)) ++ | width == width' && (isFloatReg dst == isFloatReg src) = pure (src, dst) ++takeRegRegMoveInstr _ = Nothing ++ ++-- | Make an unconditional jump instruction. ++mkJumpInstr :: BlockId -> [Instr] ++mkJumpInstr = pure . B . TBlock ++ ++-- | Decrement @sp@ to allocate stack space. ++-- ++-- The stack grows downwards, so we decrement the stack pointer by @n@ (bytes). ++-- This is dual to `mkStackDeallocInstr`. @sp@ is the RISCV stack pointer, not ++-- to be confused with the STG stack pointer. ++mkStackAllocInstr :: Platform -> Int -> [Instr] ++mkStackAllocInstr _platform = moveSp . negate ++ ++-- | Increment SP to deallocate stack space. ++-- ++-- The stack grows downwards, so we increment the stack pointer by @n@ (bytes). ++-- This is dual to `mkStackAllocInstr`. @sp@ is the RISCV stack pointer, not to ++-- be confused with the STG stack pointer. ++mkStackDeallocInstr :: Platform -> Int -> [Instr] ++mkStackDeallocInstr _platform = moveSp ++ ++moveSp :: Int -> [Instr] ++moveSp n ++ | n == 0 = [] ++ | n /= 0 && fitsIn12bitImm n = pure . ANN desc $ ADD sp sp (OpImm (ImmInt n)) ++ | otherwise = ++ -- This ends up in three effective instructions. We could get away with ++ -- two for intMax12bit < n < 3 * intMax12bit by recursing once. However, ++ -- this way is likely less surprising. ++ [ ANN desc (MOV tmp (OpImm (ImmInt n))), ++ ADD sp sp tmp ++ ] ++ where ++ desc = text "Move SP:" <+> int n ++ ++-- ++-- See Note [extra spill slots] in X86/Instr.hs ++-- ++allocMoreStack :: ++ Platform -> ++ Int -> ++ NatCmmDecl statics GHC.CmmToAsm.RV64.Instr.Instr -> ++ UniqSM (NatCmmDecl statics GHC.CmmToAsm.RV64.Instr.Instr, [(BlockId, BlockId)]) ++allocMoreStack _ _ top@(CmmData _ _) = return (top, []) ++allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do ++ let entries = entryBlocks proc ++ ++ uniqs <- getUniquesM ++ ++ let delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up ++ where ++ x = slots * spillSlotSize -- sp delta ++ alloc = mkStackAllocInstr platform delta ++ dealloc = mkStackDeallocInstr platform delta ++ ++ retargetList = zip entries (map mkBlockId uniqs) ++ ++ new_blockmap :: LabelMap BlockId ++ new_blockmap = mapFromList retargetList ++ ++ insert_stack_insn (BasicBlock id insns) ++ | Just new_blockid <- mapLookup id new_blockmap = ++ [ BasicBlock id $ alloc ++ [B (TBlock new_blockid)], ++ BasicBlock new_blockid block' ++ ] ++ | otherwise = ++ [BasicBlock id block'] ++ where ++ block' = foldr insert_dealloc [] insns ++ ++ insert_dealloc insn r = case insn of ++ J_TBL {} -> dealloc ++ (insn : r) ++ ANN _ e -> insert_dealloc e r ++ _other ++ | jumpDestsOfInstr insn /= [] -> ++ patchJumpInstr insn retarget : r ++ _other -> insn : r ++ where ++ retarget b = fromMaybe b (mapLookup b new_blockmap) ++ ++ new_code = concatMap insert_stack_insn code ++ return (CmmProc info lbl live (ListGraph new_code), retargetList) ++ ++data Instr ++ = -- | Comment pseudo-op ++ COMMENT SDoc ++ | -- | Multi-line comment pseudo-op ++ MULTILINE_COMMENT SDoc ++ | -- | Annotated instruction. Should print # ++ ANN SDoc Instr ++ | -- | Location pseudo-op @.loc@ (file, line, col, name) ++ LOCATION Int Int Int LexicalFastString ++ | -- | Static data spat out during code generation. ++ LDATA Section RawCmmStatics ++ | -- | Start a new basic block. ++ -- ++ -- Useful during codegen, removed later. Preceding instruction should be a ++ -- jump, as per the invariants for a BasicBlock (see Cmm). ++ NEWBLOCK BlockId ++ | -- | Specify current stack offset for benefit of subsequent passes ++ DELTA Int ++ | -- | Push a minimal stack frame consisting of the return address (RA) and the frame pointer (FP). ++ PUSH_STACK_FRAME ++ | -- | Pop the minimal stack frame of prior `PUSH_STACK_FRAME`. ++ POP_STACK_FRAME ++ | -- | Arithmetic addition (both integer and floating point) ++ -- ++ -- @rd = rs1 + rs2@ ++ ADD Operand Operand Operand ++ | -- | Arithmetic subtraction (both integer and floating point) ++ -- ++ -- @rd = rs1 - rs2@ ++ SUB Operand Operand Operand ++ | -- | Logical AND (integer only) ++ -- ++ -- @rd = rs1 & rs2@ ++ AND Operand Operand Operand ++ | -- | Logical OR (integer only) ++ -- ++ -- @rd = rs1 | rs2@ ++ OR Operand Operand Operand ++ | -- | Logical left shift (zero extened, integer only) ++ -- ++ -- @rd = rs1 << rs2@ ++ SLL Operand Operand Operand ++ | -- | Logical right shift (zero extened, integer only) ++ -- ++ -- @rd = rs1 >> rs2@ ++ SRL Operand Operand Operand ++ | -- | Arithmetic right shift (sign-extened, integer only) ++ -- ++ -- @rd = rs1 >> rs2@ ++ SRA Operand Operand Operand ++ | -- | Store to memory (both, integer and floating point) ++ STR Format Operand Operand ++ | -- | Load from memory (sign-extended, integer and floating point) ++ LDR Format Operand Operand ++ | -- | Load from memory (unsigned, integer and floating point) ++ LDRU Format Operand Operand ++ | -- | Arithmetic multiplication (both, integer and floating point) ++ -- ++ -- @rd = rn × rm@ ++ MUL Operand Operand Operand ++ | -- | Negation (both, integer and floating point) ++ -- ++ -- @rd = -op2@ ++ NEG Operand Operand ++ | -- | Division (both, integer and floating point) ++ -- ++ -- @rd = rn ÷ rm@ ++ DIV Operand Operand Operand ++ | -- | Remainder (integer only, signed) ++ -- ++ -- @rd = rn % rm@ ++ REM Operand Operand Operand -- ++ | -- | Remainder (integer only, unsigned) ++ -- ++ -- @rd = |rn % rm|@ ++ REMU Operand Operand Operand ++ | -- | High part of a multiplication that doesn't fit into 64bits (integer only) ++ -- ++ -- E.g. for a multiplication with 64bits width: @rd = (rs1 * rs2) >> 64@. ++ MULH Operand Operand Operand ++ | -- | Unsigned division (integer only) ++ -- ++ -- @rd = |rn ÷ rm|@ ++ DIVU Operand Operand Operand ++ | -- | XOR (integer only) ++ -- ++ -- @rd = rn ⊕ op2@ ++ XOR Operand Operand Operand ++ | -- | ORI with immediate (integer only) ++ -- ++ -- @rd = rn | op2@ ++ ORI Operand Operand Operand ++ | -- | OR with immediate (integer only) ++ -- ++ -- @rd = rn ⊕ op2@ ++ XORI Operand Operand Operand ++ | -- | Move to register (integer and floating point) ++ -- ++ -- @rd = rn@ or @rd = #imm@ ++ MOV Operand Operand ++ | -- | Pseudo-op for conditional setting of a register. ++ -- ++ -- @if(o2 cond o3) op <- 1 else op <- 0@ ++ CSET Operand Operand Operand Cond ++ | -- | A jump instruction with data for switch/jump tables ++ J_TBL [Maybe BlockId] (Maybe CLabel) Reg ++ | -- | Unconditional jump (no linking) ++ B Target ++ | -- | Unconditional jump, links return address (sets @ra@/@x1@) ++ BL Reg [Reg] ++ | -- | branch with condition (integer only) ++ BCOND Cond Operand Operand Target ++ | -- | Fence instruction ++ -- ++ -- Memory barrier. ++ FENCE FenceType FenceType ++ | -- | Floating point conversion ++ FCVT FcvtVariant Operand Operand ++ | -- | Floating point ABSolute value ++ FABS Operand Operand ++ | -- | Floating-point fused multiply-add instructions ++ -- ++ -- - fmadd : d = r1 * r2 + r3 ++ -- - fnmsub: d = r1 * r2 - r3 ++ -- - fmsub : d = - r1 * r2 + r3 ++ -- - fnmadd: d = - r1 * r2 - r3 ++ FMA FMASign Operand Operand Operand Operand ++ ++-- | Operand of a FENCE instruction (@r@, @w@ or @rw@) ++data FenceType = FenceRead | FenceWrite | FenceReadWrite ++ ++-- | Variant of a floating point conversion instruction ++data FcvtVariant = FloatToFloat | IntToFloat | FloatToInt ++ ++instrCon :: Instr -> String ++instrCon i = ++ case i of ++ COMMENT {} -> "COMMENT" ++ MULTILINE_COMMENT {} -> "COMMENT" ++ ANN {} -> "ANN" ++ LOCATION {} -> "LOCATION" ++ LDATA {} -> "LDATA" ++ NEWBLOCK {} -> "NEWBLOCK" ++ DELTA {} -> "DELTA" ++ PUSH_STACK_FRAME {} -> "PUSH_STACK_FRAME" ++ POP_STACK_FRAME {} -> "POP_STACK_FRAME" ++ ADD {} -> "ADD" ++ OR {} -> "OR" ++ MUL {} -> "MUL" ++ NEG {} -> "NEG" ++ DIV {} -> "DIV" ++ REM {} -> "REM" ++ REMU {} -> "REMU" ++ MULH {} -> "MULH" ++ SUB {} -> "SUB" ++ DIVU {} -> "DIVU" ++ AND {} -> "AND" ++ SRA {} -> "SRA" ++ XOR {} -> "XOR" ++ SLL {} -> "SLL" ++ SRL {} -> "SRL" ++ MOV {} -> "MOV" ++ ORI {} -> "ORI" ++ XORI {} -> "ORI" ++ STR {} -> "STR" ++ LDR {} -> "LDR" ++ LDRU {} -> "LDRU" ++ CSET {} -> "CSET" ++ J_TBL {} -> "J_TBL" ++ B {} -> "B" ++ BL {} -> "BL" ++ BCOND {} -> "BCOND" ++ FENCE {} -> "FENCE" ++ FCVT {} -> "FCVT" ++ FABS {} -> "FABS" ++ FMA variant _ _ _ _ -> ++ case variant of ++ FMAdd -> "FMADD" ++ FMSub -> "FMSUB" ++ FNMAdd -> "FNMADD" ++ FNMSub -> "FNMSUB" ++ ++data Target ++ = TBlock BlockId ++ | TReg Reg ++ ++data Operand ++ = -- | register ++ OpReg Width Reg ++ | -- | immediate value ++ OpImm Imm ++ | -- | memory reference ++ OpAddr AddrMode ++ deriving (Eq, Show) ++ ++operandFromReg :: Reg -> Operand ++operandFromReg = OpReg W64 ++ ++operandFromRegNo :: RegNo -> Operand ++operandFromRegNo = operandFromReg . regSingle ++ ++zero, ra, sp, gp, tp, fp, tmp :: Operand ++zero = operandFromReg zeroReg ++ra = operandFromReg raReg ++sp = operandFromReg spMachReg ++gp = operandFromRegNo 3 ++tp = operandFromRegNo 4 ++fp = operandFromRegNo 8 ++tmp = operandFromReg tmpReg ++ ++x0, x1, x2, x3, x4, x5, x6, x7 :: Operand ++x8, x9, x10, x11, x12, x13, x14, x15 :: Operand ++x16, x17, x18, x19, x20, x21, x22, x23 :: Operand ++x24, x25, x26, x27, x28, x29, x30, x31 :: Operand ++x0 = operandFromRegNo x0RegNo ++x1 = operandFromRegNo 1 ++x2 = operandFromRegNo 2 ++x3 = operandFromRegNo 3 ++x4 = operandFromRegNo 4 ++x5 = operandFromRegNo x5RegNo ++x6 = operandFromRegNo 6 ++x7 = operandFromRegNo x7RegNo ++ ++x8 = operandFromRegNo 8 ++ ++x9 = operandFromRegNo 9 ++ ++x10 = operandFromRegNo x10RegNo ++ ++x11 = operandFromRegNo 11 ++ ++x12 = operandFromRegNo 12 ++ ++x13 = operandFromRegNo 13 ++ ++x14 = operandFromRegNo 14 ++ ++x15 = operandFromRegNo 15 ++ ++x16 = operandFromRegNo 16 ++ ++x17 = operandFromRegNo x17RegNo ++ ++x18 = operandFromRegNo 18 ++ ++x19 = operandFromRegNo 19 ++ ++x20 = operandFromRegNo 20 ++ ++x21 = operandFromRegNo 21 ++ ++x22 = operandFromRegNo 22 ++ ++x23 = operandFromRegNo 23 ++ ++x24 = operandFromRegNo 24 ++ ++x25 = operandFromRegNo 25 ++ ++x26 = operandFromRegNo 26 ++ ++x27 = operandFromRegNo 27 ++ ++x28 = operandFromRegNo x28RegNo ++ ++x29 = operandFromRegNo 29 ++ ++x30 = operandFromRegNo 30 ++ ++x31 = operandFromRegNo x31RegNo ++ ++d0, d1, d2, d3, d4, d5, d6, d7 :: Operand ++d8, d9, d10, d11, d12, d13, d14, d15 :: Operand ++d16, d17, d18, d19, d20, d21, d22, d23 :: Operand ++d24, d25, d26, d27, d28, d29, d30, d31 :: Operand ++d0 = operandFromRegNo d0RegNo ++d1 = operandFromRegNo 33 ++d2 = operandFromRegNo 34 ++d3 = operandFromRegNo 35 ++d4 = operandFromRegNo 36 ++d5 = operandFromRegNo 37 ++d6 = operandFromRegNo 38 ++d7 = operandFromRegNo d7RegNo ++ ++d8 = operandFromRegNo 40 ++ ++d9 = operandFromRegNo 41 ++ ++d10 = operandFromRegNo d10RegNo ++ ++d11 = operandFromRegNo 43 ++ ++d12 = operandFromRegNo 44 ++ ++d13 = operandFromRegNo 45 ++ ++d14 = operandFromRegNo 46 ++ ++d15 = operandFromRegNo 47 ++ ++d16 = operandFromRegNo 48 ++ ++d17 = operandFromRegNo d17RegNo ++ ++d18 = operandFromRegNo 50 ++ ++d19 = operandFromRegNo 51 ++ ++d20 = operandFromRegNo 52 ++ ++d21 = operandFromRegNo 53 ++ ++d22 = operandFromRegNo 54 ++ ++d23 = operandFromRegNo 55 ++ ++d24 = operandFromRegNo 56 ++ ++d25 = operandFromRegNo 57 ++ ++d26 = operandFromRegNo 58 ++ ++d27 = operandFromRegNo 59 ++ ++d28 = operandFromRegNo 60 ++ ++d29 = operandFromRegNo 61 ++ ++d30 = operandFromRegNo 62 ++ ++d31 = operandFromRegNo d31RegNo ++ ++fitsIn12bitImm :: (Num a, Ord a) => a -> Bool ++fitsIn12bitImm off = off >= intMin12bit && off <= intMax12bit ++ ++intMin12bit :: (Num a) => a ++intMin12bit = -2048 ++ ++intMax12bit :: (Num a) => a ++intMax12bit = 2047 ++ ++fitsIn32bits :: (Num a, Ord a, Bits a) => a -> Bool ++fitsIn32bits i = (-1 `shiftL` 31) <= i && i <= (1 `shiftL` 31 - 1) ++ ++isNbitEncodeable :: Int -> Integer -> Bool ++isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift) ++ ++isEncodeableInWidth :: Width -> Integer -> Bool ++isEncodeableInWidth = isNbitEncodeable . widthInBits ++ ++isIntOp :: Operand -> Bool ++isIntOp = not . isFloatOp ++ ++isFloatOp :: Operand -> Bool ++isFloatOp (OpReg _ reg) | isFloatReg reg = True ++isFloatOp _ = False ++ ++isFloatReg :: Reg -> Bool ++isFloatReg (RegReal (RealRegSingle i)) | i > 31 = True ++isFloatReg (RegVirtual (VirtualRegF _)) = True ++isFloatReg (RegVirtual (VirtualRegD _)) = True ++isFloatReg _ = False +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Ppr.hs +=================================================================== +--- /dev/null ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Ppr.hs +@@ -0,0 +1,715 @@ ++{-# LANGUAGE ScopedTypeVariables #-} ++ ++module GHC.CmmToAsm.RV64.Ppr (pprNatCmmDecl, pprInstr) where ++ ++import GHC.Cmm hiding (topInfoTable) ++import GHC.Cmm.BlockId ++import GHC.Cmm.CLabel ++import GHC.Cmm.Dataflow.Label ++import GHC.CmmToAsm.Config ++import GHC.CmmToAsm.Format ++import GHC.CmmToAsm.Ppr ++import GHC.CmmToAsm.RV64.Cond ++import GHC.CmmToAsm.RV64.Instr ++import GHC.CmmToAsm.RV64.Regs ++import GHC.CmmToAsm.Types ++import GHC.CmmToAsm.Utils ++import GHC.Platform ++import GHC.Platform.Reg ++import GHC.Prelude hiding (EQ) ++import GHC.Types.Basic (Alignment, alignmentBytes, mkAlignment) ++import GHC.Types.Unique (getUnique, pprUniqueAlways) ++import GHC.Utils.Outputable ++import GHC.Utils.Panic ++ ++pprNatCmmDecl :: forall doc. (IsDoc doc) => NCGConfig -> NatCmmDecl RawCmmStatics Instr -> doc ++pprNatCmmDecl config (CmmData section dats) = ++ pprSectionAlign config section $$ pprDatas config dats ++pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) = ++ let platform = ncgPlatform config ++ ++ pprProcAlignment :: doc ++ pprProcAlignment = maybe empty (pprAlign . mkAlignment) (ncgProcAlignment config) ++ in pprProcAlignment ++ $$ case topInfoTable proc of ++ Nothing -> ++ -- special case for code without info table: ++ pprSectionAlign config (Section Text lbl) ++ $$ ++ -- do not ++ -- pprProcAlignment config $$ ++ pprLabel platform lbl ++ $$ vcat (map (pprBasicBlock config top_info) blocks) -- blocks guaranteed not null, so label needed ++ $$ ppWhen ++ (ncgDwarfEnabled config) ++ (line (pprBlockEndLabel platform lbl) $$ line (pprProcEndLabel platform lbl)) ++ $$ pprSizeDecl platform lbl ++ Just (CmmStaticsRaw info_lbl _) -> ++ pprSectionAlign config (Section Text info_lbl) ++ $$ ++ -- pprProcAlignment config $$ ++ ( if platformHasSubsectionsViaSymbols platform ++ then line (pprAsmLabel platform (mkDeadStripPreventer info_lbl) <> char ':') ++ else empty ++ ) ++ $$ vcat (map (pprBasicBlock config top_info) blocks) ++ $$ ppWhen (ncgDwarfEnabled config) (line (pprProcEndLabel platform info_lbl)) ++ $$ ++ -- above: Even the first block gets a label, because with branch-chain ++ -- elimination, it might be the target of a goto. ++ ( if platformHasSubsectionsViaSymbols platform ++ then -- See Note [Subsections Via Symbols] ++ ++ line ++ $ text "\t.long " ++ <+> pprAsmLabel platform info_lbl ++ <+> char '-' ++ <+> pprAsmLabel platform (mkDeadStripPreventer info_lbl) ++ else empty ++ ) ++ $$ pprSizeDecl platform info_lbl ++{-# SPECIALIZE pprNatCmmDecl :: NCGConfig -> NatCmmDecl RawCmmStatics Instr -> SDoc #-} ++{-# SPECIALIZE pprNatCmmDecl :: NCGConfig -> NatCmmDecl RawCmmStatics Instr -> HDoc #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable ++ ++pprLabel :: (IsDoc doc) => Platform -> CLabel -> doc ++pprLabel platform lbl = ++ pprGloblDecl platform lbl ++ $$ pprTypeDecl platform lbl ++ $$ line (pprAsmLabel platform lbl <> char ':') ++ ++pprAlign :: (IsDoc doc) => Alignment -> doc ++pprAlign alignment = ++ -- "The .align directive for RISC-V is an alias to .p2align, which aligns to a ++ -- power of two, so .align 2 means align to 4 bytes. Because the definition of ++ -- the .align directive varies by architecture, it is recommended to use the ++ -- unambiguous .p2align or .balign directives instead." ++ -- (https://github.com/riscv-non-isa/riscv-asm-manual/blob/main/riscv-asm.md#-align) ++ line $ text "\t.balign " <> int (alignmentBytes alignment) ++ ++-- | Print appropriate alignment for the given section type. ++-- ++-- Currently, this always aligns to a full machine word (8 byte.) A future ++-- improvement could be to really do this per section type (though, it's ++-- probably not a big gain.) ++pprAlignForSection :: (IsDoc doc) => SectionType -> doc ++pprAlignForSection _seg = pprAlign . mkAlignment $ 8 ++ ++-- | Print section header and appropriate alignment for that section. ++-- ++-- This will e.g. emit a header like: ++-- ++-- .section .text ++-- .balign 8 ++pprSectionAlign :: (IsDoc doc) => NCGConfig -> Section -> doc ++pprSectionAlign _config (Section (OtherSection _) _) = ++ panic "RV64.Ppr.pprSectionAlign: unknown section" ++pprSectionAlign config sec@(Section seg _) = ++ line (pprSectionHeader config sec) ++ $$ pprAlignForSection seg ++ ++pprProcEndLabel :: ++ (IsLine doc) => ++ Platform -> ++ -- | Procedure name ++ CLabel -> ++ doc ++pprProcEndLabel platform lbl = ++ pprAsmLabel platform (mkAsmTempProcEndLabel lbl) <> colon ++ ++pprBlockEndLabel :: ++ (IsLine doc) => ++ Platform -> ++ -- | Block name ++ CLabel -> ++ doc ++pprBlockEndLabel platform lbl = ++ pprAsmLabel platform (mkAsmTempEndLabel lbl) <> colon ++ ++-- | Output the ELF .size directive (if needed.) ++pprSizeDecl :: (IsDoc doc) => Platform -> CLabel -> doc ++pprSizeDecl platform lbl ++ | osElfTarget (platformOS platform) = ++ line $ text "\t.size" <+> asmLbl <> text ", .-" <> asmLbl ++ where ++ asmLbl = pprAsmLabel platform lbl ++pprSizeDecl _ _ = empty ++ ++pprBasicBlock :: ++ (IsDoc doc) => ++ NCGConfig -> ++ LabelMap RawCmmStatics -> ++ NatBasicBlock Instr -> ++ doc ++pprBasicBlock config info_env (BasicBlock blockid instrs) = ++ maybe_infotable ++ $ pprLabel platform asmLbl ++ $$ vcat (map (pprInstr platform) (id {-detectTrivialDeadlock-} optInstrs)) ++ $$ ppWhen ++ (ncgDwarfEnabled config) ++ ( -- Emit both end labels since this may end up being a standalone ++ -- top-level block ++ line ++ ( pprBlockEndLabel platform asmLbl ++ <> pprProcEndLabel platform asmLbl ++ ) ++ ) ++ where ++ -- TODO: Check if we can filter more instructions here. ++ -- TODO: Shouldn't this be a more general check on a higher level? ++ -- Filter out identity moves. E.g. mov x18, x18 will be dropped. ++ optInstrs = filter f instrs ++ where ++ f (MOV o1 o2) | o1 == o2 = False ++ f _ = True ++ ++ asmLbl = blockLbl blockid ++ platform = ncgPlatform config ++ maybe_infotable c = case mapLookup blockid info_env of ++ Nothing -> c ++ Just (CmmStaticsRaw info_lbl info) -> ++ -- pprAlignForSection platform Text $$ ++ infoTableLoc ++ $$ vcat (map (pprData config) info) ++ $$ pprLabel platform info_lbl ++ $$ c ++ $$ ppWhen ++ (ncgDwarfEnabled config) ++ (line (pprBlockEndLabel platform info_lbl)) ++ -- Make sure the info table has the right .loc for the block ++ -- coming right after it. See Note [Info Offset] ++ infoTableLoc = case instrs of ++ (l@LOCATION {} : _) -> pprInstr platform l ++ _other -> empty ++ ++pprDatas :: (IsDoc doc) => NCGConfig -> RawCmmStatics -> doc ++-- See Note [emit-time elimination of static indirections] in "GHC.Cmm.CLabel". ++pprDatas config (CmmStaticsRaw alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _]) ++ | lbl == mkIndStaticInfoLabel, ++ let labelInd (CmmLabelOff l _) = Just l ++ labelInd (CmmLabel l) = Just l ++ labelInd _ = Nothing, ++ Just ind' <- labelInd ind, ++ alias `mayRedirectTo` ind' = ++ pprGloblDecl (ncgPlatform config) alias ++ $$ line (text ".equiv" <+> pprAsmLabel (ncgPlatform config) alias <> comma <> pprAsmLabel (ncgPlatform config) ind') ++pprDatas config (CmmStaticsRaw lbl dats) = ++ vcat (pprLabel platform lbl : map (pprData config) dats) ++ where ++ platform = ncgPlatform config ++ ++pprData :: (IsDoc doc) => NCGConfig -> CmmStatic -> doc ++pprData _config (CmmString str) = line (pprString str) ++pprData _config (CmmFileEmbed path _) = line (pprFileEmbed path) ++-- TODO: AFAIK there no Darwin for RISCV, so we may consider to simplify this. ++pprData config (CmmUninitialised bytes) = ++ line ++ $ let platform = ncgPlatform config ++ in if platformOS platform == OSDarwin ++ then text ".space " <> int bytes ++ else text ".skip " <> int bytes ++pprData config (CmmStaticLit lit) = pprDataItem config lit ++ ++pprGloblDecl :: (IsDoc doc) => Platform -> CLabel -> doc ++pprGloblDecl platform lbl ++ | not (externallyVisibleCLabel lbl) = empty ++ | otherwise = line (text "\t.globl " <> pprAsmLabel platform lbl) ++ ++-- Note [Always use objects for info tables] ++-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++-- See discussion in X86.Ppr for why this is necessary. Essentially we need to ++-- ensure that we never pass function symbols when we might want to lookup the ++-- info table. If we did, we could end up with procedure linking tables ++-- (PLT)s, and thus the lookup wouldn't point to the function, but into the ++-- jump table. ++-- ++-- Fun fact: The LLVMMangler exists to patch this issue on the LLVM side as ++-- well. ++pprLabelType' :: (IsLine doc) => Platform -> CLabel -> doc ++pprLabelType' platform lbl = ++ if isCFunctionLabel lbl || functionOkInfoTable ++ then text "@function" ++ else text "@object" ++ where ++ functionOkInfoTable = ++ platformTablesNextToCode platform ++ && isInfoTableLabel lbl ++ && not (isCmmInfoTableLabel lbl) ++ && not (isConInfoTableLabel lbl) ++ ++-- this is called pprTypeAndSizeDecl in PPC.Ppr ++pprTypeDecl :: (IsDoc doc) => Platform -> CLabel -> doc ++pprTypeDecl platform lbl = ++ if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl ++ then line (text ".type " <> pprAsmLabel platform lbl <> text ", " <> pprLabelType' platform lbl) ++ else empty ++ ++pprDataItem :: (IsDoc doc) => NCGConfig -> CmmLit -> doc ++pprDataItem config lit = ++ lines_ (ppr_item (cmmTypeFormat $ cmmLitType platform lit) lit) ++ where ++ platform = ncgPlatform config ++ ++ imm = litToImm lit ++ ++ ppr_item II8 _ = [text "\t.byte\t" <> pprDataImm platform imm] ++ ppr_item II16 _ = [text "\t.short\t" <> pprDataImm platform imm] ++ ppr_item II32 _ = [text "\t.long\t" <> pprDataImm platform imm] ++ ppr_item II64 _ = [text "\t.quad\t" <> pprDataImm platform imm] ++ ppr_item FF32 (CmmFloat r _) = ++ let bs = floatToBytes (fromRational r) ++ in map (\b -> text "\t.byte\t" <> int (fromIntegral b)) bs ++ ppr_item FF64 (CmmFloat r _) = ++ let bs = doubleToBytes (fromRational r) ++ in map (\b -> text "\t.byte\t" <> int (fromIntegral b)) bs ++ ppr_item _ _ = pprPanic "pprDataItem:ppr_item" (text $ show lit) ++ ++-- | Pretty print an immediate value in the @data@ section ++-- ++-- This does not include any checks. We rely on the Assembler to check for ++-- errors. Use `pprOpImm` for immediates in instructions (operands.) ++pprDataImm :: (IsLine doc) => Platform -> Imm -> doc ++pprDataImm _ (ImmInt i) = int i ++pprDataImm _ (ImmInteger i) = integer i ++pprDataImm p (ImmCLbl l) = pprAsmLabel p l ++pprDataImm p (ImmIndex l i) = pprAsmLabel p l <> char '+' <> int i ++pprDataImm _ (ImmLit s) = ftext s ++pprDataImm _ (ImmFloat f) = float (fromRational f) ++pprDataImm _ (ImmDouble d) = double (fromRational d) ++pprDataImm p (ImmConstantSum a b) = pprDataImm p a <> char '+' <> pprDataImm p b ++pprDataImm p (ImmConstantDiff a b) = ++ pprDataImm p a ++ <> char '-' ++ <> lparen ++ <> pprDataImm p b ++ <> rparen ++ ++-- | Comment @c@ with @# c@ ++asmComment :: SDoc -> SDoc ++asmComment c = text "#" <+> c ++ ++-- | Commen @c@ with @// c@ ++asmDoubleslashComment :: SDoc -> SDoc ++asmDoubleslashComment c = text "//" <+> c ++ ++-- | Comment @c@ with @/* c */@ (multiline comment) ++asmMultilineComment :: SDoc -> SDoc ++asmMultilineComment c = text "/*" $+$ c $+$ text "*/" ++ ++-- | Pretty print an immediate operand of an instruction ++-- ++-- The kinds of immediates we can use here is pretty limited: RISCV doesn't ++-- support index expressions (as e.g. Aarch64 does.) Floating points need to ++-- fit in range. As we don't need them, forbit them to save us from future ++-- troubles. ++pprOpImm :: (IsLine doc) => Platform -> Imm -> doc ++pprOpImm platform im = case im of ++ ImmInt i -> int i ++ ImmInteger i -> integer i ++ ImmCLbl l -> char '=' <> pprAsmLabel platform l ++ _ -> pprPanic "RV64.Ppr.pprOpImm" (text "Unsupported immediate for instruction operands" <> colon <+> (text . show) im) ++ ++-- | Negate integer immediate operand ++-- ++-- This function is partial and will panic if the operand is not an integer. ++negOp :: Operand -> Operand ++negOp (OpImm (ImmInt i)) = OpImm (ImmInt (negate i)) ++negOp (OpImm (ImmInteger i)) = OpImm (ImmInteger (negate i)) ++negOp op = pprPanic "RV64.negOp" (text $ show op) ++ ++-- | Pretty print an operand ++pprOp :: (IsLine doc) => Platform -> Operand -> doc ++pprOp plat op = case op of ++ OpReg w r -> pprReg w r ++ OpImm im -> pprOpImm plat im ++ OpAddr (AddrRegImm r1 im) -> pprOpImm plat im <> char '(' <> pprReg W64 r1 <> char ')' ++ OpAddr (AddrReg r1) -> text "0(" <+> pprReg W64 r1 <+> char ')' ++ ++-- | Pretty print register with calling convention name ++-- ++-- This representation makes it easier to reason about the emitted assembly ++-- code. ++pprReg :: forall doc. (IsLine doc) => Width -> Reg -> doc ++pprReg w r = case r of ++ RegReal (RealRegSingle i) -> ppr_reg_no i ++ -- virtual regs should not show up, but this is helpful for debugging. ++ RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u ++ RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u ++ RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u ++ _ -> pprPanic "RiscV64.pprReg" (text (show r) <+> ppr w) ++ where ++ ppr_reg_no :: Int -> doc ++ -- General Purpose Registers ++ ppr_reg_no 0 = text "zero" ++ ppr_reg_no 1 = text "ra" ++ ppr_reg_no 2 = text "sp" ++ ppr_reg_no 3 = text "gp" ++ ppr_reg_no 4 = text "tp" ++ ppr_reg_no 5 = text "t0" ++ ppr_reg_no 6 = text "t1" ++ ppr_reg_no 7 = text "t2" ++ ppr_reg_no 8 = text "s0" ++ ppr_reg_no 9 = text "s1" ++ ppr_reg_no 10 = text "a0" ++ ppr_reg_no 11 = text "a1" ++ ppr_reg_no 12 = text "a2" ++ ppr_reg_no 13 = text "a3" ++ ppr_reg_no 14 = text "a4" ++ ppr_reg_no 15 = text "a5" ++ ppr_reg_no 16 = text "a6" ++ ppr_reg_no 17 = text "a7" ++ ppr_reg_no 18 = text "s2" ++ ppr_reg_no 19 = text "s3" ++ ppr_reg_no 20 = text "s4" ++ ppr_reg_no 21 = text "s5" ++ ppr_reg_no 22 = text "s6" ++ ppr_reg_no 23 = text "s7" ++ ppr_reg_no 24 = text "s8" ++ ppr_reg_no 25 = text "s9" ++ ppr_reg_no 26 = text "s10" ++ ppr_reg_no 27 = text "s11" ++ ppr_reg_no 28 = text "t3" ++ ppr_reg_no 29 = text "t4" ++ ppr_reg_no 30 = text "t5" ++ ppr_reg_no 31 = text "t6" ++ -- Floating Point Registers ++ ppr_reg_no 32 = text "ft0" ++ ppr_reg_no 33 = text "ft1" ++ ppr_reg_no 34 = text "ft2" ++ ppr_reg_no 35 = text "ft3" ++ ppr_reg_no 36 = text "ft4" ++ ppr_reg_no 37 = text "ft5" ++ ppr_reg_no 38 = text "ft6" ++ ppr_reg_no 39 = text "ft7" ++ ppr_reg_no 40 = text "fs0" ++ ppr_reg_no 41 = text "fs1" ++ ppr_reg_no 42 = text "fa0" ++ ppr_reg_no 43 = text "fa1" ++ ppr_reg_no 44 = text "fa2" ++ ppr_reg_no 45 = text "fa3" ++ ppr_reg_no 46 = text "fa4" ++ ppr_reg_no 47 = text "fa5" ++ ppr_reg_no 48 = text "fa6" ++ ppr_reg_no 49 = text "fa7" ++ ppr_reg_no 50 = text "fs2" ++ ppr_reg_no 51 = text "fs3" ++ ppr_reg_no 52 = text "fs4" ++ ppr_reg_no 53 = text "fs5" ++ ppr_reg_no 54 = text "fs6" ++ ppr_reg_no 55 = text "fs7" ++ ppr_reg_no 56 = text "fs8" ++ ppr_reg_no 57 = text "fs9" ++ ppr_reg_no 58 = text "fs10" ++ ppr_reg_no 59 = text "fs11" ++ ppr_reg_no 60 = text "ft8" ++ ppr_reg_no 61 = text "ft9" ++ ppr_reg_no 62 = text "ft10" ++ ppr_reg_no 63 = text "ft11" ++ ppr_reg_no i ++ | i < 0 = pprPanic "Unexpected register number (min is 0)" (ppr w <+> int i) ++ | i > 63 = pprPanic "Unexpected register number (max is 63)" (ppr w <+> int i) ++ -- no support for widths > W64. ++ | otherwise = pprPanic "Unsupported width in register (max is 64)" (ppr w <+> int i) ++ ++-- | Single precission `Operand` (floating-point) ++isSingleOp :: Operand -> Bool ++isSingleOp (OpReg W32 _) = True ++isSingleOp _ = False ++ ++-- | Double precission `Operand` (floating-point) ++isDoubleOp :: Operand -> Bool ++isDoubleOp (OpReg W64 _) = True ++isDoubleOp _ = False ++ ++-- | `Operand` is an immediate value ++isImmOp :: Operand -> Bool ++isImmOp (OpImm _) = True ++isImmOp _ = False ++ ++-- | `Operand` is an immediate @0@ value ++isImmZero :: Operand -> Bool ++isImmZero (OpImm (ImmFloat 0)) = True ++isImmZero (OpImm (ImmDouble 0)) = True ++isImmZero (OpImm (ImmInt 0)) = True ++isImmZero _ = False ++ ++-- | `Target` represents a label ++isLabel :: Target -> Bool ++isLabel (TBlock _) = True ++isLabel _ = False ++ ++-- | Get the pretty-printed label from a `Target` ++-- ++-- This function is partial and will panic if the `Target` is not a label. ++getLabel :: (IsLine doc) => Platform -> Target -> doc ++getLabel platform (TBlock bid) = pprBlockId platform bid ++ where ++ pprBlockId :: (IsLine doc) => Platform -> BlockId -> doc ++ pprBlockId platform bid = pprAsmLabel platform (mkLocalBlockLabel (getUnique bid)) ++getLabel _platform _other = panic "Cannot turn this into a label" ++ ++-- | Pretty-print an `Instr` ++-- ++-- This function is partial and will panic if the `Instr` is not supported. This ++-- can happen due to invalid operands or unexpected meta instructions. ++pprInstr :: (IsDoc doc) => Platform -> Instr -> doc ++pprInstr platform instr = case instr of ++ -- see Note [dualLine and dualDoc] in GHC.Utils.Outputable ++ COMMENT s -> dualDoc (asmComment s) empty ++ MULTILINE_COMMENT s -> dualDoc (asmMultilineComment s) empty ++ ANN d i -> dualDoc (pprInstr platform i <+> asmDoubleslashComment d) (pprInstr platform i) ++ LOCATION file line' col _name -> ++ line (text "\t.loc" <+> int file <+> int line' <+> int col) ++ DELTA d -> dualDoc (asmComment $ text "\tdelta = " <> int d) empty ++ NEWBLOCK _ -> panic "PprInstr: NEWBLOCK" ++ LDATA _ _ -> panic "pprInstr: LDATA" ++ PUSH_STACK_FRAME -> ++ lines_ ++ [ text "\taddi sp, sp, -16", ++ text "\tsd x1, 8(sp)", -- store RA ++ text "\tsd x8, 0(sp)", -- store FP/s0 ++ text "\taddi x8, sp, 16" ++ ] ++ POP_STACK_FRAME -> ++ lines_ ++ [ text "\tld x8, 0(sp)", -- restore FP/s0 ++ text "\tld x1, 8(sp)", -- restore RA ++ text "\taddi sp, sp, 16" ++ ] ++ ADD o1 o2 o3 ++ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfadd." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3 ++ -- This case is used for sign extension: SEXT.W op ++ | OpReg W64 _ <- o1, OpReg W32 _ <- o2, isImmOp o3 -> op3 (text "\taddiw") o1 o2 o3 ++ | otherwise -> op3 (text "\tadd") o1 o2 o3 ++ MUL o1 o2 o3 ++ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfmul." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3 ++ | otherwise -> op3 (text "\tmul") o1 o2 o3 ++ MULH o1 o2 o3 -> op3 (text "\tmulh") o1 o2 o3 ++ NEG o1 o2 | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfneg.s") o1 o2 ++ NEG o1 o2 | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfneg.d") o1 o2 ++ NEG o1 o2 -> op2 (text "\tneg") o1 o2 ++ DIV o1 o2 o3 ++ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> ++ -- TODO: This must (likely) be refined regarding width ++ op3 (text "\tfdiv." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3 ++ DIV o1 o2 o3 -> op3 (text "\tdiv") o1 o2 o3 ++ REM o1 o2 o3 ++ | isFloatOp o1 || isFloatOp o2 || isFloatOp o3 -> ++ panic "pprInstr - REM not implemented for floats (yet)" ++ REM o1 o2 o3 -> op3 (text "\trem") o1 o2 o3 ++ REMU o1 o2 o3 -> op3 (text "\tremu") o1 o2 o3 ++ SUB o1 o2 o3 ++ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfsub." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3 ++ | isImmOp o3 -> op3 (text "\taddi") o1 o2 (negOp o3) ++ | otherwise -> op3 (text "\tsub") o1 o2 o3 ++ DIVU o1 o2 o3 -> op3 (text "\tdivu") o1 o2 o3 ++ AND o1 o2 o3 ++ | isImmOp o3 -> op3 (text "\tandi") o1 o2 o3 ++ | otherwise -> op3 (text "\tand") o1 o2 o3 ++ OR o1 o2 o3 -> op3 (text "\tor") o1 o2 o3 ++ SRA o1 o2 o3 | isImmOp o3 -> op3 (text "\tsrai") o1 o2 o3 ++ SRA o1 o2 o3 -> op3 (text "\tsra") o1 o2 o3 ++ XOR o1 o2 o3 -> op3 (text "\txor") o1 o2 o3 ++ SLL o1 o2 o3 -> op3 (text "\tsll") o1 o2 o3 ++ SRL o1 o2 o3 -> op3 (text "\tsrl") o1 o2 o3 ++ MOV o1 o2 ++ | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.d") o1 o2 -- fmv.d rd, rs is pseudo op fsgnj.d rd, rs, rs ++ | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.s") o1 o2 -- fmv.s rd, rs is pseudo op fsgnj.s rd, rs, rs ++ | isFloatOp o1 && isImmZero o2 && isDoubleOp o1 -> op2 (text "\tfcvt.d.w") o1 zero ++ | isFloatOp o1 && isImmZero o2 && isSingleOp o1 -> op2 (text "\tfcvt.s.w") o1 zero ++ | isFloatOp o1 && not (isFloatOp o2) && isSingleOp o1 -> op2 (text "\tfmv.w.x") o1 o2 ++ | isFloatOp o1 && not (isFloatOp o2) && isDoubleOp o1 -> op2 (text "\tfmv.d.x") o1 o2 ++ | not (isFloatOp o1) && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.x.w") o1 o2 ++ | not (isFloatOp o1) && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.x.d") o1 o2 ++ | (OpImm (ImmInteger i)) <- o2, ++ fitsIn12bitImm i -> ++ lines_ [text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <> comma <+> pprOp platform o2] ++ | (OpImm (ImmInt i)) <- o2, ++ fitsIn12bitImm i -> ++ lines_ [text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <> comma <+> pprOp platform o2] ++ | (OpImm (ImmInteger i)) <- o2, ++ fitsIn32bits i -> ++ lines_ ++ [ text "\tlui" <+> pprOp platform o1 <> comma <+> text "%hi(" <> pprOp platform o2 <> text ")", ++ text "\taddw" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%lo(" <> pprOp platform o2 <> text ")" ++ ] ++ | (OpImm (ImmInt i)) <- o2, ++ fitsIn32bits i -> ++ lines_ ++ [ text "\tlui" <+> pprOp platform o1 <> comma <+> text "%hi(" <> pprOp platform o2 <> text ")", ++ text "\taddw" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%lo(" <> pprOp platform o2 <> text ")" ++ ] ++ | isImmOp o2 -> ++ -- Surrender! Let the assembler figure out the right expressions with pseudo-op LI. ++ lines_ [text "\tli" <+> pprOp platform o1 <> comma <+> pprOp platform o2] ++ | otherwise -> op3 (text "\taddi") o1 o2 (OpImm (ImmInt 0)) ++ ORI o1 o2 o3 -> op3 (text "\tori") o1 o2 o3 ++ XORI o1 o2 o3 -> op3 (text "\txori") o1 o2 o3 ++ J_TBL _ _ r -> pprInstr platform (B (TReg r)) ++ B l | isLabel l -> line $ text "\tjal" <+> pprOp platform x0 <> comma <+> getLabel platform l ++ B (TReg r) -> line $ text "\tjalr" <+> pprOp platform x0 <> comma <+> pprReg W64 r <> comma <+> text "0" ++ BL r _ -> line $ text "\tjalr" <+> text "x1" <> comma <+> pprReg W64 r <> comma <+> text "0" ++ BCOND c l r t ++ | isLabel t -> ++ line $ text "\t" <> pprBcond c <+> pprOp platform l <> comma <+> pprOp platform r <> comma <+> getLabel platform t ++ BCOND _ _ _ (TReg _) -> panic "RV64.ppr: No conditional branching to registers!" ++ CSET o l r c -> case c of ++ EQ ++ | isIntOp l && isIntOp r -> ++ lines_ ++ [ subFor l r, ++ text "\tseqz" <+> pprOp platform o <> comma <+> pprOp platform o ++ ] ++ EQ | isFloatOp l && isFloatOp r -> line $ binOp ("\tfeq." ++ floatOpPrecision platform l r) ++ NE ++ | isIntOp l && isIntOp r -> ++ lines_ ++ [ subFor l r, ++ text "\tsnez" <+> pprOp platform o <> comma <+> pprOp platform o ++ ] ++ NE ++ | isFloatOp l && isFloatOp r -> ++ lines_ ++ [ binOp ("\tfeq." ++ floatOpPrecision platform l r), ++ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ++ ] ++ SLT -> lines_ [sltFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r] ++ SLE -> ++ lines_ ++ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l, ++ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ++ ] ++ SGE -> ++ lines_ ++ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r, ++ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ++ ] ++ SGT -> lines_ [sltFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l] ++ ULT -> lines_ [sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r] ++ ULE -> ++ lines_ ++ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l, ++ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ++ ] ++ UGE -> ++ lines_ ++ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r, ++ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ++ ] ++ UGT -> lines_ [sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l] ++ FLT | isFloatOp l && isFloatOp r -> line $ binOp ("\tflt." ++ floatOpPrecision platform l r) ++ FLE | isFloatOp l && isFloatOp r -> line $ binOp ("\tfle." ++ floatOpPrecision platform l r) ++ FGT | isFloatOp l && isFloatOp r -> line $ binOp ("\tfgt." ++ floatOpPrecision platform l r) ++ FGE | isFloatOp l && isFloatOp r -> line $ binOp ("\tfge." ++ floatOpPrecision platform l r) ++ x -> pprPanic "RV64.ppr: unhandled CSET conditional" (text (show x) <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l) ++ where ++ subFor l r ++ | (OpImm _) <- r = text "\taddi" <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform (negOp r) ++ | (OpImm _) <- l = panic "RV64.ppr: Cannot SUB IMM _" ++ | otherwise = text "\tsub" <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r ++ sltFor l r ++ | (OpImm _) <- r = text "\tslti" ++ | (OpImm _) <- l = panic "PV64.ppr: Cannot SLT IMM _" ++ | otherwise = text "\tslt" ++ sltuFor l r ++ | (OpImm _) <- r = text "\tsltui" ++ | (OpImm _) <- l = panic "PV64.ppr: Cannot SLTU IMM _" ++ | otherwise = text "\tsltu" ++ binOp :: (IsLine doc) => String -> doc ++ binOp op = text op <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r ++ STR II8 o1 o2 -> op2 (text "\tsb") o1 o2 ++ STR II16 o1 o2 -> op2 (text "\tsh") o1 o2 ++ STR II32 o1 o2 -> op2 (text "\tsw") o1 o2 ++ STR II64 o1 o2 -> op2 (text "\tsd") o1 o2 ++ STR FF32 o1 o2 -> op2 (text "\tfsw") o1 o2 ++ STR FF64 o1 o2 -> op2 (text "\tfsd") o1 o2 ++ LDR _f o1 (OpImm (ImmIndex lbl off)) -> ++ lines_ ++ [ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl, ++ text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> int off ++ ] ++ LDR _f o1 (OpImm (ImmCLbl lbl)) -> ++ line $ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl ++ LDR II8 o1 o2 -> op2 (text "\tlb") o1 o2 ++ LDR II16 o1 o2 -> op2 (text "\tlh") o1 o2 ++ LDR II32 o1 o2 -> op2 (text "\tlw") o1 o2 ++ LDR II64 o1 o2 -> op2 (text "\tld") o1 o2 ++ LDR FF32 o1 o2 -> op2 (text "\tflw") o1 o2 ++ LDR FF64 o1 o2 -> op2 (text "\tfld") o1 o2 ++ LDRU II8 o1 o2 -> op2 (text "\tlbu") o1 o2 ++ LDRU II16 o1 o2 -> op2 (text "\tlhu") o1 o2 ++ LDRU II32 o1 o2 -> op2 (text "\tlwu") o1 o2 ++ -- double words (64bit) cannot be sign extended by definition ++ LDRU II64 o1 o2 -> op2 (text "\tld") o1 o2 ++ LDRU FF32 o1 o2@(OpAddr (AddrReg _)) -> op2 (text "\tflw") o1 o2 ++ LDRU FF32 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tflw") o1 o2 ++ LDRU FF64 o1 o2@(OpAddr (AddrReg _)) -> op2 (text "\tfld") o1 o2 ++ LDRU FF64 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tfld") o1 o2 ++ LDRU f o1 o2 -> pprPanic "Unsupported unsigned load" ((text . show) f <+> pprOp platform o1 <+> pprOp platform o2) ++ FENCE r w -> line $ text "\tfence" <+> pprFenceType r <> char ',' <+> pprFenceType w ++ FCVT FloatToFloat o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.d") o1 o2 ++ FCVT FloatToFloat o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.d.s") o1 o2 ++ FCVT FloatToFloat o1 o2 -> ++ pprPanic "RV64.pprInstr - impossible float to float conversion" ++ $ line (pprOp platform o1 <> text "->" <> pprOp platform o2) ++ FCVT IntToFloat o1@(OpReg W32 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.s.w") o1 o2 ++ FCVT IntToFloat o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.l") o1 o2 ++ FCVT IntToFloat o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.d.w") o1 o2 ++ FCVT IntToFloat o1@(OpReg W64 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.d.l") o1 o2 ++ FCVT IntToFloat o1 o2 -> ++ pprPanic "RV64.pprInstr - impossible integer to float conversion" ++ $ line (pprOp platform o1 <> text "->" <> pprOp platform o2) ++ FCVT FloatToInt o1@(OpReg W32 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.w.s") o1 o2 ++ FCVT FloatToInt o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.w.d") o1 o2 ++ FCVT FloatToInt o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.l.s") o1 o2 ++ FCVT FloatToInt o1@(OpReg W64 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.l.d") o1 o2 ++ FCVT FloatToInt o1 o2 -> ++ pprPanic "RV64.pprInstr - impossible float to integer conversion" ++ $ line (pprOp platform o1 <> text "->" <> pprOp platform o2) ++ FABS o1 o2 | isSingleOp o2 -> op2 (text "\tfabs.s") o1 o2 ++ FABS o1 o2 | isDoubleOp o2 -> op2 (text "\tfabs.d") o1 o2 ++ FMA variant d r1 r2 r3 -> ++ let fma = case variant of ++ FMAdd -> text "\tfmadd" <> dot <> floatPrecission d ++ FMSub -> text "\tfmsub" <> dot <> floatPrecission d ++ FNMAdd -> text "\tfnmadd" <> dot <> floatPrecission d ++ FNMSub -> text "\tfnmsub" <> dot <> floatPrecission d ++ in op4 fma d r1 r2 r3 ++ instr -> panic $ "RV64.pprInstr - Unknown instruction: " ++ instrCon instr ++ where ++ op2 op o1 o2 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 ++ op3 op o1 o2 o3 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 ++ op4 op o1 o2 o3 o4 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4 ++ pprFenceType FenceRead = text "r" ++ pprFenceType FenceWrite = text "w" ++ pprFenceType FenceReadWrite = text "rw" ++ floatPrecission o ++ | isSingleOp o = text "s" ++ | isDoubleOp o = text "d" ++ | otherwise = pprPanic "Impossible floating point precission: " (pprOp platform o) ++ ++floatOpPrecision :: Platform -> Operand -> Operand -> String ++floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isSingleOp l && isSingleOp r = "s" -- single precision ++floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isDoubleOp l && isDoubleOp r = "d" -- double precision ++floatOpPrecision p l r = pprPanic "Cannot determine floating point precission" (text "op1" <+> pprOp p l <+> text "op2" <+> pprOp p r) ++ ++-- | Pretty print a conditional branch ++-- ++-- This function is partial and will panic if the conditional is not supported; ++-- i.e. if its floating point related. ++pprBcond :: (IsLine doc) => Cond -> doc ++pprBcond c = text "b" <> pprCond c ++ where ++ pprCond :: (IsLine doc) => Cond -> doc ++ pprCond c = case c of ++ EQ -> text "eq" ++ NE -> text "ne" ++ SLT -> text "lt" ++ SLE -> text "le" ++ SGE -> text "ge" ++ SGT -> text "gt" ++ ULT -> text "ltu" ++ ULE -> text "leu" ++ UGE -> text "geu" ++ UGT -> text "gtu" ++ -- BCOND cannot handle floating point comparisons / registers ++ _ -> panic $ "RV64.ppr: unhandled BCOND conditional: " ++ show c +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/RegInfo.hs +=================================================================== +--- /dev/null ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/RegInfo.hs +@@ -0,0 +1,41 @@ ++-- | Minimum viable implementation of jump short-cutting: No short-cutting. ++-- ++-- The functions here simply implement the no-short-cutting case. Implementing ++-- the real behaviour would be a great optimization in future. ++module GHC.CmmToAsm.RV64.RegInfo ++ ( getJumpDestBlockId, ++ canShortcut, ++ shortcutStatics, ++ shortcutJump, ++ JumpDest (..), ++ ) ++where ++ ++import GHC.Cmm ++import GHC.Cmm.BlockId ++import GHC.CmmToAsm.RV64.Instr ++import GHC.Prelude ++import GHC.Utils.Outputable ++ ++newtype JumpDest = DestBlockId BlockId ++ ++instance Outputable JumpDest where ++ ppr (DestBlockId bid) = text "jd:" <> ppr bid ++ ++-- | Extract BlockId ++-- ++-- Never `Nothing` for Riscv64 NCG. ++getJumpDestBlockId :: JumpDest -> Maybe BlockId ++getJumpDestBlockId (DestBlockId bid) = Just bid ++ ++-- No `Instr`s can bet shortcut (for now) ++canShortcut :: Instr -> Maybe JumpDest ++canShortcut _ = Nothing ++ ++-- Identity of the provided `RawCmmStatics` ++shortcutStatics :: (BlockId -> Maybe JumpDest) -> RawCmmStatics -> RawCmmStatics ++shortcutStatics _ other_static = other_static ++ ++-- Identity of the provided `Instr` ++shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr ++shortcutJump _ other = other +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Regs.hs +=================================================================== +--- /dev/null ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Regs.hs +@@ -0,0 +1,230 @@ ++module GHC.CmmToAsm.RV64.Regs where ++ ++import GHC.Cmm ++import GHC.Cmm.CLabel (CLabel) ++import GHC.CmmToAsm.Format ++import GHC.Data.FastString ++import GHC.Platform ++import GHC.Platform.Reg ++import GHC.Platform.Reg.Class ++import GHC.Platform.Regs ++import GHC.Prelude ++import GHC.Types.Unique ++import GHC.Utils.Outputable ++import GHC.Utils.Panic ++ ++-- * Registers ++ ++-- | First integer register number. @zero@ register. ++x0RegNo :: RegNo ++x0RegNo = 0 ++ ++-- | return address register ++x1RegNo, raRegNo :: RegNo ++x1RegNo = 1 ++raRegNo = x1RegNo ++ ++x5RegNo, t0RegNo :: RegNo ++x5RegNo = 5 ++t0RegNo = x5RegNo ++ ++x7RegNo, t2RegNo :: RegNo ++x7RegNo = 7 ++t2RegNo = x7RegNo ++ ++x28RegNo, t3RegNo :: RegNo ++x28RegNo = 28 ++t3RegNo = x28RegNo ++ ++-- | Last integer register number. Used as TMP (IP) register. ++x31RegNo, t6RegNo, tmpRegNo :: RegNo ++x31RegNo = 31 ++t6RegNo = x31RegNo ++tmpRegNo = x31RegNo ++ ++-- | First floating point register. ++d0RegNo, ft0RegNo :: RegNo ++d0RegNo = 32 ++ft0RegNo = d0RegNo ++ ++d7RegNo, ft7RegNo :: RegNo ++d7RegNo = 39 ++ft7RegNo = d7RegNo ++ ++-- | Last floating point register. ++d31RegNo :: RegNo ++d31RegNo = 63 ++ ++a0RegNo, x10RegNo :: RegNo ++x10RegNo = 10 ++a0RegNo = x10RegNo ++ ++a7RegNo, x17RegNo :: RegNo ++x17RegNo = 17 ++a7RegNo = x17RegNo ++ ++fa0RegNo, d10RegNo :: RegNo ++d10RegNo = 42 ++fa0RegNo = d10RegNo ++ ++fa7RegNo, d17RegNo :: RegNo ++d17RegNo = 49 ++fa7RegNo = d17RegNo ++ ++-- Note [The made-up RISCV64 TMP (IP) register] ++-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++-- ++-- RISCV64 has no inter-procedural register in its ABI. However, we need one to ++-- make register spills/loads to/from high number slots. I.e. slot numbers that ++-- do not fit in a 12bit integer which is used as immediate in the arithmetic ++-- operations. Thus, we're marking one additional register (x31) as permanently ++-- non-free and call it TMP. ++-- ++-- TMP can be used as temporary register in all operations. Just be aware that ++-- it may be clobbered as soon as you loose direct control over it (i.e. using ++-- TMP by-passes the register allocation/spilling mechanisms.) It should be fine ++-- to use it as temporary register in a MachOp translation as long as you don't ++-- rely on its value beyond this limited scope. ++-- ++-- X31 is a caller-saved register. I.e. there are no guarantees about what the ++-- callee does with it. That's exactly what we want here. ++ ++zeroReg, raReg, spMachReg, tmpReg :: Reg ++zeroReg = regSingle x0RegNo ++raReg = regSingle 1 ++ ++-- | Not to be confused with the `CmmReg` `spReg` ++spMachReg = regSingle 2 ++ ++tmpReg = regSingle tmpRegNo ++ ++-- | All machine register numbers. ++allMachRegNos :: [RegNo] ++allMachRegNos = intRegs ++ fpRegs ++ where ++ intRegs = [x0RegNo .. x31RegNo] ++ fpRegs = [d0RegNo .. d31RegNo] ++ ++-- | Registers available to the register allocator. ++-- ++-- These are all registers minus those with a fixed role in RISCV ABI (zero, lr, ++-- sp, gp, tp, fp, tmp) and GHC RTS (Base, Sp, Hp, HpLim, R1..R8, F1..F6, ++-- D1..D6.) ++allocatableRegs :: Platform -> [RealReg] ++allocatableRegs platform = ++ let isFree = freeReg platform ++ in map RealRegSingle $ filter isFree allMachRegNos ++ ++-- | Integer argument registers according to the calling convention ++allGpArgRegs :: [Reg] ++allGpArgRegs = map regSingle [a0RegNo .. a7RegNo] ++ ++-- | Floating point argument registers according to the calling convention ++allFpArgRegs :: [Reg] ++allFpArgRegs = map regSingle [fa0RegNo .. fa7RegNo] ++ ++-- * Addressing modes ++ ++-- | Addressing modes ++data AddrMode ++ = -- | A register plus some immediate integer, e.g. @8(sp)@ or @-16(sp)@. The ++ -- offset needs to fit into 12bits. ++ AddrRegImm Reg Imm ++ | -- | A register ++ AddrReg Reg ++ deriving (Eq, Show) ++ ++-- * Immediates ++ ++data Imm ++ = ImmInt Int ++ | ImmInteger Integer -- Sigh. ++ | ImmCLbl CLabel -- AbstractC Label (with baggage) ++ | ImmLit FastString ++ | ImmIndex CLabel Int ++ | ImmFloat Rational ++ | ImmDouble Rational ++ | ImmConstantSum Imm Imm ++ | ImmConstantDiff Imm Imm ++ deriving (Eq, Show) ++ ++-- | Map `CmmLit` to `Imm` ++-- ++-- N.B. this is a partial function, because not all `CmmLit`s have an immediate ++-- representation. ++litToImm :: CmmLit -> Imm ++litToImm (CmmInt i w) = ImmInteger (narrowS w i) ++-- narrow to the width: a CmmInt might be out of ++-- range, but we assume that ImmInteger only contains ++-- in-range values. A signed value should be fine here. ++litToImm (CmmFloat f W32) = ImmFloat f ++litToImm (CmmFloat f W64) = ImmDouble f ++litToImm (CmmLabel l) = ImmCLbl l ++litToImm (CmmLabelOff l off) = ImmIndex l off ++litToImm (CmmLabelDiffOff l1 l2 off _) = ++ ImmConstantSum ++ (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2)) ++ (ImmInt off) ++litToImm l = panic $ "RV64.Regs.litToImm: no match for " ++ show l ++ ++-- == To satisfy GHC.CmmToAsm.Reg.Target ======================================= ++ ++-- squeese functions for the graph allocator ----------------------------------- ++ ++-- | regSqueeze_class reg ++-- Calculate the maximum number of register colors that could be ++-- denied to a node of this class due to having this reg ++-- as a neighbour. ++{-# INLINE virtualRegSqueeze #-} ++virtualRegSqueeze :: RegClass -> VirtualReg -> Int ++virtualRegSqueeze cls vr = ++ case cls of ++ RcInteger -> ++ case vr of ++ VirtualRegI {} -> 1 ++ VirtualRegHi {} -> 1 ++ _other -> 0 ++ RcDouble -> ++ case vr of ++ VirtualRegD {} -> 1 ++ VirtualRegF {} -> 0 ++ _other -> 0 ++ _other -> 0 ++ ++{-# INLINE realRegSqueeze #-} ++realRegSqueeze :: RegClass -> RealReg -> Int ++realRegSqueeze cls rr = ++ case cls of ++ RcInteger -> ++ case rr of ++ RealRegSingle regNo ++ | regNo < d0RegNo -> 1 ++ | otherwise -> 0 ++ RcDouble -> ++ case rr of ++ RealRegSingle regNo ++ | regNo < d0RegNo -> 0 ++ | otherwise -> 1 ++ _other -> 0 ++ ++mkVirtualReg :: Unique -> Format -> VirtualReg ++mkVirtualReg u format ++ | not (isFloatFormat format) = VirtualRegI u ++ | otherwise = ++ case format of ++ FF32 -> VirtualRegD u ++ FF64 -> VirtualRegD u ++ _ -> panic "RV64.mkVirtualReg" ++ ++{-# INLINE classOfRealReg #-} ++classOfRealReg :: RealReg -> RegClass ++classOfRealReg (RealRegSingle i) ++ | i < d0RegNo = RcInteger ++ | otherwise = RcDouble ++ ++regDotColor :: RealReg -> SDoc ++regDotColor reg = ++ case classOfRealReg reg of ++ RcInteger -> text "blue" ++ RcFloat -> text "red" ++ RcDouble -> text "green" +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs +=================================================================== +--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs +@@ -119,7 +119,7 @@ trivColorable platform virtualRegSqueeze + ArchMipseb -> panic "trivColorable ArchMipseb" + ArchMipsel -> panic "trivColorable ArchMipsel" + ArchS390X -> panic "trivColorable ArchS390X" +- ArchRISCV64 -> panic "trivColorable ArchRISCV64" ++ ArchRISCV64 -> 14 + ArchLoongArch64->panic "trivColorable ArchLoongArch64" + ArchJavaScript-> panic "trivColorable ArchJavaScript" + ArchWasm32 -> panic "trivColorable ArchWasm32" +@@ -154,7 +154,7 @@ trivColorable platform virtualRegSqueeze + ArchMipseb -> panic "trivColorable ArchMipseb" + ArchMipsel -> panic "trivColorable ArchMipsel" + ArchS390X -> panic "trivColorable ArchS390X" +- ArchRISCV64 -> panic "trivColorable ArchRISCV64" ++ ArchRISCV64 -> 0 + ArchLoongArch64->panic "trivColorable ArchLoongArch64" + ArchJavaScript-> panic "trivColorable ArchJavaScript" + ArchWasm32 -> panic "trivColorable ArchWasm32" +@@ -188,7 +188,7 @@ trivColorable platform virtualRegSqueeze + ArchMipseb -> panic "trivColorable ArchMipseb" + ArchMipsel -> panic "trivColorable ArchMipsel" + ArchS390X -> panic "trivColorable ArchS390X" +- ArchRISCV64 -> panic "trivColorable ArchRISCV64" ++ ArchRISCV64 -> 20 + ArchLoongArch64->panic "trivColorable ArchLoongArch64" + ArchJavaScript-> panic "trivColorable ArchJavaScript" + ArchWasm32 -> panic "trivColorable ArchWasm32" +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear.hs +=================================================================== +--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Linear.hs ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear.hs +@@ -112,6 +112,7 @@ import qualified GHC.CmmToAsm.Reg.Linear + import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 + import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 + import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64 ++import qualified GHC.CmmToAsm.Reg.Linear.RV64 as RV64 + import GHC.CmmToAsm.Reg.Target + import GHC.CmmToAsm.Reg.Liveness + import GHC.CmmToAsm.Reg.Utils +@@ -221,7 +222,7 @@ linearRegAlloc config entry_ids block_li + ArchAlpha -> panic "linearRegAlloc ArchAlpha" + ArchMipseb -> panic "linearRegAlloc ArchMipseb" + ArchMipsel -> panic "linearRegAlloc ArchMipsel" +- ArchRISCV64 -> panic "linearRegAlloc ArchRISCV64" ++ ArchRISCV64 -> go (frInitFreeRegs platform :: RV64.FreeRegs) + ArchLoongArch64-> panic "linearRegAlloc ArchLoongArch64" + ArchJavaScript -> panic "linearRegAlloc ArchJavaScript" + ArchWasm32 -> panic "linearRegAlloc ArchWasm32" +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs +=================================================================== +--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs +@@ -29,10 +29,12 @@ import qualified GHC.CmmToAsm.Reg.Linear + import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 + import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 + import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64 ++import qualified GHC.CmmToAsm.Reg.Linear.RV64 as RV64 + + import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr + import qualified GHC.CmmToAsm.X86.Instr as X86.Instr + import qualified GHC.CmmToAsm.AArch64.Instr as AArch64.Instr ++import qualified GHC.CmmToAsm.RV64.Instr as RV64.Instr + + class Show freeRegs => FR freeRegs where + frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs +@@ -64,6 +66,12 @@ instance FR AArch64.FreeRegs where + frInitFreeRegs = AArch64.initFreeRegs + frReleaseReg = \_ -> AArch64.releaseReg + ++instance FR RV64.FreeRegs where ++ frAllocateReg = const RV64.allocateReg ++ frGetFreeRegs = const RV64.getFreeRegs ++ frInitFreeRegs = RV64.initFreeRegs ++ frReleaseReg = const RV64.releaseReg ++ + maxSpillSlots :: NCGConfig -> Int + maxSpillSlots config = case platformArch (ncgPlatform config) of + ArchX86 -> X86.Instr.maxSpillSlots config +@@ -76,7 +84,7 @@ maxSpillSlots config = case platformArch + ArchAlpha -> panic "maxSpillSlots ArchAlpha" + ArchMipseb -> panic "maxSpillSlots ArchMipseb" + ArchMipsel -> panic "maxSpillSlots ArchMipsel" +- ArchRISCV64 -> panic "maxSpillSlots ArchRISCV64" ++ ArchRISCV64 -> RV64.Instr.maxSpillSlots config + ArchLoongArch64->panic "maxSpillSlots ArchLoongArch64" + ArchJavaScript-> panic "maxSpillSlots ArchJavaScript" + ArchWasm32 -> panic "maxSpillSlots ArchWasm32" +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs +=================================================================== +--- /dev/null ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs +@@ -0,0 +1,96 @@ ++-- | Functions to implement the @FR@ (as in "free regs") type class. ++-- ++-- For LLVM GHC calling convention (used registers), see ++-- https://github.com/llvm/llvm-project/blob/6ab900f8746e7d8e24afafb5886a40801f6799f4/llvm/lib/Target/RISCV/RISCVISelLowering.cpp#L13638-L13685 ++module GHC.CmmToAsm.Reg.Linear.RV64 ++ ( allocateReg, ++ getFreeRegs, ++ initFreeRegs, ++ releaseReg, ++ FreeRegs (..), ++ ) ++where ++ ++import Data.Word ++import GHC.CmmToAsm.RV64.Regs ++import GHC.Platform ++import GHC.Platform.Reg ++import GHC.Platform.Reg.Class ++import GHC.Prelude ++import GHC.Stack ++import GHC.Utils.Outputable ++import GHC.Utils.Panic ++ ++-- | Bitmaps to indicate which registers are free (currently unused) ++-- ++-- The bit index represents the `RegNo`, in case of floating point registers ++-- with an offset of 32. The register is free when the bit is set. ++data FreeRegs ++ = FreeRegs ++ -- | integer/general purpose registers (`RcInteger`) ++ !Word32 ++ -- | floating point registers (`RcDouble`) ++ !Word32 ++ ++instance Show FreeRegs where ++ show (FreeRegs g f) = "FreeRegs 0b" ++ showBits g ++ " 0b" ++ showBits f ++ ++-- | Show bits as a `String` of @1@s and @0@s ++showBits :: Word32 -> String ++showBits w = map (\i -> if testBit w i then '1' else '0') [0 .. 31] ++ ++instance Outputable FreeRegs where ++ ppr (FreeRegs g f) = ++ text " " ++ <+> foldr (\i x -> pad_int i <+> x) (text "") [0 .. 31] ++ $$ text "GPR" ++ <+> foldr (\i x -> show_bit g i <+> x) (text "") [0 .. 31] ++ $$ text "FPR" ++ <+> foldr (\i x -> show_bit f i <+> x) (text "") [0 .. 31] ++ where ++ pad_int i | i < 10 = char ' ' <> int i ++ pad_int i = int i ++ -- remember bit = 1 means it's available. ++ show_bit bits bit | testBit bits bit = text " " ++ show_bit _ _ = text " x" ++ ++-- | Set bits of all allocatable registers to 1 ++initFreeRegs :: Platform -> FreeRegs ++initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) ++ where ++ noFreeRegs :: FreeRegs ++ noFreeRegs = FreeRegs 0 0 ++ ++-- | Get all free `RealReg`s (i.e. those where the corresponding bit is 1) ++getFreeRegs :: RegClass -> FreeRegs -> [RealReg] ++getFreeRegs cls (FreeRegs g f) ++ | RcFloat <- cls = [] -- For now we only support double and integer registers, floats will need to be promoted. ++ | RcDouble <- cls = go 32 f allocatableDoubleRegs ++ | RcInteger <- cls = go 0 g allocatableIntRegs ++ where ++ go _ _ [] = [] ++ go off x (i : is) ++ | testBit x i = RealRegSingle (off + i) : (go off x $! is) ++ | otherwise = go off x $! is ++ -- The lists of allocatable registers are manually crafted: Register ++ -- allocation is pretty hot code. We don't want to iterate and map like ++ -- `initFreeRegs` all the time! (The register mappings aren't supposed to ++ -- change often.) ++ allocatableIntRegs = [5 .. 7] ++ [10 .. 17] ++ [28 .. 30] ++ allocatableDoubleRegs = [0 .. 7] ++ [10 .. 17] ++ [28 .. 31] ++ ++-- | Set corresponding register bit to 0 ++allocateReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs ++allocateReg (RealRegSingle r) (FreeRegs g f) ++ | r > 31 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) ++ | r < 32 && testBit g r = FreeRegs (clearBit g r) f ++ | r > 31 = panic $ "Linear.RV64.allocReg: double allocation of float reg v" ++ show (r - 32) ++ "; " ++ showBits f ++ | otherwise = pprPanic "Linear.RV64.allocReg" $ text ("double allocation of gp reg x" ++ show r ++ "; " ++ showBits g) ++ ++-- | Set corresponding register bit to 1 ++releaseReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs ++releaseReg (RealRegSingle r) (FreeRegs g f) ++ | r > 31 && testBit f (r - 32) = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg v" <> int (r - 32)) ++ | r < 32 && testBit g r = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg x" <> int r) ++ | r > 31 = FreeRegs g (setBit f (r - 32)) ++ | otherwise = FreeRegs (setBit g r) f +Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Target.hs +=================================================================== +--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Target.hs ++++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Target.hs +@@ -34,7 +34,7 @@ import qualified GHC.CmmToAsm.X86.Regs + import qualified GHC.CmmToAsm.X86.RegInfo as X86 + import qualified GHC.CmmToAsm.PPC.Regs as PPC + import qualified GHC.CmmToAsm.AArch64.Regs as AArch64 +- ++import qualified GHC.CmmToAsm.RV64.Regs as RV64 + + targetVirtualRegSqueeze :: Platform -> RegClass -> VirtualReg -> Int + targetVirtualRegSqueeze platform +@@ -49,7 +49,7 @@ targetVirtualRegSqueeze platform + ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha" + ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb" + ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel" +- ArchRISCV64 -> panic "targetVirtualRegSqueeze ArchRISCV64" ++ ArchRISCV64 -> RV64.virtualRegSqueeze + ArchLoongArch64->panic "targetVirtualRegSqueeze ArchLoongArch64" + ArchJavaScript-> panic "targetVirtualRegSqueeze ArchJavaScript" + ArchWasm32 -> panic "targetVirtualRegSqueeze ArchWasm32" +@@ -69,7 +69,7 @@ targetRealRegSqueeze platform + ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha" + ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb" + ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel" +- ArchRISCV64 -> panic "targetRealRegSqueeze ArchRISCV64" ++ ArchRISCV64 -> RV64.realRegSqueeze + ArchLoongArch64->panic "targetRealRegSqueeze ArchLoongArch64" + ArchJavaScript-> panic "targetRealRegSqueeze ArchJavaScript" + ArchWasm32 -> panic "targetRealRegSqueeze ArchWasm32" +@@ -88,7 +88,7 @@ targetClassOfRealReg platform + ArchAlpha -> panic "targetClassOfRealReg ArchAlpha" + ArchMipseb -> panic "targetClassOfRealReg ArchMipseb" + ArchMipsel -> panic "targetClassOfRealReg ArchMipsel" +- ArchRISCV64 -> panic "targetClassOfRealReg ArchRISCV64" ++ ArchRISCV64 -> RV64.classOfRealReg + ArchLoongArch64->panic "targetClassOfRealReg ArchLoongArch64" + ArchJavaScript-> panic "targetClassOfRealReg ArchJavaScript" + ArchWasm32 -> panic "targetClassOfRealReg ArchWasm32" +@@ -107,7 +107,7 @@ targetMkVirtualReg platform + ArchAlpha -> panic "targetMkVirtualReg ArchAlpha" + ArchMipseb -> panic "targetMkVirtualReg ArchMipseb" + ArchMipsel -> panic "targetMkVirtualReg ArchMipsel" +- ArchRISCV64 -> panic "targetMkVirtualReg ArchRISCV64" ++ ArchRISCV64 -> RV64.mkVirtualReg + ArchLoongArch64->panic "targetMkVirtualReg ArchLoongArch64" + ArchJavaScript-> panic "targetMkVirtualReg ArchJavaScript" + ArchWasm32 -> panic "targetMkVirtualReg ArchWasm32" +@@ -126,7 +126,7 @@ targetRegDotColor platform + ArchAlpha -> panic "targetRegDotColor ArchAlpha" + ArchMipseb -> panic "targetRegDotColor ArchMipseb" + ArchMipsel -> panic "targetRegDotColor ArchMipsel" +- ArchRISCV64 -> panic "targetRegDotColor ArchRISCV64" ++ ArchRISCV64 -> RV64.regDotColor + ArchLoongArch64->panic "targetRegDotColor ArchLoongArch64" + ArchJavaScript-> panic "targetRegDotColor ArchJavaScript" + ArchWasm32 -> panic "targetRegDotColor ArchWasm32" +Index: ghc-9.10.1/compiler/GHC/Driver/Backend.hs +=================================================================== +--- ghc-9.10.1.orig/compiler/GHC/Driver/Backend.hs ++++ ghc-9.10.1/compiler/GHC/Driver/Backend.hs +@@ -213,6 +213,7 @@ platformNcgSupported platform = if + ArchPPC_64 {} -> True + ArchAArch64 -> True + ArchWasm32 -> True ++ ArchRISCV64 -> True + _ -> False + + -- | Is the platform supported by the JS backend? +Index: ghc-9.10.1/compiler/GHC/Driver/DynFlags.hs +=================================================================== +--- ghc-9.10.1.orig/compiler/GHC/Driver/DynFlags.hs ++++ ghc-9.10.1/compiler/GHC/Driver/DynFlags.hs +@@ -1325,6 +1325,7 @@ default_PIC platform = + (OSDarwin, ArchAArch64) -> [Opt_PIC] + (OSLinux, ArchAArch64) -> [Opt_PIC, Opt_ExternalDynamicRefs] + (OSLinux, ArchARM {}) -> [Opt_PIC, Opt_ExternalDynamicRefs] ++ (OSLinux, ArchRISCV64 {}) -> [Opt_PIC, Opt_ExternalDynamicRefs] + (OSOpenBSD, ArchX86_64) -> [Opt_PIC] -- Due to PIE support in + -- OpenBSD since 5.3 release + -- (1 May 2013) we need to +Index: ghc-9.10.1/compiler/GHC/Platform.hs +=================================================================== +--- ghc-9.10.1.orig/compiler/GHC/Platform.hs ++++ ghc-9.10.1/compiler/GHC/Platform.hs +@@ -250,7 +250,6 @@ platformHasRTSLinker p = case archOS_arc + ArchPPC_64 ELF_V1 -> False -- powerpc64 + ArchPPC_64 ELF_V2 -> False -- powerpc64le + ArchS390X -> False +- ArchRISCV64 -> False + ArchLoongArch64 -> False + ArchJavaScript -> False + ArchWasm32 -> False +Index: ghc-9.10.1/compiler/ghc.cabal.in +=================================================================== +--- ghc-9.10.1.orig/compiler/ghc.cabal.in ++++ ghc-9.10.1/compiler/ghc.cabal.in +@@ -290,6 +290,7 @@ Library + GHC.CmmToAsm.Reg.Linear.FreeRegs + GHC.CmmToAsm.Reg.Linear.JoinToTargets + GHC.CmmToAsm.Reg.Linear.PPC ++ GHC.CmmToAsm.Reg.Linear.RV64 + GHC.CmmToAsm.Reg.Linear.StackMap + GHC.CmmToAsm.Reg.Linear.State + GHC.CmmToAsm.Reg.Linear.Stats +@@ -298,6 +299,13 @@ Library + GHC.CmmToAsm.Reg.Liveness + GHC.CmmToAsm.Reg.Target + GHC.CmmToAsm.Reg.Utils ++ GHC.CmmToAsm.RV64 ++ GHC.CmmToAsm.RV64.CodeGen ++ GHC.CmmToAsm.RV64.Cond ++ GHC.CmmToAsm.RV64.Instr ++ GHC.CmmToAsm.RV64.Ppr ++ GHC.CmmToAsm.RV64.RegInfo ++ GHC.CmmToAsm.RV64.Regs + GHC.CmmToAsm.Types + GHC.CmmToAsm.Utils + GHC.CmmToAsm.X86 +Index: ghc-9.10.1/hadrian/bindist/config.mk.in +=================================================================== +--- ghc-9.10.1.orig/hadrian/bindist/config.mk.in ++++ ghc-9.10.1/hadrian/bindist/config.mk.in +@@ -152,7 +152,7 @@ GhcWithSMP := $(strip $(if $(filter YESN + # Whether to include GHCi in the compiler. Depends on whether the RTS linker + # has support for this OS/ARCH combination. + OsSupportsGHCi=$(strip $(patsubst $(TargetOS_CPP), YES, $(findstring $(TargetOS_CPP), mingw32 linux solaris2 freebsd dragonfly netbsd openbsd darwin kfreebsdgnu))) +-ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64))) ++ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64 riscv64))) + + ifeq "$(OsSupportsGHCi)$(ArchSupportsGHCi)" "YESYES" + GhcWithInterpreter=YES +Index: ghc-9.10.1/hadrian/src/Settings/Builders/RunTest.hs +=================================================================== +--- ghc-9.10.1.orig/hadrian/src/Settings/Builders/RunTest.hs ++++ ghc-9.10.1/hadrian/src/Settings/Builders/RunTest.hs +@@ -118,7 +118,7 @@ inTreeCompilerArgs stg = do + + os <- queryHostTarget queryOS + arch <- queryTargetTarget queryArch +- let codegen_arches = ["x86_64", "i386", "powerpc", "powerpc64", "powerpc64le", "aarch64", "wasm32"] ++ let codegen_arches = ["x86_64", "i386", "powerpc", "powerpc64", "powerpc64le", "aarch64", "wasm32", "riscv64"] + let withNativeCodeGen + | unregisterised = False + | arch `elem` codegen_arches = True +@@ -139,7 +139,7 @@ inTreeCompilerArgs stg = do + -- For this information, we need to query ghc --info, however, that would + -- require building ghc, which we don't want to do here. Therefore, the + -- logic from `platformHasRTSLinker` is duplicated here. +- let rtsLinker = not $ arch `elem` ["powerpc", "powerpc64", "powerpc64le", "s390x", "riscv64", "loongarch64", "javascript", "wasm32"] ++ let rtsLinker = not $ arch `elem` ["powerpc", "powerpc64", "powerpc64le", "s390x", "loongarch64", "javascript", "wasm32"] + + return TestCompilerArgs{..} + +Index: ghc-9.10.1/rts/LinkerInternals.h +=================================================================== +--- ghc-9.10.1.orig/rts/LinkerInternals.h ++++ ghc-9.10.1/rts/LinkerInternals.h +@@ -208,7 +208,7 @@ typedef struct _Segment { + int n_sections; + } Segment; + +-#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) ++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH) + #define NEED_SYMBOL_EXTRAS 1 + #endif + +@@ -220,8 +220,9 @@ typedef struct _Segment { + #define NEED_M32 1 + #endif + +-/* Jump Islands are sniplets of machine code required for relative +- * address relocations on the PowerPC, x86_64 and ARM. ++/* Jump Islands are sniplets of machine code required for relative address ++ * relocations on the PowerPC, x86_64 and ARM. On RISCV64 we use symbolextras ++ * like a GOT for locals where SymbolExtra represents one entry. + */ + typedef struct { + #if defined(powerpc_HOST_ARCH) +@@ -237,6 +238,8 @@ typedef struct { + uint8_t jumpIsland[8]; + #elif defined(arm_HOST_ARCH) + uint8_t jumpIsland[16]; ++#elif defined(riscv64_HOST_ARCH) ++ uint64_t addr; + #endif + } SymbolExtra; + +Index: ghc-9.10.1/rts/RtsSymbols.c +=================================================================== +--- ghc-9.10.1.orig/rts/RtsSymbols.c ++++ ghc-9.10.1/rts/RtsSymbols.c +@@ -980,6 +980,17 @@ extern char **environ; + #define RTS_LIBGCC_SYMBOLS + #endif + ++#if defined(riscv64_HOST_ARCH) ++// See https://gcc.gnu.org/onlinedocs/gccint/Integer-library-routines.html as ++// reference for the following built-ins. __clzdi2 and __ctzdi2 probably relate ++// to __builtin-s in libraries/ghc-prim/cbits/ctz.c. ++#define RTS_ARCH_LIBGCC_SYMBOLS \ ++ SymI_NeedsProto(__clzdi2) \ ++ SymI_NeedsProto(__ctzdi2) ++#else ++#define RTS_ARCH_LIBGCC_SYMBOLS ++#endif ++ + // Symbols defined by libgcc/compiler-rt for AArch64's outline atomics. + #if defined(HAVE_ARM_OUTLINE_ATOMICS) + #include "ARMOutlineAtomicsSymbols.h" +@@ -1032,6 +1043,7 @@ RTS_DARWIN_ONLY_SYMBOLS + RTS_OPENBSD_ONLY_SYMBOLS + RTS_LIBC_SYMBOLS + RTS_LIBGCC_SYMBOLS ++RTS_ARCH_LIBGCC_SYMBOLS + RTS_FINI_ARRAY_SYMBOLS + RTS_LIBFFI_SYMBOLS + RTS_ARM_OUTLINE_ATOMIC_SYMBOLS +@@ -1074,6 +1086,7 @@ RtsSymbolVal rtsSyms[] = { + RTS_DARWIN_ONLY_SYMBOLS + RTS_OPENBSD_ONLY_SYMBOLS + RTS_LIBGCC_SYMBOLS ++ RTS_ARCH_LIBGCC_SYMBOLS + RTS_FINI_ARRAY_SYMBOLS + RTS_LIBFFI_SYMBOLS + RTS_ARM_OUTLINE_ATOMIC_SYMBOLS +Index: ghc-9.10.1/rts/adjustor/LibffiAdjustor.c +=================================================================== +--- ghc-9.10.1.orig/rts/adjustor/LibffiAdjustor.c ++++ ghc-9.10.1/rts/adjustor/LibffiAdjustor.c +@@ -12,6 +12,7 @@ + #include "Adjustor.h" + + #include "rts/ghc_ffi.h" ++#include + #include + + // Note that ffi_alloc_prep_closure is a non-standard libffi closure +@@ -187,5 +188,21 @@ createAdjustor (int cconv, + barf("createAdjustor: failed to allocate memory"); + } + +- return (void*)code; ++#if defined(riscv64_HOST_ARCH) ++ // Synchronize the memory and instruction cache to prevent illegal ++ // instruction exceptions. ++ ++ // We expect two instructions for address loading, one for the jump. ++ int instrCount = 3; ++ // On Linux the parameters of __builtin___clear_cache are currently unused. ++ // Add them anyways for future compatibility. (I.e. the parameters couldn't ++ // be checked during development.) ++ // TODO: Check the upper boundary e.g. with a debugger. ++ __builtin___clear_cache((void *)code, ++ (void *)((uint64_t *) code + instrCount)); ++ // Memory barrier to ensure nothing circumvents the fence.i / cache flush. ++ SEQ_CST_FENCE(); ++#endif ++ ++ return (void *)code; + } +Index: ghc-9.10.1/rts/linker/Elf.c +=================================================================== +--- ghc-9.10.1.orig/rts/linker/Elf.c ++++ ghc-9.10.1/rts/linker/Elf.c +@@ -103,7 +103,8 @@ + + #include "elf_got.h" + +-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) ++#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH) ++# define NEED_GOT + # define NEED_PLT + # include "elf_plt.h" + # include "elf_reloc.h" +@@ -430,10 +431,7 @@ ocVerifyImage_ELF ( ObjectCode* oc ) + case EM_AARCH64: IF_DEBUG(linker,debugBelch( "aarch64" )); break; + #endif + #if defined(EM_RISCV) +- case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" )); +- errorBelch("%s: RTS linker not implemented on riscv", +- oc->fileName); +- return 0; ++ case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" )); break; + #endif + #if defined(EM_LOONGARCH) + case EM_LOONGARCH: IF_DEBUG(linker,debugBelch( "loongarch64" )); +@@ -1130,9 +1128,10 @@ end: + return result; + } + +-// the aarch64 linker uses relocacteObjectCodeAarch64, +-// see elf_reloc_aarch64.{h,c} +-#if !defined(aarch64_HOST_ARCH) ++// the aarch64 and riscv64 linkers use relocateObjectCodeAarch64() and ++// relocateObjectCodeRISCV64() (respectively), see elf_reloc_aarch64.{h,c} and ++// elf_reloc_riscv64.{h,c} ++#if !defined(aarch64_HOST_ARCH) && !defined(riscv64_HOST_ARCH) + + /* Do ELF relocations which lack an explicit addend. All x86-linux + and arm-linux relocations appear to be of this form. */ +@@ -1359,7 +1358,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, + /* try to locate an existing stub for this target */ + if(findStub(&oc->sections[target_shndx], (void**)&S, 0)) { + /* didn't find any. Need to create one */ +- if(makeStub(&oc->sections[target_shndx], (void**)&S, 0)) { ++ if(makeStub(&oc->sections[target_shndx], (void**)&S, NULL, 0)) { + errorBelch("Unable to create veneer for ARM_CALL\n"); + return 0; + } +@@ -1451,7 +1450,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, + /* try to locate an existing stub for this target */ + if(findStub(&oc->sections[target_shndx], (void**)&S, 1)) { + /* didn't find any. Need to create one */ +- if(makeStub(&oc->sections[target_shndx], (void**)&S, 1)) { ++ if(makeStub(&oc->sections[target_shndx], (void**)&S, NULL, 1)) { + errorBelch("Unable to create veneer for ARM_THM_CALL\n"); + return 0; + } +@@ -1991,7 +1990,7 @@ ocResolve_ELF ( ObjectCode* oc ) + (void) shnum; + (void) shdr; + +-#if defined(aarch64_HOST_ARCH) ++#if defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH) + /* use new relocation design */ + if(relocateObjectCode( oc )) + return 0; +@@ -2014,6 +2013,9 @@ ocResolve_ELF ( ObjectCode* oc ) + + #if defined(powerpc_HOST_ARCH) + ocFlushInstructionCache( oc ); ++#elif defined(riscv64_HOST_ARCH) ++ /* New-style pseudo-polymorph (by architecture) call */ ++ flushInstructionCache( oc ); + #endif + + return ocMprotect_Elf(oc); +Index: ghc-9.10.1/rts/linker/ElfTypes.h +=================================================================== +--- ghc-9.10.1.orig/rts/linker/ElfTypes.h ++++ ghc-9.10.1/rts/linker/ElfTypes.h +@@ -150,6 +150,7 @@ typedef + struct _Stub { + void * addr; + void * target; ++ void* got_addr; + /* flags can hold architecture specific information they are used during + * lookup of stubs as well. Thus two stubs for the same target with + * different flags are considered unequal. +Index: ghc-9.10.1/rts/linker/SymbolExtras.c +=================================================================== +--- ghc-9.10.1.orig/rts/linker/SymbolExtras.c ++++ ghc-9.10.1/rts/linker/SymbolExtras.c +@@ -153,7 +153,7 @@ void ocProtectExtras(ObjectCode* oc) + } + + +-#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) ++#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH) + SymbolExtra* makeSymbolExtra( ObjectCode const* oc, + unsigned long symbolNumber, + unsigned long target ) +@@ -189,9 +189,12 @@ SymbolExtra* makeSymbolExtra( ObjectCode + extra->addr = target; + memcpy(extra->jumpIsland, jmp, 8); + #endif /* x86_64_HOST_ARCH */ +- ++#if defined(riscv64_HOST_ARCH) ++ // Fake GOT entry (used like GOT, but located in symbol extras) ++ extra->addr = target; ++#endif + return extra; + } +-#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH */ ++#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH || riscv64_HOST_ARCH */ + #endif /* !x86_64_HOST_ARCH) || !mingw32_HOST_OS */ + #endif // NEED_SYMBOL_EXTRAS +Index: ghc-9.10.1/rts/linker/SymbolExtras.h +=================================================================== +--- ghc-9.10.1.orig/rts/linker/SymbolExtras.h ++++ ghc-9.10.1/rts/linker/SymbolExtras.h +@@ -16,7 +16,7 @@ SymbolExtra* makeArmSymbolExtra( ObjectC + unsigned long target, + bool fromThumb, + bool toThumb ); +-#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) ++#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH) + SymbolExtra* makeSymbolExtra( ObjectCode const* oc, + unsigned long symbolNumber, + unsigned long target ); +Index: ghc-9.10.1/rts/linker/elf_plt.c +=================================================================== +--- ghc-9.10.1.orig/rts/linker/elf_plt.c ++++ ghc-9.10.1/rts/linker/elf_plt.c +@@ -5,7 +5,7 @@ + #include + #include + +-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) ++#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH) + #if defined(OBJFORMAT_ELF) + + #define STRINGIFY(x) #x +@@ -49,11 +49,13 @@ findStub(Section * section, + bool + makeStub(Section * section, + void* * addr, ++ void* got_addr, + uint8_t flags) { + + Stub * s = calloc(1, sizeof(Stub)); + ASSERT(s != NULL); + s->target = *addr; ++ s->got_addr = got_addr; + s->flags = flags; + s->next = NULL; + s->addr = (uint8_t *)section->info->stub_offset + 8 +Index: ghc-9.10.1/rts/linker/elf_plt.h +=================================================================== +--- ghc-9.10.1.orig/rts/linker/elf_plt.h ++++ ghc-9.10.1/rts/linker/elf_plt.h +@@ -4,8 +4,9 @@ + + #include "elf_plt_arm.h" + #include "elf_plt_aarch64.h" ++#include "elf_plt_riscv64.h" + +-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) ++#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH) + + #if defined(OBJFORMAT_ELF) + +@@ -21,6 +22,8 @@ + #define __suffix__ Arm + #elif defined(__mips__) + #define __suffix__ Mips ++#elif defined(__riscv) ++#define __suffix__ RISCV64 + #else + #error "unknown architecture" + #endif +@@ -34,10 +37,10 @@ unsigned numberOfStubsForSection( Objec + #define STUB_SIZE ADD_SUFFIX(stubSize) + + bool findStub(Section * section, void* * addr, uint8_t flags); +-bool makeStub(Section * section, void* * addr, uint8_t flags); ++bool makeStub(Section * section, void* * addr, void* got_addr, uint8_t flags); + + void freeStubs(Section * section); + + #endif // OBJECTFORMAT_ELF + +-#endif // arm/aarch64_HOST_ARCH ++#endif // arm/aarch64_HOST_ARCH/riscv64_HOST_ARCH +Index: ghc-9.10.1/rts/linker/elf_plt_riscv64.c +=================================================================== +--- /dev/null ++++ ghc-9.10.1/rts/linker/elf_plt_riscv64.c +@@ -0,0 +1,90 @@ ++#include "Rts.h" ++#include "elf_compat.h" ++#include "elf_plt_riscv64.h" ++#include "rts/Messages.h" ++#include "linker/ElfTypes.h" ++ ++#include ++#include ++ ++#if defined(riscv64_HOST_ARCH) ++ ++#if defined(OBJFORMAT_ELF) ++ ++const size_t instSizeRISCV64 = 4; ++const size_t stubSizeRISCV64 = 3 * instSizeRISCV64; ++ ++bool needStubForRelRISCV64(Elf_Rel *rel) { ++ switch (ELF64_R_TYPE(rel->r_info)) { ++ case R_RISCV_CALL: ++ case R_RISCV_CALL_PLT: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++bool needStubForRelaRISCV64(Elf_Rela *rela) { ++ switch (ELF64_R_TYPE(rela->r_info)) { ++ case R_RISCV_CALL: ++ case R_RISCV_CALL_PLT: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++// After the global offset table (GOT) has been set up, we can use these three ++// instructions to jump to the target address / function: ++// ++// 1. AUIPC ip, %pcrel_hi(addr) ++// 2. LD ip, %pcrel_lo(addr)(ip) ++// 3. JARL x0, ip, 0 ++// ++// We could use the absolute address of the target (because we know it), but ++// that would require loading a 64-bit constant which is a nightmare to do in ++// riscv64 assembly. (See ++// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/5ffe5b5aeedb37b1c1c0c3d94641267d9ad4795a/riscv-elf.adoc#procedure-linkage-table) ++// ++// So far, PC-relative addressing seems to be good enough. If it ever turns out ++// to be not, one could (additionally for out-of-range cases?) encode absolute ++// addressing here. ++bool makeStubRISCV64(Stub *s) { ++ uint32_t *P = (uint32_t *)s->addr; ++ int32_t addr = (uint64_t)s->got_addr - (uint64_t)P; ++ ++ uint64_t hi = (addr + 0x800) >> 12; ++ uint64_t lo = addr - (hi << 12); ++ ++ IF_DEBUG( ++ linker, ++ debugBelch( ++ "makeStubRISCV64: P = %p, got_addr = %p, target = %p, addr = 0x%x " ++ ", hi = 0x%lx, lo = 0x%lx\n", ++ P, s->got_addr, s->target, addr, hi, lo)); ++ ++ // AUIPC ip, %pcrel_hi(addr) ++ uint32_t auipcInst = 0b0010111; // opcode ++ auipcInst |= 0x1f << 7; // rd = ip (x31) ++ auipcInst |= hi << 12; // imm[31:12] ++ ++ // LD ip, %pcrel_lo(addr)(ip) ++ uint32_t ldInst = 0b0000011; // opcode ++ ldInst |= 0x1f << 7; // rd = ip (x31) ++ ldInst |= 0x1f << 15; // rs = ip (x31) ++ ldInst |= 0b11 << 12; // funct3 = 0x3 (LD) ++ ldInst |= lo << 20; // imm[11:0] ++ ++ // JARL x0, ip, 0 ++ uint32_t jalrInst = 0b1100111; // opcode ++ jalrInst |= 0x1f << 15; // rs = ip (x31) ++ ++ P[0] = auipcInst; ++ P[1] = ldInst; ++ P[2] = jalrInst; ++ ++ return EXIT_SUCCESS; ++} ++ ++#endif ++#endif +Index: ghc-9.10.1/rts/linker/elf_plt_riscv64.h +=================================================================== +--- /dev/null ++++ ghc-9.10.1/rts/linker/elf_plt_riscv64.h +@@ -0,0 +1,12 @@ ++#pragma once ++ ++#include "LinkerInternals.h" ++ ++#if defined(OBJFORMAT_ELF) ++ ++extern const size_t stubSizeRISCV64; ++bool needStubForRelRISCV64(Elf_Rel * rel); ++bool needStubForRelaRISCV64(Elf_Rela * rel); ++bool makeStubRISCV64(Stub * s); ++ ++#endif +Index: ghc-9.10.1/rts/linker/elf_reloc.c +=================================================================== +--- ghc-9.10.1.orig/rts/linker/elf_reloc.c ++++ ghc-9.10.1/rts/linker/elf_reloc.c +@@ -4,13 +4,18 @@ + + #if defined(OBJFORMAT_ELF) + +-/* we currently only use this abstraction for elf/aarch64 */ +-#if defined(aarch64_HOST_ARCH) ++/* we currently only use this abstraction for elf/aarch64 and elf/riscv64 */ ++#if defined(aarch64_HOST_ARCH) | defined(riscv64_HOST_ARCH) + + bool + relocateObjectCode(ObjectCode * oc) { + return ADD_SUFFIX(relocateObjectCode)(oc); + } ++ ++ ++void flushInstructionCache(ObjectCode * oc){ ++ return ADD_SUFFIX(flushInstructionCache)(oc); ++} + #endif + + #endif +Index: ghc-9.10.1/rts/linker/elf_reloc.h +=================================================================== +--- ghc-9.10.1.orig/rts/linker/elf_reloc.h ++++ ghc-9.10.1/rts/linker/elf_reloc.h +@@ -5,9 +5,10 @@ + #if defined(OBJFORMAT_ELF) + + #include "elf_reloc_aarch64.h" ++#include "elf_reloc_riscv64.h" + + bool + relocateObjectCode(ObjectCode * oc); + +- ++void flushInstructionCache(ObjectCode *oc); + #endif /* OBJETFORMAT_ELF */ +Index: ghc-9.10.1/rts/linker/elf_reloc_aarch64.c +=================================================================== +--- ghc-9.10.1.orig/rts/linker/elf_reloc_aarch64.c ++++ ghc-9.10.1/rts/linker/elf_reloc_aarch64.c +@@ -240,7 +240,7 @@ computeAddend(Section * section, Elf_Rel + /* check if we already have that stub */ + if(findStub(section, (void**)&S, 0)) { + /* did not find it. Crete a new stub. */ +- if(makeStub(section, (void**)&S, 0)) { ++ if(makeStub(section, (void**)&S, NULL, 0)) { + abort(/* could not find or make stub */); + } + } +@@ -339,5 +339,10 @@ relocateObjectCodeAarch64(ObjectCode * o + return EXIT_SUCCESS; + } + ++void flushInstructionCacheAarch64(ObjectCode * oc STG_UNUSED) { ++ // Looks like we don't need this on Aarch64. ++ /* no-op */ ++} ++ + #endif /* OBJECTFORMAT_ELF */ + #endif /* aarch64_HOST_ARCH */ +Index: ghc-9.10.1/rts/linker/elf_reloc_aarch64.h +=================================================================== +--- ghc-9.10.1.orig/rts/linker/elf_reloc_aarch64.h ++++ ghc-9.10.1/rts/linker/elf_reloc_aarch64.h +@@ -7,4 +7,5 @@ + bool + relocateObjectCodeAarch64(ObjectCode * oc); + ++void flushInstructionCacheAarch64(ObjectCode *oc); + #endif /* OBJETFORMAT_ELF */ +Index: ghc-9.10.1/rts/linker/elf_reloc_riscv64.c +=================================================================== +--- /dev/null ++++ ghc-9.10.1/rts/linker/elf_reloc_riscv64.c +@@ -0,0 +1,693 @@ ++#include "elf_reloc_riscv64.h" ++#include "LinkerInternals.h" ++#include "Rts.h" ++#include "Stg.h" ++#include "SymbolExtras.h" ++#include "linker/ElfTypes.h" ++#include "elf_plt.h" ++#include "elf_util.h" ++#include "rts/Messages.h" ++#include "util.h" ++ ++#include ++#include ++ ++#if defined(riscv64_HOST_ARCH) ++ ++#if defined(OBJFORMAT_ELF) ++ ++typedef uint64_t addr_t; ++ ++/* regular instructions are 32bit */ ++typedef uint32_t inst_t; ++ ++/* compressed instructions are 16bit */ ++typedef uint16_t cinst_t; ++ ++// TODO: These instances could be static. They are not yet, because we might ++// need their debugging symbols. ++char *relocationTypeToString(Elf64_Xword type); ++int32_t decodeAddendRISCV64(Section *section, Elf_Rel *rel); ++bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend); ++void write8le(uint8_t *p, uint8_t v); ++uint8_t read8le(const uint8_t *P); ++void write16le(cinst_t *p, uint16_t v); ++uint16_t read16le(const cinst_t *P); ++uint32_t read32le(const inst_t *P); ++void write32le(inst_t *p, uint32_t v); ++uint64_t read64le(const uint64_t *P); ++void write64le(uint64_t *p, uint64_t v); ++uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end); ++void setCJType(cinst_t *loc, uint32_t val); ++void setCBType(cinst_t *loc, uint32_t val); ++void setBType(inst_t *loc, uint32_t val); ++void setSType(inst_t *loc, uint32_t val); ++int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol, ++ int64_t addend, ObjectCode *oc); ++void setJType(inst_t *loc, uint32_t val); ++void setIType(inst_t *loc, int32_t val); ++void checkInt(inst_t *loc, int32_t v, int n); ++void setUType(inst_t *loc, int32_t val); ++ ++ ++char *relocationTypeToString(Elf64_Xword type) { ++ switch (ELF64_R_TYPE(type)) { ++ case R_RISCV_NONE: ++ return "R_RISCV_NONE"; ++ case R_RISCV_32: ++ return "R_RISCV_32"; ++ case R_RISCV_64: ++ return "R_RISCV_64"; ++ case R_RISCV_RELATIVE: ++ return "R_RISCV_RELATIVE"; ++ case R_RISCV_COPY: ++ return "R_RISCV_COPY"; ++ case R_RISCV_JUMP_SLOT: ++ return "R_RISCV_JUMP_SLOT"; ++ case R_RISCV_TLS_DTPMOD32: ++ return "R_RISCV_TLS_DTPMOD32"; ++ case R_RISCV_TLS_DTPMOD64: ++ return "R_RISCV_TLS_DTPMOD64"; ++ case R_RISCV_TLS_DTPREL32: ++ return "R_RISCV_TLS_DTPREL32"; ++ case R_RISCV_TLS_DTPREL64: ++ return "R_RISCV_TLS_DTPREL64"; ++ case R_RISCV_TLS_TPREL32: ++ return "R_RISCV_TLS_TPREL32"; ++ case R_RISCV_TLS_TPREL64: ++ return "R_RISCV_TLS_TPREL64"; ++ case R_RISCV_BRANCH: ++ return "R_RISCV_BRANCH"; ++ case R_RISCV_JAL: ++ return "R_RISCV_JAL"; ++ case R_RISCV_CALL: ++ return "R_RISCV_CALL"; ++ case R_RISCV_CALL_PLT: ++ return "R_RISCV_CALL_PLT"; ++ case R_RISCV_GOT_HI20: ++ return "R_RISCV_GOT_HI20"; ++ case R_RISCV_PCREL_HI20: ++ return "R_RISCV_PCREL_HI20"; ++ case R_RISCV_LO12_I: ++ return "R_RISCV_LO12_I"; ++ case R_RISCV_PCREL_LO12_I: ++ return "R_RISCV_PCREL_LO12_I"; ++ case R_RISCV_HI20: ++ return "R_RISCV_HI20"; ++ case R_RISCV_LO12_S: ++ return "R_RISCV_LO12_S"; ++ case R_RISCV_PCREL_LO12_S: ++ return "R_RISCV_PCREL_LO12_S"; ++ case R_RISCV_RELAX: ++ return "R_RISCV_RELAX"; ++ case R_RISCV_RVC_BRANCH: ++ return "R_RISCV_RVC_BRANCH"; ++ case R_RISCV_RVC_JUMP: ++ return "R_RISCV_RVC_JUMP"; ++ default: ++ return "Unknown relocation type"; ++ } ++} ++ ++STG_NORETURN ++int32_t decodeAddendRISCV64(Section *section STG_UNUSED, ++ Elf_Rel *rel STG_UNUSED) { ++ barf("decodeAddendRISCV64: Relocations with explicit addend are not supported." ++ " Please open a ticket; providing the causing code/binary."); ++} ++ ++// Make sure that V can be represented as an N bit signed integer. ++void checkInt(inst_t *loc, int32_t v, int n) { ++ if (!isInt(n, v)) { ++ barf("Relocation at 0x%x is out of range. value: 0x%x (%d), " ++ "sign-extended value: 0x%x (%d), max bits 0x%x (%d)\n", ++ *loc, v, v, signExtend32(v, n), signExtend32(v, n), n, n); ++ } ++} ++ ++// RISCV is little-endian by definition: We can rely on (implicit) casts. ++void write8le(uint8_t *p, uint8_t v) { *p = v; } ++ ++// RISCV is little-endian by definition: We can rely on (implicit) casts. ++uint8_t read8le(const uint8_t *p) { return *p; } ++ ++// RISCV is little-endian by definition: We can rely on (implicit) casts. ++void write16le(cinst_t *p, uint16_t v) { *p = v; } ++ ++// RISCV is little-endian by definition: We can rely on (implicit) casts. ++uint16_t read16le(const cinst_t *p) { return *p; } ++ ++// RISCV is little-endian by definition: We can rely on (implicit) casts. ++uint32_t read32le(const inst_t *p) { return *p; } ++ ++// RISCV is little-endian by definition: We can rely on (implicit) casts. ++void write32le(inst_t *p, uint32_t v) { *p = v; } ++ ++// RISCV is little-endian by definition: We can rely on (implicit) casts. ++uint64_t read64le(const uint64_t *p) { return *p; } ++ ++// RISCV is little-endian by definition: We can rely on (implicit) casts. ++void write64le(uint64_t *p, uint64_t v) { *p = v; } ++ ++uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { ++ return (v & ((1ULL << (begin + 1)) - 1)) >> end; ++} ++ ++// Set immediate val in the instruction at *loc. In U-type instructions the ++// upper 20bits carry the upper 20bits of the immediate. ++void setUType(inst_t *loc, int32_t val) { ++ const unsigned bits = 32; ++ uint32_t hi = val + 0x800; ++ checkInt(loc, signExtend32(hi, bits) >> 12, 20); ++ IF_DEBUG(linker, debugBelch("setUType: hi 0x%x val 0x%x\n", hi, val)); ++ ++ uint32_t imm = hi & 0xFFFFF000; ++ write32le(loc, (read32le(loc) & 0xFFF) | imm); ++} ++ ++// Set immediate val in the instruction at *loc. In I-type instructions the ++// upper 12bits carry the lower 12bit of the immediate. ++void setIType(inst_t *loc, int32_t val) { ++ uint64_t hi = (val + 0x800) >> 12; ++ uint64_t lo = val - (hi << 12); ++ ++ IF_DEBUG(linker, debugBelch("setIType: hi 0x%lx lo 0x%lx\n", hi, lo)); ++ IF_DEBUG(linker, debugBelch("setIType: loc %p *loc 0x%x val 0x%x\n", loc, ++ *loc, val)); ++ ++ uint32_t imm = lo & 0xfff; ++ uint32_t instr = (read32le(loc) & 0xfffff) | (imm << 20); ++ ++ IF_DEBUG(linker, debugBelch("setIType: insn 0x%x\n", instr)); ++ write32le(loc, instr); ++ IF_DEBUG(linker, debugBelch("setIType: loc %p *loc' 0x%x val 0x%x\n", loc, ++ *loc, val)); ++} ++ ++// Set immediate val in the instruction at *loc. In S-type instructions the ++// lower 12 bits of the immediate are at bits 7 to 11 ([0:4]) and 25 to 31 ++// ([5:11]). ++void setSType(inst_t *loc, uint32_t val) { ++ uint64_t hi = (val + 0x800) >> 12; ++ uint64_t lo = val - (hi << 12); ++ ++ uint32_t imm = lo; ++ uint32_t instr = (read32le(loc) & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) | ++ (extractBits(imm, 4, 0) << 7); ++ ++ write32le(loc, instr); ++} ++ ++// Set immediate val in the instruction at *loc. In J-type instructions the ++// immediate has 20bits which are pretty scattered: ++// instr bit -> imm bit ++// 31 -> 20 ++// [30:21] -> [10:1] ++// 20 -> 11 ++// [19:12] -> [19:12] ++// ++// N.B. bit 0 of the immediate is missing! ++void setJType(inst_t *loc, uint32_t val) { ++ checkInt(loc, val, 21); ++ ++ uint32_t insn = read32le(loc) & 0xFFF; ++ uint32_t imm20 = extractBits(val, 20, 20) << 31; ++ uint32_t imm10_1 = extractBits(val, 10, 1) << 21; ++ uint32_t imm11 = extractBits(val, 11, 11) << 20; ++ uint32_t imm19_12 = extractBits(val, 19, 12) << 12; ++ insn |= imm20 | imm10_1 | imm11 | imm19_12; ++ ++ write32le(loc, insn); ++} ++ ++// Set immediate val in the instruction at *loc. In B-type instructions the ++// immediate has 12bits which are pretty scattered: ++// instr bit -> imm bit ++// 31 -> 12 ++// [30:25] -> [10:5] ++// [11:8] -> [4:1] ++// 7 -> 11 ++// ++// N.B. bit 0 of the immediate is missing! ++void setBType(inst_t *loc, uint32_t val) { ++ checkInt(loc, val, 13); ++ ++ uint32_t insn = read32le(loc) & 0x1FFF07F; ++ uint32_t imm12 = extractBits(val, 12, 12) << 31; ++ uint32_t imm10_5 = extractBits(val, 10, 5) << 25; ++ uint32_t imm4_1 = extractBits(val, 4, 1) << 8; ++ uint32_t imm11 = extractBits(val, 11, 11) << 7; ++ insn |= imm12 | imm10_5 | imm4_1 | imm11; ++ ++ write32le(loc, insn); ++} ++ ++ ++// Set immediate val in the instruction at *loc. CB-type instructions have a ++// lenght of 16 bits (half-word, compared to the usual 32bit/word instructions.) ++// The immediate has 8bits which are pretty scattered: ++// instr bit -> imm bit ++// 12 -> 8 ++// [11:10] -> [4:3] ++// [6:5] -> [7:6] ++// [4:3] -> [2:1] ++// 2 -> 5 ++// ++// N.B. bit 0 of the immediate is missing! ++void setCBType(cinst_t *loc, uint32_t val) { ++ checkInt((inst_t *)loc, val, 9); ++ uint16_t insn = read16le(loc) & 0xE383; ++ uint16_t imm8 = extractBits(val, 8, 8) << 12; ++ uint16_t imm4_3 = extractBits(val, 4, 3) << 10; ++ uint16_t imm7_6 = extractBits(val, 7, 6) << 5; ++ uint16_t imm2_1 = extractBits(val, 2, 1) << 3; ++ uint16_t imm5 = extractBits(val, 5, 5) << 2; ++ insn |= imm8 | imm4_3 | imm7_6 | imm2_1 | imm5; ++ ++ write16le(loc, insn); ++} ++ ++// Set immediate val in the instruction at *loc. CJ-type instructions have a ++// lenght of 16 bits (half-word, compared to the usual 32bit/word instructions.) ++// The immediate has 11bits which are pretty scattered: ++// instr bit -> imm bit ++// 12 -> 11 ++// 11 -> 4 ++// [10:9] ->[9:8] ++// 8 -> 10 ++// 7 -> 6 ++// 6 -> 7 ++// [5:3] -> [3:1] ++// 2 -> 5 ++// ++// N.B. bit 0 of the immediate is missing! ++void setCJType(cinst_t *loc, uint32_t val) { ++ checkInt((inst_t *)loc, val, 12); ++ uint16_t insn = read16le(loc) & 0xE003; ++ uint16_t imm11 = extractBits(val, 11, 11) << 12; ++ uint16_t imm4 = extractBits(val, 4, 4) << 11; ++ uint16_t imm9_8 = extractBits(val, 9, 8) << 9; ++ uint16_t imm10 = extractBits(val, 10, 10) << 8; ++ uint16_t imm6 = extractBits(val, 6, 6) << 7; ++ uint16_t imm7 = extractBits(val, 7, 7) << 6; ++ uint16_t imm3_1 = extractBits(val, 3, 1) << 3; ++ uint16_t imm5 = extractBits(val, 5, 5) << 2; ++ insn |= imm11 | imm4 | imm9_8 | imm10 | imm6 | imm7 | imm3_1 | imm5; ++ ++ write16le(loc, insn); ++} ++ ++// Encode the addend according to the relocaction into the instruction. ++bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend) { ++ // instruction to rewrite (P: Position of the relocation) ++ addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset); ++ IF_DEBUG(linker, ++ debugBelch( ++ "Relocation type %s 0x%lx (%lu) symbol 0x%lx addend 0x%x (%u / " ++ "%d) P 0x%lx\n", ++ relocationTypeToString(rel->r_info), ELF64_R_TYPE(rel->r_info), ++ ELF64_R_TYPE(rel->r_info), ELF64_R_SYM(rel->r_info), addend, ++ addend, addend, P)); ++ switch (ELF64_R_TYPE(rel->r_info)) { ++ case R_RISCV_32_PCREL: ++ case R_RISCV_32: ++ write32le((inst_t *)P, addend); ++ break; ++ case R_RISCV_64: ++ write64le((uint64_t *)P, addend); ++ break; ++ case R_RISCV_GOT_HI20: ++ case R_RISCV_PCREL_HI20: ++ case R_RISCV_HI20: { ++ setUType((inst_t *)P, addend); ++ break; ++ } ++ case R_RISCV_PCREL_LO12_I: ++ case R_RISCV_LO12_I: { ++ setIType((inst_t *)P, addend); ++ break; ++ } ++ case R_RISCV_RVC_JUMP: { ++ setCJType((cinst_t *)P, addend); ++ break; ++ } ++ case R_RISCV_RVC_BRANCH: { ++ setCBType((cinst_t *)P, addend); ++ break; ++ } ++ case R_RISCV_BRANCH: { ++ setBType((inst_t *)P, addend); ++ break; ++ } ++ case R_RISCV_CALL: ++ case R_RISCV_CALL_PLT: { ++ // We could relax more (in some cases) but right now most important is to ++ // make it work. ++ setUType((inst_t *)P, addend); ++ setIType(((inst_t *)P) + 1, addend); ++ break; ++ } ++ case R_RISCV_JAL: { ++ setJType((inst_t *)P, addend); ++ break; ++ } ++ case R_RISCV_ADD8: ++ write8le((uint8_t *)P, read8le((uint8_t *)P) + addend); ++ break; ++ case R_RISCV_ADD16: ++ write16le((cinst_t *)P, read16le((cinst_t *)P) + addend); ++ break; ++ case R_RISCV_ADD32: ++ write32le((inst_t *)P, read32le((inst_t *)P) + addend); ++ break; ++ case R_RISCV_ADD64: ++ write64le((uint64_t *)P, read64le((uint64_t *)P) + addend); ++ break; ++ case R_RISCV_SUB6: { ++ uint8_t keep = *((uint8_t *)P) & 0xc0; ++ uint8_t imm = (((*(uint8_t *)P) & 0x3f) - addend) & 0x3f; ++ ++ write8le((uint8_t *)P, keep | imm); ++ break; ++ } ++ case R_RISCV_SUB8: ++ write8le((uint8_t *)P, read8le((uint8_t *)P) - addend); ++ break; ++ case R_RISCV_SUB16: ++ write16le((cinst_t *)P, read16le((cinst_t *)P) - addend); ++ break; ++ case R_RISCV_SUB32: ++ write32le((inst_t *)P, read32le((inst_t *)P) - addend); ++ break; ++ case R_RISCV_SUB64: ++ write64le((uint64_t *)P, read64le((uint64_t *)P) - addend); ++ break; ++ case R_RISCV_SET6: { ++ uint8_t keep = *((uint8_t *)P) & 0xc0; ++ uint8_t imm = (addend & 0x3f) & 0x3f; ++ ++ write8le((uint8_t *)P, keep | imm); ++ break; ++ } ++ case R_RISCV_SET8: ++ write8le((uint8_t *)P, addend); ++ break; ++ case R_RISCV_SET16: ++ write16le((cinst_t *)P, addend); ++ break; ++ case R_RISCV_SET32: ++ write32le((inst_t *)P, addend); ++ break; ++ case R_RISCV_PCREL_LO12_S: ++ case R_RISCV_TPREL_LO12_S: ++ case R_RISCV_LO12_S: { ++ setSType((inst_t *)P, addend); ++ break; ++ } ++ case R_RISCV_RELAX: ++ case R_RISCV_ALIGN: ++ // Implementing relaxations (rewriting instructions to more efficient ones) ++ // could be implemented in future. As the code already is aligned and we do ++ // not change the instruction sizes, we should get away with not aligning ++ // (though, that is cheating.) To align or change the instruction count, we ++ // would need machinery to squeeze or extend memory at the current location. ++ break; ++ default: ++ barf("Missing relocation 0x%lx\n", ELF64_R_TYPE(rel->r_info)); ++ } ++ return EXIT_SUCCESS; ++} ++ ++/** ++ * Compute the *new* addend for a relocation, given a pre-existing addend. ++ * @param section The section the relocation is in. ++ * @param rel The Relocation struct. ++ * @param symbol The target symbol. ++ * @param addend The existing addend. Either explicit or implicit. ++ * @return The new computed addend. ++ */ ++int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol, ++ int64_t addend, ObjectCode *oc) { ++ Section * section = &oc->sections[relaTab->targetSectionIndex]; ++ ++ // instruction to rewrite (P: Position of the relocation) ++ addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset); ++ ++ CHECK(0x0 != P); ++ CHECK((uint64_t)section->start <= P); ++ CHECK(P <= (uint64_t)section->start + section->size); ++ // S: Value of the symbol in the symbol table ++ addr_t S = (addr_t)symbol->addr; ++ /* GOT slot for the symbol (G + GOT) */ ++ addr_t GOT_S = (addr_t)symbol->got_addr; ++ ++ // A: Addend field in the relocation entry associated with the symbol ++ int64_t A = addend; ++ ++ IF_DEBUG(linker, debugBelch("%s: P 0x%lx S 0x%lx %s GOT_S 0x%lx A 0x%lx relNo %u\n", ++ relocationTypeToString(rel->r_info), P, S, ++ symbol->name, GOT_S, A, relNo)); ++ switch (ELF64_R_TYPE(rel->r_info)) { ++ case R_RISCV_32: ++ return S + A; ++ case R_RISCV_64: ++ return S + A; ++ case R_RISCV_HI20: ++ return S + A; ++ case R_RISCV_JUMP_SLOT: ++ return S; ++ case R_RISCV_JAL: ++ return S + A - P; ++ case R_RISCV_PCREL_HI20: ++ return S + A - P; ++ case R_RISCV_LO12_I: ++ return S + A; ++ // Quoting LLVM docs: For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}), ++ // the symbol actually points the corresponding R_RISCV_PCREL_HI20 ++ // relocation, and the target VA is calculated using PCREL_HI20's symbol. ++ case R_RISCV_PCREL_LO12_S: ++ FALLTHROUGH; ++ case R_RISCV_PCREL_LO12_I: { ++ // Lookup related HI20 relocation and use that value. I'm still confused why ++ // relocations aren't self-contained, but this is how LLVM does it. And, ++ // calculating the lower 12 bit without any relationship to the GOT entry's ++ // address makes no sense either. ++ for (int64_t i = relNo; i >= 0 ; i--) { ++ Elf_Rela *rel_prime = &relaTab->relocations[i]; ++ ++ addr_t P_prime = ++ (addr_t)((uint8_t *)section->start + rel_prime->r_offset); ++ ++ if (P_prime != S) { ++ // S points to the P of the corresponding *_HI20 relocation. ++ continue; ++ } ++ ++ ElfSymbol *symbol_prime = ++ findSymbol(oc, relaTab->sectionHeader->sh_link, ++ ELF64_R_SYM((Elf64_Xword)rel_prime->r_info)); ++ ++ CHECK(0x0 != symbol_prime); ++ ++ /* take explicit addend */ ++ int64_t addend_prime = rel_prime->r_addend; ++ ++ uint64_t type_prime = ELF64_R_TYPE(rel_prime->r_info); ++ ++ if (type_prime == R_RISCV_PCREL_HI20 || ++ type_prime == R_RISCV_GOT_HI20 || ++ type_prime == R_RISCV_TLS_GD_HI20 || ++ type_prime == R_RISCV_TLS_GOT_HI20) { ++ IF_DEBUG(linker, ++ debugBelch( ++ "Found matching relocation: %s (P: 0x%lx, S: 0x%lx, " ++ "sym-name: %s) -> %s (P: 0x%lx, S: %p, sym-name: %s, relNo: %ld)", ++ relocationTypeToString(rel->r_info), P, S, symbol->name, ++ relocationTypeToString(rel_prime->r_info), P_prime, ++ symbol_prime->addr, symbol_prime->name, i)); ++ int32_t result = computeAddend(relaTab, i, (Elf_Rel *)rel_prime, ++ symbol_prime, addend_prime, oc); ++ IF_DEBUG(linker, debugBelch("Result of computeAddend: 0x%x (%d)\n", ++ result, result)); ++ return result; ++ } ++ } ++ debugBelch("Missing HI relocation for %s: P 0x%lx S 0x%lx %s\n", ++ relocationTypeToString(rel->r_info), P, S, symbol->name); ++ abort(); ++ } ++ ++ case R_RISCV_RVC_JUMP: ++ return S + A - P; ++ case R_RISCV_RVC_BRANCH: ++ return S + A - P; ++ case R_RISCV_BRANCH: ++ return S + A - P; ++ case R_RISCV_CALL: ++ case R_RISCV_CALL_PLT: { ++ addr_t GOT_Target; ++ if (GOT_S != 0) { ++ // 1. Public symbol with GOT entry. ++ GOT_Target = GOT_S; ++ } else { ++ // 2. Fake GOT entry with symbol extra entry. ++ SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S); ++ addr_t* FAKE_GOT_S = &symbolExtra->addr; ++ IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT w/ SymbolExtra = %p , " ++ "entry = %p\n", ++ symbolExtra, FAKE_GOT_S)); ++ GOT_Target = (addr_t) FAKE_GOT_S; ++ } ++ ++ if (findStub(section, (void **)&S, 0)) { ++ /* did not find it. Crete a new stub. */ ++ if (makeStub(section, (void **)&S, (void *)GOT_Target, 0)) { ++ abort(/* could not find or make stub */); ++ } ++ } ++ IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT: S = 0x%lx A = 0x%lx P = " ++ "0x%lx (S + A) - P = 0x%lx \n", ++ S, A, P, (S + A) - P)); ++ return (S + A) - P; ++ } ++ case R_RISCV_ADD8: ++ FALLTHROUGH; ++ case R_RISCV_ADD16: ++ FALLTHROUGH; ++ case R_RISCV_ADD32: ++ FALLTHROUGH; ++ case R_RISCV_ADD64: ++ return S + A; // Add V when the value is set ++ case R_RISCV_SUB6: ++ FALLTHROUGH; ++ case R_RISCV_SUB8: ++ FALLTHROUGH; ++ case R_RISCV_SUB16: ++ FALLTHROUGH; ++ case R_RISCV_SUB32: ++ FALLTHROUGH; ++ case R_RISCV_SUB64: ++ return S + A; // Subtract from V when value is set ++ case R_RISCV_SET6: ++ FALLTHROUGH; ++ case R_RISCV_SET8: ++ FALLTHROUGH; ++ case R_RISCV_SET16: ++ FALLTHROUGH; ++ case R_RISCV_SET32: ++ return S + A; ++ case R_RISCV_RELAX: ++ // This "relocation" has no addend. ++ FALLTHROUGH; ++ case R_RISCV_ALIGN: ++ // I guess we don't need to implement this relaxation. Otherwise, this ++ // should return the number of blank bytes to insert via NOPs. ++ return 0; ++ case R_RISCV_32_PCREL: ++ return S + A - P; ++ case R_RISCV_GOT_HI20: { ++ // TODO: Allocating extra memory for every symbol just to play this trick ++ // seems to be a bit obscene. (GOT relocations hitting local symbols ++ // happens, but not very often.) It would be better to allocate only what we ++ // really need. ++ ++ // There are two cases here: 1. The symbol is public and has an entry in the ++ // GOT. 2. It's local and has no corresponding GOT entry. The first case is ++ // easy: We simply calculate the addend with the GOT address. In the second ++ // case we create a symbol extra entry and pretend it's the GOT. ++ if (GOT_S != 0) { ++ // 1. Public symbol with GOT entry. ++ return GOT_S + A - P; ++ } else { ++ // 2. Fake GOT entry with symbol extra entry. ++ SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S); ++ addr_t* FAKE_GOT_S = &symbolExtra->addr; ++ addr_t res = (addr_t) FAKE_GOT_S + A - P; ++ IF_DEBUG(linker, debugBelch("R_RISCV_GOT_HI20 w/ SymbolExtra = %p , " ++ "entry = %p , reloc-addend = 0x%lu ", ++ symbolExtra, FAKE_GOT_S, res)); ++ return res; ++ } ++ } ++ default: ++ barf("Unimplemented relocation: 0x%lx\n (%lu)", ++ ELF64_R_TYPE(rel->r_info), ELF64_R_TYPE(rel->r_info)); ++ } ++ barf("This should never happen!"); ++} ++ ++// Iterate over all relocations and perform them. ++bool relocateObjectCodeRISCV64(ObjectCode *oc) { ++ for (ElfRelocationTable *relTab = oc->info->relTable; relTab != NULL; ++ relTab = relTab->next) { ++ /* only relocate interesting sections */ ++ if (SECTIONKIND_OTHER == oc->sections[relTab->targetSectionIndex].kind) ++ continue; ++ ++ Section *targetSection = &oc->sections[relTab->targetSectionIndex]; ++ ++ for (unsigned i = 0; i < relTab->n_relocations; i++) { ++ Elf_Rel *rel = &relTab->relocations[i]; ++ ++ ElfSymbol *symbol = findSymbol(oc, relTab->sectionHeader->sh_link, ++ ELF64_R_SYM((Elf64_Xword)rel->r_info)); ++ ++ CHECK(0x0 != symbol); ++ ++ // This always fails, because we don't support Rel locations, yet: Do we ++ // need this case? Leaving it in to spot the potential bug when it ++ // appears. ++ /* decode implicit addend */ ++ int64_t addend = decodeAddendRISCV64(targetSection, rel); ++ ++ addend = computeAddend((ElfRelocationATable*) relTab, i, rel, symbol, addend, oc); ++ encodeAddendRISCV64(targetSection, rel, addend); ++ } ++ } ++ for (ElfRelocationATable *relaTab = oc->info->relaTable; relaTab != NULL; ++ relaTab = relaTab->next) { ++ /* only relocate interesting sections */ ++ if (SECTIONKIND_OTHER == oc->sections[relaTab->targetSectionIndex].kind) ++ continue; ++ ++ Section *targetSection = &oc->sections[relaTab->targetSectionIndex]; ++ ++ for (unsigned i = 0; i < relaTab->n_relocations; i++) { ++ ++ Elf_Rela *rel = &relaTab->relocations[i]; ++ ++ ElfSymbol *symbol = findSymbol(oc, relaTab->sectionHeader->sh_link, ++ ELF64_R_SYM((Elf64_Xword)rel->r_info)); ++ ++ CHECK(0x0 != symbol); ++ ++ /* take explicit addend */ ++ int64_t addend = rel->r_addend; ++ ++ addend = computeAddend(relaTab, i, (Elf_Rel *)rel, symbol, addend, oc); ++ encodeAddendRISCV64(targetSection, (Elf_Rel *)rel, addend); ++ } ++ } ++ return EXIT_SUCCESS; ++} ++ ++void flushInstructionCacheRISCV64(ObjectCode *oc) { ++ // Synchronize the memory and instruction cache to prevent illegal instruction ++ // exceptions. On Linux the parameters of __builtin___clear_cache are ++ // currently unused. Add them anyways for future compatibility. (I.e. the ++ // parameters couldn't be checked during development.) ++ ++ /* The main object code */ ++ void *codeBegin = oc->image + oc->misalignment; ++ __builtin___clear_cache(codeBegin, (void*) ((uint64_t*) codeBegin + oc->fileSize)); ++ ++ /* Jump Islands */ ++ __builtin___clear_cache((void *)oc->symbol_extras, ++ (void *)(oc->symbol_extras + oc->n_symbol_extras)); ++ ++ // Memory barrier to ensure nothing circumvents the fence.i / cache flushes. ++ SEQ_CST_FENCE(); ++} ++ ++#endif /* OBJECTFORMAT_ELF */ ++#endif /* riscv64_HOST_ARCH */ +Index: ghc-9.10.1/rts/linker/elf_reloc_riscv64.h +=================================================================== +--- /dev/null ++++ ghc-9.10.1/rts/linker/elf_reloc_riscv64.h +@@ -0,0 +1,11 @@ ++#pragma once ++ ++#include "LinkerInternals.h" ++ ++#if defined(OBJFORMAT_ELF) ++ ++bool ++relocateObjectCodeRISCV64(ObjectCode * oc); ++ ++void flushInstructionCacheRISCV64(ObjectCode *oc); ++#endif /* OBJETFORMAT_ELF */ +Index: ghc-9.10.1/rts/rts.cabal +=================================================================== +--- ghc-9.10.1.orig/rts/rts.cabal ++++ ghc-9.10.1/rts/rts.cabal +@@ -468,9 +468,11 @@ library + linker/elf_got.c + linker/elf_plt.c + linker/elf_plt_aarch64.c ++ linker/elf_plt_riscv64.c + linker/elf_plt_arm.c + linker/elf_reloc.c + linker/elf_reloc_aarch64.c ++ linker/elf_reloc_riscv64.c + linker/elf_tlsgd.c + linker/elf_util.c + sm/BlockAlloc.c +Index: ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.hs +=================================================================== +--- /dev/null ++++ ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.hs +@@ -0,0 +1,123 @@ ++{-# LANGUAGE ForeignFunctionInterface #-} ++{-# LANGUAGE GHCForeignImportPrim #-} ++{-# LANGUAGE MagicHash #-} ++{-# LANGUAGE UnboxedTuples #-} ++{-# LANGUAGE UnliftedFFITypes #-} ++ ++-- | This test ensures that sub-word signed and unsigned parameters are correctly ++-- handed over to C functions. I.e. it asserts the calling-convention. ++-- ++-- The number of parameters is currently shaped for the RISCV64 calling-convention. ++-- You may need to add more parameters to the C functions in case there are more ++-- registers reserved for parameters in your architecture. ++module Main where ++ ++import Data.Word ++import GHC.Exts ++import GHC.Int ++ ++foreign import ccall "fun8" ++ fun8 :: ++ Int8# -> -- a0 ++ Word8# -> -- a1 ++ Int8# -> -- a2 ++ Int8# -> -- a3 ++ Int8# -> -- a4 ++ Int8# -> -- a5 ++ Int8# -> -- a6 ++ Int8# -> -- a7 ++ Word8# -> -- s0 ++ Int8# -> -- s1 ++ Int64# -- result ++ ++foreign import ccall "fun16" ++ fun16 :: ++ Int16# -> -- a0 ++ Word16# -> -- a1 ++ Int16# -> -- a2 ++ Int16# -> -- a3 ++ Int16# -> -- a4 ++ Int16# -> -- a5 ++ Int16# -> -- a6 ++ Int16# -> -- a7 ++ Word16# -> -- s0 ++ Int16# -> -- s1 ++ Int64# -- result ++ ++foreign import ccall "fun32" ++ fun32 :: ++ Int32# -> -- a0 ++ Word32# -> -- a1 ++ Int32# -> -- a2 ++ Int32# -> -- a3 ++ Int32# -> -- a4 ++ Int32# -> -- a5 ++ Int32# -> -- a6 ++ Int32# -> -- a7 ++ Word32# -> -- s0 ++ Int32# -> -- s1 ++ Int64# -- result ++ ++foreign import ccall "funFloat" ++ funFloat :: ++ Float# -> -- a0 ++ Float# -> -- a1 ++ Float# -> -- a2 ++ Float# -> -- a3 ++ Float# -> -- a4 ++ Float# -> -- a5 ++ Float# -> -- a6 ++ Float# -> -- a7 ++ Float# -> -- s0 ++ Float# -> -- s1 ++ Float# -- result ++ ++foreign import ccall "funDouble" ++ funDouble :: ++ Double# -> -- a0 ++ Double# -> -- a1 ++ Double# -> -- a2 ++ Double# -> -- a3 ++ Double# -> -- a4 ++ Double# -> -- a5 ++ Double# -> -- a6 ++ Double# -> -- a7 ++ Double# -> -- s0 ++ Double# -> -- s1 ++ Double# -- result ++ ++main :: IO () ++main = ++ -- N.B. the values here aren't choosen by accident: -1 means all bits one in ++ -- twos-complement, which is the same as the max word value. ++ let i8 :: Int8# = intToInt8# (-1#) ++ w8 :: Word8# = wordToWord8# (255##) ++ res8 :: Int64# = fun8 i8 w8 i8 i8 i8 i8 i8 i8 w8 i8 ++ expected_res8 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word8) + 8 * (-1) ++ i16 :: Int16# = intToInt16# (-1#) ++ w16 :: Word16# = wordToWord16# (65535##) ++ res16 :: Int64# = fun16 i16 w16 i16 i16 i16 i16 i16 i16 w16 i16 ++ expected_res16 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word16) + 8 * (-1) ++ i32 :: Int32# = intToInt32# (-1#) ++ w32 :: Word32# = wordToWord32# (4294967295##) ++ res32 :: Int64# = fun32 i32 w32 i32 i32 i32 i32 i32 i32 w32 i32 ++ expected_res32 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word32) + 8 * (-1) ++ resFloat :: Float = F# (funFloat 1.0# 1.1# 1.2# 1.3# 1.4# 1.5# 1.6# 1.7# 1.8# 1.9#) ++ resDouble :: Double = D# (funDouble 1.0## 1.1## 1.2## 1.3## 1.4## 1.5## 1.6## 1.7## 1.8## 1.9##) ++ in do ++ print $ "fun8 result:" ++ show (I64# res8) ++ assertEqual expected_res8 (I64# res8) ++ print $ "fun16 result:" ++ show (I64# res16) ++ assertEqual expected_res16 (I64# res16) ++ print $ "fun32 result:" ++ show (I64# res32) ++ assertEqual expected_res32 (I64# res32) ++ print $ "funFloat result:" ++ show resFloat ++ assertEqual (14.5 :: Float) resFloat ++ print $ "funDouble result:" ++ show resDouble ++ assertEqual (14.5 :: Double) resDouble ++ ++assertEqual :: (Eq a, Show a) => a -> a -> IO () ++assertEqual a b = ++ if a == b ++ then pure () ++ else error $ show a ++ " =/= " ++ show b +Index: ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.stdout +=================================================================== +--- /dev/null ++++ ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.stdout +@@ -0,0 +1,60 @@ ++"fun8 result:502" ++"fun16 result:131062" ++"fun32 result:8589934582" ++"funFloat result:14.5" ++"funDouble result:14.5" ++fun32: ++a0: 0xffffffff -1 ++a1: 0xffffffff 4294967295 ++a2: 0xffffffff -1 ++a3: 0xffffffff -1 ++a4: 0xffffffff -1 ++a5: 0xffffffff -1 ++a6: 0xffffffff -1 ++a7: 0xffffffff -1 ++s0: 0xffffffff -1 ++s1: 0xffffffff 4294967295 ++fun16: ++a0: 0xffffffff -1 ++a1: 0xffff 65535 ++a2: 0xffffffff -1 ++a3: 0xffffffff -1 ++a4: 0xffffffff -1 ++a5: 0xffffffff -1 ++a6: 0xffffffff -1 ++a7: 0xffffffff -1 ++s0: 0xffffffff -1 ++s1: 0xffff 65535 ++fun8: ++a0: 0xffffffff -1 ++a1: 0xff 255 ++a2: 0xffffffff -1 ++a3: 0xffffffff -1 ++a4: 0xffffffff -1 ++a5: 0xffffffff -1 ++a6: 0xffffffff -1 ++a7: 0xffffffff -1 ++s0: 0xffffffff -1 ++s1: 0xff 255 ++funFloat: ++a0: 1.000000 ++a1: 1.100000 ++a2: 1.200000 ++a3: 1.300000 ++a4: 1.400000 ++a5: 1.500000 ++a6: 1.600000 ++a7: 1.700000 ++s0: 1.800000 ++s1: 1.900000 ++funDouble: ++a0: 1.000000 ++a1: 1.100000 ++a2: 1.200000 ++a3: 1.300000 ++a4: 1.400000 ++a5: 1.500000 ++a6: 1.600000 ++a7: 1.700000 ++s0: 1.800000 ++s1: 1.900000 +Index: ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv_c.c +=================================================================== +--- /dev/null ++++ ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv_c.c +@@ -0,0 +1,91 @@ ++#include "stdint.h" ++#include "stdio.h" ++ ++int64_t fun8(int8_t a0, uint8_t a1, int8_t a2, int8_t a3, int8_t a4, int8_t a5, ++ int8_t a6, int8_t a7, int8_t s0, uint8_t s1) { ++ printf("fun8:\n"); ++ printf("a0: %#x %hhd\n", a0, a0); ++ printf("a1: %#x %hhu\n", a1, a1); ++ printf("a2: %#x %hhd\n", a2, a2); ++ printf("a3: %#x %hhd\n", a3, a3); ++ printf("a4: %#x %hhd\n", a4, a4); ++ printf("a5: %#x %hhd\n", a5, a5); ++ printf("a6: %#x %hhd\n", a6, a6); ++ printf("a7: %#x %hhd\n", a7, a7); ++ printf("s0: %#x %hhd\n", s0, s0); ++ printf("s1: %#x %hhu\n", s1, s1); ++ ++ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1; ++} ++ ++int64_t fun16(int16_t a0, uint16_t a1, int16_t a2, int16_t a3, int16_t a4, ++ int16_t a5, int16_t a6, int16_t a7, int16_t s0, uint16_t s1) { ++ printf("fun16:\n"); ++ printf("a0: %#x %hd\n", a0, a0); ++ printf("a1: %#x %hu\n", a1, a1); ++ printf("a2: %#x %hd\n", a2, a2); ++ printf("a3: %#x %hd\n", a3, a3); ++ printf("a4: %#x %hd\n", a4, a4); ++ printf("a5: %#x %hd\n", a5, a5); ++ printf("a6: %#x %hd\n", a6, a6); ++ printf("a7: %#x %hd\n", a7, a7); ++ printf("s0: %#x %hd\n", s0, s0); ++ printf("s1: %#x %hu\n", s1, s1); ++ ++ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1; ++} ++ ++int64_t fun32(int32_t a0, uint32_t a1, int32_t a2, int32_t a3, int32_t a4, ++ int32_t a5, int32_t a6, int32_t a7, int32_t s0, uint32_t s1) { ++ printf("fun32:\n"); ++ printf("a0: %#x %d\n", a0, a0); ++ printf("a1: %#x %u\n", a1, a1); ++ printf("a2: %#x %d\n", a2, a2); ++ printf("a3: %#x %d\n", a3, a3); ++ printf("a4: %#x %d\n", a4, a4); ++ printf("a5: %#x %d\n", a5, a5); ++ printf("a6: %#x %d\n", a6, a6); ++ printf("a7: %#x %d\n", a7, a7); ++ printf("s0: %#x %d\n", s0, s0); ++ printf("s1: %#x %u\n", s1, s1); ++ ++ // Ensure the addition happens in long int (not just int) precission. ++ // Otherwise, the result is truncated during the operation. ++ int64_t force_int64_precission = 0; ++ return force_int64_precission + a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + ++ s1; ++} ++ ++float funFloat(float a0, float a1, float a2, float a3, float a4, float a5, ++ float a6, float a7, float s0, float s1) { ++ printf("funFloat:\n"); ++ printf("a0: %f\n", a0); ++ printf("a1: %f\n", a1); ++ printf("a2: %f\n", a2); ++ printf("a3: %f\n", a3); ++ printf("a4: %f\n", a4); ++ printf("a5: %f\n", a5); ++ printf("a6: %f\n", a6); ++ printf("a7: %f\n", a7); ++ printf("s0: %f\n", s0); ++ printf("s1: %f\n", s1); ++ ++ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1; ++} ++ ++double funDouble(double a0, double a1, double a2, double a3, double a4, double a5, ++ double a6, double a7, double s0, double s1) { ++ printf("funDouble:\n"); ++ printf("a0: %f\n", a0); ++ printf("a1: %f\n", a1); ++ printf("a2: %f\n", a2); ++ printf("a3: %f\n", a3); ++ printf("a4: %f\n", a4); ++ printf("a5: %f\n", a5); ++ printf("a6: %f\n", a6); ++ printf("a7: %f\n", a7); ++ printf("s0: %f\n", s0); ++ printf("s1: %f\n", s1); ++ ++ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1; ++}