ghc/riscv64-ncg.patch

6218 lines
234 KiB
Diff
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

Index: ghc-9.10.1/CODEOWNERS
===================================================================
--- ghc-9.10.1.orig/CODEOWNERS
+++ ghc-9.10.1/CODEOWNERS
@@ -40,6 +40,7 @@
/compiler/GHC/HsToCore/Foreign/Wasm.hs @TerrorJack
/compiler/GHC/Tc/Deriv/ @RyanGlScott
/compiler/GHC/CmmToAsm/ @simonmar @bgamari @AndreasK
+/compiler/GHC/CmmToAsm/RV64/ @supersven @angerman
/compiler/GHC/CmmToAsm/Wasm/ @TerrorJack
/compiler/GHC/CmmToLlvm/ @angerman
/compiler/GHC/StgToCmm/ @simonmar @osa1
Index: ghc-9.10.1/compiler/CodeGen.Platform.h
===================================================================
--- ghc-9.10.1.orig/compiler/CodeGen.Platform.h
+++ ghc-9.10.1/compiler/CodeGen.Platform.h
@@ -1,7 +1,8 @@
import GHC.Cmm.Expr
#if !(defined(MACHREGS_i386) || defined(MACHREGS_x86_64) \
- || defined(MACHREGS_powerpc) || defined(MACHREGS_aarch64))
+ || defined(MACHREGS_powerpc) || defined(MACHREGS_aarch64) \
+ || defined(MACHREGS_riscv64))
import GHC.Utils.Panic.Plain
#endif
import GHC.Platform.Reg
@@ -1041,6 +1042,105 @@ freeReg 18 = False
# if defined(REG_Base)
freeReg REG_Base = False
+# endif
+# if defined(REG_Sp)
+freeReg REG_Sp = False
+# endif
+# if defined(REG_SpLim)
+freeReg REG_SpLim = False
+# endif
+# if defined(REG_Hp)
+freeReg REG_Hp = False
+# endif
+# if defined(REG_HpLim)
+freeReg REG_HpLim = False
+# endif
+
+# if defined(REG_R1)
+freeReg REG_R1 = False
+# endif
+# if defined(REG_R2)
+freeReg REG_R2 = False
+# endif
+# if defined(REG_R3)
+freeReg REG_R3 = False
+# endif
+# if defined(REG_R4)
+freeReg REG_R4 = False
+# endif
+# if defined(REG_R5)
+freeReg REG_R5 = False
+# endif
+# if defined(REG_R6)
+freeReg REG_R6 = False
+# endif
+# if defined(REG_R7)
+freeReg REG_R7 = False
+# endif
+# if defined(REG_R8)
+freeReg REG_R8 = False
+# endif
+
+# if defined(REG_F1)
+freeReg REG_F1 = False
+# endif
+# if defined(REG_F2)
+freeReg REG_F2 = False
+# endif
+# if defined(REG_F3)
+freeReg REG_F3 = False
+# endif
+# if defined(REG_F4)
+freeReg REG_F4 = False
+# endif
+# if defined(REG_F5)
+freeReg REG_F5 = False
+# endif
+# if defined(REG_F6)
+freeReg REG_F6 = False
+# endif
+
+# if defined(REG_D1)
+freeReg REG_D1 = False
+# endif
+# if defined(REG_D2)
+freeReg REG_D2 = False
+# endif
+# if defined(REG_D3)
+freeReg REG_D3 = False
+# endif
+# if defined(REG_D4)
+freeReg REG_D4 = False
+# endif
+# if defined(REG_D5)
+freeReg REG_D5 = False
+# endif
+# if defined(REG_D6)
+freeReg REG_D6 = False
+# endif
+
+freeReg _ = True
+
+#elif defined(MACHREGS_riscv64)
+
+-- zero reg
+freeReg 0 = False
+-- link register
+freeReg 1 = False
+-- stack pointer
+freeReg 2 = False
+-- global pointer
+freeReg 3 = False
+-- thread pointer
+freeReg 4 = False
+-- frame pointer
+freeReg 8 = False
+-- made-up inter-procedural (ip) register
+-- See Note [The made-up RISCV64 TMP (IP) register]
+freeReg 31 = False
+
+# if defined(REG_Base)
+freeReg REG_Base = False
# endif
# if defined(REG_Sp)
freeReg REG_Sp = False
Index: ghc-9.10.1/compiler/GHC/Cmm/CLabel.hs
===================================================================
--- ghc-9.10.1.orig/compiler/GHC/Cmm/CLabel.hs
+++ ghc-9.10.1/compiler/GHC/Cmm/CLabel.hs
@@ -1720,6 +1720,8 @@ pprDynamicLinkerAsmLabel !platform dllIn
| platformArch platform == ArchAArch64
= ppLbl
+ | platformArch platform == ArchRISCV64
+ = ppLbl
| platformArch platform == ArchX86_64
= case dllInfo of
Index: ghc-9.10.1/compiler/GHC/CmmToAsm.hs
===================================================================
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm.hs
+++ ghc-9.10.1/compiler/GHC/CmmToAsm.hs
@@ -67,6 +67,7 @@ import qualified GHC.CmmToAsm.X86 as X
import qualified GHC.CmmToAsm.PPC as PPC
import qualified GHC.CmmToAsm.AArch64 as AArch64
import qualified GHC.CmmToAsm.Wasm as Wasm32
+import qualified GHC.CmmToAsm.RV64 as RV64
import GHC.CmmToAsm.Reg.Liveness
import qualified GHC.CmmToAsm.Reg.Linear as Linear
@@ -148,7 +149,7 @@ nativeCodeGen logger ts config modLoc h
ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
- ArchRISCV64 -> panic "nativeCodeGen: No NCG for RISCV64"
+ ArchRISCV64 -> nCG' (RV64.ncgRV64 config)
ArchLoongArch64->panic "nativeCodeGen: No NCG for LoongArch64"
ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Dwarf/Constants.hs
===================================================================
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Dwarf/Constants.hs
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Dwarf/Constants.hs
@@ -240,6 +240,7 @@ dwarfRegNo p r = case platformArch p of
| r == xmm15 -> 32
ArchPPC_64 _ -> fromIntegral $ toRegNo r
ArchAArch64 -> fromIntegral $ toRegNo r
+ ArchRISCV64 -> fromIntegral $ toRegNo r
_other -> error "dwarfRegNo: Unsupported platform or unknown register!"
-- | Virtual register number to use for return address.
@@ -252,5 +253,6 @@ dwarfReturnRegNo p
ArchX86 -> 8 -- eip
ArchX86_64 -> 16 -- rip
ArchPPC_64 ELF_V2 -> 65 -- lr (link register)
- ArchAArch64-> 30
+ ArchAArch64 -> 30
+ ArchRISCV64 -> 1 -- ra (return address)
_other -> error "dwarfReturnRegNo: Unsupported platform!"
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/PIC.hs
===================================================================
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/PIC.hs
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/PIC.hs
@@ -132,6 +132,11 @@ cmmMakeDynamicReference config reference
addImport symbolPtr
return $ cmmMakePicReference config symbolPtr
+ AccessViaSymbolPtr | ArchRISCV64 <- platformArch platform -> do
+ let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
+ addImport symbolPtr
+ return $ cmmMakePicReference config symbolPtr
+
AccessViaSymbolPtr -> do
let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
addImport symbolPtr
@@ -164,6 +169,10 @@ cmmMakePicReference config lbl
| ArchAArch64 <- platformArch platform
= CmmLit $ CmmLabel lbl
+ -- as on AArch64, there's no pic base register.
+ | ArchRISCV64 <- platformArch platform
+ = CmmLit $ CmmLabel lbl
+
| OSAIX <- platformOS platform
= CmmMachOp (MO_Add W32)
[ CmmReg (CmmGlobal $ GlobalRegUse PicBaseReg (bWord platform))
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64.hs
===================================================================
--- /dev/null
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64.hs
@@ -0,0 +1,57 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+-- | Native code generator for RiscV64 architectures
+module GHC.CmmToAsm.RV64 (ncgRV64) where
+
+import GHC.CmmToAsm.Config
+import GHC.CmmToAsm.Instr
+import GHC.CmmToAsm.Monad
+import GHC.CmmToAsm.RV64.CodeGen qualified as RV64
+import GHC.CmmToAsm.RV64.Instr qualified as RV64
+import GHC.CmmToAsm.RV64.Ppr qualified as RV64
+import GHC.CmmToAsm.RV64.RegInfo qualified as RV64
+import GHC.CmmToAsm.RV64.Regs qualified as RV64
+import GHC.CmmToAsm.Types
+import GHC.Prelude
+import GHC.Utils.Outputable (ftext)
+
+ncgRV64 :: NCGConfig -> NcgImpl RawCmmStatics RV64.Instr RV64.JumpDest
+ncgRV64 config =
+ NcgImpl
+ { ncgConfig = config,
+ cmmTopCodeGen = RV64.cmmTopCodeGen,
+ generateJumpTableForInstr = RV64.generateJumpTableForInstr config,
+ getJumpDestBlockId = RV64.getJumpDestBlockId,
+ canShortcut = RV64.canShortcut,
+ shortcutStatics = RV64.shortcutStatics,
+ shortcutJump = RV64.shortcutJump,
+ pprNatCmmDeclS = RV64.pprNatCmmDecl config,
+ pprNatCmmDeclH = RV64.pprNatCmmDecl config,
+ maxSpillSlots = RV64.maxSpillSlots config,
+ allocatableRegs = RV64.allocatableRegs platform,
+ ncgAllocMoreStack = RV64.allocMoreStack platform,
+ ncgMakeFarBranches = RV64.makeFarBranches,
+ extractUnwindPoints = const [],
+ invertCondBranches = \_ _ -> id
+ }
+ where
+ platform = ncgPlatform config
+
+-- | `Instruction` instance for RV64
+instance Instruction RV64.Instr where
+ regUsageOfInstr = RV64.regUsageOfInstr
+ patchRegsOfInstr = RV64.patchRegsOfInstr
+ isJumpishInstr = RV64.isJumpishInstr
+ jumpDestsOfInstr = RV64.jumpDestsOfInstr
+ patchJumpInstr = RV64.patchJumpInstr
+ mkSpillInstr = RV64.mkSpillInstr
+ mkLoadInstr = RV64.mkLoadInstr
+ takeDeltaInstr = RV64.takeDeltaInstr
+ isMetaInstr = RV64.isMetaInstr
+ mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr
+ takeRegRegMoveInstr = RV64.takeRegRegMoveInstr
+ mkJumpInstr = RV64.mkJumpInstr
+ mkStackAllocInstr = RV64.mkStackAllocInstr
+ mkStackDeallocInstr = RV64.mkStackDeallocInstr
+ mkComment = pure . RV64.COMMENT . ftext
+ pprInstr = RV64.pprInstr
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/CodeGen.hs
===================================================================
--- /dev/null
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/CodeGen.hs
@@ -0,0 +1,2207 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE BinaryLiterals #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module GHC.CmmToAsm.RV64.CodeGen
+ ( cmmTopCodeGen,
+ generateJumpTableForInstr,
+ makeFarBranches,
+ )
+where
+
+import Control.Monad
+import Data.Maybe
+import Data.Word
+import GHC.Cmm
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.DebugBlock
+import GHC.Cmm.Switch
+import GHC.Cmm.Utils
+import GHC.CmmToAsm.CPrim
+import GHC.CmmToAsm.Config
+import GHC.CmmToAsm.Format
+import GHC.CmmToAsm.Monad
+ ( NatM,
+ getBlockIdNat,
+ getConfig,
+ getDebugBlock,
+ getFileId,
+ getNewLabelNat,
+ getNewRegNat,
+ getPicBaseMaybeNat,
+ getPlatform,
+ )
+import GHC.CmmToAsm.PIC
+import GHC.CmmToAsm.RV64.Cond
+import GHC.CmmToAsm.RV64.Instr
+import GHC.CmmToAsm.RV64.Regs
+import GHC.CmmToAsm.Types
+import GHC.Data.FastString
+import GHC.Data.OrdList
+import GHC.Float
+import GHC.Platform
+import GHC.Platform.Reg
+import GHC.Platform.Regs
+import GHC.Prelude hiding (EQ)
+import GHC.Types.Basic
+import GHC.Types.ForeignCall
+import GHC.Types.SrcLoc (srcSpanFile, srcSpanStartCol, srcSpanStartLine)
+import GHC.Types.Tickish (GenTickish (..))
+import GHC.Types.Unique.Supply
+import GHC.Utils.Constants (debugIsOn)
+import GHC.Utils.Misc
+import GHC.Utils.Monad
+import GHC.Utils.Outputable
+import GHC.Utils.Panic
+
+-- For an overview of an NCG's structure, see Note [General layout of an NCG]
+
+cmmTopCodeGen ::
+ RawCmmDecl ->
+ NatM [NatCmmDecl RawCmmStatics Instr]
+-- Thus we'll have to deal with either CmmProc ...
+cmmTopCodeGen _cmm@(CmmProc info lab live graph) = do
+ picBaseMb <- getPicBaseMaybeNat
+ when (isJust picBaseMb) $ panic "RV64.cmmTopCodeGen: Unexpected PIC base register (RISCV ISA does not define one)"
+
+ let blocks = toBlockListEntryFirst graph
+ (nat_blocks, statics) <- mapAndUnzipM basicBlockCodeGen blocks
+
+ let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
+ tops = proc : concat statics
+
+ pure tops
+
+-- ... or CmmData.
+cmmTopCodeGen (CmmData sec dat) = pure [CmmData sec dat] -- no translation, we just use CmmStatic
+
+basicBlockCodeGen ::
+ Block CmmNode C C ->
+ NatM
+ ( [NatBasicBlock Instr],
+ [NatCmmDecl RawCmmStatics Instr]
+ )
+basicBlockCodeGen block = do
+ config <- getConfig
+ let (_, nodes, tail) = blockSplit block
+ id = entryLabel block
+ stmts = blockToList nodes
+
+ header_comment_instr
+ | debugIsOn =
+ unitOL
+ $ MULTILINE_COMMENT
+ ( text "-- --------------------------- basicBlockCodeGen --------------------------- --\n"
+ $+$ withPprStyle defaultDumpStyle (pdoc (ncgPlatform config) block)
+ )
+ | otherwise = nilOL
+
+ -- Generate location directive `.loc` (DWARF debug location info)
+ loc_instrs <- genLocInstrs
+
+ -- Generate other instructions
+ mid_instrs <- stmtsToInstrs stmts
+ (!tail_instrs) <- stmtToInstrs tail
+
+ let instrs = header_comment_instr `appOL` loc_instrs `appOL` mid_instrs `appOL` tail_instrs
+
+ -- TODO: Then x86 backend runs @verifyBasicBlock@ here. How important it is to
+ -- have a valid CFG is an open question: This and the AArch64 and PPC NCGs
+ -- work fine without it.
+
+ -- Code generation may introduce new basic block boundaries, which are
+ -- indicated by the NEWBLOCK instruction. We must split up the instruction
+ -- stream into basic blocks again. Also, we extract LDATAs here too.
+ (top, other_blocks, statics) = foldrOL mkBlocks ([], [], []) instrs
+
+ return (BasicBlock id top : other_blocks, statics)
+ where
+ genLocInstrs :: NatM (OrdList Instr)
+ genLocInstrs = do
+ dbg <- getDebugBlock (entryLabel block)
+ case dblSourceTick =<< dbg of
+ Just (SourceNote span name) ->
+ do
+ fileId <- getFileId (srcSpanFile span)
+ let line = srcSpanStartLine span; col = srcSpanStartCol span
+ pure $ unitOL $ LOCATION fileId line col name
+ _ -> pure nilOL
+
+mkBlocks ::
+ Instr ->
+ ([Instr], [GenBasicBlock Instr], [GenCmmDecl RawCmmStatics h g]) ->
+ ([Instr], [GenBasicBlock Instr], [GenCmmDecl RawCmmStatics h g])
+mkBlocks (NEWBLOCK id) (instrs, blocks, statics) =
+ ([], BasicBlock id instrs : blocks, statics)
+mkBlocks (LDATA sec dat) (instrs, blocks, statics) =
+ (instrs, blocks, CmmData sec dat : statics)
+mkBlocks instr (instrs, blocks, statics) =
+ (instr : instrs, blocks, statics)
+
+-- -----------------------------------------------------------------------------
+
+-- | Utilities
+
+-- | Annotate an `Instr` with a `SDoc` comment
+ann :: SDoc -> Instr -> Instr
+ann doc instr {- debugIsOn -} = ANN doc instr
+{-# INLINE ann #-}
+
+-- Using pprExpr will hide the AST, @ANN@ will end up in the assembly with
+-- -dppr-debug. The idea is that we can trivially see how a cmm expression
+-- ended up producing the assembly we see. By having the verbatim AST printed
+-- we can simply check the patterns that were matched to arrive at the assembly
+-- we generated.
+--
+-- pprExpr will hide a lot of noise of the underlying data structure and print
+-- the expression into something that can be easily read by a human. However
+-- going back to the exact CmmExpr representation can be laborious and adds
+-- indirections to find the matches that lead to the assembly.
+--
+-- An improvement could be to have
+--
+-- (pprExpr genericPlatform e) <> parens (text. show e)
+--
+-- to have the best of both worlds.
+--
+-- Note: debugIsOn is too restrictive, it only works for debug compilers.
+-- However, we do not only want to inspect this for debug compilers. Ideally
+-- we'd have a check for -dppr-debug here already, such that we don't even
+-- generate the ANN expressions. However, as they are lazy, they shouldn't be
+-- forced until we actually force them, and without -dppr-debug they should
+-- never end up being forced.
+annExpr :: CmmExpr -> Instr -> Instr
+annExpr e {- debugIsOn -} = ANN (text . show $ e)
+-- annExpr e instr {- debugIsOn -} = ANN (pprExpr genericPlatform e) instr
+-- annExpr _ instr = instr
+{-# INLINE annExpr #-}
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+-- Note [RISCV64 Jump Tables]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Jump tables are implemented by generating a table of relative addresses,
+-- where each entry is the relative offset to the target block from the first
+-- entry / table label (`generateJumpTableForInstr`). Using the jump table means
+-- loading the entry's value and jumping to the calculated absolute address
+-- (`genSwitch`).
+--
+-- For example, this Cmm switch
+--
+-- switch [1 .. 10] _s2wn::I64 {
+-- case 1 : goto c347;
+-- case 2 : goto c348;
+-- case 3 : goto c349;
+-- case 4 : goto c34a;
+-- case 5 : goto c34b;
+-- case 6 : goto c34c;
+-- case 7 : goto c34d;
+-- case 8 : goto c34e;
+-- case 9 : goto c34f;
+-- case 10 : goto c34g;
+-- } // CmmSwitch
+--
+-- leads to this jump table in Assembly
+--
+-- .section .rodata
+-- .balign 8
+-- .Ln34G:
+-- .quad 0
+-- .quad .Lc347-(.Ln34G)+0
+-- .quad .Lc348-(.Ln34G)+0
+-- .quad .Lc349-(.Ln34G)+0
+-- .quad .Lc34a-(.Ln34G)+0
+-- .quad .Lc34b-(.Ln34G)+0
+-- .quad .Lc34c-(.Ln34G)+0
+-- .quad .Lc34d-(.Ln34G)+0
+-- .quad .Lc34e-(.Ln34G)+0
+-- .quad .Lc34f-(.Ln34G)+0
+-- .quad .Lc34g-(.Ln34G)+0
+--
+-- and this indexing code where the jump should be done (register t0 contains
+-- the index)
+--
+-- addi t0, t0, 0 // silly move (ignore it)
+-- la t1, .Ln34G // load the table's address
+-- sll t0, t0, 3 // index * 8 -> offset in bytes
+-- add t0, t0, t1 // address of the table's entry
+-- ld t0, 0(t0) // load entry
+-- add t0, t0, t1 // relative to absolute address
+-- jalr zero, t0, 0 // jump to the block
+--
+-- In object code (disassembled) the table looks like
+--
+-- 0000000000000000 <.Ln34G>:
+-- ...
+-- 8: R_RISCV_ADD64 .Lc347
+-- 8: R_RISCV_SUB64 .Ln34G
+-- 10: R_RISCV_ADD64 .Lc348
+-- 10: R_RISCV_SUB64 .Ln34G
+-- 18: R_RISCV_ADD64 .Lc349
+-- 18: R_RISCV_SUB64 .Ln34G
+-- 20: R_RISCV_ADD64 .Lc34a
+-- 20: R_RISCV_SUB64 .Ln34G
+-- 28: R_RISCV_ADD64 .Lc34b
+-- 28: R_RISCV_SUB64 .Ln34G
+-- 30: R_RISCV_ADD64 .Lc34c
+-- 30: R_RISCV_SUB64 .Ln34G
+-- 38: R_RISCV_ADD64 .Lc34d
+-- 38: R_RISCV_SUB64 .Ln34G
+-- 40: R_RISCV_ADD64 .Lc34e
+-- 40: R_RISCV_SUB64 .Ln34G
+-- 48: R_RISCV_ADD64 .Lc34f
+-- 48: R_RISCV_SUB64 .Ln34G
+-- 50: R_RISCV_ADD64 .Lc34g
+-- 50: R_RISCV_SUB64 .Ln34G
+--
+-- I.e. the relative offset calculations are done by the linker via relocations.
+-- This seems to be PIC compatible; at least `scanelf` (pax-utils) does not
+-- complain.
+
+
+-- | Generate jump to jump table target
+--
+-- The index into the jump table is calulated by evaluating @expr@. The
+-- corresponding table entry contains the relative address to jump to (relative
+-- to the jump table's first entry / the table's own label).
+genSwitch :: NCGConfig -> CmmExpr -> SwitchTargets -> NatM InstrBlock
+genSwitch config expr targets = do
+ (reg, fmt1, e_code) <- getSomeReg indexExpr
+ let fmt = II64
+ targetReg <- getNewRegNat fmt
+ lbl <- getNewLabelNat
+ dynRef <- cmmMakeDynamicReference config DataReference lbl
+ (tableReg, fmt2, t_code) <- getSomeReg dynRef
+ let code =
+ toOL
+ [ COMMENT (text "indexExpr" <+> (text . show) indexExpr),
+ COMMENT (text "dynRef" <+> (text . show) dynRef)
+ ]
+ `appOL` e_code
+ `appOL` t_code
+ `appOL` toOL
+ [ COMMENT (ftext "Jump table for switch"),
+ -- index to offset into the table (relative to tableReg)
+ annExpr expr (SLL (OpReg (formatToWidth fmt1) reg) (OpReg (formatToWidth fmt1) reg) (OpImm (ImmInt 3))),
+ -- calculate table entry address
+ ADD (OpReg W64 targetReg) (OpReg (formatToWidth fmt1) reg) (OpReg (formatToWidth fmt2) tableReg),
+ -- load table entry (relative offset from tableReg (first entry) to target label)
+ LDRU II64 (OpReg W64 targetReg) (OpAddr (AddrRegImm targetReg (ImmInt 0))),
+ -- calculate absolute address of the target label
+ ADD (OpReg W64 targetReg) (OpReg W64 targetReg) (OpReg W64 tableReg),
+ -- prepare jump to target label
+ J_TBL ids (Just lbl) targetReg
+ ]
+ return code
+ where
+ -- See Note [Sub-word subtlety during jump-table indexing] in
+ -- GHC.CmmToAsm.X86.CodeGen for why we must first offset, then widen.
+ indexExpr0 = cmmOffset platform expr offset
+ -- We widen to a native-width register to sanitize the high bits
+ indexExpr =
+ CmmMachOp
+ (MO_UU_Conv expr_w (platformWordWidth platform))
+ [indexExpr0]
+ expr_w = cmmExprWidth platform expr
+ (offset, ids) = switchTargetsToTable targets
+ platform = ncgPlatform config
+
+-- | Generate jump table data (if required)
+--
+-- The idea is to emit one table entry per case. The entry is the relative
+-- address of the block to jump to (relative to the table's first entry /
+-- table's own label.) The calculation itself is done by the linker.
+generateJumpTableForInstr ::
+ NCGConfig ->
+ Instr ->
+ Maybe (NatCmmDecl RawCmmStatics Instr)
+generateJumpTableForInstr config (J_TBL ids (Just lbl) _) =
+ let jumpTable =
+ map jumpTableEntryRel ids
+ where
+ jumpTableEntryRel Nothing =
+ CmmStaticLit (CmmInt 0 (ncgWordWidth config))
+ jumpTableEntryRel (Just blockid) =
+ CmmStaticLit
+ ( CmmLabelDiffOff
+ blockLabel
+ lbl
+ 0
+ (ncgWordWidth config)
+ )
+ where
+ blockLabel = blockLbl blockid
+ in Just (CmmData (Section ReadOnlyData lbl) (CmmStaticsRaw lbl jumpTable))
+generateJumpTableForInstr _ _ = Nothing
+
+-- -----------------------------------------------------------------------------
+-- Top-level of the instruction selector
+
+stmtsToInstrs ::
+ -- | Cmm Statements
+ [CmmNode O O] ->
+ -- | Resulting instruction
+ NatM InstrBlock
+stmtsToInstrs stmts = concatOL <$> mapM stmtToInstrs stmts
+
+stmtToInstrs ::
+ CmmNode e x ->
+ -- | Resulting instructions
+ NatM InstrBlock
+stmtToInstrs stmt = do
+ config <- getConfig
+ platform <- getPlatform
+ case stmt of
+ CmmUnsafeForeignCall target result_regs args ->
+ genCCall target result_regs args
+ CmmComment s -> pure (unitOL (COMMENT (ftext s)))
+ CmmTick {} -> pure nilOL
+ CmmAssign reg src
+ | isFloatType ty -> assignReg_FltCode format reg src
+ | otherwise -> assignReg_IntCode format reg src
+ where
+ ty = cmmRegType reg
+ format = cmmTypeFormat ty
+ CmmStore addr src _alignment
+ | isFloatType ty -> assignMem_FltCode format addr src
+ | otherwise -> assignMem_IntCode format addr src
+ where
+ ty = cmmExprType platform src
+ format = cmmTypeFormat ty
+ CmmBranch id -> genBranch id
+ -- We try to arrange blocks such that the likely branch is the fallthrough
+ -- in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
+ CmmCondBranch arg true false _prediction ->
+ genCondBranch true false arg
+ CmmSwitch arg ids -> genSwitch config arg ids
+ CmmCall {cml_target = arg} -> genJump arg
+ CmmUnwind _regs -> pure nilOL
+ -- Intentionally not have a default case here: If anybody adds a
+ -- constructor, the compiler should force them to think about this here.
+ CmmForeignCall {} -> pprPanic "stmtToInstrs: statement should have been cps'd away" (pdoc platform stmt)
+ CmmEntry {} -> pprPanic "stmtToInstrs: statement should have been cps'd away" (pdoc platform stmt)
+
+--------------------------------------------------------------------------------
+
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+--
+-- They are really trees of insns to facilitate fast appending, where a
+-- left-to-right traversal yields the insns in the correct order.
+type InstrBlock =
+ OrdList Instr
+
+-- | Register's passed up the tree.
+--
+-- If the stix code forces the register to live in a pre-decided machine
+-- register, it comes out as @Fixed@; otherwise, it comes out as @Any@, and the
+-- parent can decide which register to put it in.
+data Register
+ = Fixed Format Reg InstrBlock
+ | Any Format (Reg -> InstrBlock)
+
+-- | Sometimes we need to change the Format of a register. Primarily during
+-- conversion.
+swizzleRegisterRep :: Format -> Register -> Register
+swizzleRegisterRep format' (Fixed _format reg code) = Fixed format' reg code
+swizzleRegisterRep format' (Any _format codefn) = Any format' codefn
+
+-- | Grab a `Reg` for a `CmmReg`
+--
+-- `LocalReg`s are assigned virtual registers (`RegVirtual`), `GlobalReg`s are
+-- assigned real registers (`RegReal`). It is an error if a `GlobalReg` is not a
+-- STG register.
+getRegisterReg :: Platform -> CmmReg -> Reg
+getRegisterReg _ (CmmLocal (LocalReg u pk)) =
+ RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
+getRegisterReg platform (CmmGlobal mid) =
+ case globalRegMaybe platform (globalRegUseGlobalReg mid) of
+ Just reg -> RegReal reg
+ Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- | Compute an expression into any register
+getSomeReg :: CmmExpr -> NatM (Reg, Format, InstrBlock)
+getSomeReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ newReg <- getNewRegNat rep
+ return (newReg, rep, code newReg)
+ Fixed rep reg code ->
+ return (reg, rep, code)
+
+-- | Compute an expression into any floating-point register
+--
+-- If the initial expression is not a floating-point expression, finally move
+-- the result into a floating-point register.
+getFloatReg :: (HasCallStack) => CmmExpr -> NatM (Reg, Format, InstrBlock)
+getFloatReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code | isFloatFormat rep -> do
+ newReg <- getNewRegNat rep
+ return (newReg, rep, code newReg)
+ Any II32 code -> do
+ newReg <- getNewRegNat FF32
+ return (newReg, FF32, code newReg)
+ Any II64 code -> do
+ newReg <- getNewRegNat FF64
+ return (newReg, FF64, code newReg)
+ Any _w _code -> do
+ config <- getConfig
+ pprPanic "can't do getFloatReg on" (pdoc (ncgPlatform config) expr)
+ -- can't do much for fixed.
+ Fixed rep reg code ->
+ return (reg, rep, code)
+
+-- | Map `CmmLit` to `OpImm`
+--
+-- N.B. this is a partial function, because not all `CmmLit`s have an immediate
+-- representation.
+litToImm' :: CmmLit -> Operand
+litToImm' = OpImm . litToImm
+
+-- | Compute a `CmmExpr` into a `Register`
+getRegister :: CmmExpr -> NatM Register
+getRegister e = do
+ config <- getConfig
+ getRegister' config (ncgPlatform config) e
+
+-- | The register width to be used for an operation on the given width
+-- operand.
+opRegWidth :: Width -> Width
+opRegWidth W64 = W64
+opRegWidth W32 = W32
+opRegWidth W16 = W32
+opRegWidth W8 = W32
+opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
+
+-- Note [Signed arithmetic on RISCV64]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Handling signed arithmetic on sub-word-size values on RISCV64 is a bit
+-- tricky as Cmm's type system does not capture signedness. While 32-bit values
+-- are fairly easy to handle due to RISCV64's 32-bit instruction variants
+-- (denoted by use of %wN registers), 16- and 8-bit values require quite some
+-- care.
+--
+-- We handle 16-and 8-bit values by using the 32-bit operations and
+-- sign-/zero-extending operands and truncate results as necessary. For
+-- simplicity we maintain the invariant that a register containing a
+-- sub-word-size value always contains the zero-extended form of that value
+-- in between operations.
+--
+-- For instance, consider the program,
+--
+-- test(bits64 buffer)
+-- bits8 a = bits8[buffer];
+-- bits8 b = %mul(a, 42);
+-- bits8 c = %not(b);
+-- bits8 d = %shrl(c, 4::bits8);
+-- return (d);
+-- }
+--
+-- This program begins by loading `a` from memory, for which we use a
+-- zero-extended byte-size load. We next sign-extend `a` to 32-bits, and use a
+-- 32-bit multiplication to compute `b`, and truncate the result back down to
+-- 8-bits.
+--
+-- Next we compute `c`: The `%not` requires no extension of its operands, but
+-- we must still truncate the result back down to 8-bits. Finally the `%shrl`
+-- requires no extension and no truncate since we can assume that
+-- `c` is zero-extended.
+--
+-- The "RISC-V Sign Extension Optimizations" LLVM tech talk presentation by
+-- Craig Topper covers possible future improvements
+-- (https://llvm.org/devmtg/2022-11/slides/TechTalk21-RISC-VSignExtensionOptimizations.pdf)
+--
+--
+-- Note [Handling PIC on RV64]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- RV64 does not have a special PIC register, the general approach is to simply
+-- do PC-relative addressing or go through the GOT. There is assembly support
+-- for both.
+--
+-- rv64 assembly has a `la` (load address) pseudo-instruction, that allows
+-- loading a label's address into a register. The instruction is desugared into
+-- different addressing modes, e.g. PC-relative addressing:
+--
+-- 1: lui rd1, %pcrel_hi(label)
+-- addi rd1, %pcrel_lo(1b)
+--
+-- See https://sourceware.org/binutils/docs/as/RISC_002dV_002dModifiers.html,
+-- PIC can be enabled/disabled through
+--
+-- .option pic
+--
+-- See https://sourceware.org/binutils/docs/as/RISC_002dV_002dDirectives.html#RISC_002dV_002dDirectives
+--
+-- CmmGlobal @PicBaseReg@'s are generated in @GHC.CmmToAsm.PIC@ in the
+-- @cmmMakePicReference@. This is in turn called from @cmmMakeDynamicReference@
+-- also in @Cmm.CmmToAsm.PIC@ from where it is also exported. There are two
+-- callsites for this. One is in this module to produce the @target@ in @genCCall@
+-- the other is in @GHC.CmmToAsm@ in @cmmExprNative@.
+--
+-- Conceptually we do not want any special PicBaseReg to be used on RV64. If
+-- we want to distinguish between symbol loading, we need to address this through
+-- the way we load it, not through a register.
+--
+
+getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
+-- OPTIMIZATION WARNING: CmmExpr rewrites
+-- 1. Rewrite: Reg + (-n) => Reg - n
+-- TODO: this expression shouldn't even be generated to begin with.
+getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)])
+ | i < 0 =
+ getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)])
+getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)])
+ | i < 0 =
+ getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)])
+-- Generic case.
+getRegister' config plat expr =
+ case expr of
+ CmmReg (CmmGlobal (GlobalRegUse PicBaseReg _)) ->
+ -- See Note [Handling PIC on RV64]
+ pprPanic "getRegister': There's no PIC base register on RISCV" (ppr PicBaseReg)
+ CmmLit lit ->
+ case lit of
+ CmmInt 0 w -> pure $ Fixed (intFormat w) zeroReg nilOL
+ CmmInt i w ->
+ -- narrowU is important: Negative immediates may be
+ -- sign-extended on load!
+ let imm = OpImm . ImmInteger $ narrowU w i
+ in pure (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) imm)))
+ CmmFloat 0 w -> do
+ let op = litToImm' lit
+ pure (Any (floatFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) op)))
+ CmmFloat _f W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
+ CmmFloat _f W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
+ CmmFloat f W32 -> do
+ let word = castFloatToWord32 (fromRational f) :: Word32
+ intReg <- getNewRegNat (intFormat W32)
+ return
+ ( Any
+ (floatFormat W32)
+ ( \dst ->
+ toOL
+ [ annExpr expr
+ $ MOV (OpReg W32 intReg) (OpImm (ImmInteger (fromIntegral word))),
+ MOV (OpReg W32 dst) (OpReg W32 intReg)
+ ]
+ )
+ )
+ CmmFloat f W64 -> do
+ let word = castDoubleToWord64 (fromRational f) :: Word64
+ intReg <- getNewRegNat (intFormat W64)
+ return
+ ( Any
+ (floatFormat W64)
+ ( \dst ->
+ toOL
+ [ annExpr expr
+ $ MOV (OpReg W64 intReg) (OpImm (ImmInteger (fromIntegral word))),
+ MOV (OpReg W64 dst) (OpReg W64 intReg)
+ ]
+ )
+ )
+ CmmFloat _f _w -> pprPanic "getRegister' (CmmLit:CmmFloat), unsupported float lit" (pdoc plat expr)
+ CmmVec _lits -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
+ CmmLabel lbl -> do
+ let op = OpImm (ImmCLbl lbl)
+ rep = cmmLitType plat lit
+ format = cmmTypeFormat rep
+ return (Any format (\dst -> unitOL $ annExpr expr (LDR format (OpReg (formatToWidth format) dst) op)))
+ CmmLabelOff lbl off | isNbitEncodeable 12 (fromIntegral off) -> do
+ let op = OpImm (ImmIndex lbl off)
+ rep = cmmLitType plat lit
+ format = cmmTypeFormat rep
+ return (Any format (\dst -> unitOL $ LDR format (OpReg (formatToWidth format) dst) op))
+ CmmLabelOff lbl off -> do
+ let op = litToImm' (CmmLabel lbl)
+ rep = cmmLitType plat lit
+ format = cmmTypeFormat rep
+ width = typeWidth rep
+ (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
+ return
+ ( Any
+ format
+ ( \dst ->
+ off_code
+ `snocOL` LDR format (OpReg (formatToWidth format) dst) op
+ `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r)
+ )
+ )
+ CmmLabelDiffOff {} -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
+ CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
+ CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
+ CmmLoad mem rep _ -> do
+ let format = cmmTypeFormat rep
+ width = typeWidth rep
+ Amode addr addr_code <- getAmode plat width mem
+ case width of
+ w
+ | w <= W64 ->
+ -- Load without sign-extension. See Note [Signed arithmetic on RISCV64]
+ pure
+ ( Any
+ format
+ ( \dst ->
+ addr_code
+ `snocOL` LDRU format (OpReg width dst) (OpAddr addr)
+ )
+ )
+ _ ->
+ pprPanic ("Width too big! Cannot load: " ++ show width) (pdoc plat expr)
+ CmmStackSlot _ _ ->
+ pprPanic "getRegister' (CmmStackSlot): " (pdoc plat expr)
+ CmmReg reg ->
+ return
+ ( Fixed
+ (cmmTypeFormat (cmmRegType reg))
+ (getRegisterReg plat reg)
+ nilOL
+ )
+ CmmRegOff reg off | isNbitEncodeable 12 (fromIntegral off) -> do
+ getRegister' config plat
+ $ CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+ where
+ width = typeWidth (cmmRegType reg)
+ CmmRegOff reg off -> do
+ (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
+ (reg, _format, code) <- getSomeReg $ CmmReg reg
+ return
+ $ Any
+ (intFormat width)
+ ( \dst ->
+ off_code
+ `appOL` code
+ `snocOL` ADD (OpReg width dst) (OpReg width reg) (OpReg width off_r)
+ )
+ where
+ width = typeWidth (cmmRegType reg)
+
+ -- Handle MO_RelaxedRead as a normal CmmLoad, to allow
+ -- non-trivial addressing modes to be used.
+ CmmMachOp (MO_RelaxedRead w) [e] ->
+ getRegister (CmmLoad e (cmmBits w) NaturallyAligned)
+ -- for MachOps, see GHC.Cmm.MachOp
+ -- For CmmMachOp, see GHC.Cmm.Expr
+ CmmMachOp op [e] -> do
+ (reg, _format, code) <- getSomeReg e
+ case op of
+ MO_Not w -> return $ Any (intFormat w) $ \dst ->
+ let w' = opRegWidth w
+ in code
+ `snocOL`
+ -- pseudo instruction `not` is `xori rd, rs, -1`
+ ann (text "not") (XORI (OpReg w' dst) (OpReg w' reg) (OpImm (ImmInt (-1))))
+ `appOL` truncateReg w' w dst -- See Note [Signed arithmetic on RISCV64]
+ MO_S_Neg w -> negate code w reg
+ MO_F_Neg w ->
+ return
+ $ Any
+ (floatFormat w)
+ ( \dst ->
+ code
+ `snocOL` NEG (OpReg w dst) (OpReg w reg)
+ )
+ -- TODO: Can this case happen?
+ MO_SF_Conv from to | from < W32 -> do
+ -- extend to the smallest available representation
+ (reg_x, code_x) <- signExtendReg from W32 reg
+ pure
+ $ Any
+ (floatFormat to)
+ ( \dst ->
+ code
+ `appOL` code_x
+ `snocOL` annExpr expr (FCVT IntToFloat (OpReg to dst) (OpReg from reg_x)) -- (Signed ConVerT Float)
+ )
+ MO_SF_Conv from to ->
+ pure
+ $ Any
+ (floatFormat to)
+ ( \dst ->
+ code
+ `snocOL` annExpr expr (FCVT IntToFloat (OpReg to dst) (OpReg from reg)) -- (Signed ConVerT Float)
+ )
+ MO_FS_Conv from to
+ | to < W32 ->
+ pure
+ $ Any
+ (intFormat to)
+ ( \dst ->
+ code
+ `snocOL`
+ -- W32 is the smallest width to convert to. Decrease width afterwards.
+ annExpr expr (FCVT FloatToInt (OpReg W32 dst) (OpReg from reg))
+ `appOL` signExtendAdjustPrecission W32 to dst dst -- (float convert (-> zero) signed)
+ )
+ MO_FS_Conv from to ->
+ pure
+ $ Any
+ (intFormat to)
+ ( \dst ->
+ code
+ `snocOL` annExpr expr (FCVT FloatToInt (OpReg to dst) (OpReg from reg))
+ `appOL` truncateReg from to dst -- (float convert (-> zero) signed)
+ )
+ MO_UU_Conv from to
+ | from <= to ->
+ pure
+ $ Any
+ (intFormat to)
+ ( \dst ->
+ code
+ `snocOL` annExpr e (MOV (OpReg to dst) (OpReg from reg))
+ )
+ MO_UU_Conv from to ->
+ pure
+ $ Any
+ (intFormat to)
+ ( \dst ->
+ code
+ `snocOL` annExpr e (MOV (OpReg from dst) (OpReg from reg))
+ `appOL` truncateReg from to dst
+ )
+ MO_SS_Conv from to -> ss_conv from to reg code
+ MO_FF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` annExpr e (FCVT FloatToFloat (OpReg to dst) (OpReg from reg)))
+ -- Conversions
+ -- TODO: Duplication with MO_UU_Conv
+ MO_XX_Conv from to
+ | to < from ->
+ pure
+ $ Any
+ (intFormat to)
+ ( \dst ->
+ code
+ `snocOL` annExpr e (MOV (OpReg from dst) (OpReg from reg))
+ `appOL` truncateReg from to dst
+ )
+ MO_XX_Conv _from to -> swizzleRegisterRep (intFormat to) <$> getRegister e
+ MO_AlignmentCheck align wordWidth -> do
+ reg <- getRegister' config plat e
+ addAlignmentCheck align wordWidth reg
+ x -> pprPanic ("getRegister' (monadic CmmMachOp): " ++ show x) (pdoc plat expr)
+ where
+ -- In the case of 16- or 8-bit values we need to sign-extend to 32-bits
+ -- See Note [Signed arithmetic on RISCV64].
+ negate code w reg = do
+ let w' = opRegWidth w
+ (reg', code_sx) <- signExtendReg w w' reg
+ return $ Any (intFormat w) $ \dst ->
+ code
+ `appOL` code_sx
+ `snocOL` NEG (OpReg w' dst) (OpReg w' reg')
+ `appOL` truncateReg w' w dst
+
+ ss_conv from to reg code
+ | from < to = do
+ pure $ Any (intFormat to) $ \dst ->
+ code
+ `appOL` signExtend from to reg dst
+ `appOL` truncateReg from to dst
+ | from > to =
+ pure $ Any (intFormat to) $ \dst ->
+ code
+ `appOL` toOL
+ [ ann
+ (text "MO_SS_Conv: narrow register signed" <+> ppr reg <+> ppr from <> text "->" <> ppr to)
+ (SLL (OpReg to dst) (OpReg from reg) (OpImm (ImmInt shift))),
+ -- signed right shift
+ SRA (OpReg to dst) (OpReg to dst) (OpImm (ImmInt shift))
+ ]
+ `appOL` truncateReg from to dst
+ | otherwise =
+ -- No conversion necessary: Just copy.
+ pure $ Any (intFormat from) $ \dst ->
+ code `snocOL` MOV (OpReg from dst) (OpReg from reg)
+ where
+ shift = 64 - (widthInBits from - widthInBits to)
+
+ -- Dyadic machops:
+ --
+ -- The general idea is:
+ -- compute x<i> <- x
+ -- compute x<j> <- y
+ -- OP x<r>, x<i>, x<j>
+ --
+ -- TODO: for now we'll only implement the 64bit versions. And rely on the
+ -- fallthrough to alert us if things go wrong!
+ -- OPTIMIZATION WARNING: Dyadic CmmMachOp destructuring
+ -- 0. TODO This should not exist! Rewrite: Reg +- 0 -> Reg
+ CmmMachOp (MO_Add _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
+ CmmMachOp (MO_Sub _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
+ -- 1. Compute Reg +/- n directly.
+ -- For Add/Sub we can directly encode 12bits, or 12bits lsl #12.
+ CmmMachOp (MO_Add w) [CmmReg reg, CmmLit (CmmInt n _)]
+ | fitsIn12bitImm n -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ADD (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+ where
+ -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
+ w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
+ r' = getRegisterReg plat reg
+ CmmMachOp (MO_Sub w) [CmmReg reg, CmmLit (CmmInt n _)]
+ | fitsIn12bitImm n -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (SUB (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+ where
+ -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
+ w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
+ r' = getRegisterReg plat reg
+ CmmMachOp (MO_U_Quot w) [x, y] | w == W8 || w == W16 -> do
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, format_y, code_y) <- getSomeReg y
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `appOL` truncateReg (formatToWidth format_x) w reg_x
+ `appOL` code_y
+ `appOL` truncateReg (formatToWidth format_y) w reg_y
+ `snocOL` annExpr expr (DIVU (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
+ )
+
+ -- 2. Shifts. x << n, x >> n.
+ CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
+ | w == W32,
+ 0 <= n,
+ n < 32 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+ `appOL` truncateReg w w dst
+ )
+ CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
+ | w == W64,
+ 0 <= n,
+ n < 64 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+ `appOL` truncateReg w w dst
+ )
+ CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `appOL` code_x'
+ `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
+ )
+ CmmMachOp (MO_S_Shr w) [x, y] -> do
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `appOL` code_x'
+ `appOL` code_y
+ `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpReg w reg_y))
+ )
+ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
+ | w == W8,
+ 0 <= n,
+ n < 8 -> do
+ (reg_x, format_x, code_x) <- getSomeReg x
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `appOL` truncateReg (formatToWidth format_x) w reg_x
+ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+ )
+ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
+ | w == W16,
+ 0 <= n,
+ n < 16 -> do
+ (reg_x, format_x, code_x) <- getSomeReg x
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `appOL` truncateReg (formatToWidth format_x) w reg_x
+ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+ )
+ CmmMachOp (MO_U_Shr w) [x, y] | w == W8 || w == W16 -> do
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `appOL` code_y
+ `appOL` truncateReg (formatToWidth format_x) w reg_x
+ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
+ )
+ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
+ | w == W32,
+ 0 <= n,
+ n < 32 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+ )
+ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
+ | w == W64,
+ 0 <= n,
+ n < 64 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+ )
+
+ -- 3. Logic &&, ||
+ CmmMachOp (MO_And w) [CmmReg reg, CmmLit (CmmInt n _)]
+ | fitsIn12bitImm n ->
+ return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+ where
+ w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
+ r' = getRegisterReg plat reg
+ CmmMachOp (MO_Or w) [CmmReg reg, CmmLit (CmmInt n _)]
+ | fitsIn12bitImm n ->
+ return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ORI (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+ where
+ w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
+ r' = getRegisterReg plat reg
+
+ -- Generic binary case.
+ CmmMachOp op [x, y] -> do
+ let -- A "plain" operation.
+ bitOp w op = do
+ -- compute x<m> <- x
+ -- compute x<o> <- y
+ -- <OP> x<n>, x<m>, x<o>
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, format_y, code_y) <- getSomeReg y
+ massertPpr (isIntFormat format_x == isIntFormat format_y) $ text "bitOp: incompatible"
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `appOL` code_y
+ `appOL` op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)
+ )
+
+ -- A (potentially signed) integer operation.
+ -- In the case of 8- and 16-bit signed arithmetic we must first
+ -- sign-extend both arguments to 32-bits.
+ -- See Note [Signed arithmetic on RISCV64].
+ intOp is_signed w op = do
+ -- compute x<m> <- x
+ -- compute x<o> <- y
+ -- <OP> x<n>, x<m>, x<o>
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, format_y, code_y) <- getSomeReg y
+ massertPpr (isIntFormat format_x && isIntFormat format_y) $ text "intOp: non-int"
+ -- This is the width of the registers on which the operation
+ -- should be performed.
+ let w' = opRegWidth w
+ signExt r
+ | not is_signed = return (r, nilOL)
+ | otherwise = signExtendReg w w' r
+ (reg_x_sx, code_x_sx) <- signExt reg_x
+ (reg_y_sx, code_y_sx) <- signExt reg_y
+ return $ Any (intFormat w) $ \dst ->
+ code_x
+ `appOL` code_y
+ `appOL`
+ -- sign-extend both operands
+ code_x_sx
+ `appOL` code_y_sx
+ `appOL` op (OpReg w' dst) (OpReg w' reg_x_sx) (OpReg w' reg_y_sx)
+ `appOL` truncateReg w' w dst -- truncate back to the operand's original width
+ floatOp w op = do
+ (reg_fx, format_x, code_fx) <- getFloatReg x
+ (reg_fy, format_y, code_fy) <- getFloatReg y
+ massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatOp: non-float"
+ return
+ $ Any
+ (floatFormat w)
+ ( \dst ->
+ code_fx
+ `appOL` code_fy
+ `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)
+ )
+
+ -- need a special one for conditionals, as they return ints
+ floatCond w op = do
+ (reg_fx, format_x, code_fx) <- getFloatReg x
+ (reg_fy, format_y, code_fy) <- getFloatReg y
+ massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatCond: non-float"
+ return
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_fx
+ `appOL` code_fy
+ `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)
+ )
+
+ case op of
+ -- Integer operations
+ -- Add/Sub should only be Integer Options.
+ MO_Add w -> intOp False w (\d x y -> unitOL $ annExpr expr (ADD d x y))
+ -- TODO: Handle sub-word case
+ MO_Sub w -> intOp False w (\d x y -> unitOL $ annExpr expr (SUB d x y))
+ -- N.B. We needn't sign-extend sub-word size (in)equality comparisons
+ -- since we don't care about ordering.
+ MO_Eq w -> bitOp w (\d x y -> unitOL $ annExpr expr (CSET d x y EQ))
+ MO_Ne w -> bitOp w (\d x y -> unitOL $ annExpr expr (CSET d x y NE))
+ -- Signed multiply/divide
+ MO_Mul w -> intOp True w (\d x y -> unitOL $ annExpr expr (MUL d x y))
+ MO_S_MulMayOflo w -> do_mul_may_oflo w x y
+ MO_S_Quot w -> intOp True w (\d x y -> unitOL $ annExpr expr (DIV d x y))
+ MO_S_Rem w -> intOp True w (\d x y -> unitOL $ annExpr expr (REM d x y))
+ -- Unsigned multiply/divide
+ MO_U_Quot w -> intOp False w (\d x y -> unitOL $ annExpr expr (DIVU d x y))
+ MO_U_Rem w -> intOp False w (\d x y -> unitOL $ annExpr expr (REMU d x y))
+ -- Signed comparisons
+ MO_S_Ge w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SGE))
+ MO_S_Le w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SLE))
+ MO_S_Gt w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SGT))
+ MO_S_Lt w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SLT))
+ -- Unsigned comparisons
+ MO_U_Ge w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y UGE))
+ MO_U_Le w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y ULE))
+ MO_U_Gt w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y UGT))
+ MO_U_Lt w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y ULT))
+ -- Floating point arithmetic
+ MO_F_Add w -> floatOp w (\d x y -> unitOL $ annExpr expr (ADD d x y))
+ MO_F_Sub w -> floatOp w (\d x y -> unitOL $ annExpr expr (SUB d x y))
+ MO_F_Mul w -> floatOp w (\d x y -> unitOL $ annExpr expr (MUL d x y))
+ MO_F_Quot w -> floatOp w (\d x y -> unitOL $ annExpr expr (DIV d x y))
+ -- Floating point comparison
+ MO_F_Eq w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y EQ))
+ MO_F_Ne w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y NE))
+ MO_F_Ge w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FGE))
+ MO_F_Le w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FLE)) -- x <= y <=> y > x
+ MO_F_Gt w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FGT))
+ MO_F_Lt w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FLT)) -- x < y <=> y >= x
+
+ -- Bitwise operations
+ MO_And w -> bitOp w (\d x y -> unitOL $ annExpr expr (AND d x y))
+ MO_Or w -> bitOp w (\d x y -> unitOL $ annExpr expr (OR d x y))
+ MO_Xor w -> bitOp w (\d x y -> unitOL $ annExpr expr (XOR d x y))
+ MO_Shl w -> intOp False w (\d x y -> unitOL $ annExpr expr (SLL d x y))
+ MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (SRL d x y))
+ MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (SRA d x y))
+ op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ pprMachOp op <+> text "in" <+> pdoc plat expr
+
+ -- Generic ternary case.
+ CmmMachOp op [x, y, z] ->
+ case op of
+ -- Floating-point fused multiply-add operations
+ --
+ -- x86 fmadd x * y + z <=> RISCV64 fmadd : d = r1 * r2 + r3
+ -- x86 fmsub x * y - z <=> RISCV64 fnmsub: d = r1 * r2 - r3
+ -- x86 fnmadd - x * y + z <=> RISCV64 fmsub : d = - r1 * r2 + r3
+ -- x86 fnmsub - x * y - z <=> RISCV64 fnmadd: d = - r1 * r2 - r3
+ MO_FMA var w -> case var of
+ FMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMAdd d n m a)
+ FMSub -> float3Op w (\d n m a -> unitOL $ FMA FMSub d n m a)
+ FNMAdd -> float3Op w (\d n m a -> unitOL $ FMA FNMSub d n m a)
+ FNMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMAdd d n m a)
+ _ ->
+ pprPanic "getRegister' (unhandled ternary CmmMachOp): "
+ $ pprMachOp op
+ <+> text "in"
+ <+> pdoc plat expr
+ where
+ float3Op w op = do
+ (reg_fx, format_x, code_fx) <- getFloatReg x
+ (reg_fy, format_y, code_fy) <- getFloatReg y
+ (reg_fz, format_z, code_fz) <- getFloatReg z
+ massertPpr (isFloatFormat format_x && isFloatFormat format_y && isFloatFormat format_z)
+ $ text "float3Op: non-float"
+ pure
+ $ Any (floatFormat w)
+ $ \dst ->
+ code_fx
+ `appOL` code_fy
+ `appOL` code_fz
+ `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy) (OpReg w reg_fz)
+ CmmMachOp _op _xs ->
+ pprPanic "getRegister' (variadic CmmMachOp): " (pdoc plat expr)
+ where
+ isNbitEncodeable :: Int -> Integer -> Bool
+ isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
+ -- N.B. MUL does not set the overflow flag.
+ -- Return 0 when the operation cannot overflow, /= 0 otherwise
+ do_mul_may_oflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
+ do_mul_may_oflo w _x _y | w > W64 = pprPanic "Cannot multiply larger than 64bit" (ppr w)
+ do_mul_may_oflo w@W64 x y = do
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, format_y, code_y) <- getSomeReg y
+ -- TODO: Can't we clobber reg_x and reg_y to save registers?
+ lo <- getNewRegNat II64
+ hi <- getNewRegNat II64
+ -- TODO: Overhaul CSET: 3rd operand isn't needed for SNEZ
+ let nonSense = OpImm (ImmInt 0)
+ pure
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `appOL` signExtend (formatToWidth format_x) W64 reg_x reg_x
+ `appOL` code_y
+ `appOL` signExtend (formatToWidth format_y) W64 reg_y reg_y
+ `appOL` toOL
+ [ annExpr expr (MULH (OpReg w hi) (OpReg w reg_x) (OpReg w reg_y)),
+ MUL (OpReg w lo) (OpReg w reg_x) (OpReg w reg_y),
+ SRA (OpReg w lo) (OpReg w lo) (OpImm (ImmInt (widthInBits W64 - 1))),
+ ann
+ (text "Set flag if result of MULH contains more than sign bits.")
+ (XOR (OpReg w hi) (OpReg w hi) (OpReg w lo)),
+ CSET (OpReg w dst) (OpReg w hi) nonSense NE
+ ]
+ )
+ do_mul_may_oflo w x y = do
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, format_y, code_y) <- getSomeReg y
+ let width_x = formatToWidth format_x
+ width_y = formatToWidth format_y
+ if w > width_x && w > width_y
+ then
+ pure
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ -- 8bit * 8bit cannot overflow 16bit
+ -- 16bit * 16bit cannot overflow 32bit
+ -- 32bit * 32bit cannot overflow 64bit
+ unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 0)))
+ )
+ else do
+ let use32BitMul = w <= W32 && width_x <= W32 && width_y <= W32
+ nonSense = OpImm (ImmInt 0)
+ if use32BitMul
+ then do
+ narrowedReg <- getNewRegNat II64
+ pure
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ code_x
+ `appOL` signExtend (formatToWidth format_x) W32 reg_x reg_x
+ `appOL` code_y
+ `appOL` signExtend (formatToWidth format_y) W32 reg_y reg_y
+ `snocOL` annExpr expr (MUL (OpReg W32 dst) (OpReg W32 reg_x) (OpReg W32 reg_y))
+ `appOL` signExtendAdjustPrecission W32 w dst narrowedReg
+ `appOL` toOL
+ [ ann
+ (text "Check if the multiplied value fits in the narrowed register")
+ (SUB (OpReg w dst) (OpReg w dst) (OpReg w narrowedReg)),
+ CSET (OpReg w dst) (OpReg w dst) nonSense NE
+ ]
+ )
+ else
+ pure
+ $ Any
+ (intFormat w)
+ ( \dst ->
+ -- Do not handle this unlikely case. Just tell that it may overflow.
+ unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 1)))
+ )
+
+-- | Instructions to sign-extend the value in the given register from width @w@
+-- up to width @w'@.
+signExtendReg :: Width -> Width -> Reg -> NatM (Reg, OrdList Instr)
+signExtendReg w _w' r | w == W64 = pure (r, nilOL)
+signExtendReg w w' r = do
+ r' <- getNewRegNat (intFormat w')
+ let instrs = signExtend w w' r r'
+ pure (r', instrs)
+
+-- | Sign extends to 64bit, if needed
+--
+-- Source `Reg` @r@ stays untouched, while the conversion happens on destination
+-- `Reg` @r'@.
+signExtend :: Width -> Width -> Reg -> Reg -> OrdList Instr
+signExtend w w' _r _r' | w > w' = pprPanic "This is not a sign extension, but a truncation." $ ppr w <> text "->" <+> ppr w'
+signExtend w w' _r _r' | w > W64 || w' > W64 = pprPanic "Unexpected width (max is 64bit):" $ ppr w <> text "->" <+> ppr w'
+signExtend w w' r r' | w == W64 && w' == W64 && r == r' = nilOL
+signExtend w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg w' r') (OpReg w r)
+signExtend w w' r r'
+ | w == W32 && w' == W64 =
+ unitOL
+ $ ann
+ (text "sign-extend register (SEXT.W)" <+> ppr r <+> ppr w <> text "->" <> ppr w')
+ -- `ADDIW r r 0` is the pseudo-op SEXT.W
+ (ADD (OpReg w' r') (OpReg w r) (OpImm (ImmInt 0)))
+signExtend w w' r r' =
+ toOL
+ [ ann
+ (text "narrow register signed" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w')
+ (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))),
+ -- signed (arithmetic) right shift
+ SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift))
+ ]
+ where
+ shift = 64 - widthInBits w
+
+-- | Sign extends to 64bit, if needed and reduces the precission to the target `Width` (@w'@)
+--
+-- Source `Reg` @r@ stays untouched, while the conversion happens on destination
+-- `Reg` @r'@.
+signExtendAdjustPrecission :: Width -> Width -> Reg -> Reg -> OrdList Instr
+signExtendAdjustPrecission w w' _r _r' | w > W64 || w' > W64 = pprPanic "Unexpected width (max is 64bit):" $ ppr w <> text "->" <+> ppr w'
+signExtendAdjustPrecission w w' r r' | w == W64 && w' == W64 && r == r' = nilOL
+signExtendAdjustPrecission w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg w' r') (OpReg w r)
+signExtendAdjustPrecission w w' r r'
+ | w == W32 && w' == W64 =
+ unitOL
+ $ ann
+ (text "sign-extend register (SEXT.W)" <+> ppr r <+> ppr w <> text "->" <> ppr w')
+ -- `ADDIW r r 0` is the pseudo-op SEXT.W
+ (ADD (OpReg w' r') (OpReg w r) (OpImm (ImmInt 0)))
+signExtendAdjustPrecission w w' r r'
+ | w > w' =
+ toOL
+ [ ann
+ (text "narrow register signed" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w')
+ (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))),
+ -- signed (arithmetic) right shift
+ SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift))
+ ]
+ where
+ shift = 64 - widthInBits w'
+signExtendAdjustPrecission w w' r r' =
+ toOL
+ [ ann
+ (text "sign extend register" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w')
+ (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))),
+ -- signed (arithmetic) right shift
+ SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift))
+ ]
+ where
+ shift = 64 - widthInBits w
+
+-- | Instructions to truncate the value in the given register from width @w@
+-- to width @w'@.
+--
+-- In other words, it just cuts the width out of the register. N.B.: This
+-- ignores signedness (no sign extension takes place)!
+truncateReg :: Width -> Width -> Reg -> OrdList Instr
+truncateReg _w w' _r | w' == W64 = nilOL
+truncateReg _w w' r | w' > W64 = pprPanic "Cannot truncate to width bigger than register size (max is 64bit):" $ text (show r) <> char ':' <+> ppr w'
+truncateReg w _w' r | w > W64 = pprPanic "Unexpected register size (max is 64bit):" $ text (show r) <> char ':' <+> ppr w
+truncateReg w w' r =
+ toOL
+ [ ann
+ (text "truncate register" <+> ppr r <+> ppr w <> text "->" <> ppr w')
+ (SLL (OpReg w' r) (OpReg w r) (OpImm (ImmInt shift))),
+ -- SHL ignores signedness!
+ SRL (OpReg w' r) (OpReg w r) (OpImm (ImmInt shift))
+ ]
+ where
+ shift = 64 - widthInBits w'
+
+-- | Given a 'Register', produce a new 'Register' with an instruction block
+-- which will check the value for alignment. Used for @-falignment-sanitisation@.
+addAlignmentCheck :: Int -> Width -> Register -> NatM Register
+addAlignmentCheck align wordWidth reg = do
+ jumpReg <- getNewRegNat II64
+ cmpReg <- getNewRegNat II64
+ okayLblId <- getBlockIdNat
+
+ pure $ case reg of
+ Fixed fmt reg code -> Fixed fmt reg (code `appOL` check fmt jumpReg cmpReg okayLblId reg)
+ Any fmt f -> Any fmt (\reg -> f reg `appOL` check fmt jumpReg cmpReg okayLblId reg)
+ where
+ check :: Format -> Reg -> Reg -> BlockId -> Reg -> InstrBlock
+ check fmt jumpReg cmpReg okayLblId reg =
+ let width = formatToWidth fmt
+ in assert (not $ isFloatFormat fmt)
+ $ toOL
+ [ ann
+ (text "Alignment check - alignment: " <> int align <> text ", word width: " <> text (show wordWidth))
+ (AND (OpReg width cmpReg) (OpReg width reg) (OpImm $ ImmInt $ align - 1)),
+ BCOND EQ (OpReg width cmpReg) zero (TBlock okayLblId),
+ COMMENT (text "Alignment check failed"),
+ LDR II64 (OpReg W64 jumpReg) (OpImm $ ImmCLbl mkBadAlignmentLabel),
+ B (TReg jumpReg),
+ NEWBLOCK okayLblId
+ ]
+
+-- -----------------------------------------------------------------------------
+-- The 'Amode' type: Memory addressing modes passed up the tree.
+data Amode = Amode AddrMode InstrBlock
+
+-- | Provide the value of a `CmmExpr` with an `Amode`
+--
+-- N.B. this function should be used to provide operands to load and store
+-- instructions with signed 12bit wide immediates (S & I types). For other
+-- immediate sizes and formats (e.g. B type uses multiples of 2) this function
+-- would need to be adjusted.
+getAmode ::
+ Platform ->
+ -- | width of loaded value
+ Width ->
+ CmmExpr ->
+ NatM Amode
+-- TODO: Specialize stuff we can destructure here.
+
+-- LDR/STR: Immediate can be represented with 12bits
+getAmode platform w (CmmRegOff reg off)
+ | w <= W64,
+ fitsIn12bitImm off =
+ return $ Amode (AddrRegImm reg' off') nilOL
+ where
+ reg' = getRegisterReg platform reg
+ off' = ImmInt off
+
+-- For Stores we often see something like this:
+-- CmmStore (CmmMachOp (MO_Add w) [CmmLoad expr, CmmLit (CmmInt n w')]) (expr2)
+-- E.g. a CmmStoreOff really. This can be translated to `str $expr2, [$expr, #n ]
+-- for `n` in range.
+getAmode _platform _ (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')])
+ | fitsIn12bitImm off =
+ do
+ (reg, _format, code) <- getSomeReg expr
+ return $ Amode (AddrRegImm reg (ImmInteger off)) code
+getAmode _platform _ (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')])
+ | fitsIn12bitImm (-off) =
+ do
+ (reg, _format, code) <- getSomeReg expr
+ return $ Amode (AddrRegImm reg (ImmInteger (-off))) code
+
+-- Generic case
+getAmode _platform _ expr =
+ do
+ (reg, _format, code) <- getSomeReg expr
+ return $ Amode (AddrReg reg) code
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business. Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers. If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side. This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
+assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
+assignMem_IntCode rep addrE srcE =
+ do
+ (src_reg, _format, code) <- getSomeReg srcE
+ platform <- getPlatform
+ let w = formatToWidth rep
+ Amode addr addr_code <- getAmode platform w addrE
+ return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
+ `consOL` ( code
+ `appOL` addr_code
+ `snocOL` STR rep (OpReg w src_reg) (OpAddr addr)
+ )
+
+assignReg_IntCode _ reg src =
+ do
+ platform <- getPlatform
+ let dst = getRegisterReg platform reg
+ r <- getRegister src
+ return $ case r of
+ Any _ code ->
+ COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src)))
+ `consOL` code dst
+ Fixed format freg fcode ->
+ COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src)))
+ `consOL` ( fcode
+ `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg)
+ )
+
+-- Let's treat Floating point stuff
+-- as integer code for now. Opaque.
+assignMem_FltCode = assignMem_IntCode
+
+assignReg_FltCode = assignReg_IntCode
+
+-- -----------------------------------------------------------------------------
+-- Jumps
+-- AArch64 has 26bits for targets, whereas RiscV only has 20.
+-- Thus we need to distinguish between far (outside of the)
+-- current compilation unit. And regular branches.
+-- RiscV has ±2MB of displacement, whereas AArch64 has ±128MB.
+-- Thus for most branches we can get away with encoding it
+-- directly in the instruction rather than always loading the
+-- address into a register and then using that to jump.
+-- Under the assumption that our linked build product is less than
+-- ~2*128MB of TEXT, and there are no jump that span the whole
+-- TEXT segment.
+-- Something where riscv's compressed instruction might come in
+-- handy.
+genJump :: CmmExpr {-the branch target-} -> NatM InstrBlock
+genJump expr = do
+ (target, _format, code) <- getSomeReg expr
+ return (code `appOL` unitOL (annExpr expr (B (TReg target))))
+
+-- -----------------------------------------------------------------------------
+-- Unconditional branches
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+-- -----------------------------------------------------------------------------
+-- Conditional branches
+genCondJump ::
+ BlockId ->
+ CmmExpr ->
+ NatM InstrBlock
+genCondJump bid expr = do
+ case expr of
+ -- Optimized == 0 case.
+ CmmMachOp (MO_Eq w) [x, CmmLit (CmmInt 0 _)] -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ code_x `snocOL` annExpr expr (BCOND EQ zero (OpReg w reg_x) (TBlock bid))
+
+ -- Optimized /= 0 case.
+ CmmMachOp (MO_Ne w) [x, CmmLit (CmmInt 0 _)] -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ code_x `snocOL` annExpr expr (BCOND NE zero (OpReg w reg_x) (TBlock bid))
+
+ -- Generic case.
+ CmmMachOp mop [x, y] -> do
+ let ubcond w cmp = do
+ -- compute both sides.
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, format_y, code_y) <- getSomeReg y
+ let x' = OpReg w reg_x
+ y' = OpReg w reg_y
+ return $ case w of
+ w
+ | w == W8 || w == W16 ->
+ code_x
+ `appOL` truncateReg (formatToWidth format_x) w reg_x
+ `appOL` code_y
+ `appOL` truncateReg (formatToWidth format_y) w reg_y
+ `appOL` code_y
+ `snocOL` annExpr expr (BCOND cmp x' y' (TBlock bid))
+ _ ->
+ code_x
+ `appOL` code_y
+ `snocOL` annExpr expr (BCOND cmp x' y' (TBlock bid))
+
+ sbcond w cmp = do
+ -- compute both sides.
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, format_y, code_y) <- getSomeReg y
+ let x' = OpReg w reg_x
+ y' = OpReg w reg_y
+ return $ case w of
+ w
+ | w `elem` [W8, W16, W32] ->
+ code_x
+ `appOL` signExtend (formatToWidth format_x) W64 reg_x reg_x
+ `appOL` code_y
+ `appOL` signExtend (formatToWidth format_y) W64 reg_y reg_y
+ `appOL` unitOL (annExpr expr (BCOND cmp x' y' (TBlock bid)))
+ _ -> code_x `appOL` code_y `appOL` unitOL (annExpr expr (BCOND cmp x' y' (TBlock bid)))
+
+ fbcond w cmp = do
+ -- ensure we get float regs
+ (reg_fx, _format_fx, code_fx) <- getFloatReg x
+ (reg_fy, _format_fy, code_fy) <- getFloatReg y
+ condOpReg <- OpReg W64 <$> getNewRegNat II64
+ oneReg <- getNewRegNat II64
+ return $ code_fx
+ `appOL` code_fy
+ `snocOL` annExpr expr (CSET condOpReg (OpReg w reg_fx) (OpReg w reg_fy) cmp)
+ `snocOL` MOV (OpReg W64 oneReg) (OpImm (ImmInt 1))
+ `snocOL` BCOND EQ condOpReg (OpReg w oneReg) (TBlock bid)
+
+ case mop of
+ MO_F_Eq w -> fbcond w EQ
+ MO_F_Ne w -> fbcond w NE
+ MO_F_Gt w -> fbcond w FGT
+ MO_F_Ge w -> fbcond w FGE
+ MO_F_Lt w -> fbcond w FLT
+ MO_F_Le w -> fbcond w FLE
+ MO_Eq w -> sbcond w EQ
+ MO_Ne w -> sbcond w NE
+ MO_S_Gt w -> sbcond w SGT
+ MO_S_Ge w -> sbcond w SGE
+ MO_S_Lt w -> sbcond w SLT
+ MO_S_Le w -> sbcond w SLE
+ MO_U_Gt w -> ubcond w UGT
+ MO_U_Ge w -> ubcond w UGE
+ MO_U_Lt w -> ubcond w ULT
+ MO_U_Le w -> ubcond w ULE
+ _ -> pprPanic "RV64.genCondJump:case mop: " (text $ show expr)
+ _ -> pprPanic "RV64.genCondJump: " (text $ show expr)
+
+-- | Generate conditional branching instructions
+--
+-- This is basically an "if with else" statement.
+genCondBranch ::
+ -- | the true branch target
+ BlockId ->
+ -- | the false branch target
+ BlockId ->
+ -- | the condition on which to branch
+ CmmExpr ->
+ -- | Instructions
+ NatM InstrBlock
+genCondBranch true false expr =
+ appOL
+ <$> genCondJump true expr
+ <*> genBranch false
+
+-- -----------------------------------------------------------------------------
+-- Generating C calls
+
+-- | Generate a call to a C function.
+--
+-- - Integer values are passed in GP registers a0-a7.
+-- - Floating point values are passed in FP registers fa0-fa7.
+-- - If there are no free floating point registers, the FP values are passed in GP registers.
+-- - If all GP registers are taken, the values are spilled as whole words (!) onto the stack.
+-- - For integers/words, the return value is in a0.
+-- - The return value is in fa0 if the return type is a floating point value.
+genCCall ::
+ ForeignTarget -> -- function to call
+ [CmmFormal] -> -- where to put the result
+ [CmmActual] -> -- arguments (of mixed type)
+ NatM InstrBlock
+-- TODO: Specialize where we can.
+-- Generic impl
+genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
+ -- we want to pass arg_regs into allArgRegs
+ -- The target :: ForeignTarget call can either
+ -- be a foreign procedure with an address expr
+ -- and a calling convention.
+ (call_target_reg, call_target_code) <-
+ -- Compute the address of the call target into a register. This
+ -- addressing enables us to jump through the whole address space
+ -- without further ado. PC-relative addressing would involve
+ -- instructions to do similar, though.
+ do
+ (reg, _format, reg_code) <- getSomeReg expr
+ pure (reg, reg_code)
+ -- compute the code and register logic for all arg_regs.
+ -- this will give us the format information to match on.
+ arg_regs' <- mapM getSomeReg arg_regs
+
+ -- Now this is stupid. Our Cmm expressions doesn't carry the proper sizes
+ -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in
+ -- STG; this then breaks packing of stack arguments, if we need to pack
+ -- for the pcs, e.g. darwinpcs. Option one would be to fix the Int type
+ -- in Cmm proper. Option two, which we choose here is to use extended Hint
+ -- information to contain the size information and use that when packing
+ -- arguments, spilled onto the stack.
+ let (_res_hints, arg_hints) = foreignTargetHints target
+ arg_regs'' = zipWith (\(r, f, c) h -> (r, f, h, c)) arg_regs' arg_hints
+
+ (stackSpaceWords, passRegs, passArgumentsCode) <- passArguments allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL
+
+ readResultsCode <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL
+
+ let moveStackDown 0 =
+ toOL
+ [ PUSH_STACK_FRAME,
+ DELTA (-16)
+ ]
+ moveStackDown i | odd i = moveStackDown (i + 1)
+ moveStackDown i =
+ toOL
+ [ PUSH_STACK_FRAME,
+ SUB (OpReg W64 spMachReg) (OpReg W64 spMachReg) (OpImm (ImmInt (8 * i))),
+ DELTA (-8 * i - 16)
+ ]
+ moveStackUp 0 =
+ toOL
+ [ POP_STACK_FRAME,
+ DELTA 0
+ ]
+ moveStackUp i | odd i = moveStackUp (i + 1)
+ moveStackUp i =
+ toOL
+ [ ADD (OpReg W64 spMachReg) (OpReg W64 spMachReg) (OpImm (ImmInt (8 * i))),
+ POP_STACK_FRAME,
+ DELTA 0
+ ]
+
+ let code =
+ call_target_code -- compute the label (possibly into a register)
+ `appOL` moveStackDown stackSpaceWords
+ `appOL` passArgumentsCode -- put the arguments into x0, ...
+ `snocOL` BL call_target_reg passRegs -- branch and link (C calls aren't tail calls, but return)
+ `appOL` readResultsCode -- parse the results into registers
+ `appOL` moveStackUp stackSpaceWords
+ return code
+ where
+ -- Implementiation of the RISCV ABI calling convention.
+ -- https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/948463cd5dbebea7c1869e20146b17a2cc8fda2f/riscv-cc.adoc#integer-calling-convention
+ passArguments :: [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock)
+ -- Base case: no more arguments to pass (left)
+ passArguments _ _ [] stackSpaceWords accumRegs accumCode = return (stackSpaceWords, accumRegs, accumCode)
+ -- Still have GP regs, and we want to pass an GP argument.
+ passArguments (gpReg : gpRegs) fpRegs ((r, format, hint, code_r) : args) stackSpaceWords accumRegs accumCode | isIntFormat format = do
+ -- RISCV64 Integer Calling Convention: "When passed in registers or on the
+ -- stack, integer scalars narrower than XLEN bits are widened according to
+ -- the sign of their type up to 32 bits, then sign-extended to XLEN bits."
+ let w = formatToWidth format
+ assignArg =
+ if hint == SignedHint
+ then
+ COMMENT (text "Pass gp argument sign-extended (SignedHint): " <> ppr r)
+ `consOL` signExtend w W64 r gpReg
+ else
+ toOL
+ [ COMMENT (text "Pass gp argument sign-extended (SignedHint): " <> ppr r),
+ MOV (OpReg w gpReg) (OpReg w r)
+ ]
+ accumCode' =
+ accumCode
+ `appOL` code_r
+ `appOL` assignArg
+ passArguments gpRegs fpRegs args stackSpaceWords (gpReg : accumRegs) accumCode'
+
+ -- Still have FP regs, and we want to pass an FP argument.
+ passArguments gpRegs (fpReg : fpRegs) ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isFloatFormat format = do
+ let w = formatToWidth format
+ mov = MOV (OpReg w fpReg) (OpReg w r)
+ accumCode' =
+ accumCode
+ `appOL` code_r
+ `snocOL` ann (text "Pass fp argument: " <> ppr r) mov
+ passArguments gpRegs fpRegs args stackSpaceWords (fpReg : accumRegs) accumCode'
+
+ -- No mor regs left to pass. Must pass on stack.
+ passArguments [] [] ((r, format, hint, code_r) : args) stackSpaceWords accumRegs accumCode = do
+ let w = formatToWidth format
+ spOffet = 8 * stackSpaceWords
+ str = STR format (OpReg w r) (OpAddr (AddrRegImm spMachReg (ImmInt spOffet)))
+ stackCode =
+ if hint == SignedHint
+ then
+ code_r
+ `appOL` signExtend w W64 r tmpReg
+ `snocOL` ann (text "Pass signed argument (size " <> ppr w <> text ") on the stack: " <> ppr tmpReg) str
+ else
+ code_r
+ `snocOL` ann (text "Pass unsigned argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str
+ passArguments [] [] args (stackSpaceWords + 1) accumRegs (stackCode `appOL` accumCode)
+
+ -- Still have fpRegs left, but want to pass a GP argument. Must be passed on the stack then.
+ passArguments [] fpRegs ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isIntFormat format = do
+ let w = formatToWidth format
+ spOffet = 8 * stackSpaceWords
+ str = STR format (OpReg w r) (OpAddr (AddrRegImm spMachReg (ImmInt spOffet)))
+ stackCode =
+ code_r
+ `snocOL` ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str
+ passArguments [] fpRegs args (stackSpaceWords + 1) accumRegs (stackCode `appOL` accumCode)
+
+ -- Still have gpRegs left, but want to pass a FP argument. Must be passed in gpReg then.
+ passArguments (gpReg : gpRegs) [] ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isFloatFormat format = do
+ let w = formatToWidth format
+ mov = MOV (OpReg w gpReg) (OpReg w r)
+ accumCode' =
+ accumCode
+ `appOL` code_r
+ `snocOL` ann (text "Pass fp argument in gpReg: " <> ppr r) mov
+ passArguments gpRegs [] args stackSpaceWords (gpReg : accumRegs) accumCode'
+ passArguments _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state")
+
+ readResults :: [Reg] -> [Reg] -> [LocalReg] -> [Reg] -> InstrBlock -> NatM InstrBlock
+ readResults _ _ [] _ accumCode = return accumCode
+ readResults [] _ _ _ _ = do
+ platform <- getPlatform
+ pprPanic "genCCall, out of gp registers when reading results" (pdoc platform target)
+ readResults _ [] _ _ _ = do
+ platform <- getPlatform
+ pprPanic "genCCall, out of fp registers when reading results" (pdoc platform target)
+ readResults (gpReg : gpRegs) (fpReg : fpRegs) (dst : dsts) accumRegs accumCode = do
+ -- gp/fp reg -> dst
+ platform <- getPlatform
+ let rep = cmmRegType (CmmLocal dst)
+ format = cmmTypeFormat rep
+ w = cmmRegWidth (CmmLocal dst)
+ r_dst = getRegisterReg platform (CmmLocal dst)
+ if isFloatFormat format
+ then readResults (gpReg : gpRegs) fpRegs dsts (fpReg : accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w fpReg))
+ else
+ readResults gpRegs (fpReg : fpRegs) dsts (gpReg : accumRegs)
+ $ accumCode
+ `snocOL` MOV (OpReg w r_dst) (OpReg w gpReg)
+ `appOL`
+ -- truncate, otherwise an unexpectedly big value might be used in upfollowing calculations
+ truncateReg W64 w r_dst
+genCCall (PrimTarget mop) dest_regs arg_regs = do
+ case mop of
+ MO_F32_Fabs
+ | [arg_reg] <- arg_regs,
+ [dest_reg] <- dest_regs ->
+ unaryFloatOp W32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
+ MO_F64_Fabs
+ | [arg_reg] <- arg_regs,
+ [dest_reg] <- dest_regs ->
+ unaryFloatOp W64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
+ -- 64 bit float ops
+ MO_F64_Pwr -> mkCCall "pow"
+ MO_F64_Sin -> mkCCall "sin"
+ MO_F64_Cos -> mkCCall "cos"
+ MO_F64_Tan -> mkCCall "tan"
+ MO_F64_Sinh -> mkCCall "sinh"
+ MO_F64_Cosh -> mkCCall "cosh"
+ MO_F64_Tanh -> mkCCall "tanh"
+ MO_F64_Asin -> mkCCall "asin"
+ MO_F64_Acos -> mkCCall "acos"
+ MO_F64_Atan -> mkCCall "atan"
+ MO_F64_Asinh -> mkCCall "asinh"
+ MO_F64_Acosh -> mkCCall "acosh"
+ MO_F64_Atanh -> mkCCall "atanh"
+ MO_F64_Log -> mkCCall "log"
+ MO_F64_Log1P -> mkCCall "log1p"
+ MO_F64_Exp -> mkCCall "exp"
+ MO_F64_ExpM1 -> mkCCall "expm1"
+ MO_F64_Fabs -> mkCCall "fabs"
+ MO_F64_Sqrt -> mkCCall "sqrt"
+ -- 32 bit float ops
+ MO_F32_Pwr -> mkCCall "powf"
+ MO_F32_Sin -> mkCCall "sinf"
+ MO_F32_Cos -> mkCCall "cosf"
+ MO_F32_Tan -> mkCCall "tanf"
+ MO_F32_Sinh -> mkCCall "sinhf"
+ MO_F32_Cosh -> mkCCall "coshf"
+ MO_F32_Tanh -> mkCCall "tanhf"
+ MO_F32_Asin -> mkCCall "asinf"
+ MO_F32_Acos -> mkCCall "acosf"
+ MO_F32_Atan -> mkCCall "atanf"
+ MO_F32_Asinh -> mkCCall "asinhf"
+ MO_F32_Acosh -> mkCCall "acoshf"
+ MO_F32_Atanh -> mkCCall "atanhf"
+ MO_F32_Log -> mkCCall "logf"
+ MO_F32_Log1P -> mkCCall "log1pf"
+ MO_F32_Exp -> mkCCall "expf"
+ MO_F32_ExpM1 -> mkCCall "expm1f"
+ MO_F32_Fabs -> mkCCall "fabsf"
+ MO_F32_Sqrt -> mkCCall "sqrtf"
+ -- 64-bit primops
+ MO_I64_ToI -> mkCCall "hs_int64ToInt"
+ MO_I64_FromI -> mkCCall "hs_intToInt64"
+ MO_W64_ToW -> mkCCall "hs_word64ToWord"
+ MO_W64_FromW -> mkCCall "hs_wordToWord64"
+ MO_x64_Neg -> mkCCall "hs_neg64"
+ MO_x64_Add -> mkCCall "hs_add64"
+ MO_x64_Sub -> mkCCall "hs_sub64"
+ MO_x64_Mul -> mkCCall "hs_mul64"
+ MO_I64_Quot -> mkCCall "hs_quotInt64"
+ MO_I64_Rem -> mkCCall "hs_remInt64"
+ MO_W64_Quot -> mkCCall "hs_quotWord64"
+ MO_W64_Rem -> mkCCall "hs_remWord64"
+ MO_x64_And -> mkCCall "hs_and64"
+ MO_x64_Or -> mkCCall "hs_or64"
+ MO_x64_Xor -> mkCCall "hs_xor64"
+ MO_x64_Not -> mkCCall "hs_not64"
+ MO_x64_Shl -> mkCCall "hs_uncheckedShiftL64"
+ MO_I64_Shr -> mkCCall "hs_uncheckedIShiftRA64"
+ MO_W64_Shr -> mkCCall "hs_uncheckedShiftRL64"
+ MO_x64_Eq -> mkCCall "hs_eq64"
+ MO_x64_Ne -> mkCCall "hs_ne64"
+ MO_I64_Ge -> mkCCall "hs_geInt64"
+ MO_I64_Gt -> mkCCall "hs_gtInt64"
+ MO_I64_Le -> mkCCall "hs_leInt64"
+ MO_I64_Lt -> mkCCall "hs_ltInt64"
+ MO_W64_Ge -> mkCCall "hs_geWord64"
+ MO_W64_Gt -> mkCCall "hs_gtWord64"
+ MO_W64_Le -> mkCCall "hs_leWord64"
+ MO_W64_Lt -> mkCCall "hs_ltWord64"
+ -- Conversion
+ MO_UF_Conv w -> mkCCall (word2FloatLabel w)
+ -- Optional MachOps
+ -- These are enabled/disabled by backend flags: GHC.StgToCmm.Config
+ MO_S_Mul2 _w -> unsupported mop
+ MO_S_QuotRem _w -> unsupported mop
+ MO_U_QuotRem _w -> unsupported mop
+ MO_U_QuotRem2 _w -> unsupported mop
+ MO_Add2 _w -> unsupported mop
+ MO_AddWordC _w -> unsupported mop
+ MO_SubWordC _w -> unsupported mop
+ MO_AddIntC _w -> unsupported mop
+ MO_SubIntC _w -> unsupported mop
+ MO_U_Mul2 _w -> unsupported mop
+ -- Memory Ordering
+ -- The related C functions are:
+ -- #include <stdatomic.h>
+ -- atomic_thread_fence(memory_order_acquire);
+ -- atomic_thread_fence(memory_order_release);
+ -- atomic_thread_fence(memory_order_seq_cst);
+ MO_AcquireFence -> pure (unitOL (FENCE FenceRead FenceReadWrite))
+ MO_ReleaseFence -> pure (unitOL (FENCE FenceReadWrite FenceWrite))
+ MO_SeqCstFence -> pure (unitOL (FENCE FenceReadWrite FenceReadWrite))
+ MO_Touch -> pure nilOL -- Keep variables live (when using interior pointers)
+ -- Prefetch
+ MO_Prefetch_Data _n -> pure nilOL -- Prefetch hint.
+
+ -- Memory copy/set/move/cmp, with alignment for optimization
+ MO_Memcpy _align -> mkCCall "memcpy"
+ MO_Memset _align -> mkCCall "memset"
+ MO_Memmove _align -> mkCCall "memmove"
+ MO_Memcmp _align -> mkCCall "memcmp"
+ MO_SuspendThread -> mkCCall "suspendThread"
+ MO_ResumeThread -> mkCCall "resumeThread"
+ MO_PopCnt w -> mkCCall (popCntLabel w)
+ MO_Pdep w -> mkCCall (pdepLabel w)
+ MO_Pext w -> mkCCall (pextLabel w)
+ MO_Clz w -> mkCCall (clzLabel w)
+ MO_Ctz w -> mkCCall (ctzLabel w)
+ MO_BSwap w -> mkCCall (bSwapLabel w)
+ MO_BRev w -> mkCCall (bRevLabel w)
+ -- Atomic read-modify-write.
+ mo@(MO_AtomicRead w ord)
+ | [p_reg] <- arg_regs,
+ [dst_reg] <- dest_regs -> do
+ (p, _fmt_p, code_p) <- getSomeReg p_reg
+ platform <- getPlatform
+ -- Analog to the related MachOps (above)
+ -- The related C functions are:
+ -- #include <stdatomic.h>
+ -- __atomic_load_n(&a, __ATOMIC_ACQUIRE);
+ -- __atomic_load_n(&a, __ATOMIC_SEQ_CST);
+ let instrs = case ord of
+ MemOrderRelaxed -> unitOL $ ann moDescr (LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p))
+ MemOrderAcquire ->
+ toOL
+ [ ann moDescr (LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
+ FENCE FenceRead FenceReadWrite
+ ]
+ MemOrderSeqCst ->
+ toOL
+ [ ann moDescr (FENCE FenceReadWrite FenceReadWrite),
+ LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p),
+ FENCE FenceRead FenceReadWrite
+ ]
+ MemOrderRelease -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
+ dst = getRegisterReg platform (CmmLocal dst_reg)
+ moDescr = (text . show) mo
+ code = code_p `appOL` instrs
+ return code
+ | otherwise -> panic "mal-formed AtomicRead"
+ mo@(MO_AtomicWrite w ord)
+ | [p_reg, val_reg] <- arg_regs -> do
+ (p, _fmt_p, code_p) <- getSomeReg p_reg
+ (val, fmt_val, code_val) <- getSomeReg val_reg
+ -- Analog to the related MachOps (above)
+ -- The related C functions are:
+ -- #include <stdatomic.h>
+ -- __atomic_store_n(&a, 23, __ATOMIC_SEQ_CST);
+ -- __atomic_store_n(&a, 23, __ATOMIC_RELEASE);
+ let instrs = case ord of
+ MemOrderRelaxed -> unitOL $ ann moDescr (STR fmt_val (OpReg w val) (OpAddr $ AddrReg p))
+ MemOrderSeqCst ->
+ toOL
+ [ ann moDescr (FENCE FenceReadWrite FenceWrite),
+ STR fmt_val (OpReg w val) (OpAddr $ AddrReg p),
+ FENCE FenceReadWrite FenceReadWrite
+ ]
+ MemOrderRelease ->
+ toOL
+ [ ann moDescr (FENCE FenceReadWrite FenceWrite),
+ STR fmt_val (OpReg w val) (OpAddr $ AddrReg p)
+ ]
+ MemOrderAcquire -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
+ moDescr = (text . show) mo
+ code =
+ code_p
+ `appOL` code_val
+ `appOL` instrs
+ pure code
+ | otherwise -> panic "mal-formed AtomicWrite"
+ MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop)
+ MO_Cmpxchg w -> mkCCall (cmpxchgLabel w)
+ -- -- Should be an AtomicRMW variant eventually.
+ -- -- Sequential consistent.
+ -- TODO: this should be implemented properly!
+ MO_Xchg w -> mkCCall (xchgLabel w)
+ where
+ unsupported :: (Show a) => a -> b
+ unsupported mop =
+ panic
+ ( "outOfLineCmmOp: "
+ ++ show mop
+ ++ " not supported here"
+ )
+ mkCCall :: FastString -> NatM InstrBlock
+ mkCCall name = do
+ config <- getConfig
+ target <-
+ cmmMakeDynamicReference config CallReference
+ $ mkForeignLabel name Nothing ForeignLabelInThisPackage IsFunction
+ let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
+ genCCall (ForeignTarget target cconv) dest_regs arg_regs
+
+ unaryFloatOp w op arg_reg dest_reg = do
+ platform <- getPlatform
+ (reg_fx, _format_x, code_fx) <- getFloatReg arg_reg
+ let dst = getRegisterReg platform (CmmLocal dest_reg)
+ let code = code_fx `appOL` op (OpReg w dst) (OpReg w reg_fx)
+ pure code
+
+{- Note [RISCV64 far jumps]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+RISCV64 conditional jump instructions can only encode an offset of +/-4KiB
+(12bits) which is usually enough but can be exceeded in edge cases. In these
+cases we will replace:
+
+ b.cond <cond> foo
+
+with the sequence:
+
+ b.cond <cond> <lbl_true>
+ b <lbl_false>
+ <lbl_true>:
+ la reg foo
+ b reg
+ <lbl_false>:
+
+and
+
+ b foo
+
+with the sequence:
+
+ la reg foo
+ b reg
+
+Compared to AArch64 the target label is loaded to a register, because
+unconditional jump instructions can only address +/-1MiB. The LA
+pseudo-instruction will be replaced by up to two real instructions, ensuring
+correct addressing.
+
+One could surely find more efficient replacements, taking PC-relative addressing
+into account. This could be a future improvement. (As far branches are pretty
+rare, one might question and measure the value of such improvement.)
+
+RISCV has many pseudo-instructions which emit more than one real instructions.
+Thus, we count the real instructions after the Assembler has seen them.
+
+We make some simplifications in the name of performance which can result in
+overestimating jump <-> label offsets:
+
+\* To avoid having to recalculate the label offsets once we replaced a jump we simply
+ assume all label jumps will be expanded to a three instruction far jump sequence.
+\* For labels associated with a info table we assume the info table is 64byte large.
+ Most info tables are smaller than that but it means we don't have to distinguish
+ between multiple types of info tables.
+
+In terms of implementation we walk the instruction stream at least once calculating
+label offsets, and if we determine during this that the functions body is big enough
+to potentially contain out of range jumps we walk the instructions a second time, replacing
+out of range jumps with the sequence of instructions described above.
+
+-}
+
+-- | A conditional jump to a far target
+--
+-- By loading the far target into a register for the jump, we can address the
+-- whole memory range.
+genCondFarJump :: (MonadUnique m) => Cond -> Operand -> Operand -> BlockId -> m InstrBlock
+genCondFarJump cond op1 op2 far_target = do
+ skip_lbl_id <- newBlockId
+ jmp_lbl_id <- newBlockId
+
+ -- TODO: We can improve this by inverting the condition
+ -- but it's not quite trivial since we don't know if we
+ -- need to consider float orderings.
+ -- So we take the hit of the additional jump in the false
+ -- case for now.
+ return
+ $ toOL
+ [ ann (text "Conditional far jump to: " <> ppr far_target)
+ $ BCOND cond op1 op2 (TBlock jmp_lbl_id),
+ B (TBlock skip_lbl_id),
+ NEWBLOCK jmp_lbl_id,
+ LDR II64 (OpReg W64 tmpReg) (OpImm (ImmCLbl (blockLbl far_target))),
+ B (TReg tmpReg),
+ NEWBLOCK skip_lbl_id
+ ]
+
+-- | An unconditional jump to a far target
+--
+-- By loading the far target into a register for the jump, we can address the
+-- whole memory range.
+genFarJump :: (MonadUnique m) => BlockId -> m InstrBlock
+genFarJump far_target =
+ return
+ $ toOL
+ [ ann (text "Unconditional far jump to: " <> ppr far_target)
+ $ LDR II64 (OpReg W64 tmpReg) (OpImm (ImmCLbl (blockLbl far_target))),
+ B (TReg tmpReg)
+ ]
+
+-- See Note [RISCV64 far jumps]
+data BlockInRange = InRange | NotInRange BlockId
+
+-- See Note [RISCV64 far jumps]
+makeFarBranches ::
+ Platform ->
+ LabelMap RawCmmStatics ->
+ [NatBasicBlock Instr] ->
+ UniqSM [NatBasicBlock Instr]
+makeFarBranches {- only used when debugging -} _platform statics basic_blocks = do
+ -- All offsets/positions are counted in multiples of 4 bytes (the size of RISCV64 instructions)
+ -- That is an offset of 1 represents a 4-byte/one instruction offset.
+ let (func_size, lblMap) = foldl' calc_lbl_positions (0, mapEmpty) basic_blocks
+ if func_size < max_jump_dist
+ then pure basic_blocks
+ else do
+ (_, blocks) <- mapAccumLM (replace_blk lblMap) 0 basic_blocks
+ pure $ concat blocks
+ where
+ -- pprTrace "lblMap" (ppr lblMap) $ basic_blocks
+
+ -- 2^11, 12 bit immediate with one bit is reserved for the sign
+ max_jump_dist = 2 ^ (11 :: Int) - 1 :: Int
+ -- Currently all inline info tables fit into 64 bytes.
+ max_info_size = 16 :: Int
+ long_bc_jump_size = 5 :: Int
+ long_b_jump_size = 2 :: Int
+
+ -- Replace out of range conditional jumps with unconditional jumps.
+ replace_blk :: LabelMap Int -> Int -> GenBasicBlock Instr -> UniqSM (Int, [GenBasicBlock Instr])
+ replace_blk !m !pos (BasicBlock lbl instrs) = do
+ -- Account for a potential info table before the label.
+ let !block_pos = pos + infoTblSize_maybe lbl
+ (!pos', instrs') <- mapAccumLM (replace_jump m) block_pos instrs
+ let instrs'' = concat instrs'
+ -- We might have introduced new labels, so split the instructions into basic blocks again if neccesary.
+ let (top, split_blocks, no_data) = foldr mkBlocks ([], [], []) instrs''
+ -- There should be no data in the instruction stream at this point
+ massert (null no_data)
+
+ let final_blocks = BasicBlock lbl top : split_blocks
+ pure (pos', final_blocks)
+
+ replace_jump :: LabelMap Int -> Int -> Instr -> UniqSM (Int, [Instr])
+ replace_jump !m !pos instr = do
+ case instr of
+ ANN ann instr -> do
+ (idx, instr' : instrs') <- replace_jump m pos instr
+ pure (idx, ANN ann instr' : instrs')
+ BCOND cond op1 op2 t ->
+ case target_in_range m t pos of
+ InRange -> pure (pos + instr_size instr, [instr])
+ NotInRange far_target -> do
+ jmp_code <- genCondFarJump cond op1 op2 far_target
+ pure (pos + instr_size instr, fromOL jmp_code)
+ B t ->
+ case target_in_range m t pos of
+ InRange -> pure (pos + instr_size instr, [instr])
+ NotInRange far_target -> do
+ jmp_code <- genFarJump far_target
+ pure (pos + instr_size instr, fromOL jmp_code)
+ _ -> pure (pos + instr_size instr, [instr])
+
+ target_in_range :: LabelMap Int -> Target -> Int -> BlockInRange
+ target_in_range m target src =
+ case target of
+ (TReg {}) -> InRange
+ (TBlock bid) -> block_in_range m src bid
+
+ block_in_range :: LabelMap Int -> Int -> BlockId -> BlockInRange
+ block_in_range m src_pos dest_lbl =
+ case mapLookup dest_lbl m of
+ Nothing ->
+ pprTrace "not in range" (ppr dest_lbl)
+ $ NotInRange dest_lbl
+ Just dest_pos ->
+ if abs (dest_pos - src_pos) < max_jump_dist
+ then InRange
+ else NotInRange dest_lbl
+
+ calc_lbl_positions :: (Int, LabelMap Int) -> GenBasicBlock Instr -> (Int, LabelMap Int)
+ calc_lbl_positions (pos, m) (BasicBlock lbl instrs) =
+ let !pos' = pos + infoTblSize_maybe lbl
+ in foldl' instr_pos (pos', mapInsert lbl pos' m) instrs
+
+ instr_pos :: (Int, LabelMap Int) -> Instr -> (Int, LabelMap Int)
+ instr_pos (pos, m) instr = (pos + instr_size instr, m)
+
+ infoTblSize_maybe bid =
+ case mapLookup bid statics of
+ Nothing -> 0 :: Int
+ Just _info_static -> max_info_size
+
+ instr_size :: Instr -> Int
+ instr_size i = case i of
+ COMMENT {} -> 0
+ MULTILINE_COMMENT {} -> 0
+ ANN _ instr -> instr_size instr
+ LOCATION {} -> 0
+ DELTA {} -> 0
+ -- At this point there should be no NEWBLOCK in the instruction stream (pos, mapInsert bid pos m)
+ NEWBLOCK {} -> panic "mkFarBranched - Unexpected"
+ LDATA {} -> panic "mkFarBranched - Unexpected"
+ PUSH_STACK_FRAME -> 4
+ POP_STACK_FRAME -> 4
+ ADD {} -> 1
+ MUL {} -> 1
+ MULH {} -> 1
+ NEG {} -> 1
+ DIV {} -> 1
+ REM {} -> 1
+ REMU {} -> 1
+ SUB {} -> 1
+ DIVU {} -> 1
+ AND {} -> 1
+ OR {} -> 1
+ SRA {} -> 1
+ XOR {} -> 1
+ SLL {} -> 1
+ SRL {} -> 1
+ MOV {} -> 2
+ ORI {} -> 1
+ XORI {} -> 1
+ CSET {} -> 2
+ STR {} -> 1
+ LDR {} -> 3
+ LDRU {} -> 1
+ FENCE {} -> 1
+ FCVT {} -> 1
+ FABS {} -> 1
+ FMA {} -> 1
+ -- estimate the subsituted size for jumps to lables
+ -- jumps to registers have size 1
+ BCOND {} -> long_bc_jump_size
+ B (TBlock _) -> long_b_jump_size
+ B (TReg _) -> 1
+ BL _ _ -> 1
+ J_TBL {} -> 1
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Cond.hs
===================================================================
--- /dev/null
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Cond.hs
@@ -0,0 +1,42 @@
+module GHC.CmmToAsm.RV64.Cond
+ ( Cond (..),
+ )
+where
+
+import GHC.Prelude hiding (EQ)
+
+-- | Condition codes.
+--
+-- Used in conditional branches and bit setters. According to the available
+-- instruction set, some conditions are encoded as their negated opposites. I.e.
+-- these are logical things that don't necessarily map 1:1 to hardware/ISA.
+data Cond
+ = -- | int and float
+ EQ
+ | -- | int and float
+ NE
+ | -- | signed less than
+ SLT
+ | -- | signed less than or equal
+ SLE
+ | -- | signed greater than or equal
+ SGE
+ | -- | signed greater than
+ SGT
+ | -- | unsigned less than
+ ULT
+ | -- | unsigned less than or equal
+ ULE
+ | -- | unsigned greater than or equal
+ UGE
+ | -- | unsigned greater than
+ UGT
+ | -- | floating point instruction @flt@
+ FLT
+ | -- | floating point instruction @fle@
+ FLE
+ | -- | floating point instruction @fge@
+ FGE
+ | -- | floating point instruction @fgt@
+ FGT
+ deriving (Eq, Show)
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Instr.hs
===================================================================
--- /dev/null
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Instr.hs
@@ -0,0 +1,823 @@
+-- All instructions will be rendered eventually. Thus, there's no benefit in
+-- being lazy in data types.
+{-# LANGUAGE StrictData #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+module GHC.CmmToAsm.RV64.Instr where
+
+import Data.Maybe
+import GHC.Cmm
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm.Dataflow.Label
+import GHC.CmmToAsm.Config
+import GHC.CmmToAsm.Format
+import GHC.CmmToAsm.Instr (RegUsage (..))
+import GHC.CmmToAsm.RV64.Cond
+import GHC.CmmToAsm.RV64.Regs
+import GHC.CmmToAsm.Types
+import GHC.CmmToAsm.Utils
+import GHC.Data.FastString (LexicalFastString)
+import GHC.Platform
+import GHC.Platform.Reg
+import GHC.Platform.Regs
+import GHC.Prelude
+import GHC.Stack
+import GHC.Types.Unique.Supply
+import GHC.Utils.Outputable
+import GHC.Utils.Panic
+
+-- | Stack frame header size in bytes.
+--
+-- The stack frame header is made of the values that are always saved
+-- (regardless of the context.) It consists of the saved return address and a
+-- pointer to the previous frame. Thus, its size is two stack frame slots which
+-- equals two addresses/words (2 * 8 byte).
+stackFrameHeaderSize :: Int
+stackFrameHeaderSize = 2 * spillSlotSize
+
+-- | All registers are 8 byte wide.
+spillSlotSize :: Int
+spillSlotSize = 8
+
+-- | The number of bytes that the stack pointer should be aligned to.
+stackAlign :: Int
+stackAlign = 16
+
+-- | The number of spill slots available without allocating more.
+maxSpillSlots :: NCGConfig -> Int
+maxSpillSlots config =
+ ( (ncgSpillPreallocSize config - stackFrameHeaderSize)
+ `div` spillSlotSize
+ )
+ - 1
+
+-- | Convert a spill slot number to a *byte* offset.
+spillSlotToOffset :: Int -> Int
+spillSlotToOffset slot =
+ stackFrameHeaderSize + spillSlotSize * slot
+
+instance Outputable RegUsage where
+ ppr (RU reads writes) = text "RegUsage(reads:" <+> ppr reads <> comma <+> text "writes:" <+> ppr writes <> char ')'
+
+-- | Get the registers that are being used by this instruction.
+-- regUsage doesn't need to do any trickery for jumps and such.
+-- Just state precisely the regs read and written by that insn.
+-- The consequences of control flow transfers, as far as register
+-- allocation goes, are taken care of by the register allocator.
+--
+-- RegUsage = RU [<read regs>] [<write regs>]
+regUsageOfInstr :: Platform -> Instr -> RegUsage
+regUsageOfInstr platform instr = case instr of
+ ANN _ i -> regUsageOfInstr platform i
+ COMMENT {} -> usage ([], [])
+ MULTILINE_COMMENT {} -> usage ([], [])
+ PUSH_STACK_FRAME -> usage ([], [])
+ POP_STACK_FRAME -> usage ([], [])
+ LOCATION {} -> usage ([], [])
+ DELTA {} -> usage ([], [])
+ ADD dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ MUL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ NEG dst src -> usage (regOp src, regOp dst)
+ MULH dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ DIV dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ REM dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ REMU dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ SUB dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ DIVU dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ OR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ SRA dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ XOR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ SLL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ SRL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ MOV dst src -> usage (regOp src, regOp dst)
+ -- ORI's third operand is always an immediate
+ ORI dst src1 _ -> usage (regOp src1, regOp dst)
+ XORI dst src1 _ -> usage (regOp src1, regOp dst)
+ J_TBL _ _ t -> usage ([t], [])
+ B t -> usage (regTarget t, [])
+ BCOND _ l r t -> usage (regTarget t ++ regOp l ++ regOp r, [])
+ BL t ps -> usage (t : ps, callerSavedRegisters)
+ CSET dst l r _ -> usage (regOp l ++ regOp r, regOp dst)
+ STR _ src dst -> usage (regOp src ++ regOp dst, [])
+ LDR _ dst src -> usage (regOp src, regOp dst)
+ LDRU _ dst src -> usage (regOp src, regOp dst)
+ FENCE _ _ -> usage ([], [])
+ FCVT _variant dst src -> usage (regOp src, regOp dst)
+ FABS dst src -> usage (regOp src, regOp dst)
+ FMA _ dst src1 src2 src3 ->
+ usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
+ _ -> panic $ "regUsageOfInstr: " ++ instrCon instr
+ where
+ -- filtering the usage is necessary, otherwise the register
+ -- allocator will try to allocate pre-defined fixed stg
+ -- registers as well, as they show up.
+ usage :: ([Reg], [Reg]) -> RegUsage
+ usage (srcRegs, dstRegs) =
+ RU
+ (filter (interesting platform) srcRegs)
+ (filter (interesting platform) dstRegs)
+
+ regAddr :: AddrMode -> [Reg]
+ regAddr (AddrRegImm r1 _imm) = [r1]
+ regAddr (AddrReg r1) = [r1]
+
+ regOp :: Operand -> [Reg]
+ regOp (OpReg _w r1) = [r1]
+ regOp (OpAddr a) = regAddr a
+ regOp (OpImm _imm) = []
+
+ regTarget :: Target -> [Reg]
+ regTarget (TBlock _bid) = []
+ regTarget (TReg r1) = [r1]
+
+ -- Is this register interesting for the register allocator?
+ interesting :: Platform -> Reg -> Bool
+ interesting _ (RegVirtual _) = True
+ interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
+
+-- | Caller-saved registers (according to calling convention)
+--
+-- These registers may be clobbered after a jump.
+callerSavedRegisters :: [Reg]
+callerSavedRegisters =
+ [regSingle raRegNo]
+ ++ map regSingle [t0RegNo .. t2RegNo]
+ ++ map regSingle [a0RegNo .. a7RegNo]
+ ++ map regSingle [t3RegNo .. t6RegNo]
+ ++ map regSingle [ft0RegNo .. ft7RegNo]
+ ++ map regSingle [fa0RegNo .. fa7RegNo]
+
+-- | Apply a given mapping to all the register references in this instruction.
+patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+patchRegsOfInstr instr env = case instr of
+ ANN d i -> ANN d (patchRegsOfInstr i env)
+ COMMENT {} -> instr
+ MULTILINE_COMMENT {} -> instr
+ PUSH_STACK_FRAME -> instr
+ POP_STACK_FRAME -> instr
+ LOCATION {} -> instr
+ DELTA {} -> instr
+ ADD o1 o2 o3 -> ADD (patchOp o1) (patchOp o2) (patchOp o3)
+ MUL o1 o2 o3 -> MUL (patchOp o1) (patchOp o2) (patchOp o3)
+ NEG o1 o2 -> NEG (patchOp o1) (patchOp o2)
+ MULH o1 o2 o3 -> MULH (patchOp o1) (patchOp o2) (patchOp o3)
+ DIV o1 o2 o3 -> DIV (patchOp o1) (patchOp o2) (patchOp o3)
+ REM o1 o2 o3 -> REM (patchOp o1) (patchOp o2) (patchOp o3)
+ REMU o1 o2 o3 -> REMU (patchOp o1) (patchOp o2) (patchOp o3)
+ SUB o1 o2 o3 -> SUB (patchOp o1) (patchOp o2) (patchOp o3)
+ DIVU o1 o2 o3 -> DIVU (patchOp o1) (patchOp o2) (patchOp o3)
+ AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3)
+ OR o1 o2 o3 -> OR (patchOp o1) (patchOp o2) (patchOp o3)
+ SRA o1 o2 o3 -> SRA (patchOp o1) (patchOp o2) (patchOp o3)
+ XOR o1 o2 o3 -> XOR (patchOp o1) (patchOp o2) (patchOp o3)
+ SLL o1 o2 o3 -> SLL (patchOp o1) (patchOp o2) (patchOp o3)
+ SRL o1 o2 o3 -> SRL (patchOp o1) (patchOp o2) (patchOp o3)
+ MOV o1 o2 -> MOV (patchOp o1) (patchOp o2)
+ -- o3 cannot be a register for ORI (always an immediate)
+ ORI o1 o2 o3 -> ORI (patchOp o1) (patchOp o2) (patchOp o3)
+ XORI o1 o2 o3 -> XORI (patchOp o1) (patchOp o2) (patchOp o3)
+ J_TBL ids mbLbl t -> J_TBL ids mbLbl (env t)
+ B t -> B (patchTarget t)
+ BL t ps -> BL (patchReg t) ps
+ BCOND c o1 o2 t -> BCOND c (patchOp o1) (patchOp o2) (patchTarget t)
+ CSET o l r c -> CSET (patchOp o) (patchOp l) (patchOp r) c
+ STR f o1 o2 -> STR f (patchOp o1) (patchOp o2)
+ LDR f o1 o2 -> LDR f (patchOp o1) (patchOp o2)
+ LDRU f o1 o2 -> LDRU f (patchOp o1) (patchOp o2)
+ FENCE o1 o2 -> FENCE o1 o2
+ FCVT variant o1 o2 -> FCVT variant (patchOp o1) (patchOp o2)
+ FABS o1 o2 -> FABS (patchOp o1) (patchOp o2)
+ FMA s o1 o2 o3 o4 ->
+ FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
+ _ -> panic $ "patchRegsOfInstr: " ++ instrCon instr
+ where
+ patchOp :: Operand -> Operand
+ patchOp (OpReg w r) = OpReg w (env r)
+ patchOp (OpAddr a) = OpAddr (patchAddr a)
+ patchOp opImm = opImm
+
+ patchTarget :: Target -> Target
+ patchTarget (TReg r) = TReg (env r)
+ patchTarget tBlock = tBlock
+
+ patchAddr :: AddrMode -> AddrMode
+ patchAddr (AddrRegImm r1 imm) = AddrRegImm (env r1) imm
+ patchAddr (AddrReg r) = AddrReg (env r)
+
+ patchReg :: Reg -> Reg
+ patchReg = env
+
+-- | Checks whether this instruction is a jump/branch instruction.
+--
+-- One that can change the flow of control in a way that the
+-- register allocator needs to worry about.
+isJumpishInstr :: Instr -> Bool
+isJumpishInstr instr = case instr of
+ ANN _ i -> isJumpishInstr i
+ J_TBL {} -> True
+ B {} -> True
+ BL {} -> True
+ BCOND {} -> True
+ _ -> False
+
+-- | Get the `BlockId`s of the jump destinations (if any)
+jumpDestsOfInstr :: Instr -> [BlockId]
+jumpDestsOfInstr (ANN _ i) = jumpDestsOfInstr i
+jumpDestsOfInstr (J_TBL ids _mbLbl _r) = catMaybes ids
+jumpDestsOfInstr (B t) = [id | TBlock id <- [t]]
+jumpDestsOfInstr (BCOND _ _ _ t) = [id | TBlock id <- [t]]
+jumpDestsOfInstr _ = []
+
+-- | Change the destination of this (potential) jump instruction.
+--
+-- Used in the linear allocator when adding fixup blocks for join
+-- points.
+patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
+patchJumpInstr instr patchF =
+ case instr of
+ ANN d i -> ANN d (patchJumpInstr i patchF)
+ J_TBL ids mbLbl r -> J_TBL (map (fmap patchF) ids) mbLbl r
+ B (TBlock bid) -> B (TBlock (patchF bid))
+ BCOND c o1 o2 (TBlock bid) -> BCOND c o1 o2 (TBlock (patchF bid))
+ _ -> panic $ "patchJumpInstr: " ++ instrCon instr
+
+-- -----------------------------------------------------------------------------
+-- Note [RISCV64 Spills and Reloads]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- We reserve @RESERVED_C_STACK_BYTES@ on the C stack for spilling and reloading
+-- registers. The load and store instructions of RISCV64 address with a signed
+-- 12-bit immediate + a register; machine stackpointer (sp/x2) in this case.
+--
+-- The @RESERVED_C_STACK_BYTES@ is 16k, so we can't always address into it in a
+-- single load/store instruction. There are offsets to sp (not to be confused
+-- with STG's SP!) which need a register to be calculated.
+--
+-- Using sp to compute the offset would violate assumptions about the stack pointer
+-- pointing to the top of the stack during signal handling. As we can't force
+-- every signal to use its own stack, we have to ensure that the stack pointer
+-- always points to the top of the stack, and we can't use it for computation.
+--
+-- So, we reserve one register (TMP) for this purpose (and other, unrelated
+-- intermediate operations.) See Note [The made-up RISCV64 TMP (IP) register]
+
+-- | Generate instructions to spill a register into a spill slot.
+mkSpillInstr ::
+ (HasCallStack) =>
+ NCGConfig ->
+ -- | register to spill
+ Reg ->
+ -- | current stack delta
+ Int ->
+ -- | spill slot to use
+ Int ->
+ [Instr]
+mkSpillInstr _config reg delta slot =
+ case off - delta of
+ imm | fitsIn12bitImm imm -> [mkStrSpImm imm]
+ imm ->
+ [ movImmToTmp imm,
+ addSpToTmp,
+ mkStrTmp
+ ]
+ where
+ fmt = case reg of
+ RegReal (RealRegSingle n) | n < d0RegNo -> II64
+ _ -> FF64
+ mkStrSpImm imm =
+ ANN (text "Spill@" <> int (off - delta))
+ $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
+ movImmToTmp imm =
+ ANN (text "Spill: TMP <- " <> int imm)
+ $ MOV tmp (OpImm (ImmInt imm))
+ addSpToTmp =
+ ANN (text "Spill: TMP <- SP + TMP ")
+ $ ADD tmp tmp sp
+ mkStrTmp =
+ ANN (text "Spill@" <> int (off - delta))
+ $ STR fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
+
+ off = spillSlotToOffset slot
+
+-- | Generate instructions to load a register from a spill slot.
+mkLoadInstr ::
+ NCGConfig ->
+ -- | register to load
+ Reg ->
+ -- | current stack delta
+ Int ->
+ -- | spill slot to use
+ Int ->
+ [Instr]
+mkLoadInstr _config reg delta slot =
+ case off - delta of
+ imm | fitsIn12bitImm imm -> [mkLdrSpImm imm]
+ imm ->
+ [ movImmToTmp imm,
+ addSpToTmp,
+ mkLdrTmp
+ ]
+ where
+ fmt = case reg of
+ RegReal (RealRegSingle n) | n < d0RegNo -> II64
+ _ -> FF64
+ mkLdrSpImm imm =
+ ANN (text "Reload@" <> int (off - delta))
+ $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
+ movImmToTmp imm =
+ ANN (text "Reload: TMP <- " <> int imm)
+ $ MOV tmp (OpImm (ImmInt imm))
+ addSpToTmp =
+ ANN (text "Reload: TMP <- SP + TMP ")
+ $ ADD tmp tmp sp
+ mkLdrTmp =
+ ANN (text "Reload@" <> int (off - delta))
+ $ LDR fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
+
+ off = spillSlotToOffset slot
+
+-- | See if this instruction is telling us the current C stack delta
+takeDeltaInstr :: Instr -> Maybe Int
+takeDeltaInstr (ANN _ i) = takeDeltaInstr i
+takeDeltaInstr (DELTA i) = Just i
+takeDeltaInstr _ = Nothing
+
+-- | Not real instructions. Just meta data
+isMetaInstr :: Instr -> Bool
+isMetaInstr instr =
+ case instr of
+ ANN _ i -> isMetaInstr i
+ COMMENT {} -> True
+ MULTILINE_COMMENT {} -> True
+ LOCATION {} -> True
+ LDATA {} -> True
+ NEWBLOCK {} -> True
+ DELTA {} -> True
+ PUSH_STACK_FRAME -> True
+ POP_STACK_FRAME -> True
+ _ -> False
+
+-- | Copy the value in a register to another one.
+--
+-- Must work for all register classes.
+mkRegRegMoveInstr :: Reg -> Reg -> Instr
+mkRegRegMoveInstr src dst = ANN desc instr
+ where
+ desc = text "Reg->Reg Move: " <> ppr src <> text " -> " <> ppr dst
+ instr = MOV (operandFromReg dst) (operandFromReg src)
+
+-- | Take the source and destination from this (potential) reg -> reg move instruction
+--
+-- We have to be a bit careful here: A `MOV` can also mean an implicit
+-- conversion. This case is filtered out.
+takeRegRegMoveInstr :: Instr -> Maybe (Reg, Reg)
+takeRegRegMoveInstr (MOV (OpReg width dst) (OpReg width' src))
+ | width == width' && (isFloatReg dst == isFloatReg src) = pure (src, dst)
+takeRegRegMoveInstr _ = Nothing
+
+-- | Make an unconditional jump instruction.
+mkJumpInstr :: BlockId -> [Instr]
+mkJumpInstr = pure . B . TBlock
+
+-- | Decrement @sp@ to allocate stack space.
+--
+-- The stack grows downwards, so we decrement the stack pointer by @n@ (bytes).
+-- This is dual to `mkStackDeallocInstr`. @sp@ is the RISCV stack pointer, not
+-- to be confused with the STG stack pointer.
+mkStackAllocInstr :: Platform -> Int -> [Instr]
+mkStackAllocInstr _platform = moveSp . negate
+
+-- | Increment SP to deallocate stack space.
+--
+-- The stack grows downwards, so we increment the stack pointer by @n@ (bytes).
+-- This is dual to `mkStackAllocInstr`. @sp@ is the RISCV stack pointer, not to
+-- be confused with the STG stack pointer.
+mkStackDeallocInstr :: Platform -> Int -> [Instr]
+mkStackDeallocInstr _platform = moveSp
+
+moveSp :: Int -> [Instr]
+moveSp n
+ | n == 0 = []
+ | n /= 0 && fitsIn12bitImm n = pure . ANN desc $ ADD sp sp (OpImm (ImmInt n))
+ | otherwise =
+ -- This ends up in three effective instructions. We could get away with
+ -- two for intMax12bit < n < 3 * intMax12bit by recursing once. However,
+ -- this way is likely less surprising.
+ [ ANN desc (MOV tmp (OpImm (ImmInt n))),
+ ADD sp sp tmp
+ ]
+ where
+ desc = text "Move SP:" <+> int n
+
+--
+-- See Note [extra spill slots] in X86/Instr.hs
+--
+allocMoreStack ::
+ Platform ->
+ Int ->
+ NatCmmDecl statics GHC.CmmToAsm.RV64.Instr.Instr ->
+ UniqSM (NatCmmDecl statics GHC.CmmToAsm.RV64.Instr.Instr, [(BlockId, BlockId)])
+allocMoreStack _ _ top@(CmmData _ _) = return (top, [])
+allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
+ let entries = entryBlocks proc
+
+ uniqs <- getUniquesM
+
+ let delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
+ where
+ x = slots * spillSlotSize -- sp delta
+ alloc = mkStackAllocInstr platform delta
+ dealloc = mkStackDeallocInstr platform delta
+
+ retargetList = zip entries (map mkBlockId uniqs)
+
+ new_blockmap :: LabelMap BlockId
+ new_blockmap = mapFromList retargetList
+
+ insert_stack_insn (BasicBlock id insns)
+ | Just new_blockid <- mapLookup id new_blockmap =
+ [ BasicBlock id $ alloc ++ [B (TBlock new_blockid)],
+ BasicBlock new_blockid block'
+ ]
+ | otherwise =
+ [BasicBlock id block']
+ where
+ block' = foldr insert_dealloc [] insns
+
+ insert_dealloc insn r = case insn of
+ J_TBL {} -> dealloc ++ (insn : r)
+ ANN _ e -> insert_dealloc e r
+ _other
+ | jumpDestsOfInstr insn /= [] ->
+ patchJumpInstr insn retarget : r
+ _other -> insn : r
+ where
+ retarget b = fromMaybe b (mapLookup b new_blockmap)
+
+ new_code = concatMap insert_stack_insn code
+ return (CmmProc info lbl live (ListGraph new_code), retargetList)
+
+data Instr
+ = -- | Comment pseudo-op
+ COMMENT SDoc
+ | -- | Multi-line comment pseudo-op
+ MULTILINE_COMMENT SDoc
+ | -- | Annotated instruction. Should print <instr> # <doc>
+ ANN SDoc Instr
+ | -- | Location pseudo-op @.loc@ (file, line, col, name)
+ LOCATION Int Int Int LexicalFastString
+ | -- | Static data spat out during code generation.
+ LDATA Section RawCmmStatics
+ | -- | Start a new basic block.
+ --
+ -- Useful during codegen, removed later. Preceding instruction should be a
+ -- jump, as per the invariants for a BasicBlock (see Cmm).
+ NEWBLOCK BlockId
+ | -- | Specify current stack offset for benefit of subsequent passes
+ DELTA Int
+ | -- | Push a minimal stack frame consisting of the return address (RA) and the frame pointer (FP).
+ PUSH_STACK_FRAME
+ | -- | Pop the minimal stack frame of prior `PUSH_STACK_FRAME`.
+ POP_STACK_FRAME
+ | -- | Arithmetic addition (both integer and floating point)
+ --
+ -- @rd = rs1 + rs2@
+ ADD Operand Operand Operand
+ | -- | Arithmetic subtraction (both integer and floating point)
+ --
+ -- @rd = rs1 - rs2@
+ SUB Operand Operand Operand
+ | -- | Logical AND (integer only)
+ --
+ -- @rd = rs1 & rs2@
+ AND Operand Operand Operand
+ | -- | Logical OR (integer only)
+ --
+ -- @rd = rs1 | rs2@
+ OR Operand Operand Operand
+ | -- | Logical left shift (zero extened, integer only)
+ --
+ -- @rd = rs1 << rs2@
+ SLL Operand Operand Operand
+ | -- | Logical right shift (zero extened, integer only)
+ --
+ -- @rd = rs1 >> rs2@
+ SRL Operand Operand Operand
+ | -- | Arithmetic right shift (sign-extened, integer only)
+ --
+ -- @rd = rs1 >> rs2@
+ SRA Operand Operand Operand
+ | -- | Store to memory (both, integer and floating point)
+ STR Format Operand Operand
+ | -- | Load from memory (sign-extended, integer and floating point)
+ LDR Format Operand Operand
+ | -- | Load from memory (unsigned, integer and floating point)
+ LDRU Format Operand Operand
+ | -- | Arithmetic multiplication (both, integer and floating point)
+ --
+ -- @rd = rn × rm@
+ MUL Operand Operand Operand
+ | -- | Negation (both, integer and floating point)
+ --
+ -- @rd = -op2@
+ NEG Operand Operand
+ | -- | Division (both, integer and floating point)
+ --
+ -- @rd = rn ÷ rm@
+ DIV Operand Operand Operand
+ | -- | Remainder (integer only, signed)
+ --
+ -- @rd = rn % rm@
+ REM Operand Operand Operand --
+ | -- | Remainder (integer only, unsigned)
+ --
+ -- @rd = |rn % rm|@
+ REMU Operand Operand Operand
+ | -- | High part of a multiplication that doesn't fit into 64bits (integer only)
+ --
+ -- E.g. for a multiplication with 64bits width: @rd = (rs1 * rs2) >> 64@.
+ MULH Operand Operand Operand
+ | -- | Unsigned division (integer only)
+ --
+ -- @rd = |rn ÷ rm|@
+ DIVU Operand Operand Operand
+ | -- | XOR (integer only)
+ --
+ -- @rd = rn ⊕ op2@
+ XOR Operand Operand Operand
+ | -- | ORI with immediate (integer only)
+ --
+ -- @rd = rn | op2@
+ ORI Operand Operand Operand
+ | -- | OR with immediate (integer only)
+ --
+ -- @rd = rn ⊕ op2@
+ XORI Operand Operand Operand
+ | -- | Move to register (integer and floating point)
+ --
+ -- @rd = rn@ or @rd = #imm@
+ MOV Operand Operand
+ | -- | Pseudo-op for conditional setting of a register.
+ --
+ -- @if(o2 cond o3) op <- 1 else op <- 0@
+ CSET Operand Operand Operand Cond
+ | -- | A jump instruction with data for switch/jump tables
+ J_TBL [Maybe BlockId] (Maybe CLabel) Reg
+ | -- | Unconditional jump (no linking)
+ B Target
+ | -- | Unconditional jump, links return address (sets @ra@/@x1@)
+ BL Reg [Reg]
+ | -- | branch with condition (integer only)
+ BCOND Cond Operand Operand Target
+ | -- | Fence instruction
+ --
+ -- Memory barrier.
+ FENCE FenceType FenceType
+ | -- | Floating point conversion
+ FCVT FcvtVariant Operand Operand
+ | -- | Floating point ABSolute value
+ FABS Operand Operand
+ | -- | Floating-point fused multiply-add instructions
+ --
+ -- - fmadd : d = r1 * r2 + r3
+ -- - fnmsub: d = r1 * r2 - r3
+ -- - fmsub : d = - r1 * r2 + r3
+ -- - fnmadd: d = - r1 * r2 - r3
+ FMA FMASign Operand Operand Operand Operand
+
+-- | Operand of a FENCE instruction (@r@, @w@ or @rw@)
+data FenceType = FenceRead | FenceWrite | FenceReadWrite
+
+-- | Variant of a floating point conversion instruction
+data FcvtVariant = FloatToFloat | IntToFloat | FloatToInt
+
+instrCon :: Instr -> String
+instrCon i =
+ case i of
+ COMMENT {} -> "COMMENT"
+ MULTILINE_COMMENT {} -> "COMMENT"
+ ANN {} -> "ANN"
+ LOCATION {} -> "LOCATION"
+ LDATA {} -> "LDATA"
+ NEWBLOCK {} -> "NEWBLOCK"
+ DELTA {} -> "DELTA"
+ PUSH_STACK_FRAME {} -> "PUSH_STACK_FRAME"
+ POP_STACK_FRAME {} -> "POP_STACK_FRAME"
+ ADD {} -> "ADD"
+ OR {} -> "OR"
+ MUL {} -> "MUL"
+ NEG {} -> "NEG"
+ DIV {} -> "DIV"
+ REM {} -> "REM"
+ REMU {} -> "REMU"
+ MULH {} -> "MULH"
+ SUB {} -> "SUB"
+ DIVU {} -> "DIVU"
+ AND {} -> "AND"
+ SRA {} -> "SRA"
+ XOR {} -> "XOR"
+ SLL {} -> "SLL"
+ SRL {} -> "SRL"
+ MOV {} -> "MOV"
+ ORI {} -> "ORI"
+ XORI {} -> "ORI"
+ STR {} -> "STR"
+ LDR {} -> "LDR"
+ LDRU {} -> "LDRU"
+ CSET {} -> "CSET"
+ J_TBL {} -> "J_TBL"
+ B {} -> "B"
+ BL {} -> "BL"
+ BCOND {} -> "BCOND"
+ FENCE {} -> "FENCE"
+ FCVT {} -> "FCVT"
+ FABS {} -> "FABS"
+ FMA variant _ _ _ _ ->
+ case variant of
+ FMAdd -> "FMADD"
+ FMSub -> "FMSUB"
+ FNMAdd -> "FNMADD"
+ FNMSub -> "FNMSUB"
+
+data Target
+ = TBlock BlockId
+ | TReg Reg
+
+data Operand
+ = -- | register
+ OpReg Width Reg
+ | -- | immediate value
+ OpImm Imm
+ | -- | memory reference
+ OpAddr AddrMode
+ deriving (Eq, Show)
+
+operandFromReg :: Reg -> Operand
+operandFromReg = OpReg W64
+
+operandFromRegNo :: RegNo -> Operand
+operandFromRegNo = operandFromReg . regSingle
+
+zero, ra, sp, gp, tp, fp, tmp :: Operand
+zero = operandFromReg zeroReg
+ra = operandFromReg raReg
+sp = operandFromReg spMachReg
+gp = operandFromRegNo 3
+tp = operandFromRegNo 4
+fp = operandFromRegNo 8
+tmp = operandFromReg tmpReg
+
+x0, x1, x2, x3, x4, x5, x6, x7 :: Operand
+x8, x9, x10, x11, x12, x13, x14, x15 :: Operand
+x16, x17, x18, x19, x20, x21, x22, x23 :: Operand
+x24, x25, x26, x27, x28, x29, x30, x31 :: Operand
+x0 = operandFromRegNo x0RegNo
+x1 = operandFromRegNo 1
+x2 = operandFromRegNo 2
+x3 = operandFromRegNo 3
+x4 = operandFromRegNo 4
+x5 = operandFromRegNo x5RegNo
+x6 = operandFromRegNo 6
+x7 = operandFromRegNo x7RegNo
+
+x8 = operandFromRegNo 8
+
+x9 = operandFromRegNo 9
+
+x10 = operandFromRegNo x10RegNo
+
+x11 = operandFromRegNo 11
+
+x12 = operandFromRegNo 12
+
+x13 = operandFromRegNo 13
+
+x14 = operandFromRegNo 14
+
+x15 = operandFromRegNo 15
+
+x16 = operandFromRegNo 16
+
+x17 = operandFromRegNo x17RegNo
+
+x18 = operandFromRegNo 18
+
+x19 = operandFromRegNo 19
+
+x20 = operandFromRegNo 20
+
+x21 = operandFromRegNo 21
+
+x22 = operandFromRegNo 22
+
+x23 = operandFromRegNo 23
+
+x24 = operandFromRegNo 24
+
+x25 = operandFromRegNo 25
+
+x26 = operandFromRegNo 26
+
+x27 = operandFromRegNo 27
+
+x28 = operandFromRegNo x28RegNo
+
+x29 = operandFromRegNo 29
+
+x30 = operandFromRegNo 30
+
+x31 = operandFromRegNo x31RegNo
+
+d0, d1, d2, d3, d4, d5, d6, d7 :: Operand
+d8, d9, d10, d11, d12, d13, d14, d15 :: Operand
+d16, d17, d18, d19, d20, d21, d22, d23 :: Operand
+d24, d25, d26, d27, d28, d29, d30, d31 :: Operand
+d0 = operandFromRegNo d0RegNo
+d1 = operandFromRegNo 33
+d2 = operandFromRegNo 34
+d3 = operandFromRegNo 35
+d4 = operandFromRegNo 36
+d5 = operandFromRegNo 37
+d6 = operandFromRegNo 38
+d7 = operandFromRegNo d7RegNo
+
+d8 = operandFromRegNo 40
+
+d9 = operandFromRegNo 41
+
+d10 = operandFromRegNo d10RegNo
+
+d11 = operandFromRegNo 43
+
+d12 = operandFromRegNo 44
+
+d13 = operandFromRegNo 45
+
+d14 = operandFromRegNo 46
+
+d15 = operandFromRegNo 47
+
+d16 = operandFromRegNo 48
+
+d17 = operandFromRegNo d17RegNo
+
+d18 = operandFromRegNo 50
+
+d19 = operandFromRegNo 51
+
+d20 = operandFromRegNo 52
+
+d21 = operandFromRegNo 53
+
+d22 = operandFromRegNo 54
+
+d23 = operandFromRegNo 55
+
+d24 = operandFromRegNo 56
+
+d25 = operandFromRegNo 57
+
+d26 = operandFromRegNo 58
+
+d27 = operandFromRegNo 59
+
+d28 = operandFromRegNo 60
+
+d29 = operandFromRegNo 61
+
+d30 = operandFromRegNo 62
+
+d31 = operandFromRegNo d31RegNo
+
+fitsIn12bitImm :: (Num a, Ord a) => a -> Bool
+fitsIn12bitImm off = off >= intMin12bit && off <= intMax12bit
+
+intMin12bit :: (Num a) => a
+intMin12bit = -2048
+
+intMax12bit :: (Num a) => a
+intMax12bit = 2047
+
+fitsIn32bits :: (Num a, Ord a, Bits a) => a -> Bool
+fitsIn32bits i = (-1 `shiftL` 31) <= i && i <= (1 `shiftL` 31 - 1)
+
+isNbitEncodeable :: Int -> Integer -> Bool
+isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
+
+isEncodeableInWidth :: Width -> Integer -> Bool
+isEncodeableInWidth = isNbitEncodeable . widthInBits
+
+isIntOp :: Operand -> Bool
+isIntOp = not . isFloatOp
+
+isFloatOp :: Operand -> Bool
+isFloatOp (OpReg _ reg) | isFloatReg reg = True
+isFloatOp _ = False
+
+isFloatReg :: Reg -> Bool
+isFloatReg (RegReal (RealRegSingle i)) | i > 31 = True
+isFloatReg (RegVirtual (VirtualRegF _)) = True
+isFloatReg (RegVirtual (VirtualRegD _)) = True
+isFloatReg _ = False
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Ppr.hs
===================================================================
--- /dev/null
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Ppr.hs
@@ -0,0 +1,715 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module GHC.CmmToAsm.RV64.Ppr (pprNatCmmDecl, pprInstr) where
+
+import GHC.Cmm hiding (topInfoTable)
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm.Dataflow.Label
+import GHC.CmmToAsm.Config
+import GHC.CmmToAsm.Format
+import GHC.CmmToAsm.Ppr
+import GHC.CmmToAsm.RV64.Cond
+import GHC.CmmToAsm.RV64.Instr
+import GHC.CmmToAsm.RV64.Regs
+import GHC.CmmToAsm.Types
+import GHC.CmmToAsm.Utils
+import GHC.Platform
+import GHC.Platform.Reg
+import GHC.Prelude hiding (EQ)
+import GHC.Types.Basic (Alignment, alignmentBytes, mkAlignment)
+import GHC.Types.Unique (getUnique, pprUniqueAlways)
+import GHC.Utils.Outputable
+import GHC.Utils.Panic
+
+pprNatCmmDecl :: forall doc. (IsDoc doc) => NCGConfig -> NatCmmDecl RawCmmStatics Instr -> doc
+pprNatCmmDecl config (CmmData section dats) =
+ pprSectionAlign config section $$ pprDatas config dats
+pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
+ let platform = ncgPlatform config
+
+ pprProcAlignment :: doc
+ pprProcAlignment = maybe empty (pprAlign . mkAlignment) (ncgProcAlignment config)
+ in pprProcAlignment
+ $$ case topInfoTable proc of
+ Nothing ->
+ -- special case for code without info table:
+ pprSectionAlign config (Section Text lbl)
+ $$
+ -- do not
+ -- pprProcAlignment config $$
+ pprLabel platform lbl
+ $$ vcat (map (pprBasicBlock config top_info) blocks) -- blocks guaranteed not null, so label needed
+ $$ ppWhen
+ (ncgDwarfEnabled config)
+ (line (pprBlockEndLabel platform lbl) $$ line (pprProcEndLabel platform lbl))
+ $$ pprSizeDecl platform lbl
+ Just (CmmStaticsRaw info_lbl _) ->
+ pprSectionAlign config (Section Text info_lbl)
+ $$
+ -- pprProcAlignment config $$
+ ( if platformHasSubsectionsViaSymbols platform
+ then line (pprAsmLabel platform (mkDeadStripPreventer info_lbl) <> char ':')
+ else empty
+ )
+ $$ vcat (map (pprBasicBlock config top_info) blocks)
+ $$ ppWhen (ncgDwarfEnabled config) (line (pprProcEndLabel platform info_lbl))
+ $$
+ -- above: Even the first block gets a label, because with branch-chain
+ -- elimination, it might be the target of a goto.
+ ( if platformHasSubsectionsViaSymbols platform
+ then -- See Note [Subsections Via Symbols]
+
+ line
+ $ text "\t.long "
+ <+> pprAsmLabel platform info_lbl
+ <+> char '-'
+ <+> pprAsmLabel platform (mkDeadStripPreventer info_lbl)
+ else empty
+ )
+ $$ pprSizeDecl platform info_lbl
+{-# SPECIALIZE pprNatCmmDecl :: NCGConfig -> NatCmmDecl RawCmmStatics Instr -> SDoc #-}
+{-# SPECIALIZE pprNatCmmDecl :: NCGConfig -> NatCmmDecl RawCmmStatics Instr -> HDoc #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
+
+pprLabel :: (IsDoc doc) => Platform -> CLabel -> doc
+pprLabel platform lbl =
+ pprGloblDecl platform lbl
+ $$ pprTypeDecl platform lbl
+ $$ line (pprAsmLabel platform lbl <> char ':')
+
+pprAlign :: (IsDoc doc) => Alignment -> doc
+pprAlign alignment =
+ -- "The .align directive for RISC-V is an alias to .p2align, which aligns to a
+ -- power of two, so .align 2 means align to 4 bytes. Because the definition of
+ -- the .align directive varies by architecture, it is recommended to use the
+ -- unambiguous .p2align or .balign directives instead."
+ -- (https://github.com/riscv-non-isa/riscv-asm-manual/blob/main/riscv-asm.md#-align)
+ line $ text "\t.balign " <> int (alignmentBytes alignment)
+
+-- | Print appropriate alignment for the given section type.
+--
+-- Currently, this always aligns to a full machine word (8 byte.) A future
+-- improvement could be to really do this per section type (though, it's
+-- probably not a big gain.)
+pprAlignForSection :: (IsDoc doc) => SectionType -> doc
+pprAlignForSection _seg = pprAlign . mkAlignment $ 8
+
+-- | Print section header and appropriate alignment for that section.
+--
+-- This will e.g. emit a header like:
+--
+-- .section .text
+-- .balign 8
+pprSectionAlign :: (IsDoc doc) => NCGConfig -> Section -> doc
+pprSectionAlign _config (Section (OtherSection _) _) =
+ panic "RV64.Ppr.pprSectionAlign: unknown section"
+pprSectionAlign config sec@(Section seg _) =
+ line (pprSectionHeader config sec)
+ $$ pprAlignForSection seg
+
+pprProcEndLabel ::
+ (IsLine doc) =>
+ Platform ->
+ -- | Procedure name
+ CLabel ->
+ doc
+pprProcEndLabel platform lbl =
+ pprAsmLabel platform (mkAsmTempProcEndLabel lbl) <> colon
+
+pprBlockEndLabel ::
+ (IsLine doc) =>
+ Platform ->
+ -- | Block name
+ CLabel ->
+ doc
+pprBlockEndLabel platform lbl =
+ pprAsmLabel platform (mkAsmTempEndLabel lbl) <> colon
+
+-- | Output the ELF .size directive (if needed.)
+pprSizeDecl :: (IsDoc doc) => Platform -> CLabel -> doc
+pprSizeDecl platform lbl
+ | osElfTarget (platformOS platform) =
+ line $ text "\t.size" <+> asmLbl <> text ", .-" <> asmLbl
+ where
+ asmLbl = pprAsmLabel platform lbl
+pprSizeDecl _ _ = empty
+
+pprBasicBlock ::
+ (IsDoc doc) =>
+ NCGConfig ->
+ LabelMap RawCmmStatics ->
+ NatBasicBlock Instr ->
+ doc
+pprBasicBlock config info_env (BasicBlock blockid instrs) =
+ maybe_infotable
+ $ pprLabel platform asmLbl
+ $$ vcat (map (pprInstr platform) (id {-detectTrivialDeadlock-} optInstrs))
+ $$ ppWhen
+ (ncgDwarfEnabled config)
+ ( -- Emit both end labels since this may end up being a standalone
+ -- top-level block
+ line
+ ( pprBlockEndLabel platform asmLbl
+ <> pprProcEndLabel platform asmLbl
+ )
+ )
+ where
+ -- TODO: Check if we can filter more instructions here.
+ -- TODO: Shouldn't this be a more general check on a higher level?
+ -- Filter out identity moves. E.g. mov x18, x18 will be dropped.
+ optInstrs = filter f instrs
+ where
+ f (MOV o1 o2) | o1 == o2 = False
+ f _ = True
+
+ asmLbl = blockLbl blockid
+ platform = ncgPlatform config
+ maybe_infotable c = case mapLookup blockid info_env of
+ Nothing -> c
+ Just (CmmStaticsRaw info_lbl info) ->
+ -- pprAlignForSection platform Text $$
+ infoTableLoc
+ $$ vcat (map (pprData config) info)
+ $$ pprLabel platform info_lbl
+ $$ c
+ $$ ppWhen
+ (ncgDwarfEnabled config)
+ (line (pprBlockEndLabel platform info_lbl))
+ -- Make sure the info table has the right .loc for the block
+ -- coming right after it. See Note [Info Offset]
+ infoTableLoc = case instrs of
+ (l@LOCATION {} : _) -> pprInstr platform l
+ _other -> empty
+
+pprDatas :: (IsDoc doc) => NCGConfig -> RawCmmStatics -> doc
+-- See Note [emit-time elimination of static indirections] in "GHC.Cmm.CLabel".
+pprDatas config (CmmStaticsRaw alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _])
+ | lbl == mkIndStaticInfoLabel,
+ let labelInd (CmmLabelOff l _) = Just l
+ labelInd (CmmLabel l) = Just l
+ labelInd _ = Nothing,
+ Just ind' <- labelInd ind,
+ alias `mayRedirectTo` ind' =
+ pprGloblDecl (ncgPlatform config) alias
+ $$ line (text ".equiv" <+> pprAsmLabel (ncgPlatform config) alias <> comma <> pprAsmLabel (ncgPlatform config) ind')
+pprDatas config (CmmStaticsRaw lbl dats) =
+ vcat (pprLabel platform lbl : map (pprData config) dats)
+ where
+ platform = ncgPlatform config
+
+pprData :: (IsDoc doc) => NCGConfig -> CmmStatic -> doc
+pprData _config (CmmString str) = line (pprString str)
+pprData _config (CmmFileEmbed path _) = line (pprFileEmbed path)
+-- TODO: AFAIK there no Darwin for RISCV, so we may consider to simplify this.
+pprData config (CmmUninitialised bytes) =
+ line
+ $ let platform = ncgPlatform config
+ in if platformOS platform == OSDarwin
+ then text ".space " <> int bytes
+ else text ".skip " <> int bytes
+pprData config (CmmStaticLit lit) = pprDataItem config lit
+
+pprGloblDecl :: (IsDoc doc) => Platform -> CLabel -> doc
+pprGloblDecl platform lbl
+ | not (externallyVisibleCLabel lbl) = empty
+ | otherwise = line (text "\t.globl " <> pprAsmLabel platform lbl)
+
+-- Note [Always use objects for info tables]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- See discussion in X86.Ppr for why this is necessary. Essentially we need to
+-- ensure that we never pass function symbols when we might want to lookup the
+-- info table. If we did, we could end up with procedure linking tables
+-- (PLT)s, and thus the lookup wouldn't point to the function, but into the
+-- jump table.
+--
+-- Fun fact: The LLVMMangler exists to patch this issue on the LLVM side as
+-- well.
+pprLabelType' :: (IsLine doc) => Platform -> CLabel -> doc
+pprLabelType' platform lbl =
+ if isCFunctionLabel lbl || functionOkInfoTable
+ then text "@function"
+ else text "@object"
+ where
+ functionOkInfoTable =
+ platformTablesNextToCode platform
+ && isInfoTableLabel lbl
+ && not (isCmmInfoTableLabel lbl)
+ && not (isConInfoTableLabel lbl)
+
+-- this is called pprTypeAndSizeDecl in PPC.Ppr
+pprTypeDecl :: (IsDoc doc) => Platform -> CLabel -> doc
+pprTypeDecl platform lbl =
+ if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl
+ then line (text ".type " <> pprAsmLabel platform lbl <> text ", " <> pprLabelType' platform lbl)
+ else empty
+
+pprDataItem :: (IsDoc doc) => NCGConfig -> CmmLit -> doc
+pprDataItem config lit =
+ lines_ (ppr_item (cmmTypeFormat $ cmmLitType platform lit) lit)
+ where
+ platform = ncgPlatform config
+
+ imm = litToImm lit
+
+ ppr_item II8 _ = [text "\t.byte\t" <> pprDataImm platform imm]
+ ppr_item II16 _ = [text "\t.short\t" <> pprDataImm platform imm]
+ ppr_item II32 _ = [text "\t.long\t" <> pprDataImm platform imm]
+ ppr_item II64 _ = [text "\t.quad\t" <> pprDataImm platform imm]
+ ppr_item FF32 (CmmFloat r _) =
+ let bs = floatToBytes (fromRational r)
+ in map (\b -> text "\t.byte\t" <> int (fromIntegral b)) bs
+ ppr_item FF64 (CmmFloat r _) =
+ let bs = doubleToBytes (fromRational r)
+ in map (\b -> text "\t.byte\t" <> int (fromIntegral b)) bs
+ ppr_item _ _ = pprPanic "pprDataItem:ppr_item" (text $ show lit)
+
+-- | Pretty print an immediate value in the @data@ section
+--
+-- This does not include any checks. We rely on the Assembler to check for
+-- errors. Use `pprOpImm` for immediates in instructions (operands.)
+pprDataImm :: (IsLine doc) => Platform -> Imm -> doc
+pprDataImm _ (ImmInt i) = int i
+pprDataImm _ (ImmInteger i) = integer i
+pprDataImm p (ImmCLbl l) = pprAsmLabel p l
+pprDataImm p (ImmIndex l i) = pprAsmLabel p l <> char '+' <> int i
+pprDataImm _ (ImmLit s) = ftext s
+pprDataImm _ (ImmFloat f) = float (fromRational f)
+pprDataImm _ (ImmDouble d) = double (fromRational d)
+pprDataImm p (ImmConstantSum a b) = pprDataImm p a <> char '+' <> pprDataImm p b
+pprDataImm p (ImmConstantDiff a b) =
+ pprDataImm p a
+ <> char '-'
+ <> lparen
+ <> pprDataImm p b
+ <> rparen
+
+-- | Comment @c@ with @# c@
+asmComment :: SDoc -> SDoc
+asmComment c = text "#" <+> c
+
+-- | Commen @c@ with @// c@
+asmDoubleslashComment :: SDoc -> SDoc
+asmDoubleslashComment c = text "//" <+> c
+
+-- | Comment @c@ with @/* c */@ (multiline comment)
+asmMultilineComment :: SDoc -> SDoc
+asmMultilineComment c = text "/*" $+$ c $+$ text "*/"
+
+-- | Pretty print an immediate operand of an instruction
+--
+-- The kinds of immediates we can use here is pretty limited: RISCV doesn't
+-- support index expressions (as e.g. Aarch64 does.) Floating points need to
+-- fit in range. As we don't need them, forbit them to save us from future
+-- troubles.
+pprOpImm :: (IsLine doc) => Platform -> Imm -> doc
+pprOpImm platform im = case im of
+ ImmInt i -> int i
+ ImmInteger i -> integer i
+ ImmCLbl l -> char '=' <> pprAsmLabel platform l
+ _ -> pprPanic "RV64.Ppr.pprOpImm" (text "Unsupported immediate for instruction operands" <> colon <+> (text . show) im)
+
+-- | Negate integer immediate operand
+--
+-- This function is partial and will panic if the operand is not an integer.
+negOp :: Operand -> Operand
+negOp (OpImm (ImmInt i)) = OpImm (ImmInt (negate i))
+negOp (OpImm (ImmInteger i)) = OpImm (ImmInteger (negate i))
+negOp op = pprPanic "RV64.negOp" (text $ show op)
+
+-- | Pretty print an operand
+pprOp :: (IsLine doc) => Platform -> Operand -> doc
+pprOp plat op = case op of
+ OpReg w r -> pprReg w r
+ OpImm im -> pprOpImm plat im
+ OpAddr (AddrRegImm r1 im) -> pprOpImm plat im <> char '(' <> pprReg W64 r1 <> char ')'
+ OpAddr (AddrReg r1) -> text "0(" <+> pprReg W64 r1 <+> char ')'
+
+-- | Pretty print register with calling convention name
+--
+-- This representation makes it easier to reason about the emitted assembly
+-- code.
+pprReg :: forall doc. (IsLine doc) => Width -> Reg -> doc
+pprReg w r = case r of
+ RegReal (RealRegSingle i) -> ppr_reg_no i
+ -- virtual regs should not show up, but this is helpful for debugging.
+ RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u
+ RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
+ RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
+ _ -> pprPanic "RiscV64.pprReg" (text (show r) <+> ppr w)
+ where
+ ppr_reg_no :: Int -> doc
+ -- General Purpose Registers
+ ppr_reg_no 0 = text "zero"
+ ppr_reg_no 1 = text "ra"
+ ppr_reg_no 2 = text "sp"
+ ppr_reg_no 3 = text "gp"
+ ppr_reg_no 4 = text "tp"
+ ppr_reg_no 5 = text "t0"
+ ppr_reg_no 6 = text "t1"
+ ppr_reg_no 7 = text "t2"
+ ppr_reg_no 8 = text "s0"
+ ppr_reg_no 9 = text "s1"
+ ppr_reg_no 10 = text "a0"
+ ppr_reg_no 11 = text "a1"
+ ppr_reg_no 12 = text "a2"
+ ppr_reg_no 13 = text "a3"
+ ppr_reg_no 14 = text "a4"
+ ppr_reg_no 15 = text "a5"
+ ppr_reg_no 16 = text "a6"
+ ppr_reg_no 17 = text "a7"
+ ppr_reg_no 18 = text "s2"
+ ppr_reg_no 19 = text "s3"
+ ppr_reg_no 20 = text "s4"
+ ppr_reg_no 21 = text "s5"
+ ppr_reg_no 22 = text "s6"
+ ppr_reg_no 23 = text "s7"
+ ppr_reg_no 24 = text "s8"
+ ppr_reg_no 25 = text "s9"
+ ppr_reg_no 26 = text "s10"
+ ppr_reg_no 27 = text "s11"
+ ppr_reg_no 28 = text "t3"
+ ppr_reg_no 29 = text "t4"
+ ppr_reg_no 30 = text "t5"
+ ppr_reg_no 31 = text "t6"
+ -- Floating Point Registers
+ ppr_reg_no 32 = text "ft0"
+ ppr_reg_no 33 = text "ft1"
+ ppr_reg_no 34 = text "ft2"
+ ppr_reg_no 35 = text "ft3"
+ ppr_reg_no 36 = text "ft4"
+ ppr_reg_no 37 = text "ft5"
+ ppr_reg_no 38 = text "ft6"
+ ppr_reg_no 39 = text "ft7"
+ ppr_reg_no 40 = text "fs0"
+ ppr_reg_no 41 = text "fs1"
+ ppr_reg_no 42 = text "fa0"
+ ppr_reg_no 43 = text "fa1"
+ ppr_reg_no 44 = text "fa2"
+ ppr_reg_no 45 = text "fa3"
+ ppr_reg_no 46 = text "fa4"
+ ppr_reg_no 47 = text "fa5"
+ ppr_reg_no 48 = text "fa6"
+ ppr_reg_no 49 = text "fa7"
+ ppr_reg_no 50 = text "fs2"
+ ppr_reg_no 51 = text "fs3"
+ ppr_reg_no 52 = text "fs4"
+ ppr_reg_no 53 = text "fs5"
+ ppr_reg_no 54 = text "fs6"
+ ppr_reg_no 55 = text "fs7"
+ ppr_reg_no 56 = text "fs8"
+ ppr_reg_no 57 = text "fs9"
+ ppr_reg_no 58 = text "fs10"
+ ppr_reg_no 59 = text "fs11"
+ ppr_reg_no 60 = text "ft8"
+ ppr_reg_no 61 = text "ft9"
+ ppr_reg_no 62 = text "ft10"
+ ppr_reg_no 63 = text "ft11"
+ ppr_reg_no i
+ | i < 0 = pprPanic "Unexpected register number (min is 0)" (ppr w <+> int i)
+ | i > 63 = pprPanic "Unexpected register number (max is 63)" (ppr w <+> int i)
+ -- no support for widths > W64.
+ | otherwise = pprPanic "Unsupported width in register (max is 64)" (ppr w <+> int i)
+
+-- | Single precission `Operand` (floating-point)
+isSingleOp :: Operand -> Bool
+isSingleOp (OpReg W32 _) = True
+isSingleOp _ = False
+
+-- | Double precission `Operand` (floating-point)
+isDoubleOp :: Operand -> Bool
+isDoubleOp (OpReg W64 _) = True
+isDoubleOp _ = False
+
+-- | `Operand` is an immediate value
+isImmOp :: Operand -> Bool
+isImmOp (OpImm _) = True
+isImmOp _ = False
+
+-- | `Operand` is an immediate @0@ value
+isImmZero :: Operand -> Bool
+isImmZero (OpImm (ImmFloat 0)) = True
+isImmZero (OpImm (ImmDouble 0)) = True
+isImmZero (OpImm (ImmInt 0)) = True
+isImmZero _ = False
+
+-- | `Target` represents a label
+isLabel :: Target -> Bool
+isLabel (TBlock _) = True
+isLabel _ = False
+
+-- | Get the pretty-printed label from a `Target`
+--
+-- This function is partial and will panic if the `Target` is not a label.
+getLabel :: (IsLine doc) => Platform -> Target -> doc
+getLabel platform (TBlock bid) = pprBlockId platform bid
+ where
+ pprBlockId :: (IsLine doc) => Platform -> BlockId -> doc
+ pprBlockId platform bid = pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
+getLabel _platform _other = panic "Cannot turn this into a label"
+
+-- | Pretty-print an `Instr`
+--
+-- This function is partial and will panic if the `Instr` is not supported. This
+-- can happen due to invalid operands or unexpected meta instructions.
+pprInstr :: (IsDoc doc) => Platform -> Instr -> doc
+pprInstr platform instr = case instr of
+ -- see Note [dualLine and dualDoc] in GHC.Utils.Outputable
+ COMMENT s -> dualDoc (asmComment s) empty
+ MULTILINE_COMMENT s -> dualDoc (asmMultilineComment s) empty
+ ANN d i -> dualDoc (pprInstr platform i <+> asmDoubleslashComment d) (pprInstr platform i)
+ LOCATION file line' col _name ->
+ line (text "\t.loc" <+> int file <+> int line' <+> int col)
+ DELTA d -> dualDoc (asmComment $ text "\tdelta = " <> int d) empty
+ NEWBLOCK _ -> panic "PprInstr: NEWBLOCK"
+ LDATA _ _ -> panic "pprInstr: LDATA"
+ PUSH_STACK_FRAME ->
+ lines_
+ [ text "\taddi sp, sp, -16",
+ text "\tsd x1, 8(sp)", -- store RA
+ text "\tsd x8, 0(sp)", -- store FP/s0
+ text "\taddi x8, sp, 16"
+ ]
+ POP_STACK_FRAME ->
+ lines_
+ [ text "\tld x8, 0(sp)", -- restore FP/s0
+ text "\tld x1, 8(sp)", -- restore RA
+ text "\taddi sp, sp, 16"
+ ]
+ ADD o1 o2 o3
+ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfadd." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3
+ -- This case is used for sign extension: SEXT.W op
+ | OpReg W64 _ <- o1, OpReg W32 _ <- o2, isImmOp o3 -> op3 (text "\taddiw") o1 o2 o3
+ | otherwise -> op3 (text "\tadd") o1 o2 o3
+ MUL o1 o2 o3
+ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfmul." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3
+ | otherwise -> op3 (text "\tmul") o1 o2 o3
+ MULH o1 o2 o3 -> op3 (text "\tmulh") o1 o2 o3
+ NEG o1 o2 | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfneg.s") o1 o2
+ NEG o1 o2 | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfneg.d") o1 o2
+ NEG o1 o2 -> op2 (text "\tneg") o1 o2
+ DIV o1 o2 o3
+ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 ->
+ -- TODO: This must (likely) be refined regarding width
+ op3 (text "\tfdiv." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3
+ DIV o1 o2 o3 -> op3 (text "\tdiv") o1 o2 o3
+ REM o1 o2 o3
+ | isFloatOp o1 || isFloatOp o2 || isFloatOp o3 ->
+ panic "pprInstr - REM not implemented for floats (yet)"
+ REM o1 o2 o3 -> op3 (text "\trem") o1 o2 o3
+ REMU o1 o2 o3 -> op3 (text "\tremu") o1 o2 o3
+ SUB o1 o2 o3
+ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfsub." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3
+ | isImmOp o3 -> op3 (text "\taddi") o1 o2 (negOp o3)
+ | otherwise -> op3 (text "\tsub") o1 o2 o3
+ DIVU o1 o2 o3 -> op3 (text "\tdivu") o1 o2 o3
+ AND o1 o2 o3
+ | isImmOp o3 -> op3 (text "\tandi") o1 o2 o3
+ | otherwise -> op3 (text "\tand") o1 o2 o3
+ OR o1 o2 o3 -> op3 (text "\tor") o1 o2 o3
+ SRA o1 o2 o3 | isImmOp o3 -> op3 (text "\tsrai") o1 o2 o3
+ SRA o1 o2 o3 -> op3 (text "\tsra") o1 o2 o3
+ XOR o1 o2 o3 -> op3 (text "\txor") o1 o2 o3
+ SLL o1 o2 o3 -> op3 (text "\tsll") o1 o2 o3
+ SRL o1 o2 o3 -> op3 (text "\tsrl") o1 o2 o3
+ MOV o1 o2
+ | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.d") o1 o2 -- fmv.d rd, rs is pseudo op fsgnj.d rd, rs, rs
+ | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.s") o1 o2 -- fmv.s rd, rs is pseudo op fsgnj.s rd, rs, rs
+ | isFloatOp o1 && isImmZero o2 && isDoubleOp o1 -> op2 (text "\tfcvt.d.w") o1 zero
+ | isFloatOp o1 && isImmZero o2 && isSingleOp o1 -> op2 (text "\tfcvt.s.w") o1 zero
+ | isFloatOp o1 && not (isFloatOp o2) && isSingleOp o1 -> op2 (text "\tfmv.w.x") o1 o2
+ | isFloatOp o1 && not (isFloatOp o2) && isDoubleOp o1 -> op2 (text "\tfmv.d.x") o1 o2
+ | not (isFloatOp o1) && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.x.w") o1 o2
+ | not (isFloatOp o1) && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.x.d") o1 o2
+ | (OpImm (ImmInteger i)) <- o2,
+ fitsIn12bitImm i ->
+ lines_ [text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <> comma <+> pprOp platform o2]
+ | (OpImm (ImmInt i)) <- o2,
+ fitsIn12bitImm i ->
+ lines_ [text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <> comma <+> pprOp platform o2]
+ | (OpImm (ImmInteger i)) <- o2,
+ fitsIn32bits i ->
+ lines_
+ [ text "\tlui" <+> pprOp platform o1 <> comma <+> text "%hi(" <> pprOp platform o2 <> text ")",
+ text "\taddw" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%lo(" <> pprOp platform o2 <> text ")"
+ ]
+ | (OpImm (ImmInt i)) <- o2,
+ fitsIn32bits i ->
+ lines_
+ [ text "\tlui" <+> pprOp platform o1 <> comma <+> text "%hi(" <> pprOp platform o2 <> text ")",
+ text "\taddw" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%lo(" <> pprOp platform o2 <> text ")"
+ ]
+ | isImmOp o2 ->
+ -- Surrender! Let the assembler figure out the right expressions with pseudo-op LI.
+ lines_ [text "\tli" <+> pprOp platform o1 <> comma <+> pprOp platform o2]
+ | otherwise -> op3 (text "\taddi") o1 o2 (OpImm (ImmInt 0))
+ ORI o1 o2 o3 -> op3 (text "\tori") o1 o2 o3
+ XORI o1 o2 o3 -> op3 (text "\txori") o1 o2 o3
+ J_TBL _ _ r -> pprInstr platform (B (TReg r))
+ B l | isLabel l -> line $ text "\tjal" <+> pprOp platform x0 <> comma <+> getLabel platform l
+ B (TReg r) -> line $ text "\tjalr" <+> pprOp platform x0 <> comma <+> pprReg W64 r <> comma <+> text "0"
+ BL r _ -> line $ text "\tjalr" <+> text "x1" <> comma <+> pprReg W64 r <> comma <+> text "0"
+ BCOND c l r t
+ | isLabel t ->
+ line $ text "\t" <> pprBcond c <+> pprOp platform l <> comma <+> pprOp platform r <> comma <+> getLabel platform t
+ BCOND _ _ _ (TReg _) -> panic "RV64.ppr: No conditional branching to registers!"
+ CSET o l r c -> case c of
+ EQ
+ | isIntOp l && isIntOp r ->
+ lines_
+ [ subFor l r,
+ text "\tseqz" <+> pprOp platform o <> comma <+> pprOp platform o
+ ]
+ EQ | isFloatOp l && isFloatOp r -> line $ binOp ("\tfeq." ++ floatOpPrecision platform l r)
+ NE
+ | isIntOp l && isIntOp r ->
+ lines_
+ [ subFor l r,
+ text "\tsnez" <+> pprOp platform o <> comma <+> pprOp platform o
+ ]
+ NE
+ | isFloatOp l && isFloatOp r ->
+ lines_
+ [ binOp ("\tfeq." ++ floatOpPrecision platform l r),
+ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1"
+ ]
+ SLT -> lines_ [sltFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r]
+ SLE ->
+ lines_
+ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l,
+ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1"
+ ]
+ SGE ->
+ lines_
+ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r,
+ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1"
+ ]
+ SGT -> lines_ [sltFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l]
+ ULT -> lines_ [sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r]
+ ULE ->
+ lines_
+ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l,
+ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1"
+ ]
+ UGE ->
+ lines_
+ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r,
+ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1"
+ ]
+ UGT -> lines_ [sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l]
+ FLT | isFloatOp l && isFloatOp r -> line $ binOp ("\tflt." ++ floatOpPrecision platform l r)
+ FLE | isFloatOp l && isFloatOp r -> line $ binOp ("\tfle." ++ floatOpPrecision platform l r)
+ FGT | isFloatOp l && isFloatOp r -> line $ binOp ("\tfgt." ++ floatOpPrecision platform l r)
+ FGE | isFloatOp l && isFloatOp r -> line $ binOp ("\tfge." ++ floatOpPrecision platform l r)
+ x -> pprPanic "RV64.ppr: unhandled CSET conditional" (text (show x) <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l)
+ where
+ subFor l r
+ | (OpImm _) <- r = text "\taddi" <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform (negOp r)
+ | (OpImm _) <- l = panic "RV64.ppr: Cannot SUB IMM _"
+ | otherwise = text "\tsub" <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r
+ sltFor l r
+ | (OpImm _) <- r = text "\tslti"
+ | (OpImm _) <- l = panic "PV64.ppr: Cannot SLT IMM _"
+ | otherwise = text "\tslt"
+ sltuFor l r
+ | (OpImm _) <- r = text "\tsltui"
+ | (OpImm _) <- l = panic "PV64.ppr: Cannot SLTU IMM _"
+ | otherwise = text "\tsltu"
+ binOp :: (IsLine doc) => String -> doc
+ binOp op = text op <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r
+ STR II8 o1 o2 -> op2 (text "\tsb") o1 o2
+ STR II16 o1 o2 -> op2 (text "\tsh") o1 o2
+ STR II32 o1 o2 -> op2 (text "\tsw") o1 o2
+ STR II64 o1 o2 -> op2 (text "\tsd") o1 o2
+ STR FF32 o1 o2 -> op2 (text "\tfsw") o1 o2
+ STR FF64 o1 o2 -> op2 (text "\tfsd") o1 o2
+ LDR _f o1 (OpImm (ImmIndex lbl off)) ->
+ lines_
+ [ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl,
+ text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> int off
+ ]
+ LDR _f o1 (OpImm (ImmCLbl lbl)) ->
+ line $ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl
+ LDR II8 o1 o2 -> op2 (text "\tlb") o1 o2
+ LDR II16 o1 o2 -> op2 (text "\tlh") o1 o2
+ LDR II32 o1 o2 -> op2 (text "\tlw") o1 o2
+ LDR II64 o1 o2 -> op2 (text "\tld") o1 o2
+ LDR FF32 o1 o2 -> op2 (text "\tflw") o1 o2
+ LDR FF64 o1 o2 -> op2 (text "\tfld") o1 o2
+ LDRU II8 o1 o2 -> op2 (text "\tlbu") o1 o2
+ LDRU II16 o1 o2 -> op2 (text "\tlhu") o1 o2
+ LDRU II32 o1 o2 -> op2 (text "\tlwu") o1 o2
+ -- double words (64bit) cannot be sign extended by definition
+ LDRU II64 o1 o2 -> op2 (text "\tld") o1 o2
+ LDRU FF32 o1 o2@(OpAddr (AddrReg _)) -> op2 (text "\tflw") o1 o2
+ LDRU FF32 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tflw") o1 o2
+ LDRU FF64 o1 o2@(OpAddr (AddrReg _)) -> op2 (text "\tfld") o1 o2
+ LDRU FF64 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tfld") o1 o2
+ LDRU f o1 o2 -> pprPanic "Unsupported unsigned load" ((text . show) f <+> pprOp platform o1 <+> pprOp platform o2)
+ FENCE r w -> line $ text "\tfence" <+> pprFenceType r <> char ',' <+> pprFenceType w
+ FCVT FloatToFloat o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.d") o1 o2
+ FCVT FloatToFloat o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.d.s") o1 o2
+ FCVT FloatToFloat o1 o2 ->
+ pprPanic "RV64.pprInstr - impossible float to float conversion"
+ $ line (pprOp platform o1 <> text "->" <> pprOp platform o2)
+ FCVT IntToFloat o1@(OpReg W32 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.s.w") o1 o2
+ FCVT IntToFloat o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.l") o1 o2
+ FCVT IntToFloat o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.d.w") o1 o2
+ FCVT IntToFloat o1@(OpReg W64 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.d.l") o1 o2
+ FCVT IntToFloat o1 o2 ->
+ pprPanic "RV64.pprInstr - impossible integer to float conversion"
+ $ line (pprOp platform o1 <> text "->" <> pprOp platform o2)
+ FCVT FloatToInt o1@(OpReg W32 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.w.s") o1 o2
+ FCVT FloatToInt o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.w.d") o1 o2
+ FCVT FloatToInt o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.l.s") o1 o2
+ FCVT FloatToInt o1@(OpReg W64 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.l.d") o1 o2
+ FCVT FloatToInt o1 o2 ->
+ pprPanic "RV64.pprInstr - impossible float to integer conversion"
+ $ line (pprOp platform o1 <> text "->" <> pprOp platform o2)
+ FABS o1 o2 | isSingleOp o2 -> op2 (text "\tfabs.s") o1 o2
+ FABS o1 o2 | isDoubleOp o2 -> op2 (text "\tfabs.d") o1 o2
+ FMA variant d r1 r2 r3 ->
+ let fma = case variant of
+ FMAdd -> text "\tfmadd" <> dot <> floatPrecission d
+ FMSub -> text "\tfmsub" <> dot <> floatPrecission d
+ FNMAdd -> text "\tfnmadd" <> dot <> floatPrecission d
+ FNMSub -> text "\tfnmsub" <> dot <> floatPrecission d
+ in op4 fma d r1 r2 r3
+ instr -> panic $ "RV64.pprInstr - Unknown instruction: " ++ instrCon instr
+ where
+ op2 op o1 o2 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2
+ op3 op o1 o2 o3 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3
+ op4 op o1 o2 o3 o4 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4
+ pprFenceType FenceRead = text "r"
+ pprFenceType FenceWrite = text "w"
+ pprFenceType FenceReadWrite = text "rw"
+ floatPrecission o
+ | isSingleOp o = text "s"
+ | isDoubleOp o = text "d"
+ | otherwise = pprPanic "Impossible floating point precission: " (pprOp platform o)
+
+floatOpPrecision :: Platform -> Operand -> Operand -> String
+floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isSingleOp l && isSingleOp r = "s" -- single precision
+floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isDoubleOp l && isDoubleOp r = "d" -- double precision
+floatOpPrecision p l r = pprPanic "Cannot determine floating point precission" (text "op1" <+> pprOp p l <+> text "op2" <+> pprOp p r)
+
+-- | Pretty print a conditional branch
+--
+-- This function is partial and will panic if the conditional is not supported;
+-- i.e. if its floating point related.
+pprBcond :: (IsLine doc) => Cond -> doc
+pprBcond c = text "b" <> pprCond c
+ where
+ pprCond :: (IsLine doc) => Cond -> doc
+ pprCond c = case c of
+ EQ -> text "eq"
+ NE -> text "ne"
+ SLT -> text "lt"
+ SLE -> text "le"
+ SGE -> text "ge"
+ SGT -> text "gt"
+ ULT -> text "ltu"
+ ULE -> text "leu"
+ UGE -> text "geu"
+ UGT -> text "gtu"
+ -- BCOND cannot handle floating point comparisons / registers
+ _ -> panic $ "RV64.ppr: unhandled BCOND conditional: " ++ show c
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/RegInfo.hs
===================================================================
--- /dev/null
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/RegInfo.hs
@@ -0,0 +1,41 @@
+-- | Minimum viable implementation of jump short-cutting: No short-cutting.
+--
+-- The functions here simply implement the no-short-cutting case. Implementing
+-- the real behaviour would be a great optimization in future.
+module GHC.CmmToAsm.RV64.RegInfo
+ ( getJumpDestBlockId,
+ canShortcut,
+ shortcutStatics,
+ shortcutJump,
+ JumpDest (..),
+ )
+where
+
+import GHC.Cmm
+import GHC.Cmm.BlockId
+import GHC.CmmToAsm.RV64.Instr
+import GHC.Prelude
+import GHC.Utils.Outputable
+
+newtype JumpDest = DestBlockId BlockId
+
+instance Outputable JumpDest where
+ ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid
+
+-- | Extract BlockId
+--
+-- Never `Nothing` for Riscv64 NCG.
+getJumpDestBlockId :: JumpDest -> Maybe BlockId
+getJumpDestBlockId (DestBlockId bid) = Just bid
+
+-- No `Instr`s can bet shortcut (for now)
+canShortcut :: Instr -> Maybe JumpDest
+canShortcut _ = Nothing
+
+-- Identity of the provided `RawCmmStatics`
+shortcutStatics :: (BlockId -> Maybe JumpDest) -> RawCmmStatics -> RawCmmStatics
+shortcutStatics _ other_static = other_static
+
+-- Identity of the provided `Instr`
+shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
+shortcutJump _ other = other
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Regs.hs
===================================================================
--- /dev/null
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Regs.hs
@@ -0,0 +1,230 @@
+module GHC.CmmToAsm.RV64.Regs where
+
+import GHC.Cmm
+import GHC.Cmm.CLabel (CLabel)
+import GHC.CmmToAsm.Format
+import GHC.Data.FastString
+import GHC.Platform
+import GHC.Platform.Reg
+import GHC.Platform.Reg.Class
+import GHC.Platform.Regs
+import GHC.Prelude
+import GHC.Types.Unique
+import GHC.Utils.Outputable
+import GHC.Utils.Panic
+
+-- * Registers
+
+-- | First integer register number. @zero@ register.
+x0RegNo :: RegNo
+x0RegNo = 0
+
+-- | return address register
+x1RegNo, raRegNo :: RegNo
+x1RegNo = 1
+raRegNo = x1RegNo
+
+x5RegNo, t0RegNo :: RegNo
+x5RegNo = 5
+t0RegNo = x5RegNo
+
+x7RegNo, t2RegNo :: RegNo
+x7RegNo = 7
+t2RegNo = x7RegNo
+
+x28RegNo, t3RegNo :: RegNo
+x28RegNo = 28
+t3RegNo = x28RegNo
+
+-- | Last integer register number. Used as TMP (IP) register.
+x31RegNo, t6RegNo, tmpRegNo :: RegNo
+x31RegNo = 31
+t6RegNo = x31RegNo
+tmpRegNo = x31RegNo
+
+-- | First floating point register.
+d0RegNo, ft0RegNo :: RegNo
+d0RegNo = 32
+ft0RegNo = d0RegNo
+
+d7RegNo, ft7RegNo :: RegNo
+d7RegNo = 39
+ft7RegNo = d7RegNo
+
+-- | Last floating point register.
+d31RegNo :: RegNo
+d31RegNo = 63
+
+a0RegNo, x10RegNo :: RegNo
+x10RegNo = 10
+a0RegNo = x10RegNo
+
+a7RegNo, x17RegNo :: RegNo
+x17RegNo = 17
+a7RegNo = x17RegNo
+
+fa0RegNo, d10RegNo :: RegNo
+d10RegNo = 42
+fa0RegNo = d10RegNo
+
+fa7RegNo, d17RegNo :: RegNo
+d17RegNo = 49
+fa7RegNo = d17RegNo
+
+-- Note [The made-up RISCV64 TMP (IP) register]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- RISCV64 has no inter-procedural register in its ABI. However, we need one to
+-- make register spills/loads to/from high number slots. I.e. slot numbers that
+-- do not fit in a 12bit integer which is used as immediate in the arithmetic
+-- operations. Thus, we're marking one additional register (x31) as permanently
+-- non-free and call it TMP.
+--
+-- TMP can be used as temporary register in all operations. Just be aware that
+-- it may be clobbered as soon as you loose direct control over it (i.e. using
+-- TMP by-passes the register allocation/spilling mechanisms.) It should be fine
+-- to use it as temporary register in a MachOp translation as long as you don't
+-- rely on its value beyond this limited scope.
+--
+-- X31 is a caller-saved register. I.e. there are no guarantees about what the
+-- callee does with it. That's exactly what we want here.
+
+zeroReg, raReg, spMachReg, tmpReg :: Reg
+zeroReg = regSingle x0RegNo
+raReg = regSingle 1
+
+-- | Not to be confused with the `CmmReg` `spReg`
+spMachReg = regSingle 2
+
+tmpReg = regSingle tmpRegNo
+
+-- | All machine register numbers.
+allMachRegNos :: [RegNo]
+allMachRegNos = intRegs ++ fpRegs
+ where
+ intRegs = [x0RegNo .. x31RegNo]
+ fpRegs = [d0RegNo .. d31RegNo]
+
+-- | Registers available to the register allocator.
+--
+-- These are all registers minus those with a fixed role in RISCV ABI (zero, lr,
+-- sp, gp, tp, fp, tmp) and GHC RTS (Base, Sp, Hp, HpLim, R1..R8, F1..F6,
+-- D1..D6.)
+allocatableRegs :: Platform -> [RealReg]
+allocatableRegs platform =
+ let isFree = freeReg platform
+ in map RealRegSingle $ filter isFree allMachRegNos
+
+-- | Integer argument registers according to the calling convention
+allGpArgRegs :: [Reg]
+allGpArgRegs = map regSingle [a0RegNo .. a7RegNo]
+
+-- | Floating point argument registers according to the calling convention
+allFpArgRegs :: [Reg]
+allFpArgRegs = map regSingle [fa0RegNo .. fa7RegNo]
+
+-- * Addressing modes
+
+-- | Addressing modes
+data AddrMode
+ = -- | A register plus some immediate integer, e.g. @8(sp)@ or @-16(sp)@. The
+ -- offset needs to fit into 12bits.
+ AddrRegImm Reg Imm
+ | -- | A register
+ AddrReg Reg
+ deriving (Eq, Show)
+
+-- * Immediates
+
+data Imm
+ = ImmInt Int
+ | ImmInteger Integer -- Sigh.
+ | ImmCLbl CLabel -- AbstractC Label (with baggage)
+ | ImmLit FastString
+ | ImmIndex CLabel Int
+ | ImmFloat Rational
+ | ImmDouble Rational
+ | ImmConstantSum Imm Imm
+ | ImmConstantDiff Imm Imm
+ deriving (Eq, Show)
+
+-- | Map `CmmLit` to `Imm`
+--
+-- N.B. this is a partial function, because not all `CmmLit`s have an immediate
+-- representation.
+litToImm :: CmmLit -> Imm
+litToImm (CmmInt i w) = ImmInteger (narrowS w i)
+-- narrow to the width: a CmmInt might be out of
+-- range, but we assume that ImmInteger only contains
+-- in-range values. A signed value should be fine here.
+litToImm (CmmFloat f W32) = ImmFloat f
+litToImm (CmmFloat f W64) = ImmDouble f
+litToImm (CmmLabel l) = ImmCLbl l
+litToImm (CmmLabelOff l off) = ImmIndex l off
+litToImm (CmmLabelDiffOff l1 l2 off _) =
+ ImmConstantSum
+ (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
+ (ImmInt off)
+litToImm l = panic $ "RV64.Regs.litToImm: no match for " ++ show l
+
+-- == To satisfy GHC.CmmToAsm.Reg.Target =======================================
+
+-- squeese functions for the graph allocator -----------------------------------
+
+-- | regSqueeze_class reg
+-- Calculate the maximum number of register colors that could be
+-- denied to a node of this class due to having this reg
+-- as a neighbour.
+{-# INLINE virtualRegSqueeze #-}
+virtualRegSqueeze :: RegClass -> VirtualReg -> Int
+virtualRegSqueeze cls vr =
+ case cls of
+ RcInteger ->
+ case vr of
+ VirtualRegI {} -> 1
+ VirtualRegHi {} -> 1
+ _other -> 0
+ RcDouble ->
+ case vr of
+ VirtualRegD {} -> 1
+ VirtualRegF {} -> 0
+ _other -> 0
+ _other -> 0
+
+{-# INLINE realRegSqueeze #-}
+realRegSqueeze :: RegClass -> RealReg -> Int
+realRegSqueeze cls rr =
+ case cls of
+ RcInteger ->
+ case rr of
+ RealRegSingle regNo
+ | regNo < d0RegNo -> 1
+ | otherwise -> 0
+ RcDouble ->
+ case rr of
+ RealRegSingle regNo
+ | regNo < d0RegNo -> 0
+ | otherwise -> 1
+ _other -> 0
+
+mkVirtualReg :: Unique -> Format -> VirtualReg
+mkVirtualReg u format
+ | not (isFloatFormat format) = VirtualRegI u
+ | otherwise =
+ case format of
+ FF32 -> VirtualRegD u
+ FF64 -> VirtualRegD u
+ _ -> panic "RV64.mkVirtualReg"
+
+{-# INLINE classOfRealReg #-}
+classOfRealReg :: RealReg -> RegClass
+classOfRealReg (RealRegSingle i)
+ | i < d0RegNo = RcInteger
+ | otherwise = RcDouble
+
+regDotColor :: RealReg -> SDoc
+regDotColor reg =
+ case classOfRealReg reg of
+ RcInteger -> text "blue"
+ RcFloat -> text "red"
+ RcDouble -> text "green"
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
===================================================================
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
@@ -119,7 +119,7 @@ trivColorable platform virtualRegSqueeze
ArchMipseb -> panic "trivColorable ArchMipseb"
ArchMipsel -> panic "trivColorable ArchMipsel"
ArchS390X -> panic "trivColorable ArchS390X"
- ArchRISCV64 -> panic "trivColorable ArchRISCV64"
+ ArchRISCV64 -> 14
ArchLoongArch64->panic "trivColorable ArchLoongArch64"
ArchJavaScript-> panic "trivColorable ArchJavaScript"
ArchWasm32 -> panic "trivColorable ArchWasm32"
@@ -154,7 +154,7 @@ trivColorable platform virtualRegSqueeze
ArchMipseb -> panic "trivColorable ArchMipseb"
ArchMipsel -> panic "trivColorable ArchMipsel"
ArchS390X -> panic "trivColorable ArchS390X"
- ArchRISCV64 -> panic "trivColorable ArchRISCV64"
+ ArchRISCV64 -> 0
ArchLoongArch64->panic "trivColorable ArchLoongArch64"
ArchJavaScript-> panic "trivColorable ArchJavaScript"
ArchWasm32 -> panic "trivColorable ArchWasm32"
@@ -188,7 +188,7 @@ trivColorable platform virtualRegSqueeze
ArchMipseb -> panic "trivColorable ArchMipseb"
ArchMipsel -> panic "trivColorable ArchMipsel"
ArchS390X -> panic "trivColorable ArchS390X"
- ArchRISCV64 -> panic "trivColorable ArchRISCV64"
+ ArchRISCV64 -> 20
ArchLoongArch64->panic "trivColorable ArchLoongArch64"
ArchJavaScript-> panic "trivColorable ArchJavaScript"
ArchWasm32 -> panic "trivColorable ArchWasm32"
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear.hs
===================================================================
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Linear.hs
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear.hs
@@ -112,6 +112,7 @@ import qualified GHC.CmmToAsm.Reg.Linear
import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86
import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64
import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64
+import qualified GHC.CmmToAsm.Reg.Linear.RV64 as RV64
import GHC.CmmToAsm.Reg.Target
import GHC.CmmToAsm.Reg.Liveness
import GHC.CmmToAsm.Reg.Utils
@@ -221,7 +222,7 @@ linearRegAlloc config entry_ids block_li
ArchAlpha -> panic "linearRegAlloc ArchAlpha"
ArchMipseb -> panic "linearRegAlloc ArchMipseb"
ArchMipsel -> panic "linearRegAlloc ArchMipsel"
- ArchRISCV64 -> panic "linearRegAlloc ArchRISCV64"
+ ArchRISCV64 -> go (frInitFreeRegs platform :: RV64.FreeRegs)
ArchLoongArch64-> panic "linearRegAlloc ArchLoongArch64"
ArchJavaScript -> panic "linearRegAlloc ArchJavaScript"
ArchWasm32 -> panic "linearRegAlloc ArchWasm32"
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
===================================================================
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
@@ -29,10 +29,12 @@ import qualified GHC.CmmToAsm.Reg.Linear
import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86
import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64
import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64
+import qualified GHC.CmmToAsm.Reg.Linear.RV64 as RV64
import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr
import qualified GHC.CmmToAsm.X86.Instr as X86.Instr
import qualified GHC.CmmToAsm.AArch64.Instr as AArch64.Instr
+import qualified GHC.CmmToAsm.RV64.Instr as RV64.Instr
class Show freeRegs => FR freeRegs where
frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs
@@ -64,6 +66,12 @@ instance FR AArch64.FreeRegs where
frInitFreeRegs = AArch64.initFreeRegs
frReleaseReg = \_ -> AArch64.releaseReg
+instance FR RV64.FreeRegs where
+ frAllocateReg = const RV64.allocateReg
+ frGetFreeRegs = const RV64.getFreeRegs
+ frInitFreeRegs = RV64.initFreeRegs
+ frReleaseReg = const RV64.releaseReg
+
maxSpillSlots :: NCGConfig -> Int
maxSpillSlots config = case platformArch (ncgPlatform config) of
ArchX86 -> X86.Instr.maxSpillSlots config
@@ -76,7 +84,7 @@ maxSpillSlots config = case platformArch
ArchAlpha -> panic "maxSpillSlots ArchAlpha"
ArchMipseb -> panic "maxSpillSlots ArchMipseb"
ArchMipsel -> panic "maxSpillSlots ArchMipsel"
- ArchRISCV64 -> panic "maxSpillSlots ArchRISCV64"
+ ArchRISCV64 -> RV64.Instr.maxSpillSlots config
ArchLoongArch64->panic "maxSpillSlots ArchLoongArch64"
ArchJavaScript-> panic "maxSpillSlots ArchJavaScript"
ArchWasm32 -> panic "maxSpillSlots ArchWasm32"
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
===================================================================
--- /dev/null
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
@@ -0,0 +1,96 @@
+-- | Functions to implement the @FR@ (as in "free regs") type class.
+--
+-- For LLVM GHC calling convention (used registers), see
+-- https://github.com/llvm/llvm-project/blob/6ab900f8746e7d8e24afafb5886a40801f6799f4/llvm/lib/Target/RISCV/RISCVISelLowering.cpp#L13638-L13685
+module GHC.CmmToAsm.Reg.Linear.RV64
+ ( allocateReg,
+ getFreeRegs,
+ initFreeRegs,
+ releaseReg,
+ FreeRegs (..),
+ )
+where
+
+import Data.Word
+import GHC.CmmToAsm.RV64.Regs
+import GHC.Platform
+import GHC.Platform.Reg
+import GHC.Platform.Reg.Class
+import GHC.Prelude
+import GHC.Stack
+import GHC.Utils.Outputable
+import GHC.Utils.Panic
+
+-- | Bitmaps to indicate which registers are free (currently unused)
+--
+-- The bit index represents the `RegNo`, in case of floating point registers
+-- with an offset of 32. The register is free when the bit is set.
+data FreeRegs
+ = FreeRegs
+ -- | integer/general purpose registers (`RcInteger`)
+ !Word32
+ -- | floating point registers (`RcDouble`)
+ !Word32
+
+instance Show FreeRegs where
+ show (FreeRegs g f) = "FreeRegs 0b" ++ showBits g ++ " 0b" ++ showBits f
+
+-- | Show bits as a `String` of @1@s and @0@s
+showBits :: Word32 -> String
+showBits w = map (\i -> if testBit w i then '1' else '0') [0 .. 31]
+
+instance Outputable FreeRegs where
+ ppr (FreeRegs g f) =
+ text " "
+ <+> foldr (\i x -> pad_int i <+> x) (text "") [0 .. 31]
+ $$ text "GPR"
+ <+> foldr (\i x -> show_bit g i <+> x) (text "") [0 .. 31]
+ $$ text "FPR"
+ <+> foldr (\i x -> show_bit f i <+> x) (text "") [0 .. 31]
+ where
+ pad_int i | i < 10 = char ' ' <> int i
+ pad_int i = int i
+ -- remember bit = 1 means it's available.
+ show_bit bits bit | testBit bits bit = text " "
+ show_bit _ _ = text " x"
+
+-- | Set bits of all allocatable registers to 1
+initFreeRegs :: Platform -> FreeRegs
+initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
+ where
+ noFreeRegs :: FreeRegs
+ noFreeRegs = FreeRegs 0 0
+
+-- | Get all free `RealReg`s (i.e. those where the corresponding bit is 1)
+getFreeRegs :: RegClass -> FreeRegs -> [RealReg]
+getFreeRegs cls (FreeRegs g f)
+ | RcFloat <- cls = [] -- For now we only support double and integer registers, floats will need to be promoted.
+ | RcDouble <- cls = go 32 f allocatableDoubleRegs
+ | RcInteger <- cls = go 0 g allocatableIntRegs
+ where
+ go _ _ [] = []
+ go off x (i : is)
+ | testBit x i = RealRegSingle (off + i) : (go off x $! is)
+ | otherwise = go off x $! is
+ -- The lists of allocatable registers are manually crafted: Register
+ -- allocation is pretty hot code. We don't want to iterate and map like
+ -- `initFreeRegs` all the time! (The register mappings aren't supposed to
+ -- change often.)
+ allocatableIntRegs = [5 .. 7] ++ [10 .. 17] ++ [28 .. 30]
+ allocatableDoubleRegs = [0 .. 7] ++ [10 .. 17] ++ [28 .. 31]
+
+-- | Set corresponding register bit to 0
+allocateReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs
+allocateReg (RealRegSingle r) (FreeRegs g f)
+ | r > 31 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32))
+ | r < 32 && testBit g r = FreeRegs (clearBit g r) f
+ | r > 31 = panic $ "Linear.RV64.allocReg: double allocation of float reg v" ++ show (r - 32) ++ "; " ++ showBits f
+ | otherwise = pprPanic "Linear.RV64.allocReg" $ text ("double allocation of gp reg x" ++ show r ++ "; " ++ showBits g)
+
+-- | Set corresponding register bit to 1
+releaseReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs
+releaseReg (RealRegSingle r) (FreeRegs g f)
+ | r > 31 && testBit f (r - 32) = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg v" <> int (r - 32))
+ | r < 32 && testBit g r = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg x" <> int r)
+ | r > 31 = FreeRegs g (setBit f (r - 32))
+ | otherwise = FreeRegs (setBit g r) f
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Target.hs
===================================================================
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Target.hs
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Target.hs
@@ -34,7 +34,7 @@ import qualified GHC.CmmToAsm.X86.Regs
import qualified GHC.CmmToAsm.X86.RegInfo as X86
import qualified GHC.CmmToAsm.PPC.Regs as PPC
import qualified GHC.CmmToAsm.AArch64.Regs as AArch64
-
+import qualified GHC.CmmToAsm.RV64.Regs as RV64
targetVirtualRegSqueeze :: Platform -> RegClass -> VirtualReg -> Int
targetVirtualRegSqueeze platform
@@ -49,7 +49,7 @@ targetVirtualRegSqueeze platform
ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha"
ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb"
ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel"
- ArchRISCV64 -> panic "targetVirtualRegSqueeze ArchRISCV64"
+ ArchRISCV64 -> RV64.virtualRegSqueeze
ArchLoongArch64->panic "targetVirtualRegSqueeze ArchLoongArch64"
ArchJavaScript-> panic "targetVirtualRegSqueeze ArchJavaScript"
ArchWasm32 -> panic "targetVirtualRegSqueeze ArchWasm32"
@@ -69,7 +69,7 @@ targetRealRegSqueeze platform
ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha"
ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb"
ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel"
- ArchRISCV64 -> panic "targetRealRegSqueeze ArchRISCV64"
+ ArchRISCV64 -> RV64.realRegSqueeze
ArchLoongArch64->panic "targetRealRegSqueeze ArchLoongArch64"
ArchJavaScript-> panic "targetRealRegSqueeze ArchJavaScript"
ArchWasm32 -> panic "targetRealRegSqueeze ArchWasm32"
@@ -88,7 +88,7 @@ targetClassOfRealReg platform
ArchAlpha -> panic "targetClassOfRealReg ArchAlpha"
ArchMipseb -> panic "targetClassOfRealReg ArchMipseb"
ArchMipsel -> panic "targetClassOfRealReg ArchMipsel"
- ArchRISCV64 -> panic "targetClassOfRealReg ArchRISCV64"
+ ArchRISCV64 -> RV64.classOfRealReg
ArchLoongArch64->panic "targetClassOfRealReg ArchLoongArch64"
ArchJavaScript-> panic "targetClassOfRealReg ArchJavaScript"
ArchWasm32 -> panic "targetClassOfRealReg ArchWasm32"
@@ -107,7 +107,7 @@ targetMkVirtualReg platform
ArchAlpha -> panic "targetMkVirtualReg ArchAlpha"
ArchMipseb -> panic "targetMkVirtualReg ArchMipseb"
ArchMipsel -> panic "targetMkVirtualReg ArchMipsel"
- ArchRISCV64 -> panic "targetMkVirtualReg ArchRISCV64"
+ ArchRISCV64 -> RV64.mkVirtualReg
ArchLoongArch64->panic "targetMkVirtualReg ArchLoongArch64"
ArchJavaScript-> panic "targetMkVirtualReg ArchJavaScript"
ArchWasm32 -> panic "targetMkVirtualReg ArchWasm32"
@@ -126,7 +126,7 @@ targetRegDotColor platform
ArchAlpha -> panic "targetRegDotColor ArchAlpha"
ArchMipseb -> panic "targetRegDotColor ArchMipseb"
ArchMipsel -> panic "targetRegDotColor ArchMipsel"
- ArchRISCV64 -> panic "targetRegDotColor ArchRISCV64"
+ ArchRISCV64 -> RV64.regDotColor
ArchLoongArch64->panic "targetRegDotColor ArchLoongArch64"
ArchJavaScript-> panic "targetRegDotColor ArchJavaScript"
ArchWasm32 -> panic "targetRegDotColor ArchWasm32"
Index: ghc-9.10.1/compiler/GHC/Driver/Backend.hs
===================================================================
--- ghc-9.10.1.orig/compiler/GHC/Driver/Backend.hs
+++ ghc-9.10.1/compiler/GHC/Driver/Backend.hs
@@ -213,6 +213,7 @@ platformNcgSupported platform = if
ArchPPC_64 {} -> True
ArchAArch64 -> True
ArchWasm32 -> True
+ ArchRISCV64 -> True
_ -> False
-- | Is the platform supported by the JS backend?
Index: ghc-9.10.1/compiler/GHC/Driver/DynFlags.hs
===================================================================
--- ghc-9.10.1.orig/compiler/GHC/Driver/DynFlags.hs
+++ ghc-9.10.1/compiler/GHC/Driver/DynFlags.hs
@@ -1325,6 +1325,7 @@ default_PIC platform =
(OSDarwin, ArchAArch64) -> [Opt_PIC]
(OSLinux, ArchAArch64) -> [Opt_PIC, Opt_ExternalDynamicRefs]
(OSLinux, ArchARM {}) -> [Opt_PIC, Opt_ExternalDynamicRefs]
+ (OSLinux, ArchRISCV64 {}) -> [Opt_PIC, Opt_ExternalDynamicRefs]
(OSOpenBSD, ArchX86_64) -> [Opt_PIC] -- Due to PIE support in
-- OpenBSD since 5.3 release
-- (1 May 2013) we need to
Index: ghc-9.10.1/compiler/GHC/Platform.hs
===================================================================
--- ghc-9.10.1.orig/compiler/GHC/Platform.hs
+++ ghc-9.10.1/compiler/GHC/Platform.hs
@@ -250,7 +250,6 @@ platformHasRTSLinker p = case archOS_arc
ArchPPC_64 ELF_V1 -> False -- powerpc64
ArchPPC_64 ELF_V2 -> False -- powerpc64le
ArchS390X -> False
- ArchRISCV64 -> False
ArchLoongArch64 -> False
ArchJavaScript -> False
ArchWasm32 -> False
Index: ghc-9.10.1/compiler/ghc.cabal.in
===================================================================
--- ghc-9.10.1.orig/compiler/ghc.cabal.in
+++ ghc-9.10.1/compiler/ghc.cabal.in
@@ -290,6 +290,7 @@ Library
GHC.CmmToAsm.Reg.Linear.FreeRegs
GHC.CmmToAsm.Reg.Linear.JoinToTargets
GHC.CmmToAsm.Reg.Linear.PPC
+ GHC.CmmToAsm.Reg.Linear.RV64
GHC.CmmToAsm.Reg.Linear.StackMap
GHC.CmmToAsm.Reg.Linear.State
GHC.CmmToAsm.Reg.Linear.Stats
@@ -298,6 +299,13 @@ Library
GHC.CmmToAsm.Reg.Liveness
GHC.CmmToAsm.Reg.Target
GHC.CmmToAsm.Reg.Utils
+ GHC.CmmToAsm.RV64
+ GHC.CmmToAsm.RV64.CodeGen
+ GHC.CmmToAsm.RV64.Cond
+ GHC.CmmToAsm.RV64.Instr
+ GHC.CmmToAsm.RV64.Ppr
+ GHC.CmmToAsm.RV64.RegInfo
+ GHC.CmmToAsm.RV64.Regs
GHC.CmmToAsm.Types
GHC.CmmToAsm.Utils
GHC.CmmToAsm.X86
Index: ghc-9.10.1/hadrian/bindist/config.mk.in
===================================================================
--- ghc-9.10.1.orig/hadrian/bindist/config.mk.in
+++ ghc-9.10.1/hadrian/bindist/config.mk.in
@@ -152,7 +152,7 @@ GhcWithSMP := $(strip $(if $(filter YESN
# Whether to include GHCi in the compiler. Depends on whether the RTS linker
# has support for this OS/ARCH combination.
OsSupportsGHCi=$(strip $(patsubst $(TargetOS_CPP), YES, $(findstring $(TargetOS_CPP), mingw32 linux solaris2 freebsd dragonfly netbsd openbsd darwin kfreebsdgnu)))
-ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64)))
+ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64 riscv64)))
ifeq "$(OsSupportsGHCi)$(ArchSupportsGHCi)" "YESYES"
GhcWithInterpreter=YES
Index: ghc-9.10.1/hadrian/src/Settings/Builders/RunTest.hs
===================================================================
--- ghc-9.10.1.orig/hadrian/src/Settings/Builders/RunTest.hs
+++ ghc-9.10.1/hadrian/src/Settings/Builders/RunTest.hs
@@ -118,7 +118,7 @@ inTreeCompilerArgs stg = do
os <- queryHostTarget queryOS
arch <- queryTargetTarget queryArch
- let codegen_arches = ["x86_64", "i386", "powerpc", "powerpc64", "powerpc64le", "aarch64", "wasm32"]
+ let codegen_arches = ["x86_64", "i386", "powerpc", "powerpc64", "powerpc64le", "aarch64", "wasm32", "riscv64"]
let withNativeCodeGen
| unregisterised = False
| arch `elem` codegen_arches = True
@@ -139,7 +139,7 @@ inTreeCompilerArgs stg = do
-- For this information, we need to query ghc --info, however, that would
-- require building ghc, which we don't want to do here. Therefore, the
-- logic from `platformHasRTSLinker` is duplicated here.
- let rtsLinker = not $ arch `elem` ["powerpc", "powerpc64", "powerpc64le", "s390x", "riscv64", "loongarch64", "javascript", "wasm32"]
+ let rtsLinker = not $ arch `elem` ["powerpc", "powerpc64", "powerpc64le", "s390x", "loongarch64", "javascript", "wasm32"]
return TestCompilerArgs{..}
Index: ghc-9.10.1/rts/LinkerInternals.h
===================================================================
--- ghc-9.10.1.orig/rts/LinkerInternals.h
+++ ghc-9.10.1/rts/LinkerInternals.h
@@ -208,7 +208,7 @@ typedef struct _Segment {
int n_sections;
} Segment;
-#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
#define NEED_SYMBOL_EXTRAS 1
#endif
@@ -220,8 +220,9 @@ typedef struct _Segment {
#define NEED_M32 1
#endif
-/* Jump Islands are sniplets of machine code required for relative
- * address relocations on the PowerPC, x86_64 and ARM.
+/* Jump Islands are sniplets of machine code required for relative address
+ * relocations on the PowerPC, x86_64 and ARM. On RISCV64 we use symbolextras
+ * like a GOT for locals where SymbolExtra represents one entry.
*/
typedef struct {
#if defined(powerpc_HOST_ARCH)
@@ -237,6 +238,8 @@ typedef struct {
uint8_t jumpIsland[8];
#elif defined(arm_HOST_ARCH)
uint8_t jumpIsland[16];
+#elif defined(riscv64_HOST_ARCH)
+ uint64_t addr;
#endif
} SymbolExtra;
Index: ghc-9.10.1/rts/RtsSymbols.c
===================================================================
--- ghc-9.10.1.orig/rts/RtsSymbols.c
+++ ghc-9.10.1/rts/RtsSymbols.c
@@ -980,6 +980,17 @@ extern char **environ;
#define RTS_LIBGCC_SYMBOLS
#endif
+#if defined(riscv64_HOST_ARCH)
+// See https://gcc.gnu.org/onlinedocs/gccint/Integer-library-routines.html as
+// reference for the following built-ins. __clzdi2 and __ctzdi2 probably relate
+// to __builtin-s in libraries/ghc-prim/cbits/ctz.c.
+#define RTS_ARCH_LIBGCC_SYMBOLS \
+ SymI_NeedsProto(__clzdi2) \
+ SymI_NeedsProto(__ctzdi2)
+#else
+#define RTS_ARCH_LIBGCC_SYMBOLS
+#endif
+
// Symbols defined by libgcc/compiler-rt for AArch64's outline atomics.
#if defined(HAVE_ARM_OUTLINE_ATOMICS)
#include "ARMOutlineAtomicsSymbols.h"
@@ -1032,6 +1043,7 @@ RTS_DARWIN_ONLY_SYMBOLS
RTS_OPENBSD_ONLY_SYMBOLS
RTS_LIBC_SYMBOLS
RTS_LIBGCC_SYMBOLS
+RTS_ARCH_LIBGCC_SYMBOLS
RTS_FINI_ARRAY_SYMBOLS
RTS_LIBFFI_SYMBOLS
RTS_ARM_OUTLINE_ATOMIC_SYMBOLS
@@ -1074,6 +1086,7 @@ RtsSymbolVal rtsSyms[] = {
RTS_DARWIN_ONLY_SYMBOLS
RTS_OPENBSD_ONLY_SYMBOLS
RTS_LIBGCC_SYMBOLS
+ RTS_ARCH_LIBGCC_SYMBOLS
RTS_FINI_ARRAY_SYMBOLS
RTS_LIBFFI_SYMBOLS
RTS_ARM_OUTLINE_ATOMIC_SYMBOLS
Index: ghc-9.10.1/rts/adjustor/LibffiAdjustor.c
===================================================================
--- ghc-9.10.1.orig/rts/adjustor/LibffiAdjustor.c
+++ ghc-9.10.1/rts/adjustor/LibffiAdjustor.c
@@ -12,6 +12,7 @@
#include "Adjustor.h"
#include "rts/ghc_ffi.h"
+#include <stdint.h>
#include <string.h>
// Note that ffi_alloc_prep_closure is a non-standard libffi closure
@@ -187,5 +188,21 @@ createAdjustor (int cconv,
barf("createAdjustor: failed to allocate memory");
}
- return (void*)code;
+#if defined(riscv64_HOST_ARCH)
+ // Synchronize the memory and instruction cache to prevent illegal
+ // instruction exceptions.
+
+ // We expect two instructions for address loading, one for the jump.
+ int instrCount = 3;
+ // On Linux the parameters of __builtin___clear_cache are currently unused.
+ // Add them anyways for future compatibility. (I.e. the parameters couldn't
+ // be checked during development.)
+ // TODO: Check the upper boundary e.g. with a debugger.
+ __builtin___clear_cache((void *)code,
+ (void *)((uint64_t *) code + instrCount));
+ // Memory barrier to ensure nothing circumvents the fence.i / cache flush.
+ SEQ_CST_FENCE();
+#endif
+
+ return (void *)code;
}
Index: ghc-9.10.1/rts/linker/Elf.c
===================================================================
--- ghc-9.10.1.orig/rts/linker/Elf.c
+++ ghc-9.10.1/rts/linker/Elf.c
@@ -103,7 +103,8 @@
#include "elf_got.h"
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH)
+# define NEED_GOT
# define NEED_PLT
# include "elf_plt.h"
# include "elf_reloc.h"
@@ -430,10 +431,7 @@ ocVerifyImage_ELF ( ObjectCode* oc )
case EM_AARCH64: IF_DEBUG(linker,debugBelch( "aarch64" )); break;
#endif
#if defined(EM_RISCV)
- case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" ));
- errorBelch("%s: RTS linker not implemented on riscv",
- oc->fileName);
- return 0;
+ case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" )); break;
#endif
#if defined(EM_LOONGARCH)
case EM_LOONGARCH: IF_DEBUG(linker,debugBelch( "loongarch64" ));
@@ -1130,9 +1128,10 @@ end:
return result;
}
-// the aarch64 linker uses relocacteObjectCodeAarch64,
-// see elf_reloc_aarch64.{h,c}
-#if !defined(aarch64_HOST_ARCH)
+// the aarch64 and riscv64 linkers use relocateObjectCodeAarch64() and
+// relocateObjectCodeRISCV64() (respectively), see elf_reloc_aarch64.{h,c} and
+// elf_reloc_riscv64.{h,c}
+#if !defined(aarch64_HOST_ARCH) && !defined(riscv64_HOST_ARCH)
/* Do ELF relocations which lack an explicit addend. All x86-linux
and arm-linux relocations appear to be of this form. */
@@ -1359,7 +1358,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc,
/* try to locate an existing stub for this target */
if(findStub(&oc->sections[target_shndx], (void**)&S, 0)) {
/* didn't find any. Need to create one */
- if(makeStub(&oc->sections[target_shndx], (void**)&S, 0)) {
+ if(makeStub(&oc->sections[target_shndx], (void**)&S, NULL, 0)) {
errorBelch("Unable to create veneer for ARM_CALL\n");
return 0;
}
@@ -1451,7 +1450,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc,
/* try to locate an existing stub for this target */
if(findStub(&oc->sections[target_shndx], (void**)&S, 1)) {
/* didn't find any. Need to create one */
- if(makeStub(&oc->sections[target_shndx], (void**)&S, 1)) {
+ if(makeStub(&oc->sections[target_shndx], (void**)&S, NULL, 1)) {
errorBelch("Unable to create veneer for ARM_THM_CALL\n");
return 0;
}
@@ -1991,7 +1990,7 @@ ocResolve_ELF ( ObjectCode* oc )
(void) shnum;
(void) shdr;
-#if defined(aarch64_HOST_ARCH)
+#if defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
/* use new relocation design */
if(relocateObjectCode( oc ))
return 0;
@@ -2014,6 +2013,9 @@ ocResolve_ELF ( ObjectCode* oc )
#if defined(powerpc_HOST_ARCH)
ocFlushInstructionCache( oc );
+#elif defined(riscv64_HOST_ARCH)
+ /* New-style pseudo-polymorph (by architecture) call */
+ flushInstructionCache( oc );
#endif
return ocMprotect_Elf(oc);
Index: ghc-9.10.1/rts/linker/ElfTypes.h
===================================================================
--- ghc-9.10.1.orig/rts/linker/ElfTypes.h
+++ ghc-9.10.1/rts/linker/ElfTypes.h
@@ -150,6 +150,7 @@ typedef
struct _Stub {
void * addr;
void * target;
+ void* got_addr;
/* flags can hold architecture specific information they are used during
* lookup of stubs as well. Thus two stubs for the same target with
* different flags are considered unequal.
Index: ghc-9.10.1/rts/linker/SymbolExtras.c
===================================================================
--- ghc-9.10.1.orig/rts/linker/SymbolExtras.c
+++ ghc-9.10.1/rts/linker/SymbolExtras.c
@@ -153,7 +153,7 @@ void ocProtectExtras(ObjectCode* oc)
}
-#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
unsigned long symbolNumber,
unsigned long target )
@@ -189,9 +189,12 @@ SymbolExtra* makeSymbolExtra( ObjectCode
extra->addr = target;
memcpy(extra->jumpIsland, jmp, 8);
#endif /* x86_64_HOST_ARCH */
-
+#if defined(riscv64_HOST_ARCH)
+ // Fake GOT entry (used like GOT, but located in symbol extras)
+ extra->addr = target;
+#endif
return extra;
}
-#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH */
+#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH || riscv64_HOST_ARCH */
#endif /* !x86_64_HOST_ARCH) || !mingw32_HOST_OS */
#endif // NEED_SYMBOL_EXTRAS
Index: ghc-9.10.1/rts/linker/SymbolExtras.h
===================================================================
--- ghc-9.10.1.orig/rts/linker/SymbolExtras.h
+++ ghc-9.10.1/rts/linker/SymbolExtras.h
@@ -16,7 +16,7 @@ SymbolExtra* makeArmSymbolExtra( ObjectC
unsigned long target,
bool fromThumb,
bool toThumb );
-#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
unsigned long symbolNumber,
unsigned long target );
Index: ghc-9.10.1/rts/linker/elf_plt.c
===================================================================
--- ghc-9.10.1.orig/rts/linker/elf_plt.c
+++ ghc-9.10.1/rts/linker/elf_plt.c
@@ -5,7 +5,7 @@
#include <stdint.h>
#include <stdlib.h>
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
#if defined(OBJFORMAT_ELF)
#define STRINGIFY(x) #x
@@ -49,11 +49,13 @@ findStub(Section * section,
bool
makeStub(Section * section,
void* * addr,
+ void* got_addr,
uint8_t flags) {
Stub * s = calloc(1, sizeof(Stub));
ASSERT(s != NULL);
s->target = *addr;
+ s->got_addr = got_addr;
s->flags = flags;
s->next = NULL;
s->addr = (uint8_t *)section->info->stub_offset + 8
Index: ghc-9.10.1/rts/linker/elf_plt.h
===================================================================
--- ghc-9.10.1.orig/rts/linker/elf_plt.h
+++ ghc-9.10.1/rts/linker/elf_plt.h
@@ -4,8 +4,9 @@
#include "elf_plt_arm.h"
#include "elf_plt_aarch64.h"
+#include "elf_plt_riscv64.h"
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH)
#if defined(OBJFORMAT_ELF)
@@ -21,6 +22,8 @@
#define __suffix__ Arm
#elif defined(__mips__)
#define __suffix__ Mips
+#elif defined(__riscv)
+#define __suffix__ RISCV64
#else
#error "unknown architecture"
#endif
@@ -34,10 +37,10 @@ unsigned numberOfStubsForSection( Objec
#define STUB_SIZE ADD_SUFFIX(stubSize)
bool findStub(Section * section, void* * addr, uint8_t flags);
-bool makeStub(Section * section, void* * addr, uint8_t flags);
+bool makeStub(Section * section, void* * addr, void* got_addr, uint8_t flags);
void freeStubs(Section * section);
#endif // OBJECTFORMAT_ELF
-#endif // arm/aarch64_HOST_ARCH
+#endif // arm/aarch64_HOST_ARCH/riscv64_HOST_ARCH
Index: ghc-9.10.1/rts/linker/elf_plt_riscv64.c
===================================================================
--- /dev/null
+++ ghc-9.10.1/rts/linker/elf_plt_riscv64.c
@@ -0,0 +1,90 @@
+#include "Rts.h"
+#include "elf_compat.h"
+#include "elf_plt_riscv64.h"
+#include "rts/Messages.h"
+#include "linker/ElfTypes.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#if defined(riscv64_HOST_ARCH)
+
+#if defined(OBJFORMAT_ELF)
+
+const size_t instSizeRISCV64 = 4;
+const size_t stubSizeRISCV64 = 3 * instSizeRISCV64;
+
+bool needStubForRelRISCV64(Elf_Rel *rel) {
+ switch (ELF64_R_TYPE(rel->r_info)) {
+ case R_RISCV_CALL:
+ case R_RISCV_CALL_PLT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool needStubForRelaRISCV64(Elf_Rela *rela) {
+ switch (ELF64_R_TYPE(rela->r_info)) {
+ case R_RISCV_CALL:
+ case R_RISCV_CALL_PLT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// After the global offset table (GOT) has been set up, we can use these three
+// instructions to jump to the target address / function:
+//
+// 1. AUIPC ip, %pcrel_hi(addr)
+// 2. LD ip, %pcrel_lo(addr)(ip)
+// 3. JARL x0, ip, 0
+//
+// We could use the absolute address of the target (because we know it), but
+// that would require loading a 64-bit constant which is a nightmare to do in
+// riscv64 assembly. (See
+// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/5ffe5b5aeedb37b1c1c0c3d94641267d9ad4795a/riscv-elf.adoc#procedure-linkage-table)
+//
+// So far, PC-relative addressing seems to be good enough. If it ever turns out
+// to be not, one could (additionally for out-of-range cases?) encode absolute
+// addressing here.
+bool makeStubRISCV64(Stub *s) {
+ uint32_t *P = (uint32_t *)s->addr;
+ int32_t addr = (uint64_t)s->got_addr - (uint64_t)P;
+
+ uint64_t hi = (addr + 0x800) >> 12;
+ uint64_t lo = addr - (hi << 12);
+
+ IF_DEBUG(
+ linker,
+ debugBelch(
+ "makeStubRISCV64: P = %p, got_addr = %p, target = %p, addr = 0x%x "
+ ", hi = 0x%lx, lo = 0x%lx\n",
+ P, s->got_addr, s->target, addr, hi, lo));
+
+ // AUIPC ip, %pcrel_hi(addr)
+ uint32_t auipcInst = 0b0010111; // opcode
+ auipcInst |= 0x1f << 7; // rd = ip (x31)
+ auipcInst |= hi << 12; // imm[31:12]
+
+ // LD ip, %pcrel_lo(addr)(ip)
+ uint32_t ldInst = 0b0000011; // opcode
+ ldInst |= 0x1f << 7; // rd = ip (x31)
+ ldInst |= 0x1f << 15; // rs = ip (x31)
+ ldInst |= 0b11 << 12; // funct3 = 0x3 (LD)
+ ldInst |= lo << 20; // imm[11:0]
+
+ // JARL x0, ip, 0
+ uint32_t jalrInst = 0b1100111; // opcode
+ jalrInst |= 0x1f << 15; // rs = ip (x31)
+
+ P[0] = auipcInst;
+ P[1] = ldInst;
+ P[2] = jalrInst;
+
+ return EXIT_SUCCESS;
+}
+
+#endif
+#endif
Index: ghc-9.10.1/rts/linker/elf_plt_riscv64.h
===================================================================
--- /dev/null
+++ ghc-9.10.1/rts/linker/elf_plt_riscv64.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "LinkerInternals.h"
+
+#if defined(OBJFORMAT_ELF)
+
+extern const size_t stubSizeRISCV64;
+bool needStubForRelRISCV64(Elf_Rel * rel);
+bool needStubForRelaRISCV64(Elf_Rela * rel);
+bool makeStubRISCV64(Stub * s);
+
+#endif
Index: ghc-9.10.1/rts/linker/elf_reloc.c
===================================================================
--- ghc-9.10.1.orig/rts/linker/elf_reloc.c
+++ ghc-9.10.1/rts/linker/elf_reloc.c
@@ -4,13 +4,18 @@
#if defined(OBJFORMAT_ELF)
-/* we currently only use this abstraction for elf/aarch64 */
-#if defined(aarch64_HOST_ARCH)
+/* we currently only use this abstraction for elf/aarch64 and elf/riscv64 */
+#if defined(aarch64_HOST_ARCH) | defined(riscv64_HOST_ARCH)
bool
relocateObjectCode(ObjectCode * oc) {
return ADD_SUFFIX(relocateObjectCode)(oc);
}
+
+
+void flushInstructionCache(ObjectCode * oc){
+ return ADD_SUFFIX(flushInstructionCache)(oc);
+}
#endif
#endif
Index: ghc-9.10.1/rts/linker/elf_reloc.h
===================================================================
--- ghc-9.10.1.orig/rts/linker/elf_reloc.h
+++ ghc-9.10.1/rts/linker/elf_reloc.h
@@ -5,9 +5,10 @@
#if defined(OBJFORMAT_ELF)
#include "elf_reloc_aarch64.h"
+#include "elf_reloc_riscv64.h"
bool
relocateObjectCode(ObjectCode * oc);
-
+void flushInstructionCache(ObjectCode *oc);
#endif /* OBJETFORMAT_ELF */
Index: ghc-9.10.1/rts/linker/elf_reloc_aarch64.c
===================================================================
--- ghc-9.10.1.orig/rts/linker/elf_reloc_aarch64.c
+++ ghc-9.10.1/rts/linker/elf_reloc_aarch64.c
@@ -240,7 +240,7 @@ computeAddend(Section * section, Elf_Rel
/* check if we already have that stub */
if(findStub(section, (void**)&S, 0)) {
/* did not find it. Crete a new stub. */
- if(makeStub(section, (void**)&S, 0)) {
+ if(makeStub(section, (void**)&S, NULL, 0)) {
abort(/* could not find or make stub */);
}
}
@@ -339,5 +339,10 @@ relocateObjectCodeAarch64(ObjectCode * o
return EXIT_SUCCESS;
}
+void flushInstructionCacheAarch64(ObjectCode * oc STG_UNUSED) {
+ // Looks like we don't need this on Aarch64.
+ /* no-op */
+}
+
#endif /* OBJECTFORMAT_ELF */
#endif /* aarch64_HOST_ARCH */
Index: ghc-9.10.1/rts/linker/elf_reloc_aarch64.h
===================================================================
--- ghc-9.10.1.orig/rts/linker/elf_reloc_aarch64.h
+++ ghc-9.10.1/rts/linker/elf_reloc_aarch64.h
@@ -7,4 +7,5 @@
bool
relocateObjectCodeAarch64(ObjectCode * oc);
+void flushInstructionCacheAarch64(ObjectCode *oc);
#endif /* OBJETFORMAT_ELF */
Index: ghc-9.10.1/rts/linker/elf_reloc_riscv64.c
===================================================================
--- /dev/null
+++ ghc-9.10.1/rts/linker/elf_reloc_riscv64.c
@@ -0,0 +1,693 @@
+#include "elf_reloc_riscv64.h"
+#include "LinkerInternals.h"
+#include "Rts.h"
+#include "Stg.h"
+#include "SymbolExtras.h"
+#include "linker/ElfTypes.h"
+#include "elf_plt.h"
+#include "elf_util.h"
+#include "rts/Messages.h"
+#include "util.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#if defined(riscv64_HOST_ARCH)
+
+#if defined(OBJFORMAT_ELF)
+
+typedef uint64_t addr_t;
+
+/* regular instructions are 32bit */
+typedef uint32_t inst_t;
+
+/* compressed instructions are 16bit */
+typedef uint16_t cinst_t;
+
+// TODO: These instances could be static. They are not yet, because we might
+// need their debugging symbols.
+char *relocationTypeToString(Elf64_Xword type);
+int32_t decodeAddendRISCV64(Section *section, Elf_Rel *rel);
+bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend);
+void write8le(uint8_t *p, uint8_t v);
+uint8_t read8le(const uint8_t *P);
+void write16le(cinst_t *p, uint16_t v);
+uint16_t read16le(const cinst_t *P);
+uint32_t read32le(const inst_t *P);
+void write32le(inst_t *p, uint32_t v);
+uint64_t read64le(const uint64_t *P);
+void write64le(uint64_t *p, uint64_t v);
+uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end);
+void setCJType(cinst_t *loc, uint32_t val);
+void setCBType(cinst_t *loc, uint32_t val);
+void setBType(inst_t *loc, uint32_t val);
+void setSType(inst_t *loc, uint32_t val);
+int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol,
+ int64_t addend, ObjectCode *oc);
+void setJType(inst_t *loc, uint32_t val);
+void setIType(inst_t *loc, int32_t val);
+void checkInt(inst_t *loc, int32_t v, int n);
+void setUType(inst_t *loc, int32_t val);
+
+
+char *relocationTypeToString(Elf64_Xword type) {
+ switch (ELF64_R_TYPE(type)) {
+ case R_RISCV_NONE:
+ return "R_RISCV_NONE";
+ case R_RISCV_32:
+ return "R_RISCV_32";
+ case R_RISCV_64:
+ return "R_RISCV_64";
+ case R_RISCV_RELATIVE:
+ return "R_RISCV_RELATIVE";
+ case R_RISCV_COPY:
+ return "R_RISCV_COPY";
+ case R_RISCV_JUMP_SLOT:
+ return "R_RISCV_JUMP_SLOT";
+ case R_RISCV_TLS_DTPMOD32:
+ return "R_RISCV_TLS_DTPMOD32";
+ case R_RISCV_TLS_DTPMOD64:
+ return "R_RISCV_TLS_DTPMOD64";
+ case R_RISCV_TLS_DTPREL32:
+ return "R_RISCV_TLS_DTPREL32";
+ case R_RISCV_TLS_DTPREL64:
+ return "R_RISCV_TLS_DTPREL64";
+ case R_RISCV_TLS_TPREL32:
+ return "R_RISCV_TLS_TPREL32";
+ case R_RISCV_TLS_TPREL64:
+ return "R_RISCV_TLS_TPREL64";
+ case R_RISCV_BRANCH:
+ return "R_RISCV_BRANCH";
+ case R_RISCV_JAL:
+ return "R_RISCV_JAL";
+ case R_RISCV_CALL:
+ return "R_RISCV_CALL";
+ case R_RISCV_CALL_PLT:
+ return "R_RISCV_CALL_PLT";
+ case R_RISCV_GOT_HI20:
+ return "R_RISCV_GOT_HI20";
+ case R_RISCV_PCREL_HI20:
+ return "R_RISCV_PCREL_HI20";
+ case R_RISCV_LO12_I:
+ return "R_RISCV_LO12_I";
+ case R_RISCV_PCREL_LO12_I:
+ return "R_RISCV_PCREL_LO12_I";
+ case R_RISCV_HI20:
+ return "R_RISCV_HI20";
+ case R_RISCV_LO12_S:
+ return "R_RISCV_LO12_S";
+ case R_RISCV_PCREL_LO12_S:
+ return "R_RISCV_PCREL_LO12_S";
+ case R_RISCV_RELAX:
+ return "R_RISCV_RELAX";
+ case R_RISCV_RVC_BRANCH:
+ return "R_RISCV_RVC_BRANCH";
+ case R_RISCV_RVC_JUMP:
+ return "R_RISCV_RVC_JUMP";
+ default:
+ return "Unknown relocation type";
+ }
+}
+
+STG_NORETURN
+int32_t decodeAddendRISCV64(Section *section STG_UNUSED,
+ Elf_Rel *rel STG_UNUSED) {
+ barf("decodeAddendRISCV64: Relocations with explicit addend are not supported."
+ " Please open a ticket; providing the causing code/binary.");
+}
+
+// Make sure that V can be represented as an N bit signed integer.
+void checkInt(inst_t *loc, int32_t v, int n) {
+ if (!isInt(n, v)) {
+ barf("Relocation at 0x%x is out of range. value: 0x%x (%d), "
+ "sign-extended value: 0x%x (%d), max bits 0x%x (%d)\n",
+ *loc, v, v, signExtend32(v, n), signExtend32(v, n), n, n);
+ }
+}
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write8le(uint8_t *p, uint8_t v) { *p = v; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint8_t read8le(const uint8_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write16le(cinst_t *p, uint16_t v) { *p = v; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint16_t read16le(const cinst_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint32_t read32le(const inst_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write32le(inst_t *p, uint32_t v) { *p = v; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint64_t read64le(const uint64_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write64le(uint64_t *p, uint64_t v) { *p = v; }
+
+uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
+ return (v & ((1ULL << (begin + 1)) - 1)) >> end;
+}
+
+// Set immediate val in the instruction at *loc. In U-type instructions the
+// upper 20bits carry the upper 20bits of the immediate.
+void setUType(inst_t *loc, int32_t val) {
+ const unsigned bits = 32;
+ uint32_t hi = val + 0x800;
+ checkInt(loc, signExtend32(hi, bits) >> 12, 20);
+ IF_DEBUG(linker, debugBelch("setUType: hi 0x%x val 0x%x\n", hi, val));
+
+ uint32_t imm = hi & 0xFFFFF000;
+ write32le(loc, (read32le(loc) & 0xFFF) | imm);
+}
+
+// Set immediate val in the instruction at *loc. In I-type instructions the
+// upper 12bits carry the lower 12bit of the immediate.
+void setIType(inst_t *loc, int32_t val) {
+ uint64_t hi = (val + 0x800) >> 12;
+ uint64_t lo = val - (hi << 12);
+
+ IF_DEBUG(linker, debugBelch("setIType: hi 0x%lx lo 0x%lx\n", hi, lo));
+ IF_DEBUG(linker, debugBelch("setIType: loc %p *loc 0x%x val 0x%x\n", loc,
+ *loc, val));
+
+ uint32_t imm = lo & 0xfff;
+ uint32_t instr = (read32le(loc) & 0xfffff) | (imm << 20);
+
+ IF_DEBUG(linker, debugBelch("setIType: insn 0x%x\n", instr));
+ write32le(loc, instr);
+ IF_DEBUG(linker, debugBelch("setIType: loc %p *loc' 0x%x val 0x%x\n", loc,
+ *loc, val));
+}
+
+// Set immediate val in the instruction at *loc. In S-type instructions the
+// lower 12 bits of the immediate are at bits 7 to 11 ([0:4]) and 25 to 31
+// ([5:11]).
+void setSType(inst_t *loc, uint32_t val) {
+ uint64_t hi = (val + 0x800) >> 12;
+ uint64_t lo = val - (hi << 12);
+
+ uint32_t imm = lo;
+ uint32_t instr = (read32le(loc) & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) |
+ (extractBits(imm, 4, 0) << 7);
+
+ write32le(loc, instr);
+}
+
+// Set immediate val in the instruction at *loc. In J-type instructions the
+// immediate has 20bits which are pretty scattered:
+// instr bit -> imm bit
+// 31 -> 20
+// [30:21] -> [10:1]
+// 20 -> 11
+// [19:12] -> [19:12]
+//
+// N.B. bit 0 of the immediate is missing!
+void setJType(inst_t *loc, uint32_t val) {
+ checkInt(loc, val, 21);
+
+ uint32_t insn = read32le(loc) & 0xFFF;
+ uint32_t imm20 = extractBits(val, 20, 20) << 31;
+ uint32_t imm10_1 = extractBits(val, 10, 1) << 21;
+ uint32_t imm11 = extractBits(val, 11, 11) << 20;
+ uint32_t imm19_12 = extractBits(val, 19, 12) << 12;
+ insn |= imm20 | imm10_1 | imm11 | imm19_12;
+
+ write32le(loc, insn);
+}
+
+// Set immediate val in the instruction at *loc. In B-type instructions the
+// immediate has 12bits which are pretty scattered:
+// instr bit -> imm bit
+// 31 -> 12
+// [30:25] -> [10:5]
+// [11:8] -> [4:1]
+// 7 -> 11
+//
+// N.B. bit 0 of the immediate is missing!
+void setBType(inst_t *loc, uint32_t val) {
+ checkInt(loc, val, 13);
+
+ uint32_t insn = read32le(loc) & 0x1FFF07F;
+ uint32_t imm12 = extractBits(val, 12, 12) << 31;
+ uint32_t imm10_5 = extractBits(val, 10, 5) << 25;
+ uint32_t imm4_1 = extractBits(val, 4, 1) << 8;
+ uint32_t imm11 = extractBits(val, 11, 11) << 7;
+ insn |= imm12 | imm10_5 | imm4_1 | imm11;
+
+ write32le(loc, insn);
+}
+
+
+// Set immediate val in the instruction at *loc. CB-type instructions have a
+// lenght of 16 bits (half-word, compared to the usual 32bit/word instructions.)
+// The immediate has 8bits which are pretty scattered:
+// instr bit -> imm bit
+// 12 -> 8
+// [11:10] -> [4:3]
+// [6:5] -> [7:6]
+// [4:3] -> [2:1]
+// 2 -> 5
+//
+// N.B. bit 0 of the immediate is missing!
+void setCBType(cinst_t *loc, uint32_t val) {
+ checkInt((inst_t *)loc, val, 9);
+ uint16_t insn = read16le(loc) & 0xE383;
+ uint16_t imm8 = extractBits(val, 8, 8) << 12;
+ uint16_t imm4_3 = extractBits(val, 4, 3) << 10;
+ uint16_t imm7_6 = extractBits(val, 7, 6) << 5;
+ uint16_t imm2_1 = extractBits(val, 2, 1) << 3;
+ uint16_t imm5 = extractBits(val, 5, 5) << 2;
+ insn |= imm8 | imm4_3 | imm7_6 | imm2_1 | imm5;
+
+ write16le(loc, insn);
+}
+
+// Set immediate val in the instruction at *loc. CJ-type instructions have a
+// lenght of 16 bits (half-word, compared to the usual 32bit/word instructions.)
+// The immediate has 11bits which are pretty scattered:
+// instr bit -> imm bit
+// 12 -> 11
+// 11 -> 4
+// [10:9] ->[9:8]
+// 8 -> 10
+// 7 -> 6
+// 6 -> 7
+// [5:3] -> [3:1]
+// 2 -> 5
+//
+// N.B. bit 0 of the immediate is missing!
+void setCJType(cinst_t *loc, uint32_t val) {
+ checkInt((inst_t *)loc, val, 12);
+ uint16_t insn = read16le(loc) & 0xE003;
+ uint16_t imm11 = extractBits(val, 11, 11) << 12;
+ uint16_t imm4 = extractBits(val, 4, 4) << 11;
+ uint16_t imm9_8 = extractBits(val, 9, 8) << 9;
+ uint16_t imm10 = extractBits(val, 10, 10) << 8;
+ uint16_t imm6 = extractBits(val, 6, 6) << 7;
+ uint16_t imm7 = extractBits(val, 7, 7) << 6;
+ uint16_t imm3_1 = extractBits(val, 3, 1) << 3;
+ uint16_t imm5 = extractBits(val, 5, 5) << 2;
+ insn |= imm11 | imm4 | imm9_8 | imm10 | imm6 | imm7 | imm3_1 | imm5;
+
+ write16le(loc, insn);
+}
+
+// Encode the addend according to the relocaction into the instruction.
+bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend) {
+ // instruction to rewrite (P: Position of the relocation)
+ addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset);
+ IF_DEBUG(linker,
+ debugBelch(
+ "Relocation type %s 0x%lx (%lu) symbol 0x%lx addend 0x%x (%u / "
+ "%d) P 0x%lx\n",
+ relocationTypeToString(rel->r_info), ELF64_R_TYPE(rel->r_info),
+ ELF64_R_TYPE(rel->r_info), ELF64_R_SYM(rel->r_info), addend,
+ addend, addend, P));
+ switch (ELF64_R_TYPE(rel->r_info)) {
+ case R_RISCV_32_PCREL:
+ case R_RISCV_32:
+ write32le((inst_t *)P, addend);
+ break;
+ case R_RISCV_64:
+ write64le((uint64_t *)P, addend);
+ break;
+ case R_RISCV_GOT_HI20:
+ case R_RISCV_PCREL_HI20:
+ case R_RISCV_HI20: {
+ setUType((inst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_PCREL_LO12_I:
+ case R_RISCV_LO12_I: {
+ setIType((inst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_RVC_JUMP: {
+ setCJType((cinst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_RVC_BRANCH: {
+ setCBType((cinst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_BRANCH: {
+ setBType((inst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_CALL:
+ case R_RISCV_CALL_PLT: {
+ // We could relax more (in some cases) but right now most important is to
+ // make it work.
+ setUType((inst_t *)P, addend);
+ setIType(((inst_t *)P) + 1, addend);
+ break;
+ }
+ case R_RISCV_JAL: {
+ setJType((inst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_ADD8:
+ write8le((uint8_t *)P, read8le((uint8_t *)P) + addend);
+ break;
+ case R_RISCV_ADD16:
+ write16le((cinst_t *)P, read16le((cinst_t *)P) + addend);
+ break;
+ case R_RISCV_ADD32:
+ write32le((inst_t *)P, read32le((inst_t *)P) + addend);
+ break;
+ case R_RISCV_ADD64:
+ write64le((uint64_t *)P, read64le((uint64_t *)P) + addend);
+ break;
+ case R_RISCV_SUB6: {
+ uint8_t keep = *((uint8_t *)P) & 0xc0;
+ uint8_t imm = (((*(uint8_t *)P) & 0x3f) - addend) & 0x3f;
+
+ write8le((uint8_t *)P, keep | imm);
+ break;
+ }
+ case R_RISCV_SUB8:
+ write8le((uint8_t *)P, read8le((uint8_t *)P) - addend);
+ break;
+ case R_RISCV_SUB16:
+ write16le((cinst_t *)P, read16le((cinst_t *)P) - addend);
+ break;
+ case R_RISCV_SUB32:
+ write32le((inst_t *)P, read32le((inst_t *)P) - addend);
+ break;
+ case R_RISCV_SUB64:
+ write64le((uint64_t *)P, read64le((uint64_t *)P) - addend);
+ break;
+ case R_RISCV_SET6: {
+ uint8_t keep = *((uint8_t *)P) & 0xc0;
+ uint8_t imm = (addend & 0x3f) & 0x3f;
+
+ write8le((uint8_t *)P, keep | imm);
+ break;
+ }
+ case R_RISCV_SET8:
+ write8le((uint8_t *)P, addend);
+ break;
+ case R_RISCV_SET16:
+ write16le((cinst_t *)P, addend);
+ break;
+ case R_RISCV_SET32:
+ write32le((inst_t *)P, addend);
+ break;
+ case R_RISCV_PCREL_LO12_S:
+ case R_RISCV_TPREL_LO12_S:
+ case R_RISCV_LO12_S: {
+ setSType((inst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_RELAX:
+ case R_RISCV_ALIGN:
+ // Implementing relaxations (rewriting instructions to more efficient ones)
+ // could be implemented in future. As the code already is aligned and we do
+ // not change the instruction sizes, we should get away with not aligning
+ // (though, that is cheating.) To align or change the instruction count, we
+ // would need machinery to squeeze or extend memory at the current location.
+ break;
+ default:
+ barf("Missing relocation 0x%lx\n", ELF64_R_TYPE(rel->r_info));
+ }
+ return EXIT_SUCCESS;
+}
+
+/**
+ * Compute the *new* addend for a relocation, given a pre-existing addend.
+ * @param section The section the relocation is in.
+ * @param rel The Relocation struct.
+ * @param symbol The target symbol.
+ * @param addend The existing addend. Either explicit or implicit.
+ * @return The new computed addend.
+ */
+int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol,
+ int64_t addend, ObjectCode *oc) {
+ Section * section = &oc->sections[relaTab->targetSectionIndex];
+
+ // instruction to rewrite (P: Position of the relocation)
+ addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset);
+
+ CHECK(0x0 != P);
+ CHECK((uint64_t)section->start <= P);
+ CHECK(P <= (uint64_t)section->start + section->size);
+ // S: Value of the symbol in the symbol table
+ addr_t S = (addr_t)symbol->addr;
+ /* GOT slot for the symbol (G + GOT) */
+ addr_t GOT_S = (addr_t)symbol->got_addr;
+
+ // A: Addend field in the relocation entry associated with the symbol
+ int64_t A = addend;
+
+ IF_DEBUG(linker, debugBelch("%s: P 0x%lx S 0x%lx %s GOT_S 0x%lx A 0x%lx relNo %u\n",
+ relocationTypeToString(rel->r_info), P, S,
+ symbol->name, GOT_S, A, relNo));
+ switch (ELF64_R_TYPE(rel->r_info)) {
+ case R_RISCV_32:
+ return S + A;
+ case R_RISCV_64:
+ return S + A;
+ case R_RISCV_HI20:
+ return S + A;
+ case R_RISCV_JUMP_SLOT:
+ return S;
+ case R_RISCV_JAL:
+ return S + A - P;
+ case R_RISCV_PCREL_HI20:
+ return S + A - P;
+ case R_RISCV_LO12_I:
+ return S + A;
+ // Quoting LLVM docs: For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}),
+ // the symbol actually points the corresponding R_RISCV_PCREL_HI20
+ // relocation, and the target VA is calculated using PCREL_HI20's symbol.
+ case R_RISCV_PCREL_LO12_S:
+ FALLTHROUGH;
+ case R_RISCV_PCREL_LO12_I: {
+ // Lookup related HI20 relocation and use that value. I'm still confused why
+ // relocations aren't self-contained, but this is how LLVM does it. And,
+ // calculating the lower 12 bit without any relationship to the GOT entry's
+ // address makes no sense either.
+ for (int64_t i = relNo; i >= 0 ; i--) {
+ Elf_Rela *rel_prime = &relaTab->relocations[i];
+
+ addr_t P_prime =
+ (addr_t)((uint8_t *)section->start + rel_prime->r_offset);
+
+ if (P_prime != S) {
+ // S points to the P of the corresponding *_HI20 relocation.
+ continue;
+ }
+
+ ElfSymbol *symbol_prime =
+ findSymbol(oc, relaTab->sectionHeader->sh_link,
+ ELF64_R_SYM((Elf64_Xword)rel_prime->r_info));
+
+ CHECK(0x0 != symbol_prime);
+
+ /* take explicit addend */
+ int64_t addend_prime = rel_prime->r_addend;
+
+ uint64_t type_prime = ELF64_R_TYPE(rel_prime->r_info);
+
+ if (type_prime == R_RISCV_PCREL_HI20 ||
+ type_prime == R_RISCV_GOT_HI20 ||
+ type_prime == R_RISCV_TLS_GD_HI20 ||
+ type_prime == R_RISCV_TLS_GOT_HI20) {
+ IF_DEBUG(linker,
+ debugBelch(
+ "Found matching relocation: %s (P: 0x%lx, S: 0x%lx, "
+ "sym-name: %s) -> %s (P: 0x%lx, S: %p, sym-name: %s, relNo: %ld)",
+ relocationTypeToString(rel->r_info), P, S, symbol->name,
+ relocationTypeToString(rel_prime->r_info), P_prime,
+ symbol_prime->addr, symbol_prime->name, i));
+ int32_t result = computeAddend(relaTab, i, (Elf_Rel *)rel_prime,
+ symbol_prime, addend_prime, oc);
+ IF_DEBUG(linker, debugBelch("Result of computeAddend: 0x%x (%d)\n",
+ result, result));
+ return result;
+ }
+ }
+ debugBelch("Missing HI relocation for %s: P 0x%lx S 0x%lx %s\n",
+ relocationTypeToString(rel->r_info), P, S, symbol->name);
+ abort();
+ }
+
+ case R_RISCV_RVC_JUMP:
+ return S + A - P;
+ case R_RISCV_RVC_BRANCH:
+ return S + A - P;
+ case R_RISCV_BRANCH:
+ return S + A - P;
+ case R_RISCV_CALL:
+ case R_RISCV_CALL_PLT: {
+ addr_t GOT_Target;
+ if (GOT_S != 0) {
+ // 1. Public symbol with GOT entry.
+ GOT_Target = GOT_S;
+ } else {
+ // 2. Fake GOT entry with symbol extra entry.
+ SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S);
+ addr_t* FAKE_GOT_S = &symbolExtra->addr;
+ IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT w/ SymbolExtra = %p , "
+ "entry = %p\n",
+ symbolExtra, FAKE_GOT_S));
+ GOT_Target = (addr_t) FAKE_GOT_S;
+ }
+
+ if (findStub(section, (void **)&S, 0)) {
+ /* did not find it. Crete a new stub. */
+ if (makeStub(section, (void **)&S, (void *)GOT_Target, 0)) {
+ abort(/* could not find or make stub */);
+ }
+ }
+ IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT: S = 0x%lx A = 0x%lx P = "
+ "0x%lx (S + A) - P = 0x%lx \n",
+ S, A, P, (S + A) - P));
+ return (S + A) - P;
+ }
+ case R_RISCV_ADD8:
+ FALLTHROUGH;
+ case R_RISCV_ADD16:
+ FALLTHROUGH;
+ case R_RISCV_ADD32:
+ FALLTHROUGH;
+ case R_RISCV_ADD64:
+ return S + A; // Add V when the value is set
+ case R_RISCV_SUB6:
+ FALLTHROUGH;
+ case R_RISCV_SUB8:
+ FALLTHROUGH;
+ case R_RISCV_SUB16:
+ FALLTHROUGH;
+ case R_RISCV_SUB32:
+ FALLTHROUGH;
+ case R_RISCV_SUB64:
+ return S + A; // Subtract from V when value is set
+ case R_RISCV_SET6:
+ FALLTHROUGH;
+ case R_RISCV_SET8:
+ FALLTHROUGH;
+ case R_RISCV_SET16:
+ FALLTHROUGH;
+ case R_RISCV_SET32:
+ return S + A;
+ case R_RISCV_RELAX:
+ // This "relocation" has no addend.
+ FALLTHROUGH;
+ case R_RISCV_ALIGN:
+ // I guess we don't need to implement this relaxation. Otherwise, this
+ // should return the number of blank bytes to insert via NOPs.
+ return 0;
+ case R_RISCV_32_PCREL:
+ return S + A - P;
+ case R_RISCV_GOT_HI20: {
+ // TODO: Allocating extra memory for every symbol just to play this trick
+ // seems to be a bit obscene. (GOT relocations hitting local symbols
+ // happens, but not very often.) It would be better to allocate only what we
+ // really need.
+
+ // There are two cases here: 1. The symbol is public and has an entry in the
+ // GOT. 2. It's local and has no corresponding GOT entry. The first case is
+ // easy: We simply calculate the addend with the GOT address. In the second
+ // case we create a symbol extra entry and pretend it's the GOT.
+ if (GOT_S != 0) {
+ // 1. Public symbol with GOT entry.
+ return GOT_S + A - P;
+ } else {
+ // 2. Fake GOT entry with symbol extra entry.
+ SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S);
+ addr_t* FAKE_GOT_S = &symbolExtra->addr;
+ addr_t res = (addr_t) FAKE_GOT_S + A - P;
+ IF_DEBUG(linker, debugBelch("R_RISCV_GOT_HI20 w/ SymbolExtra = %p , "
+ "entry = %p , reloc-addend = 0x%lu ",
+ symbolExtra, FAKE_GOT_S, res));
+ return res;
+ }
+ }
+ default:
+ barf("Unimplemented relocation: 0x%lx\n (%lu)",
+ ELF64_R_TYPE(rel->r_info), ELF64_R_TYPE(rel->r_info));
+ }
+ barf("This should never happen!");
+}
+
+// Iterate over all relocations and perform them.
+bool relocateObjectCodeRISCV64(ObjectCode *oc) {
+ for (ElfRelocationTable *relTab = oc->info->relTable; relTab != NULL;
+ relTab = relTab->next) {
+ /* only relocate interesting sections */
+ if (SECTIONKIND_OTHER == oc->sections[relTab->targetSectionIndex].kind)
+ continue;
+
+ Section *targetSection = &oc->sections[relTab->targetSectionIndex];
+
+ for (unsigned i = 0; i < relTab->n_relocations; i++) {
+ Elf_Rel *rel = &relTab->relocations[i];
+
+ ElfSymbol *symbol = findSymbol(oc, relTab->sectionHeader->sh_link,
+ ELF64_R_SYM((Elf64_Xword)rel->r_info));
+
+ CHECK(0x0 != symbol);
+
+ // This always fails, because we don't support Rel locations, yet: Do we
+ // need this case? Leaving it in to spot the potential bug when it
+ // appears.
+ /* decode implicit addend */
+ int64_t addend = decodeAddendRISCV64(targetSection, rel);
+
+ addend = computeAddend((ElfRelocationATable*) relTab, i, rel, symbol, addend, oc);
+ encodeAddendRISCV64(targetSection, rel, addend);
+ }
+ }
+ for (ElfRelocationATable *relaTab = oc->info->relaTable; relaTab != NULL;
+ relaTab = relaTab->next) {
+ /* only relocate interesting sections */
+ if (SECTIONKIND_OTHER == oc->sections[relaTab->targetSectionIndex].kind)
+ continue;
+
+ Section *targetSection = &oc->sections[relaTab->targetSectionIndex];
+
+ for (unsigned i = 0; i < relaTab->n_relocations; i++) {
+
+ Elf_Rela *rel = &relaTab->relocations[i];
+
+ ElfSymbol *symbol = findSymbol(oc, relaTab->sectionHeader->sh_link,
+ ELF64_R_SYM((Elf64_Xword)rel->r_info));
+
+ CHECK(0x0 != symbol);
+
+ /* take explicit addend */
+ int64_t addend = rel->r_addend;
+
+ addend = computeAddend(relaTab, i, (Elf_Rel *)rel, symbol, addend, oc);
+ encodeAddendRISCV64(targetSection, (Elf_Rel *)rel, addend);
+ }
+ }
+ return EXIT_SUCCESS;
+}
+
+void flushInstructionCacheRISCV64(ObjectCode *oc) {
+ // Synchronize the memory and instruction cache to prevent illegal instruction
+ // exceptions. On Linux the parameters of __builtin___clear_cache are
+ // currently unused. Add them anyways for future compatibility. (I.e. the
+ // parameters couldn't be checked during development.)
+
+ /* The main object code */
+ void *codeBegin = oc->image + oc->misalignment;
+ __builtin___clear_cache(codeBegin, (void*) ((uint64_t*) codeBegin + oc->fileSize));
+
+ /* Jump Islands */
+ __builtin___clear_cache((void *)oc->symbol_extras,
+ (void *)(oc->symbol_extras + oc->n_symbol_extras));
+
+ // Memory barrier to ensure nothing circumvents the fence.i / cache flushes.
+ SEQ_CST_FENCE();
+}
+
+#endif /* OBJECTFORMAT_ELF */
+#endif /* riscv64_HOST_ARCH */
Index: ghc-9.10.1/rts/linker/elf_reloc_riscv64.h
===================================================================
--- /dev/null
+++ ghc-9.10.1/rts/linker/elf_reloc_riscv64.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "LinkerInternals.h"
+
+#if defined(OBJFORMAT_ELF)
+
+bool
+relocateObjectCodeRISCV64(ObjectCode * oc);
+
+void flushInstructionCacheRISCV64(ObjectCode *oc);
+#endif /* OBJETFORMAT_ELF */
Index: ghc-9.10.1/rts/rts.cabal
===================================================================
--- ghc-9.10.1.orig/rts/rts.cabal
+++ ghc-9.10.1/rts/rts.cabal
@@ -468,9 +468,11 @@ library
linker/elf_got.c
linker/elf_plt.c
linker/elf_plt_aarch64.c
+ linker/elf_plt_riscv64.c
linker/elf_plt_arm.c
linker/elf_reloc.c
linker/elf_reloc_aarch64.c
+ linker/elf_reloc_riscv64.c
linker/elf_tlsgd.c
linker/elf_util.c
sm/BlockAlloc.c
Index: ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.hs
===================================================================
--- /dev/null
+++ ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.hs
@@ -0,0 +1,123 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+{-# LANGUAGE GHCForeignImportPrim #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+
+-- | This test ensures that sub-word signed and unsigned parameters are correctly
+-- handed over to C functions. I.e. it asserts the calling-convention.
+--
+-- The number of parameters is currently shaped for the RISCV64 calling-convention.
+-- You may need to add more parameters to the C functions in case there are more
+-- registers reserved for parameters in your architecture.
+module Main where
+
+import Data.Word
+import GHC.Exts
+import GHC.Int
+
+foreign import ccall "fun8"
+ fun8 ::
+ Int8# -> -- a0
+ Word8# -> -- a1
+ Int8# -> -- a2
+ Int8# -> -- a3
+ Int8# -> -- a4
+ Int8# -> -- a5
+ Int8# -> -- a6
+ Int8# -> -- a7
+ Word8# -> -- s0
+ Int8# -> -- s1
+ Int64# -- result
+
+foreign import ccall "fun16"
+ fun16 ::
+ Int16# -> -- a0
+ Word16# -> -- a1
+ Int16# -> -- a2
+ Int16# -> -- a3
+ Int16# -> -- a4
+ Int16# -> -- a5
+ Int16# -> -- a6
+ Int16# -> -- a7
+ Word16# -> -- s0
+ Int16# -> -- s1
+ Int64# -- result
+
+foreign import ccall "fun32"
+ fun32 ::
+ Int32# -> -- a0
+ Word32# -> -- a1
+ Int32# -> -- a2
+ Int32# -> -- a3
+ Int32# -> -- a4
+ Int32# -> -- a5
+ Int32# -> -- a6
+ Int32# -> -- a7
+ Word32# -> -- s0
+ Int32# -> -- s1
+ Int64# -- result
+
+foreign import ccall "funFloat"
+ funFloat ::
+ Float# -> -- a0
+ Float# -> -- a1
+ Float# -> -- a2
+ Float# -> -- a3
+ Float# -> -- a4
+ Float# -> -- a5
+ Float# -> -- a6
+ Float# -> -- a7
+ Float# -> -- s0
+ Float# -> -- s1
+ Float# -- result
+
+foreign import ccall "funDouble"
+ funDouble ::
+ Double# -> -- a0
+ Double# -> -- a1
+ Double# -> -- a2
+ Double# -> -- a3
+ Double# -> -- a4
+ Double# -> -- a5
+ Double# -> -- a6
+ Double# -> -- a7
+ Double# -> -- s0
+ Double# -> -- s1
+ Double# -- result
+
+main :: IO ()
+main =
+ -- N.B. the values here aren't choosen by accident: -1 means all bits one in
+ -- twos-complement, which is the same as the max word value.
+ let i8 :: Int8# = intToInt8# (-1#)
+ w8 :: Word8# = wordToWord8# (255##)
+ res8 :: Int64# = fun8 i8 w8 i8 i8 i8 i8 i8 i8 w8 i8
+ expected_res8 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word8) + 8 * (-1)
+ i16 :: Int16# = intToInt16# (-1#)
+ w16 :: Word16# = wordToWord16# (65535##)
+ res16 :: Int64# = fun16 i16 w16 i16 i16 i16 i16 i16 i16 w16 i16
+ expected_res16 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word16) + 8 * (-1)
+ i32 :: Int32# = intToInt32# (-1#)
+ w32 :: Word32# = wordToWord32# (4294967295##)
+ res32 :: Int64# = fun32 i32 w32 i32 i32 i32 i32 i32 i32 w32 i32
+ expected_res32 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word32) + 8 * (-1)
+ resFloat :: Float = F# (funFloat 1.0# 1.1# 1.2# 1.3# 1.4# 1.5# 1.6# 1.7# 1.8# 1.9#)
+ resDouble :: Double = D# (funDouble 1.0## 1.1## 1.2## 1.3## 1.4## 1.5## 1.6## 1.7## 1.8## 1.9##)
+ in do
+ print $ "fun8 result:" ++ show (I64# res8)
+ assertEqual expected_res8 (I64# res8)
+ print $ "fun16 result:" ++ show (I64# res16)
+ assertEqual expected_res16 (I64# res16)
+ print $ "fun32 result:" ++ show (I64# res32)
+ assertEqual expected_res32 (I64# res32)
+ print $ "funFloat result:" ++ show resFloat
+ assertEqual (14.5 :: Float) resFloat
+ print $ "funDouble result:" ++ show resDouble
+ assertEqual (14.5 :: Double) resDouble
+
+assertEqual :: (Eq a, Show a) => a -> a -> IO ()
+assertEqual a b =
+ if a == b
+ then pure ()
+ else error $ show a ++ " =/= " ++ show b
Index: ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.stdout
===================================================================
--- /dev/null
+++ ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.stdout
@@ -0,0 +1,60 @@
+"fun8 result:502"
+"fun16 result:131062"
+"fun32 result:8589934582"
+"funFloat result:14.5"
+"funDouble result:14.5"
+fun32:
+a0: 0xffffffff -1
+a1: 0xffffffff 4294967295
+a2: 0xffffffff -1
+a3: 0xffffffff -1
+a4: 0xffffffff -1
+a5: 0xffffffff -1
+a6: 0xffffffff -1
+a7: 0xffffffff -1
+s0: 0xffffffff -1
+s1: 0xffffffff 4294967295
+fun16:
+a0: 0xffffffff -1
+a1: 0xffff 65535
+a2: 0xffffffff -1
+a3: 0xffffffff -1
+a4: 0xffffffff -1
+a5: 0xffffffff -1
+a6: 0xffffffff -1
+a7: 0xffffffff -1
+s0: 0xffffffff -1
+s1: 0xffff 65535
+fun8:
+a0: 0xffffffff -1
+a1: 0xff 255
+a2: 0xffffffff -1
+a3: 0xffffffff -1
+a4: 0xffffffff -1
+a5: 0xffffffff -1
+a6: 0xffffffff -1
+a7: 0xffffffff -1
+s0: 0xffffffff -1
+s1: 0xff 255
+funFloat:
+a0: 1.000000
+a1: 1.100000
+a2: 1.200000
+a3: 1.300000
+a4: 1.400000
+a5: 1.500000
+a6: 1.600000
+a7: 1.700000
+s0: 1.800000
+s1: 1.900000
+funDouble:
+a0: 1.000000
+a1: 1.100000
+a2: 1.200000
+a3: 1.300000
+a4: 1.400000
+a5: 1.500000
+a6: 1.600000
+a7: 1.700000
+s0: 1.800000
+s1: 1.900000
Index: ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv_c.c
===================================================================
--- /dev/null
+++ ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv_c.c
@@ -0,0 +1,91 @@
+#include "stdint.h"
+#include "stdio.h"
+
+int64_t fun8(int8_t a0, uint8_t a1, int8_t a2, int8_t a3, int8_t a4, int8_t a5,
+ int8_t a6, int8_t a7, int8_t s0, uint8_t s1) {
+ printf("fun8:\n");
+ printf("a0: %#x %hhd\n", a0, a0);
+ printf("a1: %#x %hhu\n", a1, a1);
+ printf("a2: %#x %hhd\n", a2, a2);
+ printf("a3: %#x %hhd\n", a3, a3);
+ printf("a4: %#x %hhd\n", a4, a4);
+ printf("a5: %#x %hhd\n", a5, a5);
+ printf("a6: %#x %hhd\n", a6, a6);
+ printf("a7: %#x %hhd\n", a7, a7);
+ printf("s0: %#x %hhd\n", s0, s0);
+ printf("s1: %#x %hhu\n", s1, s1);
+
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}
+
+int64_t fun16(int16_t a0, uint16_t a1, int16_t a2, int16_t a3, int16_t a4,
+ int16_t a5, int16_t a6, int16_t a7, int16_t s0, uint16_t s1) {
+ printf("fun16:\n");
+ printf("a0: %#x %hd\n", a0, a0);
+ printf("a1: %#x %hu\n", a1, a1);
+ printf("a2: %#x %hd\n", a2, a2);
+ printf("a3: %#x %hd\n", a3, a3);
+ printf("a4: %#x %hd\n", a4, a4);
+ printf("a5: %#x %hd\n", a5, a5);
+ printf("a6: %#x %hd\n", a6, a6);
+ printf("a7: %#x %hd\n", a7, a7);
+ printf("s0: %#x %hd\n", s0, s0);
+ printf("s1: %#x %hu\n", s1, s1);
+
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}
+
+int64_t fun32(int32_t a0, uint32_t a1, int32_t a2, int32_t a3, int32_t a4,
+ int32_t a5, int32_t a6, int32_t a7, int32_t s0, uint32_t s1) {
+ printf("fun32:\n");
+ printf("a0: %#x %d\n", a0, a0);
+ printf("a1: %#x %u\n", a1, a1);
+ printf("a2: %#x %d\n", a2, a2);
+ printf("a3: %#x %d\n", a3, a3);
+ printf("a4: %#x %d\n", a4, a4);
+ printf("a5: %#x %d\n", a5, a5);
+ printf("a6: %#x %d\n", a6, a6);
+ printf("a7: %#x %d\n", a7, a7);
+ printf("s0: %#x %d\n", s0, s0);
+ printf("s1: %#x %u\n", s1, s1);
+
+ // Ensure the addition happens in long int (not just int) precission.
+ // Otherwise, the result is truncated during the operation.
+ int64_t force_int64_precission = 0;
+ return force_int64_precission + a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 +
+ s1;
+}
+
+float funFloat(float a0, float a1, float a2, float a3, float a4, float a5,
+ float a6, float a7, float s0, float s1) {
+ printf("funFloat:\n");
+ printf("a0: %f\n", a0);
+ printf("a1: %f\n", a1);
+ printf("a2: %f\n", a2);
+ printf("a3: %f\n", a3);
+ printf("a4: %f\n", a4);
+ printf("a5: %f\n", a5);
+ printf("a6: %f\n", a6);
+ printf("a7: %f\n", a7);
+ printf("s0: %f\n", s0);
+ printf("s1: %f\n", s1);
+
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}
+
+double funDouble(double a0, double a1, double a2, double a3, double a4, double a5,
+ double a6, double a7, double s0, double s1) {
+ printf("funDouble:\n");
+ printf("a0: %f\n", a0);
+ printf("a1: %f\n", a1);
+ printf("a2: %f\n", a2);
+ printf("a3: %f\n", a3);
+ printf("a4: %f\n", a4);
+ printf("a5: %f\n", a5);
+ printf("a6: %f\n", a6);
+ printf("a7: %f\n", a7);
+ printf("s0: %f\n", s0);
+ printf("s1: %f\n", s1);
+
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}