OBS-URL: https://build.opensuse.org/package/show/devel:languages:haskell/ghc?expand=0&rev=407
6218 lines
234 KiB
Diff
6218 lines
234 KiB
Diff
Index: ghc-9.10.1/CODEOWNERS
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/CODEOWNERS
|
||
+++ ghc-9.10.1/CODEOWNERS
|
||
@@ -40,6 +40,7 @@
|
||
/compiler/GHC/HsToCore/Foreign/Wasm.hs @TerrorJack
|
||
/compiler/GHC/Tc/Deriv/ @RyanGlScott
|
||
/compiler/GHC/CmmToAsm/ @simonmar @bgamari @AndreasK
|
||
+/compiler/GHC/CmmToAsm/RV64/ @supersven @angerman
|
||
/compiler/GHC/CmmToAsm/Wasm/ @TerrorJack
|
||
/compiler/GHC/CmmToLlvm/ @angerman
|
||
/compiler/GHC/StgToCmm/ @simonmar @osa1
|
||
Index: ghc-9.10.1/compiler/CodeGen.Platform.h
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/CodeGen.Platform.h
|
||
+++ ghc-9.10.1/compiler/CodeGen.Platform.h
|
||
@@ -1,7 +1,8 @@
|
||
|
||
import GHC.Cmm.Expr
|
||
#if !(defined(MACHREGS_i386) || defined(MACHREGS_x86_64) \
|
||
- || defined(MACHREGS_powerpc) || defined(MACHREGS_aarch64))
|
||
+ || defined(MACHREGS_powerpc) || defined(MACHREGS_aarch64) \
|
||
+ || defined(MACHREGS_riscv64))
|
||
import GHC.Utils.Panic.Plain
|
||
#endif
|
||
import GHC.Platform.Reg
|
||
@@ -1041,6 +1042,105 @@ freeReg 18 = False
|
||
|
||
# if defined(REG_Base)
|
||
freeReg REG_Base = False
|
||
+# endif
|
||
+# if defined(REG_Sp)
|
||
+freeReg REG_Sp = False
|
||
+# endif
|
||
+# if defined(REG_SpLim)
|
||
+freeReg REG_SpLim = False
|
||
+# endif
|
||
+# if defined(REG_Hp)
|
||
+freeReg REG_Hp = False
|
||
+# endif
|
||
+# if defined(REG_HpLim)
|
||
+freeReg REG_HpLim = False
|
||
+# endif
|
||
+
|
||
+# if defined(REG_R1)
|
||
+freeReg REG_R1 = False
|
||
+# endif
|
||
+# if defined(REG_R2)
|
||
+freeReg REG_R2 = False
|
||
+# endif
|
||
+# if defined(REG_R3)
|
||
+freeReg REG_R3 = False
|
||
+# endif
|
||
+# if defined(REG_R4)
|
||
+freeReg REG_R4 = False
|
||
+# endif
|
||
+# if defined(REG_R5)
|
||
+freeReg REG_R5 = False
|
||
+# endif
|
||
+# if defined(REG_R6)
|
||
+freeReg REG_R6 = False
|
||
+# endif
|
||
+# if defined(REG_R7)
|
||
+freeReg REG_R7 = False
|
||
+# endif
|
||
+# if defined(REG_R8)
|
||
+freeReg REG_R8 = False
|
||
+# endif
|
||
+
|
||
+# if defined(REG_F1)
|
||
+freeReg REG_F1 = False
|
||
+# endif
|
||
+# if defined(REG_F2)
|
||
+freeReg REG_F2 = False
|
||
+# endif
|
||
+# if defined(REG_F3)
|
||
+freeReg REG_F3 = False
|
||
+# endif
|
||
+# if defined(REG_F4)
|
||
+freeReg REG_F4 = False
|
||
+# endif
|
||
+# if defined(REG_F5)
|
||
+freeReg REG_F5 = False
|
||
+# endif
|
||
+# if defined(REG_F6)
|
||
+freeReg REG_F6 = False
|
||
+# endif
|
||
+
|
||
+# if defined(REG_D1)
|
||
+freeReg REG_D1 = False
|
||
+# endif
|
||
+# if defined(REG_D2)
|
||
+freeReg REG_D2 = False
|
||
+# endif
|
||
+# if defined(REG_D3)
|
||
+freeReg REG_D3 = False
|
||
+# endif
|
||
+# if defined(REG_D4)
|
||
+freeReg REG_D4 = False
|
||
+# endif
|
||
+# if defined(REG_D5)
|
||
+freeReg REG_D5 = False
|
||
+# endif
|
||
+# if defined(REG_D6)
|
||
+freeReg REG_D6 = False
|
||
+# endif
|
||
+
|
||
+freeReg _ = True
|
||
+
|
||
+#elif defined(MACHREGS_riscv64)
|
||
+
|
||
+-- zero reg
|
||
+freeReg 0 = False
|
||
+-- link register
|
||
+freeReg 1 = False
|
||
+-- stack pointer
|
||
+freeReg 2 = False
|
||
+-- global pointer
|
||
+freeReg 3 = False
|
||
+-- thread pointer
|
||
+freeReg 4 = False
|
||
+-- frame pointer
|
||
+freeReg 8 = False
|
||
+-- made-up inter-procedural (ip) register
|
||
+-- See Note [The made-up RISCV64 TMP (IP) register]
|
||
+freeReg 31 = False
|
||
+
|
||
+# if defined(REG_Base)
|
||
+freeReg REG_Base = False
|
||
# endif
|
||
# if defined(REG_Sp)
|
||
freeReg REG_Sp = False
|
||
Index: ghc-9.10.1/compiler/GHC/Cmm/CLabel.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/GHC/Cmm/CLabel.hs
|
||
+++ ghc-9.10.1/compiler/GHC/Cmm/CLabel.hs
|
||
@@ -1720,6 +1720,8 @@ pprDynamicLinkerAsmLabel !platform dllIn
|
||
| platformArch platform == ArchAArch64
|
||
= ppLbl
|
||
|
||
+ | platformArch platform == ArchRISCV64
|
||
+ = ppLbl
|
||
|
||
| platformArch platform == ArchX86_64
|
||
= case dllInfo of
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm.hs
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm.hs
|
||
@@ -67,6 +67,7 @@ import qualified GHC.CmmToAsm.X86 as X
|
||
import qualified GHC.CmmToAsm.PPC as PPC
|
||
import qualified GHC.CmmToAsm.AArch64 as AArch64
|
||
import qualified GHC.CmmToAsm.Wasm as Wasm32
|
||
+import qualified GHC.CmmToAsm.RV64 as RV64
|
||
|
||
import GHC.CmmToAsm.Reg.Liveness
|
||
import qualified GHC.CmmToAsm.Reg.Linear as Linear
|
||
@@ -148,7 +149,7 @@ nativeCodeGen logger ts config modLoc h
|
||
ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
|
||
ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
|
||
ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
|
||
- ArchRISCV64 -> panic "nativeCodeGen: No NCG for RISCV64"
|
||
+ ArchRISCV64 -> nCG' (RV64.ncgRV64 config)
|
||
ArchLoongArch64->panic "nativeCodeGen: No NCG for LoongArch64"
|
||
ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
|
||
ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Dwarf/Constants.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Dwarf/Constants.hs
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Dwarf/Constants.hs
|
||
@@ -240,6 +240,7 @@ dwarfRegNo p r = case platformArch p of
|
||
| r == xmm15 -> 32
|
||
ArchPPC_64 _ -> fromIntegral $ toRegNo r
|
||
ArchAArch64 -> fromIntegral $ toRegNo r
|
||
+ ArchRISCV64 -> fromIntegral $ toRegNo r
|
||
_other -> error "dwarfRegNo: Unsupported platform or unknown register!"
|
||
|
||
-- | Virtual register number to use for return address.
|
||
@@ -252,5 +253,6 @@ dwarfReturnRegNo p
|
||
ArchX86 -> 8 -- eip
|
||
ArchX86_64 -> 16 -- rip
|
||
ArchPPC_64 ELF_V2 -> 65 -- lr (link register)
|
||
- ArchAArch64-> 30
|
||
+ ArchAArch64 -> 30
|
||
+ ArchRISCV64 -> 1 -- ra (return address)
|
||
_other -> error "dwarfReturnRegNo: Unsupported platform!"
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/PIC.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/PIC.hs
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/PIC.hs
|
||
@@ -132,6 +132,11 @@ cmmMakeDynamicReference config reference
|
||
addImport symbolPtr
|
||
return $ cmmMakePicReference config symbolPtr
|
||
|
||
+ AccessViaSymbolPtr | ArchRISCV64 <- platformArch platform -> do
|
||
+ let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
|
||
+ addImport symbolPtr
|
||
+ return $ cmmMakePicReference config symbolPtr
|
||
+
|
||
AccessViaSymbolPtr -> do
|
||
let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
|
||
addImport symbolPtr
|
||
@@ -164,6 +169,10 @@ cmmMakePicReference config lbl
|
||
| ArchAArch64 <- platformArch platform
|
||
= CmmLit $ CmmLabel lbl
|
||
|
||
+ -- as on AArch64, there's no pic base register.
|
||
+ | ArchRISCV64 <- platformArch platform
|
||
+ = CmmLit $ CmmLabel lbl
|
||
+
|
||
| OSAIX <- platformOS platform
|
||
= CmmMachOp (MO_Add W32)
|
||
[ CmmReg (CmmGlobal $ GlobalRegUse PicBaseReg (bWord platform))
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64.hs
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64.hs
|
||
@@ -0,0 +1,57 @@
|
||
+{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||
+
|
||
+-- | Native code generator for RiscV64 architectures
|
||
+module GHC.CmmToAsm.RV64 (ncgRV64) where
|
||
+
|
||
+import GHC.CmmToAsm.Config
|
||
+import GHC.CmmToAsm.Instr
|
||
+import GHC.CmmToAsm.Monad
|
||
+import GHC.CmmToAsm.RV64.CodeGen qualified as RV64
|
||
+import GHC.CmmToAsm.RV64.Instr qualified as RV64
|
||
+import GHC.CmmToAsm.RV64.Ppr qualified as RV64
|
||
+import GHC.CmmToAsm.RV64.RegInfo qualified as RV64
|
||
+import GHC.CmmToAsm.RV64.Regs qualified as RV64
|
||
+import GHC.CmmToAsm.Types
|
||
+import GHC.Prelude
|
||
+import GHC.Utils.Outputable (ftext)
|
||
+
|
||
+ncgRV64 :: NCGConfig -> NcgImpl RawCmmStatics RV64.Instr RV64.JumpDest
|
||
+ncgRV64 config =
|
||
+ NcgImpl
|
||
+ { ncgConfig = config,
|
||
+ cmmTopCodeGen = RV64.cmmTopCodeGen,
|
||
+ generateJumpTableForInstr = RV64.generateJumpTableForInstr config,
|
||
+ getJumpDestBlockId = RV64.getJumpDestBlockId,
|
||
+ canShortcut = RV64.canShortcut,
|
||
+ shortcutStatics = RV64.shortcutStatics,
|
||
+ shortcutJump = RV64.shortcutJump,
|
||
+ pprNatCmmDeclS = RV64.pprNatCmmDecl config,
|
||
+ pprNatCmmDeclH = RV64.pprNatCmmDecl config,
|
||
+ maxSpillSlots = RV64.maxSpillSlots config,
|
||
+ allocatableRegs = RV64.allocatableRegs platform,
|
||
+ ncgAllocMoreStack = RV64.allocMoreStack platform,
|
||
+ ncgMakeFarBranches = RV64.makeFarBranches,
|
||
+ extractUnwindPoints = const [],
|
||
+ invertCondBranches = \_ _ -> id
|
||
+ }
|
||
+ where
|
||
+ platform = ncgPlatform config
|
||
+
|
||
+-- | `Instruction` instance for RV64
|
||
+instance Instruction RV64.Instr where
|
||
+ regUsageOfInstr = RV64.regUsageOfInstr
|
||
+ patchRegsOfInstr = RV64.patchRegsOfInstr
|
||
+ isJumpishInstr = RV64.isJumpishInstr
|
||
+ jumpDestsOfInstr = RV64.jumpDestsOfInstr
|
||
+ patchJumpInstr = RV64.patchJumpInstr
|
||
+ mkSpillInstr = RV64.mkSpillInstr
|
||
+ mkLoadInstr = RV64.mkLoadInstr
|
||
+ takeDeltaInstr = RV64.takeDeltaInstr
|
||
+ isMetaInstr = RV64.isMetaInstr
|
||
+ mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr
|
||
+ takeRegRegMoveInstr = RV64.takeRegRegMoveInstr
|
||
+ mkJumpInstr = RV64.mkJumpInstr
|
||
+ mkStackAllocInstr = RV64.mkStackAllocInstr
|
||
+ mkStackDeallocInstr = RV64.mkStackDeallocInstr
|
||
+ mkComment = pure . RV64.COMMENT . ftext
|
||
+ pprInstr = RV64.pprInstr
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/CodeGen.hs
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/CodeGen.hs
|
||
@@ -0,0 +1,2207 @@
|
||
+{-# LANGUAGE BangPatterns #-}
|
||
+{-# LANGUAGE BinaryLiterals #-}
|
||
+{-# LANGUAGE GADTs #-}
|
||
+{-# LANGUAGE OverloadedStrings #-}
|
||
+
|
||
+module GHC.CmmToAsm.RV64.CodeGen
|
||
+ ( cmmTopCodeGen,
|
||
+ generateJumpTableForInstr,
|
||
+ makeFarBranches,
|
||
+ )
|
||
+where
|
||
+
|
||
+import Control.Monad
|
||
+import Data.Maybe
|
||
+import Data.Word
|
||
+import GHC.Cmm
|
||
+import GHC.Cmm.BlockId
|
||
+import GHC.Cmm.CLabel
|
||
+import GHC.Cmm.Dataflow.Block
|
||
+import GHC.Cmm.Dataflow.Graph
|
||
+import GHC.Cmm.Dataflow.Label
|
||
+import GHC.Cmm.DebugBlock
|
||
+import GHC.Cmm.Switch
|
||
+import GHC.Cmm.Utils
|
||
+import GHC.CmmToAsm.CPrim
|
||
+import GHC.CmmToAsm.Config
|
||
+import GHC.CmmToAsm.Format
|
||
+import GHC.CmmToAsm.Monad
|
||
+ ( NatM,
|
||
+ getBlockIdNat,
|
||
+ getConfig,
|
||
+ getDebugBlock,
|
||
+ getFileId,
|
||
+ getNewLabelNat,
|
||
+ getNewRegNat,
|
||
+ getPicBaseMaybeNat,
|
||
+ getPlatform,
|
||
+ )
|
||
+import GHC.CmmToAsm.PIC
|
||
+import GHC.CmmToAsm.RV64.Cond
|
||
+import GHC.CmmToAsm.RV64.Instr
|
||
+import GHC.CmmToAsm.RV64.Regs
|
||
+import GHC.CmmToAsm.Types
|
||
+import GHC.Data.FastString
|
||
+import GHC.Data.OrdList
|
||
+import GHC.Float
|
||
+import GHC.Platform
|
||
+import GHC.Platform.Reg
|
||
+import GHC.Platform.Regs
|
||
+import GHC.Prelude hiding (EQ)
|
||
+import GHC.Types.Basic
|
||
+import GHC.Types.ForeignCall
|
||
+import GHC.Types.SrcLoc (srcSpanFile, srcSpanStartCol, srcSpanStartLine)
|
||
+import GHC.Types.Tickish (GenTickish (..))
|
||
+import GHC.Types.Unique.Supply
|
||
+import GHC.Utils.Constants (debugIsOn)
|
||
+import GHC.Utils.Misc
|
||
+import GHC.Utils.Monad
|
||
+import GHC.Utils.Outputable
|
||
+import GHC.Utils.Panic
|
||
+
|
||
+-- For an overview of an NCG's structure, see Note [General layout of an NCG]
|
||
+
|
||
+cmmTopCodeGen ::
|
||
+ RawCmmDecl ->
|
||
+ NatM [NatCmmDecl RawCmmStatics Instr]
|
||
+-- Thus we'll have to deal with either CmmProc ...
|
||
+cmmTopCodeGen _cmm@(CmmProc info lab live graph) = do
|
||
+ picBaseMb <- getPicBaseMaybeNat
|
||
+ when (isJust picBaseMb) $ panic "RV64.cmmTopCodeGen: Unexpected PIC base register (RISCV ISA does not define one)"
|
||
+
|
||
+ let blocks = toBlockListEntryFirst graph
|
||
+ (nat_blocks, statics) <- mapAndUnzipM basicBlockCodeGen blocks
|
||
+
|
||
+ let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
|
||
+ tops = proc : concat statics
|
||
+
|
||
+ pure tops
|
||
+
|
||
+-- ... or CmmData.
|
||
+cmmTopCodeGen (CmmData sec dat) = pure [CmmData sec dat] -- no translation, we just use CmmStatic
|
||
+
|
||
+basicBlockCodeGen ::
|
||
+ Block CmmNode C C ->
|
||
+ NatM
|
||
+ ( [NatBasicBlock Instr],
|
||
+ [NatCmmDecl RawCmmStatics Instr]
|
||
+ )
|
||
+basicBlockCodeGen block = do
|
||
+ config <- getConfig
|
||
+ let (_, nodes, tail) = blockSplit block
|
||
+ id = entryLabel block
|
||
+ stmts = blockToList nodes
|
||
+
|
||
+ header_comment_instr
|
||
+ | debugIsOn =
|
||
+ unitOL
|
||
+ $ MULTILINE_COMMENT
|
||
+ ( text "-- --------------------------- basicBlockCodeGen --------------------------- --\n"
|
||
+ $+$ withPprStyle defaultDumpStyle (pdoc (ncgPlatform config) block)
|
||
+ )
|
||
+ | otherwise = nilOL
|
||
+
|
||
+ -- Generate location directive `.loc` (DWARF debug location info)
|
||
+ loc_instrs <- genLocInstrs
|
||
+
|
||
+ -- Generate other instructions
|
||
+ mid_instrs <- stmtsToInstrs stmts
|
||
+ (!tail_instrs) <- stmtToInstrs tail
|
||
+
|
||
+ let instrs = header_comment_instr `appOL` loc_instrs `appOL` mid_instrs `appOL` tail_instrs
|
||
+
|
||
+ -- TODO: Then x86 backend runs @verifyBasicBlock@ here. How important it is to
|
||
+ -- have a valid CFG is an open question: This and the AArch64 and PPC NCGs
|
||
+ -- work fine without it.
|
||
+
|
||
+ -- Code generation may introduce new basic block boundaries, which are
|
||
+ -- indicated by the NEWBLOCK instruction. We must split up the instruction
|
||
+ -- stream into basic blocks again. Also, we extract LDATAs here too.
|
||
+ (top, other_blocks, statics) = foldrOL mkBlocks ([], [], []) instrs
|
||
+
|
||
+ return (BasicBlock id top : other_blocks, statics)
|
||
+ where
|
||
+ genLocInstrs :: NatM (OrdList Instr)
|
||
+ genLocInstrs = do
|
||
+ dbg <- getDebugBlock (entryLabel block)
|
||
+ case dblSourceTick =<< dbg of
|
||
+ Just (SourceNote span name) ->
|
||
+ do
|
||
+ fileId <- getFileId (srcSpanFile span)
|
||
+ let line = srcSpanStartLine span; col = srcSpanStartCol span
|
||
+ pure $ unitOL $ LOCATION fileId line col name
|
||
+ _ -> pure nilOL
|
||
+
|
||
+mkBlocks ::
|
||
+ Instr ->
|
||
+ ([Instr], [GenBasicBlock Instr], [GenCmmDecl RawCmmStatics h g]) ->
|
||
+ ([Instr], [GenBasicBlock Instr], [GenCmmDecl RawCmmStatics h g])
|
||
+mkBlocks (NEWBLOCK id) (instrs, blocks, statics) =
|
||
+ ([], BasicBlock id instrs : blocks, statics)
|
||
+mkBlocks (LDATA sec dat) (instrs, blocks, statics) =
|
||
+ (instrs, blocks, CmmData sec dat : statics)
|
||
+mkBlocks instr (instrs, blocks, statics) =
|
||
+ (instr : instrs, blocks, statics)
|
||
+
|
||
+-- -----------------------------------------------------------------------------
|
||
+
|
||
+-- | Utilities
|
||
+
|
||
+-- | Annotate an `Instr` with a `SDoc` comment
|
||
+ann :: SDoc -> Instr -> Instr
|
||
+ann doc instr {- debugIsOn -} = ANN doc instr
|
||
+{-# INLINE ann #-}
|
||
+
|
||
+-- Using pprExpr will hide the AST, @ANN@ will end up in the assembly with
|
||
+-- -dppr-debug. The idea is that we can trivially see how a cmm expression
|
||
+-- ended up producing the assembly we see. By having the verbatim AST printed
|
||
+-- we can simply check the patterns that were matched to arrive at the assembly
|
||
+-- we generated.
|
||
+--
|
||
+-- pprExpr will hide a lot of noise of the underlying data structure and print
|
||
+-- the expression into something that can be easily read by a human. However
|
||
+-- going back to the exact CmmExpr representation can be laborious and adds
|
||
+-- indirections to find the matches that lead to the assembly.
|
||
+--
|
||
+-- An improvement could be to have
|
||
+--
|
||
+-- (pprExpr genericPlatform e) <> parens (text. show e)
|
||
+--
|
||
+-- to have the best of both worlds.
|
||
+--
|
||
+-- Note: debugIsOn is too restrictive, it only works for debug compilers.
|
||
+-- However, we do not only want to inspect this for debug compilers. Ideally
|
||
+-- we'd have a check for -dppr-debug here already, such that we don't even
|
||
+-- generate the ANN expressions. However, as they are lazy, they shouldn't be
|
||
+-- forced until we actually force them, and without -dppr-debug they should
|
||
+-- never end up being forced.
|
||
+annExpr :: CmmExpr -> Instr -> Instr
|
||
+annExpr e {- debugIsOn -} = ANN (text . show $ e)
|
||
+-- annExpr e instr {- debugIsOn -} = ANN (pprExpr genericPlatform e) instr
|
||
+-- annExpr _ instr = instr
|
||
+{-# INLINE annExpr #-}
|
||
+
|
||
+-- -----------------------------------------------------------------------------
|
||
+-- Generating a table-branch
|
||
+
|
||
+-- Note [RISCV64 Jump Tables]
|
||
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
+--
|
||
+-- Jump tables are implemented by generating a table of relative addresses,
|
||
+-- where each entry is the relative offset to the target block from the first
|
||
+-- entry / table label (`generateJumpTableForInstr`). Using the jump table means
|
||
+-- loading the entry's value and jumping to the calculated absolute address
|
||
+-- (`genSwitch`).
|
||
+--
|
||
+-- For example, this Cmm switch
|
||
+--
|
||
+-- switch [1 .. 10] _s2wn::I64 {
|
||
+-- case 1 : goto c347;
|
||
+-- case 2 : goto c348;
|
||
+-- case 3 : goto c349;
|
||
+-- case 4 : goto c34a;
|
||
+-- case 5 : goto c34b;
|
||
+-- case 6 : goto c34c;
|
||
+-- case 7 : goto c34d;
|
||
+-- case 8 : goto c34e;
|
||
+-- case 9 : goto c34f;
|
||
+-- case 10 : goto c34g;
|
||
+-- } // CmmSwitch
|
||
+--
|
||
+-- leads to this jump table in Assembly
|
||
+--
|
||
+-- .section .rodata
|
||
+-- .balign 8
|
||
+-- .Ln34G:
|
||
+-- .quad 0
|
||
+-- .quad .Lc347-(.Ln34G)+0
|
||
+-- .quad .Lc348-(.Ln34G)+0
|
||
+-- .quad .Lc349-(.Ln34G)+0
|
||
+-- .quad .Lc34a-(.Ln34G)+0
|
||
+-- .quad .Lc34b-(.Ln34G)+0
|
||
+-- .quad .Lc34c-(.Ln34G)+0
|
||
+-- .quad .Lc34d-(.Ln34G)+0
|
||
+-- .quad .Lc34e-(.Ln34G)+0
|
||
+-- .quad .Lc34f-(.Ln34G)+0
|
||
+-- .quad .Lc34g-(.Ln34G)+0
|
||
+--
|
||
+-- and this indexing code where the jump should be done (register t0 contains
|
||
+-- the index)
|
||
+--
|
||
+-- addi t0, t0, 0 // silly move (ignore it)
|
||
+-- la t1, .Ln34G // load the table's address
|
||
+-- sll t0, t0, 3 // index * 8 -> offset in bytes
|
||
+-- add t0, t0, t1 // address of the table's entry
|
||
+-- ld t0, 0(t0) // load entry
|
||
+-- add t0, t0, t1 // relative to absolute address
|
||
+-- jalr zero, t0, 0 // jump to the block
|
||
+--
|
||
+-- In object code (disassembled) the table looks like
|
||
+--
|
||
+-- 0000000000000000 <.Ln34G>:
|
||
+-- ...
|
||
+-- 8: R_RISCV_ADD64 .Lc347
|
||
+-- 8: R_RISCV_SUB64 .Ln34G
|
||
+-- 10: R_RISCV_ADD64 .Lc348
|
||
+-- 10: R_RISCV_SUB64 .Ln34G
|
||
+-- 18: R_RISCV_ADD64 .Lc349
|
||
+-- 18: R_RISCV_SUB64 .Ln34G
|
||
+-- 20: R_RISCV_ADD64 .Lc34a
|
||
+-- 20: R_RISCV_SUB64 .Ln34G
|
||
+-- 28: R_RISCV_ADD64 .Lc34b
|
||
+-- 28: R_RISCV_SUB64 .Ln34G
|
||
+-- 30: R_RISCV_ADD64 .Lc34c
|
||
+-- 30: R_RISCV_SUB64 .Ln34G
|
||
+-- 38: R_RISCV_ADD64 .Lc34d
|
||
+-- 38: R_RISCV_SUB64 .Ln34G
|
||
+-- 40: R_RISCV_ADD64 .Lc34e
|
||
+-- 40: R_RISCV_SUB64 .Ln34G
|
||
+-- 48: R_RISCV_ADD64 .Lc34f
|
||
+-- 48: R_RISCV_SUB64 .Ln34G
|
||
+-- 50: R_RISCV_ADD64 .Lc34g
|
||
+-- 50: R_RISCV_SUB64 .Ln34G
|
||
+--
|
||
+-- I.e. the relative offset calculations are done by the linker via relocations.
|
||
+-- This seems to be PIC compatible; at least `scanelf` (pax-utils) does not
|
||
+-- complain.
|
||
+
|
||
+
|
||
+-- | Generate jump to jump table target
|
||
+--
|
||
+-- The index into the jump table is calulated by evaluating @expr@. The
|
||
+-- corresponding table entry contains the relative address to jump to (relative
|
||
+-- to the jump table's first entry / the table's own label).
|
||
+genSwitch :: NCGConfig -> CmmExpr -> SwitchTargets -> NatM InstrBlock
|
||
+genSwitch config expr targets = do
|
||
+ (reg, fmt1, e_code) <- getSomeReg indexExpr
|
||
+ let fmt = II64
|
||
+ targetReg <- getNewRegNat fmt
|
||
+ lbl <- getNewLabelNat
|
||
+ dynRef <- cmmMakeDynamicReference config DataReference lbl
|
||
+ (tableReg, fmt2, t_code) <- getSomeReg dynRef
|
||
+ let code =
|
||
+ toOL
|
||
+ [ COMMENT (text "indexExpr" <+> (text . show) indexExpr),
|
||
+ COMMENT (text "dynRef" <+> (text . show) dynRef)
|
||
+ ]
|
||
+ `appOL` e_code
|
||
+ `appOL` t_code
|
||
+ `appOL` toOL
|
||
+ [ COMMENT (ftext "Jump table for switch"),
|
||
+ -- index to offset into the table (relative to tableReg)
|
||
+ annExpr expr (SLL (OpReg (formatToWidth fmt1) reg) (OpReg (formatToWidth fmt1) reg) (OpImm (ImmInt 3))),
|
||
+ -- calculate table entry address
|
||
+ ADD (OpReg W64 targetReg) (OpReg (formatToWidth fmt1) reg) (OpReg (formatToWidth fmt2) tableReg),
|
||
+ -- load table entry (relative offset from tableReg (first entry) to target label)
|
||
+ LDRU II64 (OpReg W64 targetReg) (OpAddr (AddrRegImm targetReg (ImmInt 0))),
|
||
+ -- calculate absolute address of the target label
|
||
+ ADD (OpReg W64 targetReg) (OpReg W64 targetReg) (OpReg W64 tableReg),
|
||
+ -- prepare jump to target label
|
||
+ J_TBL ids (Just lbl) targetReg
|
||
+ ]
|
||
+ return code
|
||
+ where
|
||
+ -- See Note [Sub-word subtlety during jump-table indexing] in
|
||
+ -- GHC.CmmToAsm.X86.CodeGen for why we must first offset, then widen.
|
||
+ indexExpr0 = cmmOffset platform expr offset
|
||
+ -- We widen to a native-width register to sanitize the high bits
|
||
+ indexExpr =
|
||
+ CmmMachOp
|
||
+ (MO_UU_Conv expr_w (platformWordWidth platform))
|
||
+ [indexExpr0]
|
||
+ expr_w = cmmExprWidth platform expr
|
||
+ (offset, ids) = switchTargetsToTable targets
|
||
+ platform = ncgPlatform config
|
||
+
|
||
+-- | Generate jump table data (if required)
|
||
+--
|
||
+-- The idea is to emit one table entry per case. The entry is the relative
|
||
+-- address of the block to jump to (relative to the table's first entry /
|
||
+-- table's own label.) The calculation itself is done by the linker.
|
||
+generateJumpTableForInstr ::
|
||
+ NCGConfig ->
|
||
+ Instr ->
|
||
+ Maybe (NatCmmDecl RawCmmStatics Instr)
|
||
+generateJumpTableForInstr config (J_TBL ids (Just lbl) _) =
|
||
+ let jumpTable =
|
||
+ map jumpTableEntryRel ids
|
||
+ where
|
||
+ jumpTableEntryRel Nothing =
|
||
+ CmmStaticLit (CmmInt 0 (ncgWordWidth config))
|
||
+ jumpTableEntryRel (Just blockid) =
|
||
+ CmmStaticLit
|
||
+ ( CmmLabelDiffOff
|
||
+ blockLabel
|
||
+ lbl
|
||
+ 0
|
||
+ (ncgWordWidth config)
|
||
+ )
|
||
+ where
|
||
+ blockLabel = blockLbl blockid
|
||
+ in Just (CmmData (Section ReadOnlyData lbl) (CmmStaticsRaw lbl jumpTable))
|
||
+generateJumpTableForInstr _ _ = Nothing
|
||
+
|
||
+-- -----------------------------------------------------------------------------
|
||
+-- Top-level of the instruction selector
|
||
+
|
||
+stmtsToInstrs ::
|
||
+ -- | Cmm Statements
|
||
+ [CmmNode O O] ->
|
||
+ -- | Resulting instruction
|
||
+ NatM InstrBlock
|
||
+stmtsToInstrs stmts = concatOL <$> mapM stmtToInstrs stmts
|
||
+
|
||
+stmtToInstrs ::
|
||
+ CmmNode e x ->
|
||
+ -- | Resulting instructions
|
||
+ NatM InstrBlock
|
||
+stmtToInstrs stmt = do
|
||
+ config <- getConfig
|
||
+ platform <- getPlatform
|
||
+ case stmt of
|
||
+ CmmUnsafeForeignCall target result_regs args ->
|
||
+ genCCall target result_regs args
|
||
+ CmmComment s -> pure (unitOL (COMMENT (ftext s)))
|
||
+ CmmTick {} -> pure nilOL
|
||
+ CmmAssign reg src
|
||
+ | isFloatType ty -> assignReg_FltCode format reg src
|
||
+ | otherwise -> assignReg_IntCode format reg src
|
||
+ where
|
||
+ ty = cmmRegType reg
|
||
+ format = cmmTypeFormat ty
|
||
+ CmmStore addr src _alignment
|
||
+ | isFloatType ty -> assignMem_FltCode format addr src
|
||
+ | otherwise -> assignMem_IntCode format addr src
|
||
+ where
|
||
+ ty = cmmExprType platform src
|
||
+ format = cmmTypeFormat ty
|
||
+ CmmBranch id -> genBranch id
|
||
+ -- We try to arrange blocks such that the likely branch is the fallthrough
|
||
+ -- in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
|
||
+ CmmCondBranch arg true false _prediction ->
|
||
+ genCondBranch true false arg
|
||
+ CmmSwitch arg ids -> genSwitch config arg ids
|
||
+ CmmCall {cml_target = arg} -> genJump arg
|
||
+ CmmUnwind _regs -> pure nilOL
|
||
+ -- Intentionally not have a default case here: If anybody adds a
|
||
+ -- constructor, the compiler should force them to think about this here.
|
||
+ CmmForeignCall {} -> pprPanic "stmtToInstrs: statement should have been cps'd away" (pdoc platform stmt)
|
||
+ CmmEntry {} -> pprPanic "stmtToInstrs: statement should have been cps'd away" (pdoc platform stmt)
|
||
+
|
||
+--------------------------------------------------------------------------------
|
||
+
|
||
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
|
||
+--
|
||
+-- They are really trees of insns to facilitate fast appending, where a
|
||
+-- left-to-right traversal yields the insns in the correct order.
|
||
+type InstrBlock =
|
||
+ OrdList Instr
|
||
+
|
||
+-- | Register's passed up the tree.
|
||
+--
|
||
+-- If the stix code forces the register to live in a pre-decided machine
|
||
+-- register, it comes out as @Fixed@; otherwise, it comes out as @Any@, and the
|
||
+-- parent can decide which register to put it in.
|
||
+data Register
|
||
+ = Fixed Format Reg InstrBlock
|
||
+ | Any Format (Reg -> InstrBlock)
|
||
+
|
||
+-- | Sometimes we need to change the Format of a register. Primarily during
|
||
+-- conversion.
|
||
+swizzleRegisterRep :: Format -> Register -> Register
|
||
+swizzleRegisterRep format' (Fixed _format reg code) = Fixed format' reg code
|
||
+swizzleRegisterRep format' (Any _format codefn) = Any format' codefn
|
||
+
|
||
+-- | Grab a `Reg` for a `CmmReg`
|
||
+--
|
||
+-- `LocalReg`s are assigned virtual registers (`RegVirtual`), `GlobalReg`s are
|
||
+-- assigned real registers (`RegReal`). It is an error if a `GlobalReg` is not a
|
||
+-- STG register.
|
||
+getRegisterReg :: Platform -> CmmReg -> Reg
|
||
+getRegisterReg _ (CmmLocal (LocalReg u pk)) =
|
||
+ RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
|
||
+getRegisterReg platform (CmmGlobal mid) =
|
||
+ case globalRegMaybe platform (globalRegUseGlobalReg mid) of
|
||
+ Just reg -> RegReal reg
|
||
+ Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
|
||
+
|
||
+-- -----------------------------------------------------------------------------
|
||
+-- General things for putting together code sequences
|
||
+
|
||
+-- | Compute an expression into any register
|
||
+getSomeReg :: CmmExpr -> NatM (Reg, Format, InstrBlock)
|
||
+getSomeReg expr = do
|
||
+ r <- getRegister expr
|
||
+ case r of
|
||
+ Any rep code -> do
|
||
+ newReg <- getNewRegNat rep
|
||
+ return (newReg, rep, code newReg)
|
||
+ Fixed rep reg code ->
|
||
+ return (reg, rep, code)
|
||
+
|
||
+-- | Compute an expression into any floating-point register
|
||
+--
|
||
+-- If the initial expression is not a floating-point expression, finally move
|
||
+-- the result into a floating-point register.
|
||
+getFloatReg :: (HasCallStack) => CmmExpr -> NatM (Reg, Format, InstrBlock)
|
||
+getFloatReg expr = do
|
||
+ r <- getRegister expr
|
||
+ case r of
|
||
+ Any rep code | isFloatFormat rep -> do
|
||
+ newReg <- getNewRegNat rep
|
||
+ return (newReg, rep, code newReg)
|
||
+ Any II32 code -> do
|
||
+ newReg <- getNewRegNat FF32
|
||
+ return (newReg, FF32, code newReg)
|
||
+ Any II64 code -> do
|
||
+ newReg <- getNewRegNat FF64
|
||
+ return (newReg, FF64, code newReg)
|
||
+ Any _w _code -> do
|
||
+ config <- getConfig
|
||
+ pprPanic "can't do getFloatReg on" (pdoc (ncgPlatform config) expr)
|
||
+ -- can't do much for fixed.
|
||
+ Fixed rep reg code ->
|
||
+ return (reg, rep, code)
|
||
+
|
||
+-- | Map `CmmLit` to `OpImm`
|
||
+--
|
||
+-- N.B. this is a partial function, because not all `CmmLit`s have an immediate
|
||
+-- representation.
|
||
+litToImm' :: CmmLit -> Operand
|
||
+litToImm' = OpImm . litToImm
|
||
+
|
||
+-- | Compute a `CmmExpr` into a `Register`
|
||
+getRegister :: CmmExpr -> NatM Register
|
||
+getRegister e = do
|
||
+ config <- getConfig
|
||
+ getRegister' config (ncgPlatform config) e
|
||
+
|
||
+-- | The register width to be used for an operation on the given width
|
||
+-- operand.
|
||
+opRegWidth :: Width -> Width
|
||
+opRegWidth W64 = W64
|
||
+opRegWidth W32 = W32
|
||
+opRegWidth W16 = W32
|
||
+opRegWidth W8 = W32
|
||
+opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
|
||
+
|
||
+-- Note [Signed arithmetic on RISCV64]
|
||
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
+-- Handling signed arithmetic on sub-word-size values on RISCV64 is a bit
|
||
+-- tricky as Cmm's type system does not capture signedness. While 32-bit values
|
||
+-- are fairly easy to handle due to RISCV64's 32-bit instruction variants
|
||
+-- (denoted by use of %wN registers), 16- and 8-bit values require quite some
|
||
+-- care.
|
||
+--
|
||
+-- We handle 16-and 8-bit values by using the 32-bit operations and
|
||
+-- sign-/zero-extending operands and truncate results as necessary. For
|
||
+-- simplicity we maintain the invariant that a register containing a
|
||
+-- sub-word-size value always contains the zero-extended form of that value
|
||
+-- in between operations.
|
||
+--
|
||
+-- For instance, consider the program,
|
||
+--
|
||
+-- test(bits64 buffer)
|
||
+-- bits8 a = bits8[buffer];
|
||
+-- bits8 b = %mul(a, 42);
|
||
+-- bits8 c = %not(b);
|
||
+-- bits8 d = %shrl(c, 4::bits8);
|
||
+-- return (d);
|
||
+-- }
|
||
+--
|
||
+-- This program begins by loading `a` from memory, for which we use a
|
||
+-- zero-extended byte-size load. We next sign-extend `a` to 32-bits, and use a
|
||
+-- 32-bit multiplication to compute `b`, and truncate the result back down to
|
||
+-- 8-bits.
|
||
+--
|
||
+-- Next we compute `c`: The `%not` requires no extension of its operands, but
|
||
+-- we must still truncate the result back down to 8-bits. Finally the `%shrl`
|
||
+-- requires no extension and no truncate since we can assume that
|
||
+-- `c` is zero-extended.
|
||
+--
|
||
+-- The "RISC-V Sign Extension Optimizations" LLVM tech talk presentation by
|
||
+-- Craig Topper covers possible future improvements
|
||
+-- (https://llvm.org/devmtg/2022-11/slides/TechTalk21-RISC-VSignExtensionOptimizations.pdf)
|
||
+--
|
||
+--
|
||
+-- Note [Handling PIC on RV64]
|
||
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
+-- RV64 does not have a special PIC register, the general approach is to simply
|
||
+-- do PC-relative addressing or go through the GOT. There is assembly support
|
||
+-- for both.
|
||
+--
|
||
+-- rv64 assembly has a `la` (load address) pseudo-instruction, that allows
|
||
+-- loading a label's address into a register. The instruction is desugared into
|
||
+-- different addressing modes, e.g. PC-relative addressing:
|
||
+--
|
||
+-- 1: lui rd1, %pcrel_hi(label)
|
||
+-- addi rd1, %pcrel_lo(1b)
|
||
+--
|
||
+-- See https://sourceware.org/binutils/docs/as/RISC_002dV_002dModifiers.html,
|
||
+-- PIC can be enabled/disabled through
|
||
+--
|
||
+-- .option pic
|
||
+--
|
||
+-- See https://sourceware.org/binutils/docs/as/RISC_002dV_002dDirectives.html#RISC_002dV_002dDirectives
|
||
+--
|
||
+-- CmmGlobal @PicBaseReg@'s are generated in @GHC.CmmToAsm.PIC@ in the
|
||
+-- @cmmMakePicReference@. This is in turn called from @cmmMakeDynamicReference@
|
||
+-- also in @Cmm.CmmToAsm.PIC@ from where it is also exported. There are two
|
||
+-- callsites for this. One is in this module to produce the @target@ in @genCCall@
|
||
+-- the other is in @GHC.CmmToAsm@ in @cmmExprNative@.
|
||
+--
|
||
+-- Conceptually we do not want any special PicBaseReg to be used on RV64. If
|
||
+-- we want to distinguish between symbol loading, we need to address this through
|
||
+-- the way we load it, not through a register.
|
||
+--
|
||
+
|
||
+getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
|
||
+-- OPTIMIZATION WARNING: CmmExpr rewrites
|
||
+-- 1. Rewrite: Reg + (-n) => Reg - n
|
||
+-- TODO: this expression shouldn't even be generated to begin with.
|
||
+getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)])
|
||
+ | i < 0 =
|
||
+ getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)])
|
||
+getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)])
|
||
+ | i < 0 =
|
||
+ getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)])
|
||
+-- Generic case.
|
||
+getRegister' config plat expr =
|
||
+ case expr of
|
||
+ CmmReg (CmmGlobal (GlobalRegUse PicBaseReg _)) ->
|
||
+ -- See Note [Handling PIC on RV64]
|
||
+ pprPanic "getRegister': There's no PIC base register on RISCV" (ppr PicBaseReg)
|
||
+ CmmLit lit ->
|
||
+ case lit of
|
||
+ CmmInt 0 w -> pure $ Fixed (intFormat w) zeroReg nilOL
|
||
+ CmmInt i w ->
|
||
+ -- narrowU is important: Negative immediates may be
|
||
+ -- sign-extended on load!
|
||
+ let imm = OpImm . ImmInteger $ narrowU w i
|
||
+ in pure (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) imm)))
|
||
+ CmmFloat 0 w -> do
|
||
+ let op = litToImm' lit
|
||
+ pure (Any (floatFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) op)))
|
||
+ CmmFloat _f W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
|
||
+ CmmFloat _f W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
|
||
+ CmmFloat f W32 -> do
|
||
+ let word = castFloatToWord32 (fromRational f) :: Word32
|
||
+ intReg <- getNewRegNat (intFormat W32)
|
||
+ return
|
||
+ ( Any
|
||
+ (floatFormat W32)
|
||
+ ( \dst ->
|
||
+ toOL
|
||
+ [ annExpr expr
|
||
+ $ MOV (OpReg W32 intReg) (OpImm (ImmInteger (fromIntegral word))),
|
||
+ MOV (OpReg W32 dst) (OpReg W32 intReg)
|
||
+ ]
|
||
+ )
|
||
+ )
|
||
+ CmmFloat f W64 -> do
|
||
+ let word = castDoubleToWord64 (fromRational f) :: Word64
|
||
+ intReg <- getNewRegNat (intFormat W64)
|
||
+ return
|
||
+ ( Any
|
||
+ (floatFormat W64)
|
||
+ ( \dst ->
|
||
+ toOL
|
||
+ [ annExpr expr
|
||
+ $ MOV (OpReg W64 intReg) (OpImm (ImmInteger (fromIntegral word))),
|
||
+ MOV (OpReg W64 dst) (OpReg W64 intReg)
|
||
+ ]
|
||
+ )
|
||
+ )
|
||
+ CmmFloat _f _w -> pprPanic "getRegister' (CmmLit:CmmFloat), unsupported float lit" (pdoc plat expr)
|
||
+ CmmVec _lits -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
|
||
+ CmmLabel lbl -> do
|
||
+ let op = OpImm (ImmCLbl lbl)
|
||
+ rep = cmmLitType plat lit
|
||
+ format = cmmTypeFormat rep
|
||
+ return (Any format (\dst -> unitOL $ annExpr expr (LDR format (OpReg (formatToWidth format) dst) op)))
|
||
+ CmmLabelOff lbl off | isNbitEncodeable 12 (fromIntegral off) -> do
|
||
+ let op = OpImm (ImmIndex lbl off)
|
||
+ rep = cmmLitType plat lit
|
||
+ format = cmmTypeFormat rep
|
||
+ return (Any format (\dst -> unitOL $ LDR format (OpReg (formatToWidth format) dst) op))
|
||
+ CmmLabelOff lbl off -> do
|
||
+ let op = litToImm' (CmmLabel lbl)
|
||
+ rep = cmmLitType plat lit
|
||
+ format = cmmTypeFormat rep
|
||
+ width = typeWidth rep
|
||
+ (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
|
||
+ return
|
||
+ ( Any
|
||
+ format
|
||
+ ( \dst ->
|
||
+ off_code
|
||
+ `snocOL` LDR format (OpReg (formatToWidth format) dst) op
|
||
+ `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r)
|
||
+ )
|
||
+ )
|
||
+ CmmLabelDiffOff {} -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
|
||
+ CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
|
||
+ CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
|
||
+ CmmLoad mem rep _ -> do
|
||
+ let format = cmmTypeFormat rep
|
||
+ width = typeWidth rep
|
||
+ Amode addr addr_code <- getAmode plat width mem
|
||
+ case width of
|
||
+ w
|
||
+ | w <= W64 ->
|
||
+ -- Load without sign-extension. See Note [Signed arithmetic on RISCV64]
|
||
+ pure
|
||
+ ( Any
|
||
+ format
|
||
+ ( \dst ->
|
||
+ addr_code
|
||
+ `snocOL` LDRU format (OpReg width dst) (OpAddr addr)
|
||
+ )
|
||
+ )
|
||
+ _ ->
|
||
+ pprPanic ("Width too big! Cannot load: " ++ show width) (pdoc plat expr)
|
||
+ CmmStackSlot _ _ ->
|
||
+ pprPanic "getRegister' (CmmStackSlot): " (pdoc plat expr)
|
||
+ CmmReg reg ->
|
||
+ return
|
||
+ ( Fixed
|
||
+ (cmmTypeFormat (cmmRegType reg))
|
||
+ (getRegisterReg plat reg)
|
||
+ nilOL
|
||
+ )
|
||
+ CmmRegOff reg off | isNbitEncodeable 12 (fromIntegral off) -> do
|
||
+ getRegister' config plat
|
||
+ $ CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
|
||
+ where
|
||
+ width = typeWidth (cmmRegType reg)
|
||
+ CmmRegOff reg off -> do
|
||
+ (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
|
||
+ (reg, _format, code) <- getSomeReg $ CmmReg reg
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat width)
|
||
+ ( \dst ->
|
||
+ off_code
|
||
+ `appOL` code
|
||
+ `snocOL` ADD (OpReg width dst) (OpReg width reg) (OpReg width off_r)
|
||
+ )
|
||
+ where
|
||
+ width = typeWidth (cmmRegType reg)
|
||
+
|
||
+ -- Handle MO_RelaxedRead as a normal CmmLoad, to allow
|
||
+ -- non-trivial addressing modes to be used.
|
||
+ CmmMachOp (MO_RelaxedRead w) [e] ->
|
||
+ getRegister (CmmLoad e (cmmBits w) NaturallyAligned)
|
||
+ -- for MachOps, see GHC.Cmm.MachOp
|
||
+ -- For CmmMachOp, see GHC.Cmm.Expr
|
||
+ CmmMachOp op [e] -> do
|
||
+ (reg, _format, code) <- getSomeReg e
|
||
+ case op of
|
||
+ MO_Not w -> return $ Any (intFormat w) $ \dst ->
|
||
+ let w' = opRegWidth w
|
||
+ in code
|
||
+ `snocOL`
|
||
+ -- pseudo instruction `not` is `xori rd, rs, -1`
|
||
+ ann (text "not") (XORI (OpReg w' dst) (OpReg w' reg) (OpImm (ImmInt (-1))))
|
||
+ `appOL` truncateReg w' w dst -- See Note [Signed arithmetic on RISCV64]
|
||
+ MO_S_Neg w -> negate code w reg
|
||
+ MO_F_Neg w ->
|
||
+ return
|
||
+ $ Any
|
||
+ (floatFormat w)
|
||
+ ( \dst ->
|
||
+ code
|
||
+ `snocOL` NEG (OpReg w dst) (OpReg w reg)
|
||
+ )
|
||
+ -- TODO: Can this case happen?
|
||
+ MO_SF_Conv from to | from < W32 -> do
|
||
+ -- extend to the smallest available representation
|
||
+ (reg_x, code_x) <- signExtendReg from W32 reg
|
||
+ pure
|
||
+ $ Any
|
||
+ (floatFormat to)
|
||
+ ( \dst ->
|
||
+ code
|
||
+ `appOL` code_x
|
||
+ `snocOL` annExpr expr (FCVT IntToFloat (OpReg to dst) (OpReg from reg_x)) -- (Signed ConVerT Float)
|
||
+ )
|
||
+ MO_SF_Conv from to ->
|
||
+ pure
|
||
+ $ Any
|
||
+ (floatFormat to)
|
||
+ ( \dst ->
|
||
+ code
|
||
+ `snocOL` annExpr expr (FCVT IntToFloat (OpReg to dst) (OpReg from reg)) -- (Signed ConVerT Float)
|
||
+ )
|
||
+ MO_FS_Conv from to
|
||
+ | to < W32 ->
|
||
+ pure
|
||
+ $ Any
|
||
+ (intFormat to)
|
||
+ ( \dst ->
|
||
+ code
|
||
+ `snocOL`
|
||
+ -- W32 is the smallest width to convert to. Decrease width afterwards.
|
||
+ annExpr expr (FCVT FloatToInt (OpReg W32 dst) (OpReg from reg))
|
||
+ `appOL` signExtendAdjustPrecission W32 to dst dst -- (float convert (-> zero) signed)
|
||
+ )
|
||
+ MO_FS_Conv from to ->
|
||
+ pure
|
||
+ $ Any
|
||
+ (intFormat to)
|
||
+ ( \dst ->
|
||
+ code
|
||
+ `snocOL` annExpr expr (FCVT FloatToInt (OpReg to dst) (OpReg from reg))
|
||
+ `appOL` truncateReg from to dst -- (float convert (-> zero) signed)
|
||
+ )
|
||
+ MO_UU_Conv from to
|
||
+ | from <= to ->
|
||
+ pure
|
||
+ $ Any
|
||
+ (intFormat to)
|
||
+ ( \dst ->
|
||
+ code
|
||
+ `snocOL` annExpr e (MOV (OpReg to dst) (OpReg from reg))
|
||
+ )
|
||
+ MO_UU_Conv from to ->
|
||
+ pure
|
||
+ $ Any
|
||
+ (intFormat to)
|
||
+ ( \dst ->
|
||
+ code
|
||
+ `snocOL` annExpr e (MOV (OpReg from dst) (OpReg from reg))
|
||
+ `appOL` truncateReg from to dst
|
||
+ )
|
||
+ MO_SS_Conv from to -> ss_conv from to reg code
|
||
+ MO_FF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` annExpr e (FCVT FloatToFloat (OpReg to dst) (OpReg from reg)))
|
||
+ -- Conversions
|
||
+ -- TODO: Duplication with MO_UU_Conv
|
||
+ MO_XX_Conv from to
|
||
+ | to < from ->
|
||
+ pure
|
||
+ $ Any
|
||
+ (intFormat to)
|
||
+ ( \dst ->
|
||
+ code
|
||
+ `snocOL` annExpr e (MOV (OpReg from dst) (OpReg from reg))
|
||
+ `appOL` truncateReg from to dst
|
||
+ )
|
||
+ MO_XX_Conv _from to -> swizzleRegisterRep (intFormat to) <$> getRegister e
|
||
+ MO_AlignmentCheck align wordWidth -> do
|
||
+ reg <- getRegister' config plat e
|
||
+ addAlignmentCheck align wordWidth reg
|
||
+ x -> pprPanic ("getRegister' (monadic CmmMachOp): " ++ show x) (pdoc plat expr)
|
||
+ where
|
||
+ -- In the case of 16- or 8-bit values we need to sign-extend to 32-bits
|
||
+ -- See Note [Signed arithmetic on RISCV64].
|
||
+ negate code w reg = do
|
||
+ let w' = opRegWidth w
|
||
+ (reg', code_sx) <- signExtendReg w w' reg
|
||
+ return $ Any (intFormat w) $ \dst ->
|
||
+ code
|
||
+ `appOL` code_sx
|
||
+ `snocOL` NEG (OpReg w' dst) (OpReg w' reg')
|
||
+ `appOL` truncateReg w' w dst
|
||
+
|
||
+ ss_conv from to reg code
|
||
+ | from < to = do
|
||
+ pure $ Any (intFormat to) $ \dst ->
|
||
+ code
|
||
+ `appOL` signExtend from to reg dst
|
||
+ `appOL` truncateReg from to dst
|
||
+ | from > to =
|
||
+ pure $ Any (intFormat to) $ \dst ->
|
||
+ code
|
||
+ `appOL` toOL
|
||
+ [ ann
|
||
+ (text "MO_SS_Conv: narrow register signed" <+> ppr reg <+> ppr from <> text "->" <> ppr to)
|
||
+ (SLL (OpReg to dst) (OpReg from reg) (OpImm (ImmInt shift))),
|
||
+ -- signed right shift
|
||
+ SRA (OpReg to dst) (OpReg to dst) (OpImm (ImmInt shift))
|
||
+ ]
|
||
+ `appOL` truncateReg from to dst
|
||
+ | otherwise =
|
||
+ -- No conversion necessary: Just copy.
|
||
+ pure $ Any (intFormat from) $ \dst ->
|
||
+ code `snocOL` MOV (OpReg from dst) (OpReg from reg)
|
||
+ where
|
||
+ shift = 64 - (widthInBits from - widthInBits to)
|
||
+
|
||
+ -- Dyadic machops:
|
||
+ --
|
||
+ -- The general idea is:
|
||
+ -- compute x<i> <- x
|
||
+ -- compute x<j> <- y
|
||
+ -- OP x<r>, x<i>, x<j>
|
||
+ --
|
||
+ -- TODO: for now we'll only implement the 64bit versions. And rely on the
|
||
+ -- fallthrough to alert us if things go wrong!
|
||
+ -- OPTIMIZATION WARNING: Dyadic CmmMachOp destructuring
|
||
+ -- 0. TODO This should not exist! Rewrite: Reg +- 0 -> Reg
|
||
+ CmmMachOp (MO_Add _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
|
||
+ CmmMachOp (MO_Sub _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
|
||
+ -- 1. Compute Reg +/- n directly.
|
||
+ -- For Add/Sub we can directly encode 12bits, or 12bits lsl #12.
|
||
+ CmmMachOp (MO_Add w) [CmmReg reg, CmmLit (CmmInt n _)]
|
||
+ | fitsIn12bitImm n -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ADD (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
|
||
+ where
|
||
+ -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
|
||
+ w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
|
||
+ r' = getRegisterReg plat reg
|
||
+ CmmMachOp (MO_Sub w) [CmmReg reg, CmmLit (CmmInt n _)]
|
||
+ | fitsIn12bitImm n -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (SUB (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
|
||
+ where
|
||
+ -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
|
||
+ w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
|
||
+ r' = getRegisterReg plat reg
|
||
+ CmmMachOp (MO_U_Quot w) [x, y] | w == W8 || w == W16 -> do
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ (reg_y, format_y, code_y) <- getSomeReg y
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `appOL` truncateReg (formatToWidth format_x) w reg_x
|
||
+ `appOL` code_y
|
||
+ `appOL` truncateReg (formatToWidth format_y) w reg_y
|
||
+ `snocOL` annExpr expr (DIVU (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
|
||
+ )
|
||
+
|
||
+ -- 2. Shifts. x << n, x >> n.
|
||
+ CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
|
||
+ | w == W32,
|
||
+ 0 <= n,
|
||
+ n < 32 -> do
|
||
+ (reg_x, _format_x, code_x) <- getSomeReg x
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
||
+ `appOL` truncateReg w w dst
|
||
+ )
|
||
+ CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
|
||
+ | w == W64,
|
||
+ 0 <= n,
|
||
+ n < 64 -> do
|
||
+ (reg_x, _format_x, code_x) <- getSomeReg x
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
||
+ `appOL` truncateReg w w dst
|
||
+ )
|
||
+ CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `appOL` code_x'
|
||
+ `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
|
||
+ )
|
||
+ CmmMachOp (MO_S_Shr w) [x, y] -> do
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ (reg_y, _format_y, code_y) <- getSomeReg y
|
||
+ (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `appOL` code_x'
|
||
+ `appOL` code_y
|
||
+ `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpReg w reg_y))
|
||
+ )
|
||
+ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
|
||
+ | w == W8,
|
||
+ 0 <= n,
|
||
+ n < 8 -> do
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `appOL` truncateReg (formatToWidth format_x) w reg_x
|
||
+ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
||
+ )
|
||
+ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
|
||
+ | w == W16,
|
||
+ 0 <= n,
|
||
+ n < 16 -> do
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `appOL` truncateReg (formatToWidth format_x) w reg_x
|
||
+ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
||
+ )
|
||
+ CmmMachOp (MO_U_Shr w) [x, y] | w == W8 || w == W16 -> do
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ (reg_y, _format_y, code_y) <- getSomeReg y
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `appOL` code_y
|
||
+ `appOL` truncateReg (formatToWidth format_x) w reg_x
|
||
+ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
|
||
+ )
|
||
+ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
|
||
+ | w == W32,
|
||
+ 0 <= n,
|
||
+ n < 32 -> do
|
||
+ (reg_x, _format_x, code_x) <- getSomeReg x
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
||
+ )
|
||
+ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
|
||
+ | w == W64,
|
||
+ 0 <= n,
|
||
+ n < 64 -> do
|
||
+ (reg_x, _format_x, code_x) <- getSomeReg x
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
||
+ )
|
||
+
|
||
+ -- 3. Logic &&, ||
|
||
+ CmmMachOp (MO_And w) [CmmReg reg, CmmLit (CmmInt n _)]
|
||
+ | fitsIn12bitImm n ->
|
||
+ return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
|
||
+ where
|
||
+ w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
|
||
+ r' = getRegisterReg plat reg
|
||
+ CmmMachOp (MO_Or w) [CmmReg reg, CmmLit (CmmInt n _)]
|
||
+ | fitsIn12bitImm n ->
|
||
+ return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ORI (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
|
||
+ where
|
||
+ w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
|
||
+ r' = getRegisterReg plat reg
|
||
+
|
||
+ -- Generic binary case.
|
||
+ CmmMachOp op [x, y] -> do
|
||
+ let -- A "plain" operation.
|
||
+ bitOp w op = do
|
||
+ -- compute x<m> <- x
|
||
+ -- compute x<o> <- y
|
||
+ -- <OP> x<n>, x<m>, x<o>
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ (reg_y, format_y, code_y) <- getSomeReg y
|
||
+ massertPpr (isIntFormat format_x == isIntFormat format_y) $ text "bitOp: incompatible"
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `appOL` code_y
|
||
+ `appOL` op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)
|
||
+ )
|
||
+
|
||
+ -- A (potentially signed) integer operation.
|
||
+ -- In the case of 8- and 16-bit signed arithmetic we must first
|
||
+ -- sign-extend both arguments to 32-bits.
|
||
+ -- See Note [Signed arithmetic on RISCV64].
|
||
+ intOp is_signed w op = do
|
||
+ -- compute x<m> <- x
|
||
+ -- compute x<o> <- y
|
||
+ -- <OP> x<n>, x<m>, x<o>
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ (reg_y, format_y, code_y) <- getSomeReg y
|
||
+ massertPpr (isIntFormat format_x && isIntFormat format_y) $ text "intOp: non-int"
|
||
+ -- This is the width of the registers on which the operation
|
||
+ -- should be performed.
|
||
+ let w' = opRegWidth w
|
||
+ signExt r
|
||
+ | not is_signed = return (r, nilOL)
|
||
+ | otherwise = signExtendReg w w' r
|
||
+ (reg_x_sx, code_x_sx) <- signExt reg_x
|
||
+ (reg_y_sx, code_y_sx) <- signExt reg_y
|
||
+ return $ Any (intFormat w) $ \dst ->
|
||
+ code_x
|
||
+ `appOL` code_y
|
||
+ `appOL`
|
||
+ -- sign-extend both operands
|
||
+ code_x_sx
|
||
+ `appOL` code_y_sx
|
||
+ `appOL` op (OpReg w' dst) (OpReg w' reg_x_sx) (OpReg w' reg_y_sx)
|
||
+ `appOL` truncateReg w' w dst -- truncate back to the operand's original width
|
||
+ floatOp w op = do
|
||
+ (reg_fx, format_x, code_fx) <- getFloatReg x
|
||
+ (reg_fy, format_y, code_fy) <- getFloatReg y
|
||
+ massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatOp: non-float"
|
||
+ return
|
||
+ $ Any
|
||
+ (floatFormat w)
|
||
+ ( \dst ->
|
||
+ code_fx
|
||
+ `appOL` code_fy
|
||
+ `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)
|
||
+ )
|
||
+
|
||
+ -- need a special one for conditionals, as they return ints
|
||
+ floatCond w op = do
|
||
+ (reg_fx, format_x, code_fx) <- getFloatReg x
|
||
+ (reg_fy, format_y, code_fy) <- getFloatReg y
|
||
+ massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatCond: non-float"
|
||
+ return
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_fx
|
||
+ `appOL` code_fy
|
||
+ `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)
|
||
+ )
|
||
+
|
||
+ case op of
|
||
+ -- Integer operations
|
||
+ -- Add/Sub should only be Integer Options.
|
||
+ MO_Add w -> intOp False w (\d x y -> unitOL $ annExpr expr (ADD d x y))
|
||
+ -- TODO: Handle sub-word case
|
||
+ MO_Sub w -> intOp False w (\d x y -> unitOL $ annExpr expr (SUB d x y))
|
||
+ -- N.B. We needn't sign-extend sub-word size (in)equality comparisons
|
||
+ -- since we don't care about ordering.
|
||
+ MO_Eq w -> bitOp w (\d x y -> unitOL $ annExpr expr (CSET d x y EQ))
|
||
+ MO_Ne w -> bitOp w (\d x y -> unitOL $ annExpr expr (CSET d x y NE))
|
||
+ -- Signed multiply/divide
|
||
+ MO_Mul w -> intOp True w (\d x y -> unitOL $ annExpr expr (MUL d x y))
|
||
+ MO_S_MulMayOflo w -> do_mul_may_oflo w x y
|
||
+ MO_S_Quot w -> intOp True w (\d x y -> unitOL $ annExpr expr (DIV d x y))
|
||
+ MO_S_Rem w -> intOp True w (\d x y -> unitOL $ annExpr expr (REM d x y))
|
||
+ -- Unsigned multiply/divide
|
||
+ MO_U_Quot w -> intOp False w (\d x y -> unitOL $ annExpr expr (DIVU d x y))
|
||
+ MO_U_Rem w -> intOp False w (\d x y -> unitOL $ annExpr expr (REMU d x y))
|
||
+ -- Signed comparisons
|
||
+ MO_S_Ge w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SGE))
|
||
+ MO_S_Le w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SLE))
|
||
+ MO_S_Gt w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SGT))
|
||
+ MO_S_Lt w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SLT))
|
||
+ -- Unsigned comparisons
|
||
+ MO_U_Ge w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y UGE))
|
||
+ MO_U_Le w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y ULE))
|
||
+ MO_U_Gt w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y UGT))
|
||
+ MO_U_Lt w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y ULT))
|
||
+ -- Floating point arithmetic
|
||
+ MO_F_Add w -> floatOp w (\d x y -> unitOL $ annExpr expr (ADD d x y))
|
||
+ MO_F_Sub w -> floatOp w (\d x y -> unitOL $ annExpr expr (SUB d x y))
|
||
+ MO_F_Mul w -> floatOp w (\d x y -> unitOL $ annExpr expr (MUL d x y))
|
||
+ MO_F_Quot w -> floatOp w (\d x y -> unitOL $ annExpr expr (DIV d x y))
|
||
+ -- Floating point comparison
|
||
+ MO_F_Eq w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y EQ))
|
||
+ MO_F_Ne w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y NE))
|
||
+ MO_F_Ge w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FGE))
|
||
+ MO_F_Le w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FLE)) -- x <= y <=> y > x
|
||
+ MO_F_Gt w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FGT))
|
||
+ MO_F_Lt w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FLT)) -- x < y <=> y >= x
|
||
+
|
||
+ -- Bitwise operations
|
||
+ MO_And w -> bitOp w (\d x y -> unitOL $ annExpr expr (AND d x y))
|
||
+ MO_Or w -> bitOp w (\d x y -> unitOL $ annExpr expr (OR d x y))
|
||
+ MO_Xor w -> bitOp w (\d x y -> unitOL $ annExpr expr (XOR d x y))
|
||
+ MO_Shl w -> intOp False w (\d x y -> unitOL $ annExpr expr (SLL d x y))
|
||
+ MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (SRL d x y))
|
||
+ MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (SRA d x y))
|
||
+ op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ pprMachOp op <+> text "in" <+> pdoc plat expr
|
||
+
|
||
+ -- Generic ternary case.
|
||
+ CmmMachOp op [x, y, z] ->
|
||
+ case op of
|
||
+ -- Floating-point fused multiply-add operations
|
||
+ --
|
||
+ -- x86 fmadd x * y + z <=> RISCV64 fmadd : d = r1 * r2 + r3
|
||
+ -- x86 fmsub x * y - z <=> RISCV64 fnmsub: d = r1 * r2 - r3
|
||
+ -- x86 fnmadd - x * y + z <=> RISCV64 fmsub : d = - r1 * r2 + r3
|
||
+ -- x86 fnmsub - x * y - z <=> RISCV64 fnmadd: d = - r1 * r2 - r3
|
||
+ MO_FMA var w -> case var of
|
||
+ FMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMAdd d n m a)
|
||
+ FMSub -> float3Op w (\d n m a -> unitOL $ FMA FMSub d n m a)
|
||
+ FNMAdd -> float3Op w (\d n m a -> unitOL $ FMA FNMSub d n m a)
|
||
+ FNMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMAdd d n m a)
|
||
+ _ ->
|
||
+ pprPanic "getRegister' (unhandled ternary CmmMachOp): "
|
||
+ $ pprMachOp op
|
||
+ <+> text "in"
|
||
+ <+> pdoc plat expr
|
||
+ where
|
||
+ float3Op w op = do
|
||
+ (reg_fx, format_x, code_fx) <- getFloatReg x
|
||
+ (reg_fy, format_y, code_fy) <- getFloatReg y
|
||
+ (reg_fz, format_z, code_fz) <- getFloatReg z
|
||
+ massertPpr (isFloatFormat format_x && isFloatFormat format_y && isFloatFormat format_z)
|
||
+ $ text "float3Op: non-float"
|
||
+ pure
|
||
+ $ Any (floatFormat w)
|
||
+ $ \dst ->
|
||
+ code_fx
|
||
+ `appOL` code_fy
|
||
+ `appOL` code_fz
|
||
+ `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy) (OpReg w reg_fz)
|
||
+ CmmMachOp _op _xs ->
|
||
+ pprPanic "getRegister' (variadic CmmMachOp): " (pdoc plat expr)
|
||
+ where
|
||
+ isNbitEncodeable :: Int -> Integer -> Bool
|
||
+ isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
|
||
+ -- N.B. MUL does not set the overflow flag.
|
||
+ -- Return 0 when the operation cannot overflow, /= 0 otherwise
|
||
+ do_mul_may_oflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
|
||
+ do_mul_may_oflo w _x _y | w > W64 = pprPanic "Cannot multiply larger than 64bit" (ppr w)
|
||
+ do_mul_may_oflo w@W64 x y = do
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ (reg_y, format_y, code_y) <- getSomeReg y
|
||
+ -- TODO: Can't we clobber reg_x and reg_y to save registers?
|
||
+ lo <- getNewRegNat II64
|
||
+ hi <- getNewRegNat II64
|
||
+ -- TODO: Overhaul CSET: 3rd operand isn't needed for SNEZ
|
||
+ let nonSense = OpImm (ImmInt 0)
|
||
+ pure
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `appOL` signExtend (formatToWidth format_x) W64 reg_x reg_x
|
||
+ `appOL` code_y
|
||
+ `appOL` signExtend (formatToWidth format_y) W64 reg_y reg_y
|
||
+ `appOL` toOL
|
||
+ [ annExpr expr (MULH (OpReg w hi) (OpReg w reg_x) (OpReg w reg_y)),
|
||
+ MUL (OpReg w lo) (OpReg w reg_x) (OpReg w reg_y),
|
||
+ SRA (OpReg w lo) (OpReg w lo) (OpImm (ImmInt (widthInBits W64 - 1))),
|
||
+ ann
|
||
+ (text "Set flag if result of MULH contains more than sign bits.")
|
||
+ (XOR (OpReg w hi) (OpReg w hi) (OpReg w lo)),
|
||
+ CSET (OpReg w dst) (OpReg w hi) nonSense NE
|
||
+ ]
|
||
+ )
|
||
+ do_mul_may_oflo w x y = do
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ (reg_y, format_y, code_y) <- getSomeReg y
|
||
+ let width_x = formatToWidth format_x
|
||
+ width_y = formatToWidth format_y
|
||
+ if w > width_x && w > width_y
|
||
+ then
|
||
+ pure
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ -- 8bit * 8bit cannot overflow 16bit
|
||
+ -- 16bit * 16bit cannot overflow 32bit
|
||
+ -- 32bit * 32bit cannot overflow 64bit
|
||
+ unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 0)))
|
||
+ )
|
||
+ else do
|
||
+ let use32BitMul = w <= W32 && width_x <= W32 && width_y <= W32
|
||
+ nonSense = OpImm (ImmInt 0)
|
||
+ if use32BitMul
|
||
+ then do
|
||
+ narrowedReg <- getNewRegNat II64
|
||
+ pure
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ code_x
|
||
+ `appOL` signExtend (formatToWidth format_x) W32 reg_x reg_x
|
||
+ `appOL` code_y
|
||
+ `appOL` signExtend (formatToWidth format_y) W32 reg_y reg_y
|
||
+ `snocOL` annExpr expr (MUL (OpReg W32 dst) (OpReg W32 reg_x) (OpReg W32 reg_y))
|
||
+ `appOL` signExtendAdjustPrecission W32 w dst narrowedReg
|
||
+ `appOL` toOL
|
||
+ [ ann
|
||
+ (text "Check if the multiplied value fits in the narrowed register")
|
||
+ (SUB (OpReg w dst) (OpReg w dst) (OpReg w narrowedReg)),
|
||
+ CSET (OpReg w dst) (OpReg w dst) nonSense NE
|
||
+ ]
|
||
+ )
|
||
+ else
|
||
+ pure
|
||
+ $ Any
|
||
+ (intFormat w)
|
||
+ ( \dst ->
|
||
+ -- Do not handle this unlikely case. Just tell that it may overflow.
|
||
+ unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 1)))
|
||
+ )
|
||
+
|
||
+-- | Instructions to sign-extend the value in the given register from width @w@
|
||
+-- up to width @w'@.
|
||
+signExtendReg :: Width -> Width -> Reg -> NatM (Reg, OrdList Instr)
|
||
+signExtendReg w _w' r | w == W64 = pure (r, nilOL)
|
||
+signExtendReg w w' r = do
|
||
+ r' <- getNewRegNat (intFormat w')
|
||
+ let instrs = signExtend w w' r r'
|
||
+ pure (r', instrs)
|
||
+
|
||
+-- | Sign extends to 64bit, if needed
|
||
+--
|
||
+-- Source `Reg` @r@ stays untouched, while the conversion happens on destination
|
||
+-- `Reg` @r'@.
|
||
+signExtend :: Width -> Width -> Reg -> Reg -> OrdList Instr
|
||
+signExtend w w' _r _r' | w > w' = pprPanic "This is not a sign extension, but a truncation." $ ppr w <> text "->" <+> ppr w'
|
||
+signExtend w w' _r _r' | w > W64 || w' > W64 = pprPanic "Unexpected width (max is 64bit):" $ ppr w <> text "->" <+> ppr w'
|
||
+signExtend w w' r r' | w == W64 && w' == W64 && r == r' = nilOL
|
||
+signExtend w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg w' r') (OpReg w r)
|
||
+signExtend w w' r r'
|
||
+ | w == W32 && w' == W64 =
|
||
+ unitOL
|
||
+ $ ann
|
||
+ (text "sign-extend register (SEXT.W)" <+> ppr r <+> ppr w <> text "->" <> ppr w')
|
||
+ -- `ADDIW r r 0` is the pseudo-op SEXT.W
|
||
+ (ADD (OpReg w' r') (OpReg w r) (OpImm (ImmInt 0)))
|
||
+signExtend w w' r r' =
|
||
+ toOL
|
||
+ [ ann
|
||
+ (text "narrow register signed" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w')
|
||
+ (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))),
|
||
+ -- signed (arithmetic) right shift
|
||
+ SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift))
|
||
+ ]
|
||
+ where
|
||
+ shift = 64 - widthInBits w
|
||
+
|
||
+-- | Sign extends to 64bit, if needed and reduces the precission to the target `Width` (@w'@)
|
||
+--
|
||
+-- Source `Reg` @r@ stays untouched, while the conversion happens on destination
|
||
+-- `Reg` @r'@.
|
||
+signExtendAdjustPrecission :: Width -> Width -> Reg -> Reg -> OrdList Instr
|
||
+signExtendAdjustPrecission w w' _r _r' | w > W64 || w' > W64 = pprPanic "Unexpected width (max is 64bit):" $ ppr w <> text "->" <+> ppr w'
|
||
+signExtendAdjustPrecission w w' r r' | w == W64 && w' == W64 && r == r' = nilOL
|
||
+signExtendAdjustPrecission w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg w' r') (OpReg w r)
|
||
+signExtendAdjustPrecission w w' r r'
|
||
+ | w == W32 && w' == W64 =
|
||
+ unitOL
|
||
+ $ ann
|
||
+ (text "sign-extend register (SEXT.W)" <+> ppr r <+> ppr w <> text "->" <> ppr w')
|
||
+ -- `ADDIW r r 0` is the pseudo-op SEXT.W
|
||
+ (ADD (OpReg w' r') (OpReg w r) (OpImm (ImmInt 0)))
|
||
+signExtendAdjustPrecission w w' r r'
|
||
+ | w > w' =
|
||
+ toOL
|
||
+ [ ann
|
||
+ (text "narrow register signed" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w')
|
||
+ (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))),
|
||
+ -- signed (arithmetic) right shift
|
||
+ SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift))
|
||
+ ]
|
||
+ where
|
||
+ shift = 64 - widthInBits w'
|
||
+signExtendAdjustPrecission w w' r r' =
|
||
+ toOL
|
||
+ [ ann
|
||
+ (text "sign extend register" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w')
|
||
+ (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))),
|
||
+ -- signed (arithmetic) right shift
|
||
+ SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift))
|
||
+ ]
|
||
+ where
|
||
+ shift = 64 - widthInBits w
|
||
+
|
||
+-- | Instructions to truncate the value in the given register from width @w@
|
||
+-- to width @w'@.
|
||
+--
|
||
+-- In other words, it just cuts the width out of the register. N.B.: This
|
||
+-- ignores signedness (no sign extension takes place)!
|
||
+truncateReg :: Width -> Width -> Reg -> OrdList Instr
|
||
+truncateReg _w w' _r | w' == W64 = nilOL
|
||
+truncateReg _w w' r | w' > W64 = pprPanic "Cannot truncate to width bigger than register size (max is 64bit):" $ text (show r) <> char ':' <+> ppr w'
|
||
+truncateReg w _w' r | w > W64 = pprPanic "Unexpected register size (max is 64bit):" $ text (show r) <> char ':' <+> ppr w
|
||
+truncateReg w w' r =
|
||
+ toOL
|
||
+ [ ann
|
||
+ (text "truncate register" <+> ppr r <+> ppr w <> text "->" <> ppr w')
|
||
+ (SLL (OpReg w' r) (OpReg w r) (OpImm (ImmInt shift))),
|
||
+ -- SHL ignores signedness!
|
||
+ SRL (OpReg w' r) (OpReg w r) (OpImm (ImmInt shift))
|
||
+ ]
|
||
+ where
|
||
+ shift = 64 - widthInBits w'
|
||
+
|
||
+-- | Given a 'Register', produce a new 'Register' with an instruction block
|
||
+-- which will check the value for alignment. Used for @-falignment-sanitisation@.
|
||
+addAlignmentCheck :: Int -> Width -> Register -> NatM Register
|
||
+addAlignmentCheck align wordWidth reg = do
|
||
+ jumpReg <- getNewRegNat II64
|
||
+ cmpReg <- getNewRegNat II64
|
||
+ okayLblId <- getBlockIdNat
|
||
+
|
||
+ pure $ case reg of
|
||
+ Fixed fmt reg code -> Fixed fmt reg (code `appOL` check fmt jumpReg cmpReg okayLblId reg)
|
||
+ Any fmt f -> Any fmt (\reg -> f reg `appOL` check fmt jumpReg cmpReg okayLblId reg)
|
||
+ where
|
||
+ check :: Format -> Reg -> Reg -> BlockId -> Reg -> InstrBlock
|
||
+ check fmt jumpReg cmpReg okayLblId reg =
|
||
+ let width = formatToWidth fmt
|
||
+ in assert (not $ isFloatFormat fmt)
|
||
+ $ toOL
|
||
+ [ ann
|
||
+ (text "Alignment check - alignment: " <> int align <> text ", word width: " <> text (show wordWidth))
|
||
+ (AND (OpReg width cmpReg) (OpReg width reg) (OpImm $ ImmInt $ align - 1)),
|
||
+ BCOND EQ (OpReg width cmpReg) zero (TBlock okayLblId),
|
||
+ COMMENT (text "Alignment check failed"),
|
||
+ LDR II64 (OpReg W64 jumpReg) (OpImm $ ImmCLbl mkBadAlignmentLabel),
|
||
+ B (TReg jumpReg),
|
||
+ NEWBLOCK okayLblId
|
||
+ ]
|
||
+
|
||
+-- -----------------------------------------------------------------------------
|
||
+-- The 'Amode' type: Memory addressing modes passed up the tree.
|
||
+data Amode = Amode AddrMode InstrBlock
|
||
+
|
||
+-- | Provide the value of a `CmmExpr` with an `Amode`
|
||
+--
|
||
+-- N.B. this function should be used to provide operands to load and store
|
||
+-- instructions with signed 12bit wide immediates (S & I types). For other
|
||
+-- immediate sizes and formats (e.g. B type uses multiples of 2) this function
|
||
+-- would need to be adjusted.
|
||
+getAmode ::
|
||
+ Platform ->
|
||
+ -- | width of loaded value
|
||
+ Width ->
|
||
+ CmmExpr ->
|
||
+ NatM Amode
|
||
+-- TODO: Specialize stuff we can destructure here.
|
||
+
|
||
+-- LDR/STR: Immediate can be represented with 12bits
|
||
+getAmode platform w (CmmRegOff reg off)
|
||
+ | w <= W64,
|
||
+ fitsIn12bitImm off =
|
||
+ return $ Amode (AddrRegImm reg' off') nilOL
|
||
+ where
|
||
+ reg' = getRegisterReg platform reg
|
||
+ off' = ImmInt off
|
||
+
|
||
+-- For Stores we often see something like this:
|
||
+-- CmmStore (CmmMachOp (MO_Add w) [CmmLoad expr, CmmLit (CmmInt n w')]) (expr2)
|
||
+-- E.g. a CmmStoreOff really. This can be translated to `str $expr2, [$expr, #n ]
|
||
+-- for `n` in range.
|
||
+getAmode _platform _ (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')])
|
||
+ | fitsIn12bitImm off =
|
||
+ do
|
||
+ (reg, _format, code) <- getSomeReg expr
|
||
+ return $ Amode (AddrRegImm reg (ImmInteger off)) code
|
||
+getAmode _platform _ (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')])
|
||
+ | fitsIn12bitImm (-off) =
|
||
+ do
|
||
+ (reg, _format, code) <- getSomeReg expr
|
||
+ return $ Amode (AddrRegImm reg (ImmInteger (-off))) code
|
||
+
|
||
+-- Generic case
|
||
+getAmode _platform _ expr =
|
||
+ do
|
||
+ (reg, _format, code) <- getSomeReg expr
|
||
+ return $ Amode (AddrReg reg) code
|
||
+
|
||
+-- -----------------------------------------------------------------------------
|
||
+-- Generating assignments
|
||
+
|
||
+-- Assignments are really at the heart of the whole code generation
|
||
+-- business. Almost all top-level nodes of any real importance are
|
||
+-- assignments, which correspond to loads, stores, or register
|
||
+-- transfers. If we're really lucky, some of the register transfers
|
||
+-- will go away, because we can use the destination register to
|
||
+-- complete the code generation for the right hand side. This only
|
||
+-- fails when the right hand side is forced into a fixed register
|
||
+-- (e.g. the result of a call).
|
||
+
|
||
+assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
|
||
+assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
|
||
+assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
|
||
+assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
|
||
+assignMem_IntCode rep addrE srcE =
|
||
+ do
|
||
+ (src_reg, _format, code) <- getSomeReg srcE
|
||
+ platform <- getPlatform
|
||
+ let w = formatToWidth rep
|
||
+ Amode addr addr_code <- getAmode platform w addrE
|
||
+ return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
|
||
+ `consOL` ( code
|
||
+ `appOL` addr_code
|
||
+ `snocOL` STR rep (OpReg w src_reg) (OpAddr addr)
|
||
+ )
|
||
+
|
||
+assignReg_IntCode _ reg src =
|
||
+ do
|
||
+ platform <- getPlatform
|
||
+ let dst = getRegisterReg platform reg
|
||
+ r <- getRegister src
|
||
+ return $ case r of
|
||
+ Any _ code ->
|
||
+ COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src)))
|
||
+ `consOL` code dst
|
||
+ Fixed format freg fcode ->
|
||
+ COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src)))
|
||
+ `consOL` ( fcode
|
||
+ `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg)
|
||
+ )
|
||
+
|
||
+-- Let's treat Floating point stuff
|
||
+-- as integer code for now. Opaque.
|
||
+assignMem_FltCode = assignMem_IntCode
|
||
+
|
||
+assignReg_FltCode = assignReg_IntCode
|
||
+
|
||
+-- -----------------------------------------------------------------------------
|
||
+-- Jumps
|
||
+-- AArch64 has 26bits for targets, whereas RiscV only has 20.
|
||
+-- Thus we need to distinguish between far (outside of the)
|
||
+-- current compilation unit. And regular branches.
|
||
+-- RiscV has ±2MB of displacement, whereas AArch64 has ±128MB.
|
||
+-- Thus for most branches we can get away with encoding it
|
||
+-- directly in the instruction rather than always loading the
|
||
+-- address into a register and then using that to jump.
|
||
+-- Under the assumption that our linked build product is less than
|
||
+-- ~2*128MB of TEXT, and there are no jump that span the whole
|
||
+-- TEXT segment.
|
||
+-- Something where riscv's compressed instruction might come in
|
||
+-- handy.
|
||
+genJump :: CmmExpr {-the branch target-} -> NatM InstrBlock
|
||
+genJump expr = do
|
||
+ (target, _format, code) <- getSomeReg expr
|
||
+ return (code `appOL` unitOL (annExpr expr (B (TReg target))))
|
||
+
|
||
+-- -----------------------------------------------------------------------------
|
||
+-- Unconditional branches
|
||
+genBranch :: BlockId -> NatM InstrBlock
|
||
+genBranch = return . toOL . mkJumpInstr
|
||
+
|
||
+-- -----------------------------------------------------------------------------
|
||
+-- Conditional branches
|
||
+genCondJump ::
|
||
+ BlockId ->
|
||
+ CmmExpr ->
|
||
+ NatM InstrBlock
|
||
+genCondJump bid expr = do
|
||
+ case expr of
|
||
+ -- Optimized == 0 case.
|
||
+ CmmMachOp (MO_Eq w) [x, CmmLit (CmmInt 0 _)] -> do
|
||
+ (reg_x, _format_x, code_x) <- getSomeReg x
|
||
+ return $ code_x `snocOL` annExpr expr (BCOND EQ zero (OpReg w reg_x) (TBlock bid))
|
||
+
|
||
+ -- Optimized /= 0 case.
|
||
+ CmmMachOp (MO_Ne w) [x, CmmLit (CmmInt 0 _)] -> do
|
||
+ (reg_x, _format_x, code_x) <- getSomeReg x
|
||
+ return $ code_x `snocOL` annExpr expr (BCOND NE zero (OpReg w reg_x) (TBlock bid))
|
||
+
|
||
+ -- Generic case.
|
||
+ CmmMachOp mop [x, y] -> do
|
||
+ let ubcond w cmp = do
|
||
+ -- compute both sides.
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ (reg_y, format_y, code_y) <- getSomeReg y
|
||
+ let x' = OpReg w reg_x
|
||
+ y' = OpReg w reg_y
|
||
+ return $ case w of
|
||
+ w
|
||
+ | w == W8 || w == W16 ->
|
||
+ code_x
|
||
+ `appOL` truncateReg (formatToWidth format_x) w reg_x
|
||
+ `appOL` code_y
|
||
+ `appOL` truncateReg (formatToWidth format_y) w reg_y
|
||
+ `appOL` code_y
|
||
+ `snocOL` annExpr expr (BCOND cmp x' y' (TBlock bid))
|
||
+ _ ->
|
||
+ code_x
|
||
+ `appOL` code_y
|
||
+ `snocOL` annExpr expr (BCOND cmp x' y' (TBlock bid))
|
||
+
|
||
+ sbcond w cmp = do
|
||
+ -- compute both sides.
|
||
+ (reg_x, format_x, code_x) <- getSomeReg x
|
||
+ (reg_y, format_y, code_y) <- getSomeReg y
|
||
+ let x' = OpReg w reg_x
|
||
+ y' = OpReg w reg_y
|
||
+ return $ case w of
|
||
+ w
|
||
+ | w `elem` [W8, W16, W32] ->
|
||
+ code_x
|
||
+ `appOL` signExtend (formatToWidth format_x) W64 reg_x reg_x
|
||
+ `appOL` code_y
|
||
+ `appOL` signExtend (formatToWidth format_y) W64 reg_y reg_y
|
||
+ `appOL` unitOL (annExpr expr (BCOND cmp x' y' (TBlock bid)))
|
||
+ _ -> code_x `appOL` code_y `appOL` unitOL (annExpr expr (BCOND cmp x' y' (TBlock bid)))
|
||
+
|
||
+ fbcond w cmp = do
|
||
+ -- ensure we get float regs
|
||
+ (reg_fx, _format_fx, code_fx) <- getFloatReg x
|
||
+ (reg_fy, _format_fy, code_fy) <- getFloatReg y
|
||
+ condOpReg <- OpReg W64 <$> getNewRegNat II64
|
||
+ oneReg <- getNewRegNat II64
|
||
+ return $ code_fx
|
||
+ `appOL` code_fy
|
||
+ `snocOL` annExpr expr (CSET condOpReg (OpReg w reg_fx) (OpReg w reg_fy) cmp)
|
||
+ `snocOL` MOV (OpReg W64 oneReg) (OpImm (ImmInt 1))
|
||
+ `snocOL` BCOND EQ condOpReg (OpReg w oneReg) (TBlock bid)
|
||
+
|
||
+ case mop of
|
||
+ MO_F_Eq w -> fbcond w EQ
|
||
+ MO_F_Ne w -> fbcond w NE
|
||
+ MO_F_Gt w -> fbcond w FGT
|
||
+ MO_F_Ge w -> fbcond w FGE
|
||
+ MO_F_Lt w -> fbcond w FLT
|
||
+ MO_F_Le w -> fbcond w FLE
|
||
+ MO_Eq w -> sbcond w EQ
|
||
+ MO_Ne w -> sbcond w NE
|
||
+ MO_S_Gt w -> sbcond w SGT
|
||
+ MO_S_Ge w -> sbcond w SGE
|
||
+ MO_S_Lt w -> sbcond w SLT
|
||
+ MO_S_Le w -> sbcond w SLE
|
||
+ MO_U_Gt w -> ubcond w UGT
|
||
+ MO_U_Ge w -> ubcond w UGE
|
||
+ MO_U_Lt w -> ubcond w ULT
|
||
+ MO_U_Le w -> ubcond w ULE
|
||
+ _ -> pprPanic "RV64.genCondJump:case mop: " (text $ show expr)
|
||
+ _ -> pprPanic "RV64.genCondJump: " (text $ show expr)
|
||
+
|
||
+-- | Generate conditional branching instructions
|
||
+--
|
||
+-- This is basically an "if with else" statement.
|
||
+genCondBranch ::
|
||
+ -- | the true branch target
|
||
+ BlockId ->
|
||
+ -- | the false branch target
|
||
+ BlockId ->
|
||
+ -- | the condition on which to branch
|
||
+ CmmExpr ->
|
||
+ -- | Instructions
|
||
+ NatM InstrBlock
|
||
+genCondBranch true false expr =
|
||
+ appOL
|
||
+ <$> genCondJump true expr
|
||
+ <*> genBranch false
|
||
+
|
||
+-- -----------------------------------------------------------------------------
|
||
+-- Generating C calls
|
||
+
|
||
+-- | Generate a call to a C function.
|
||
+--
|
||
+-- - Integer values are passed in GP registers a0-a7.
|
||
+-- - Floating point values are passed in FP registers fa0-fa7.
|
||
+-- - If there are no free floating point registers, the FP values are passed in GP registers.
|
||
+-- - If all GP registers are taken, the values are spilled as whole words (!) onto the stack.
|
||
+-- - For integers/words, the return value is in a0.
|
||
+-- - The return value is in fa0 if the return type is a floating point value.
|
||
+genCCall ::
|
||
+ ForeignTarget -> -- function to call
|
||
+ [CmmFormal] -> -- where to put the result
|
||
+ [CmmActual] -> -- arguments (of mixed type)
|
||
+ NatM InstrBlock
|
||
+-- TODO: Specialize where we can.
|
||
+-- Generic impl
|
||
+genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
|
||
+ -- we want to pass arg_regs into allArgRegs
|
||
+ -- The target :: ForeignTarget call can either
|
||
+ -- be a foreign procedure with an address expr
|
||
+ -- and a calling convention.
|
||
+ (call_target_reg, call_target_code) <-
|
||
+ -- Compute the address of the call target into a register. This
|
||
+ -- addressing enables us to jump through the whole address space
|
||
+ -- without further ado. PC-relative addressing would involve
|
||
+ -- instructions to do similar, though.
|
||
+ do
|
||
+ (reg, _format, reg_code) <- getSomeReg expr
|
||
+ pure (reg, reg_code)
|
||
+ -- compute the code and register logic for all arg_regs.
|
||
+ -- this will give us the format information to match on.
|
||
+ arg_regs' <- mapM getSomeReg arg_regs
|
||
+
|
||
+ -- Now this is stupid. Our Cmm expressions doesn't carry the proper sizes
|
||
+ -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in
|
||
+ -- STG; this then breaks packing of stack arguments, if we need to pack
|
||
+ -- for the pcs, e.g. darwinpcs. Option one would be to fix the Int type
|
||
+ -- in Cmm proper. Option two, which we choose here is to use extended Hint
|
||
+ -- information to contain the size information and use that when packing
|
||
+ -- arguments, spilled onto the stack.
|
||
+ let (_res_hints, arg_hints) = foreignTargetHints target
|
||
+ arg_regs'' = zipWith (\(r, f, c) h -> (r, f, h, c)) arg_regs' arg_hints
|
||
+
|
||
+ (stackSpaceWords, passRegs, passArgumentsCode) <- passArguments allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL
|
||
+
|
||
+ readResultsCode <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL
|
||
+
|
||
+ let moveStackDown 0 =
|
||
+ toOL
|
||
+ [ PUSH_STACK_FRAME,
|
||
+ DELTA (-16)
|
||
+ ]
|
||
+ moveStackDown i | odd i = moveStackDown (i + 1)
|
||
+ moveStackDown i =
|
||
+ toOL
|
||
+ [ PUSH_STACK_FRAME,
|
||
+ SUB (OpReg W64 spMachReg) (OpReg W64 spMachReg) (OpImm (ImmInt (8 * i))),
|
||
+ DELTA (-8 * i - 16)
|
||
+ ]
|
||
+ moveStackUp 0 =
|
||
+ toOL
|
||
+ [ POP_STACK_FRAME,
|
||
+ DELTA 0
|
||
+ ]
|
||
+ moveStackUp i | odd i = moveStackUp (i + 1)
|
||
+ moveStackUp i =
|
||
+ toOL
|
||
+ [ ADD (OpReg W64 spMachReg) (OpReg W64 spMachReg) (OpImm (ImmInt (8 * i))),
|
||
+ POP_STACK_FRAME,
|
||
+ DELTA 0
|
||
+ ]
|
||
+
|
||
+ let code =
|
||
+ call_target_code -- compute the label (possibly into a register)
|
||
+ `appOL` moveStackDown stackSpaceWords
|
||
+ `appOL` passArgumentsCode -- put the arguments into x0, ...
|
||
+ `snocOL` BL call_target_reg passRegs -- branch and link (C calls aren't tail calls, but return)
|
||
+ `appOL` readResultsCode -- parse the results into registers
|
||
+ `appOL` moveStackUp stackSpaceWords
|
||
+ return code
|
||
+ where
|
||
+ -- Implementiation of the RISCV ABI calling convention.
|
||
+ -- https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/948463cd5dbebea7c1869e20146b17a2cc8fda2f/riscv-cc.adoc#integer-calling-convention
|
||
+ passArguments :: [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock)
|
||
+ -- Base case: no more arguments to pass (left)
|
||
+ passArguments _ _ [] stackSpaceWords accumRegs accumCode = return (stackSpaceWords, accumRegs, accumCode)
|
||
+ -- Still have GP regs, and we want to pass an GP argument.
|
||
+ passArguments (gpReg : gpRegs) fpRegs ((r, format, hint, code_r) : args) stackSpaceWords accumRegs accumCode | isIntFormat format = do
|
||
+ -- RISCV64 Integer Calling Convention: "When passed in registers or on the
|
||
+ -- stack, integer scalars narrower than XLEN bits are widened according to
|
||
+ -- the sign of their type up to 32 bits, then sign-extended to XLEN bits."
|
||
+ let w = formatToWidth format
|
||
+ assignArg =
|
||
+ if hint == SignedHint
|
||
+ then
|
||
+ COMMENT (text "Pass gp argument sign-extended (SignedHint): " <> ppr r)
|
||
+ `consOL` signExtend w W64 r gpReg
|
||
+ else
|
||
+ toOL
|
||
+ [ COMMENT (text "Pass gp argument sign-extended (SignedHint): " <> ppr r),
|
||
+ MOV (OpReg w gpReg) (OpReg w r)
|
||
+ ]
|
||
+ accumCode' =
|
||
+ accumCode
|
||
+ `appOL` code_r
|
||
+ `appOL` assignArg
|
||
+ passArguments gpRegs fpRegs args stackSpaceWords (gpReg : accumRegs) accumCode'
|
||
+
|
||
+ -- Still have FP regs, and we want to pass an FP argument.
|
||
+ passArguments gpRegs (fpReg : fpRegs) ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isFloatFormat format = do
|
||
+ let w = formatToWidth format
|
||
+ mov = MOV (OpReg w fpReg) (OpReg w r)
|
||
+ accumCode' =
|
||
+ accumCode
|
||
+ `appOL` code_r
|
||
+ `snocOL` ann (text "Pass fp argument: " <> ppr r) mov
|
||
+ passArguments gpRegs fpRegs args stackSpaceWords (fpReg : accumRegs) accumCode'
|
||
+
|
||
+ -- No mor regs left to pass. Must pass on stack.
|
||
+ passArguments [] [] ((r, format, hint, code_r) : args) stackSpaceWords accumRegs accumCode = do
|
||
+ let w = formatToWidth format
|
||
+ spOffet = 8 * stackSpaceWords
|
||
+ str = STR format (OpReg w r) (OpAddr (AddrRegImm spMachReg (ImmInt spOffet)))
|
||
+ stackCode =
|
||
+ if hint == SignedHint
|
||
+ then
|
||
+ code_r
|
||
+ `appOL` signExtend w W64 r tmpReg
|
||
+ `snocOL` ann (text "Pass signed argument (size " <> ppr w <> text ") on the stack: " <> ppr tmpReg) str
|
||
+ else
|
||
+ code_r
|
||
+ `snocOL` ann (text "Pass unsigned argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str
|
||
+ passArguments [] [] args (stackSpaceWords + 1) accumRegs (stackCode `appOL` accumCode)
|
||
+
|
||
+ -- Still have fpRegs left, but want to pass a GP argument. Must be passed on the stack then.
|
||
+ passArguments [] fpRegs ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isIntFormat format = do
|
||
+ let w = formatToWidth format
|
||
+ spOffet = 8 * stackSpaceWords
|
||
+ str = STR format (OpReg w r) (OpAddr (AddrRegImm spMachReg (ImmInt spOffet)))
|
||
+ stackCode =
|
||
+ code_r
|
||
+ `snocOL` ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str
|
||
+ passArguments [] fpRegs args (stackSpaceWords + 1) accumRegs (stackCode `appOL` accumCode)
|
||
+
|
||
+ -- Still have gpRegs left, but want to pass a FP argument. Must be passed in gpReg then.
|
||
+ passArguments (gpReg : gpRegs) [] ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isFloatFormat format = do
|
||
+ let w = formatToWidth format
|
||
+ mov = MOV (OpReg w gpReg) (OpReg w r)
|
||
+ accumCode' =
|
||
+ accumCode
|
||
+ `appOL` code_r
|
||
+ `snocOL` ann (text "Pass fp argument in gpReg: " <> ppr r) mov
|
||
+ passArguments gpRegs [] args stackSpaceWords (gpReg : accumRegs) accumCode'
|
||
+ passArguments _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state")
|
||
+
|
||
+ readResults :: [Reg] -> [Reg] -> [LocalReg] -> [Reg] -> InstrBlock -> NatM InstrBlock
|
||
+ readResults _ _ [] _ accumCode = return accumCode
|
||
+ readResults [] _ _ _ _ = do
|
||
+ platform <- getPlatform
|
||
+ pprPanic "genCCall, out of gp registers when reading results" (pdoc platform target)
|
||
+ readResults _ [] _ _ _ = do
|
||
+ platform <- getPlatform
|
||
+ pprPanic "genCCall, out of fp registers when reading results" (pdoc platform target)
|
||
+ readResults (gpReg : gpRegs) (fpReg : fpRegs) (dst : dsts) accumRegs accumCode = do
|
||
+ -- gp/fp reg -> dst
|
||
+ platform <- getPlatform
|
||
+ let rep = cmmRegType (CmmLocal dst)
|
||
+ format = cmmTypeFormat rep
|
||
+ w = cmmRegWidth (CmmLocal dst)
|
||
+ r_dst = getRegisterReg platform (CmmLocal dst)
|
||
+ if isFloatFormat format
|
||
+ then readResults (gpReg : gpRegs) fpRegs dsts (fpReg : accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w fpReg))
|
||
+ else
|
||
+ readResults gpRegs (fpReg : fpRegs) dsts (gpReg : accumRegs)
|
||
+ $ accumCode
|
||
+ `snocOL` MOV (OpReg w r_dst) (OpReg w gpReg)
|
||
+ `appOL`
|
||
+ -- truncate, otherwise an unexpectedly big value might be used in upfollowing calculations
|
||
+ truncateReg W64 w r_dst
|
||
+genCCall (PrimTarget mop) dest_regs arg_regs = do
|
||
+ case mop of
|
||
+ MO_F32_Fabs
|
||
+ | [arg_reg] <- arg_regs,
|
||
+ [dest_reg] <- dest_regs ->
|
||
+ unaryFloatOp W32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
|
||
+ MO_F64_Fabs
|
||
+ | [arg_reg] <- arg_regs,
|
||
+ [dest_reg] <- dest_regs ->
|
||
+ unaryFloatOp W64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
|
||
+ -- 64 bit float ops
|
||
+ MO_F64_Pwr -> mkCCall "pow"
|
||
+ MO_F64_Sin -> mkCCall "sin"
|
||
+ MO_F64_Cos -> mkCCall "cos"
|
||
+ MO_F64_Tan -> mkCCall "tan"
|
||
+ MO_F64_Sinh -> mkCCall "sinh"
|
||
+ MO_F64_Cosh -> mkCCall "cosh"
|
||
+ MO_F64_Tanh -> mkCCall "tanh"
|
||
+ MO_F64_Asin -> mkCCall "asin"
|
||
+ MO_F64_Acos -> mkCCall "acos"
|
||
+ MO_F64_Atan -> mkCCall "atan"
|
||
+ MO_F64_Asinh -> mkCCall "asinh"
|
||
+ MO_F64_Acosh -> mkCCall "acosh"
|
||
+ MO_F64_Atanh -> mkCCall "atanh"
|
||
+ MO_F64_Log -> mkCCall "log"
|
||
+ MO_F64_Log1P -> mkCCall "log1p"
|
||
+ MO_F64_Exp -> mkCCall "exp"
|
||
+ MO_F64_ExpM1 -> mkCCall "expm1"
|
||
+ MO_F64_Fabs -> mkCCall "fabs"
|
||
+ MO_F64_Sqrt -> mkCCall "sqrt"
|
||
+ -- 32 bit float ops
|
||
+ MO_F32_Pwr -> mkCCall "powf"
|
||
+ MO_F32_Sin -> mkCCall "sinf"
|
||
+ MO_F32_Cos -> mkCCall "cosf"
|
||
+ MO_F32_Tan -> mkCCall "tanf"
|
||
+ MO_F32_Sinh -> mkCCall "sinhf"
|
||
+ MO_F32_Cosh -> mkCCall "coshf"
|
||
+ MO_F32_Tanh -> mkCCall "tanhf"
|
||
+ MO_F32_Asin -> mkCCall "asinf"
|
||
+ MO_F32_Acos -> mkCCall "acosf"
|
||
+ MO_F32_Atan -> mkCCall "atanf"
|
||
+ MO_F32_Asinh -> mkCCall "asinhf"
|
||
+ MO_F32_Acosh -> mkCCall "acoshf"
|
||
+ MO_F32_Atanh -> mkCCall "atanhf"
|
||
+ MO_F32_Log -> mkCCall "logf"
|
||
+ MO_F32_Log1P -> mkCCall "log1pf"
|
||
+ MO_F32_Exp -> mkCCall "expf"
|
||
+ MO_F32_ExpM1 -> mkCCall "expm1f"
|
||
+ MO_F32_Fabs -> mkCCall "fabsf"
|
||
+ MO_F32_Sqrt -> mkCCall "sqrtf"
|
||
+ -- 64-bit primops
|
||
+ MO_I64_ToI -> mkCCall "hs_int64ToInt"
|
||
+ MO_I64_FromI -> mkCCall "hs_intToInt64"
|
||
+ MO_W64_ToW -> mkCCall "hs_word64ToWord"
|
||
+ MO_W64_FromW -> mkCCall "hs_wordToWord64"
|
||
+ MO_x64_Neg -> mkCCall "hs_neg64"
|
||
+ MO_x64_Add -> mkCCall "hs_add64"
|
||
+ MO_x64_Sub -> mkCCall "hs_sub64"
|
||
+ MO_x64_Mul -> mkCCall "hs_mul64"
|
||
+ MO_I64_Quot -> mkCCall "hs_quotInt64"
|
||
+ MO_I64_Rem -> mkCCall "hs_remInt64"
|
||
+ MO_W64_Quot -> mkCCall "hs_quotWord64"
|
||
+ MO_W64_Rem -> mkCCall "hs_remWord64"
|
||
+ MO_x64_And -> mkCCall "hs_and64"
|
||
+ MO_x64_Or -> mkCCall "hs_or64"
|
||
+ MO_x64_Xor -> mkCCall "hs_xor64"
|
||
+ MO_x64_Not -> mkCCall "hs_not64"
|
||
+ MO_x64_Shl -> mkCCall "hs_uncheckedShiftL64"
|
||
+ MO_I64_Shr -> mkCCall "hs_uncheckedIShiftRA64"
|
||
+ MO_W64_Shr -> mkCCall "hs_uncheckedShiftRL64"
|
||
+ MO_x64_Eq -> mkCCall "hs_eq64"
|
||
+ MO_x64_Ne -> mkCCall "hs_ne64"
|
||
+ MO_I64_Ge -> mkCCall "hs_geInt64"
|
||
+ MO_I64_Gt -> mkCCall "hs_gtInt64"
|
||
+ MO_I64_Le -> mkCCall "hs_leInt64"
|
||
+ MO_I64_Lt -> mkCCall "hs_ltInt64"
|
||
+ MO_W64_Ge -> mkCCall "hs_geWord64"
|
||
+ MO_W64_Gt -> mkCCall "hs_gtWord64"
|
||
+ MO_W64_Le -> mkCCall "hs_leWord64"
|
||
+ MO_W64_Lt -> mkCCall "hs_ltWord64"
|
||
+ -- Conversion
|
||
+ MO_UF_Conv w -> mkCCall (word2FloatLabel w)
|
||
+ -- Optional MachOps
|
||
+ -- These are enabled/disabled by backend flags: GHC.StgToCmm.Config
|
||
+ MO_S_Mul2 _w -> unsupported mop
|
||
+ MO_S_QuotRem _w -> unsupported mop
|
||
+ MO_U_QuotRem _w -> unsupported mop
|
||
+ MO_U_QuotRem2 _w -> unsupported mop
|
||
+ MO_Add2 _w -> unsupported mop
|
||
+ MO_AddWordC _w -> unsupported mop
|
||
+ MO_SubWordC _w -> unsupported mop
|
||
+ MO_AddIntC _w -> unsupported mop
|
||
+ MO_SubIntC _w -> unsupported mop
|
||
+ MO_U_Mul2 _w -> unsupported mop
|
||
+ -- Memory Ordering
|
||
+ -- The related C functions are:
|
||
+ -- #include <stdatomic.h>
|
||
+ -- atomic_thread_fence(memory_order_acquire);
|
||
+ -- atomic_thread_fence(memory_order_release);
|
||
+ -- atomic_thread_fence(memory_order_seq_cst);
|
||
+ MO_AcquireFence -> pure (unitOL (FENCE FenceRead FenceReadWrite))
|
||
+ MO_ReleaseFence -> pure (unitOL (FENCE FenceReadWrite FenceWrite))
|
||
+ MO_SeqCstFence -> pure (unitOL (FENCE FenceReadWrite FenceReadWrite))
|
||
+ MO_Touch -> pure nilOL -- Keep variables live (when using interior pointers)
|
||
+ -- Prefetch
|
||
+ MO_Prefetch_Data _n -> pure nilOL -- Prefetch hint.
|
||
+
|
||
+ -- Memory copy/set/move/cmp, with alignment for optimization
|
||
+ MO_Memcpy _align -> mkCCall "memcpy"
|
||
+ MO_Memset _align -> mkCCall "memset"
|
||
+ MO_Memmove _align -> mkCCall "memmove"
|
||
+ MO_Memcmp _align -> mkCCall "memcmp"
|
||
+ MO_SuspendThread -> mkCCall "suspendThread"
|
||
+ MO_ResumeThread -> mkCCall "resumeThread"
|
||
+ MO_PopCnt w -> mkCCall (popCntLabel w)
|
||
+ MO_Pdep w -> mkCCall (pdepLabel w)
|
||
+ MO_Pext w -> mkCCall (pextLabel w)
|
||
+ MO_Clz w -> mkCCall (clzLabel w)
|
||
+ MO_Ctz w -> mkCCall (ctzLabel w)
|
||
+ MO_BSwap w -> mkCCall (bSwapLabel w)
|
||
+ MO_BRev w -> mkCCall (bRevLabel w)
|
||
+ -- Atomic read-modify-write.
|
||
+ mo@(MO_AtomicRead w ord)
|
||
+ | [p_reg] <- arg_regs,
|
||
+ [dst_reg] <- dest_regs -> do
|
||
+ (p, _fmt_p, code_p) <- getSomeReg p_reg
|
||
+ platform <- getPlatform
|
||
+ -- Analog to the related MachOps (above)
|
||
+ -- The related C functions are:
|
||
+ -- #include <stdatomic.h>
|
||
+ -- __atomic_load_n(&a, __ATOMIC_ACQUIRE);
|
||
+ -- __atomic_load_n(&a, __ATOMIC_SEQ_CST);
|
||
+ let instrs = case ord of
|
||
+ MemOrderRelaxed -> unitOL $ ann moDescr (LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p))
|
||
+ MemOrderAcquire ->
|
||
+ toOL
|
||
+ [ ann moDescr (LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
|
||
+ FENCE FenceRead FenceReadWrite
|
||
+ ]
|
||
+ MemOrderSeqCst ->
|
||
+ toOL
|
||
+ [ ann moDescr (FENCE FenceReadWrite FenceReadWrite),
|
||
+ LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p),
|
||
+ FENCE FenceRead FenceReadWrite
|
||
+ ]
|
||
+ MemOrderRelease -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
|
||
+ dst = getRegisterReg platform (CmmLocal dst_reg)
|
||
+ moDescr = (text . show) mo
|
||
+ code = code_p `appOL` instrs
|
||
+ return code
|
||
+ | otherwise -> panic "mal-formed AtomicRead"
|
||
+ mo@(MO_AtomicWrite w ord)
|
||
+ | [p_reg, val_reg] <- arg_regs -> do
|
||
+ (p, _fmt_p, code_p) <- getSomeReg p_reg
|
||
+ (val, fmt_val, code_val) <- getSomeReg val_reg
|
||
+ -- Analog to the related MachOps (above)
|
||
+ -- The related C functions are:
|
||
+ -- #include <stdatomic.h>
|
||
+ -- __atomic_store_n(&a, 23, __ATOMIC_SEQ_CST);
|
||
+ -- __atomic_store_n(&a, 23, __ATOMIC_RELEASE);
|
||
+ let instrs = case ord of
|
||
+ MemOrderRelaxed -> unitOL $ ann moDescr (STR fmt_val (OpReg w val) (OpAddr $ AddrReg p))
|
||
+ MemOrderSeqCst ->
|
||
+ toOL
|
||
+ [ ann moDescr (FENCE FenceReadWrite FenceWrite),
|
||
+ STR fmt_val (OpReg w val) (OpAddr $ AddrReg p),
|
||
+ FENCE FenceReadWrite FenceReadWrite
|
||
+ ]
|
||
+ MemOrderRelease ->
|
||
+ toOL
|
||
+ [ ann moDescr (FENCE FenceReadWrite FenceWrite),
|
||
+ STR fmt_val (OpReg w val) (OpAddr $ AddrReg p)
|
||
+ ]
|
||
+ MemOrderAcquire -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
|
||
+ moDescr = (text . show) mo
|
||
+ code =
|
||
+ code_p
|
||
+ `appOL` code_val
|
||
+ `appOL` instrs
|
||
+ pure code
|
||
+ | otherwise -> panic "mal-formed AtomicWrite"
|
||
+ MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop)
|
||
+ MO_Cmpxchg w -> mkCCall (cmpxchgLabel w)
|
||
+ -- -- Should be an AtomicRMW variant eventually.
|
||
+ -- -- Sequential consistent.
|
||
+ -- TODO: this should be implemented properly!
|
||
+ MO_Xchg w -> mkCCall (xchgLabel w)
|
||
+ where
|
||
+ unsupported :: (Show a) => a -> b
|
||
+ unsupported mop =
|
||
+ panic
|
||
+ ( "outOfLineCmmOp: "
|
||
+ ++ show mop
|
||
+ ++ " not supported here"
|
||
+ )
|
||
+ mkCCall :: FastString -> NatM InstrBlock
|
||
+ mkCCall name = do
|
||
+ config <- getConfig
|
||
+ target <-
|
||
+ cmmMakeDynamicReference config CallReference
|
||
+ $ mkForeignLabel name Nothing ForeignLabelInThisPackage IsFunction
|
||
+ let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
|
||
+ genCCall (ForeignTarget target cconv) dest_regs arg_regs
|
||
+
|
||
+ unaryFloatOp w op arg_reg dest_reg = do
|
||
+ platform <- getPlatform
|
||
+ (reg_fx, _format_x, code_fx) <- getFloatReg arg_reg
|
||
+ let dst = getRegisterReg platform (CmmLocal dest_reg)
|
||
+ let code = code_fx `appOL` op (OpReg w dst) (OpReg w reg_fx)
|
||
+ pure code
|
||
+
|
||
+{- Note [RISCV64 far jumps]
|
||
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
+
|
||
+RISCV64 conditional jump instructions can only encode an offset of +/-4KiB
|
||
+(12bits) which is usually enough but can be exceeded in edge cases. In these
|
||
+cases we will replace:
|
||
+
|
||
+ b.cond <cond> foo
|
||
+
|
||
+with the sequence:
|
||
+
|
||
+ b.cond <cond> <lbl_true>
|
||
+ b <lbl_false>
|
||
+ <lbl_true>:
|
||
+ la reg foo
|
||
+ b reg
|
||
+ <lbl_false>:
|
||
+
|
||
+and
|
||
+
|
||
+ b foo
|
||
+
|
||
+with the sequence:
|
||
+
|
||
+ la reg foo
|
||
+ b reg
|
||
+
|
||
+Compared to AArch64 the target label is loaded to a register, because
|
||
+unconditional jump instructions can only address +/-1MiB. The LA
|
||
+pseudo-instruction will be replaced by up to two real instructions, ensuring
|
||
+correct addressing.
|
||
+
|
||
+One could surely find more efficient replacements, taking PC-relative addressing
|
||
+into account. This could be a future improvement. (As far branches are pretty
|
||
+rare, one might question and measure the value of such improvement.)
|
||
+
|
||
+RISCV has many pseudo-instructions which emit more than one real instructions.
|
||
+Thus, we count the real instructions after the Assembler has seen them.
|
||
+
|
||
+We make some simplifications in the name of performance which can result in
|
||
+overestimating jump <-> label offsets:
|
||
+
|
||
+\* To avoid having to recalculate the label offsets once we replaced a jump we simply
|
||
+ assume all label jumps will be expanded to a three instruction far jump sequence.
|
||
+\* For labels associated with a info table we assume the info table is 64byte large.
|
||
+ Most info tables are smaller than that but it means we don't have to distinguish
|
||
+ between multiple types of info tables.
|
||
+
|
||
+In terms of implementation we walk the instruction stream at least once calculating
|
||
+label offsets, and if we determine during this that the functions body is big enough
|
||
+to potentially contain out of range jumps we walk the instructions a second time, replacing
|
||
+out of range jumps with the sequence of instructions described above.
|
||
+
|
||
+-}
|
||
+
|
||
+-- | A conditional jump to a far target
|
||
+--
|
||
+-- By loading the far target into a register for the jump, we can address the
|
||
+-- whole memory range.
|
||
+genCondFarJump :: (MonadUnique m) => Cond -> Operand -> Operand -> BlockId -> m InstrBlock
|
||
+genCondFarJump cond op1 op2 far_target = do
|
||
+ skip_lbl_id <- newBlockId
|
||
+ jmp_lbl_id <- newBlockId
|
||
+
|
||
+ -- TODO: We can improve this by inverting the condition
|
||
+ -- but it's not quite trivial since we don't know if we
|
||
+ -- need to consider float orderings.
|
||
+ -- So we take the hit of the additional jump in the false
|
||
+ -- case for now.
|
||
+ return
|
||
+ $ toOL
|
||
+ [ ann (text "Conditional far jump to: " <> ppr far_target)
|
||
+ $ BCOND cond op1 op2 (TBlock jmp_lbl_id),
|
||
+ B (TBlock skip_lbl_id),
|
||
+ NEWBLOCK jmp_lbl_id,
|
||
+ LDR II64 (OpReg W64 tmpReg) (OpImm (ImmCLbl (blockLbl far_target))),
|
||
+ B (TReg tmpReg),
|
||
+ NEWBLOCK skip_lbl_id
|
||
+ ]
|
||
+
|
||
+-- | An unconditional jump to a far target
|
||
+--
|
||
+-- By loading the far target into a register for the jump, we can address the
|
||
+-- whole memory range.
|
||
+genFarJump :: (MonadUnique m) => BlockId -> m InstrBlock
|
||
+genFarJump far_target =
|
||
+ return
|
||
+ $ toOL
|
||
+ [ ann (text "Unconditional far jump to: " <> ppr far_target)
|
||
+ $ LDR II64 (OpReg W64 tmpReg) (OpImm (ImmCLbl (blockLbl far_target))),
|
||
+ B (TReg tmpReg)
|
||
+ ]
|
||
+
|
||
+-- See Note [RISCV64 far jumps]
|
||
+data BlockInRange = InRange | NotInRange BlockId
|
||
+
|
||
+-- See Note [RISCV64 far jumps]
|
||
+makeFarBranches ::
|
||
+ Platform ->
|
||
+ LabelMap RawCmmStatics ->
|
||
+ [NatBasicBlock Instr] ->
|
||
+ UniqSM [NatBasicBlock Instr]
|
||
+makeFarBranches {- only used when debugging -} _platform statics basic_blocks = do
|
||
+ -- All offsets/positions are counted in multiples of 4 bytes (the size of RISCV64 instructions)
|
||
+ -- That is an offset of 1 represents a 4-byte/one instruction offset.
|
||
+ let (func_size, lblMap) = foldl' calc_lbl_positions (0, mapEmpty) basic_blocks
|
||
+ if func_size < max_jump_dist
|
||
+ then pure basic_blocks
|
||
+ else do
|
||
+ (_, blocks) <- mapAccumLM (replace_blk lblMap) 0 basic_blocks
|
||
+ pure $ concat blocks
|
||
+ where
|
||
+ -- pprTrace "lblMap" (ppr lblMap) $ basic_blocks
|
||
+
|
||
+ -- 2^11, 12 bit immediate with one bit is reserved for the sign
|
||
+ max_jump_dist = 2 ^ (11 :: Int) - 1 :: Int
|
||
+ -- Currently all inline info tables fit into 64 bytes.
|
||
+ max_info_size = 16 :: Int
|
||
+ long_bc_jump_size = 5 :: Int
|
||
+ long_b_jump_size = 2 :: Int
|
||
+
|
||
+ -- Replace out of range conditional jumps with unconditional jumps.
|
||
+ replace_blk :: LabelMap Int -> Int -> GenBasicBlock Instr -> UniqSM (Int, [GenBasicBlock Instr])
|
||
+ replace_blk !m !pos (BasicBlock lbl instrs) = do
|
||
+ -- Account for a potential info table before the label.
|
||
+ let !block_pos = pos + infoTblSize_maybe lbl
|
||
+ (!pos', instrs') <- mapAccumLM (replace_jump m) block_pos instrs
|
||
+ let instrs'' = concat instrs'
|
||
+ -- We might have introduced new labels, so split the instructions into basic blocks again if neccesary.
|
||
+ let (top, split_blocks, no_data) = foldr mkBlocks ([], [], []) instrs''
|
||
+ -- There should be no data in the instruction stream at this point
|
||
+ massert (null no_data)
|
||
+
|
||
+ let final_blocks = BasicBlock lbl top : split_blocks
|
||
+ pure (pos', final_blocks)
|
||
+
|
||
+ replace_jump :: LabelMap Int -> Int -> Instr -> UniqSM (Int, [Instr])
|
||
+ replace_jump !m !pos instr = do
|
||
+ case instr of
|
||
+ ANN ann instr -> do
|
||
+ (idx, instr' : instrs') <- replace_jump m pos instr
|
||
+ pure (idx, ANN ann instr' : instrs')
|
||
+ BCOND cond op1 op2 t ->
|
||
+ case target_in_range m t pos of
|
||
+ InRange -> pure (pos + instr_size instr, [instr])
|
||
+ NotInRange far_target -> do
|
||
+ jmp_code <- genCondFarJump cond op1 op2 far_target
|
||
+ pure (pos + instr_size instr, fromOL jmp_code)
|
||
+ B t ->
|
||
+ case target_in_range m t pos of
|
||
+ InRange -> pure (pos + instr_size instr, [instr])
|
||
+ NotInRange far_target -> do
|
||
+ jmp_code <- genFarJump far_target
|
||
+ pure (pos + instr_size instr, fromOL jmp_code)
|
||
+ _ -> pure (pos + instr_size instr, [instr])
|
||
+
|
||
+ target_in_range :: LabelMap Int -> Target -> Int -> BlockInRange
|
||
+ target_in_range m target src =
|
||
+ case target of
|
||
+ (TReg {}) -> InRange
|
||
+ (TBlock bid) -> block_in_range m src bid
|
||
+
|
||
+ block_in_range :: LabelMap Int -> Int -> BlockId -> BlockInRange
|
||
+ block_in_range m src_pos dest_lbl =
|
||
+ case mapLookup dest_lbl m of
|
||
+ Nothing ->
|
||
+ pprTrace "not in range" (ppr dest_lbl)
|
||
+ $ NotInRange dest_lbl
|
||
+ Just dest_pos ->
|
||
+ if abs (dest_pos - src_pos) < max_jump_dist
|
||
+ then InRange
|
||
+ else NotInRange dest_lbl
|
||
+
|
||
+ calc_lbl_positions :: (Int, LabelMap Int) -> GenBasicBlock Instr -> (Int, LabelMap Int)
|
||
+ calc_lbl_positions (pos, m) (BasicBlock lbl instrs) =
|
||
+ let !pos' = pos + infoTblSize_maybe lbl
|
||
+ in foldl' instr_pos (pos', mapInsert lbl pos' m) instrs
|
||
+
|
||
+ instr_pos :: (Int, LabelMap Int) -> Instr -> (Int, LabelMap Int)
|
||
+ instr_pos (pos, m) instr = (pos + instr_size instr, m)
|
||
+
|
||
+ infoTblSize_maybe bid =
|
||
+ case mapLookup bid statics of
|
||
+ Nothing -> 0 :: Int
|
||
+ Just _info_static -> max_info_size
|
||
+
|
||
+ instr_size :: Instr -> Int
|
||
+ instr_size i = case i of
|
||
+ COMMENT {} -> 0
|
||
+ MULTILINE_COMMENT {} -> 0
|
||
+ ANN _ instr -> instr_size instr
|
||
+ LOCATION {} -> 0
|
||
+ DELTA {} -> 0
|
||
+ -- At this point there should be no NEWBLOCK in the instruction stream (pos, mapInsert bid pos m)
|
||
+ NEWBLOCK {} -> panic "mkFarBranched - Unexpected"
|
||
+ LDATA {} -> panic "mkFarBranched - Unexpected"
|
||
+ PUSH_STACK_FRAME -> 4
|
||
+ POP_STACK_FRAME -> 4
|
||
+ ADD {} -> 1
|
||
+ MUL {} -> 1
|
||
+ MULH {} -> 1
|
||
+ NEG {} -> 1
|
||
+ DIV {} -> 1
|
||
+ REM {} -> 1
|
||
+ REMU {} -> 1
|
||
+ SUB {} -> 1
|
||
+ DIVU {} -> 1
|
||
+ AND {} -> 1
|
||
+ OR {} -> 1
|
||
+ SRA {} -> 1
|
||
+ XOR {} -> 1
|
||
+ SLL {} -> 1
|
||
+ SRL {} -> 1
|
||
+ MOV {} -> 2
|
||
+ ORI {} -> 1
|
||
+ XORI {} -> 1
|
||
+ CSET {} -> 2
|
||
+ STR {} -> 1
|
||
+ LDR {} -> 3
|
||
+ LDRU {} -> 1
|
||
+ FENCE {} -> 1
|
||
+ FCVT {} -> 1
|
||
+ FABS {} -> 1
|
||
+ FMA {} -> 1
|
||
+ -- estimate the subsituted size for jumps to lables
|
||
+ -- jumps to registers have size 1
|
||
+ BCOND {} -> long_bc_jump_size
|
||
+ B (TBlock _) -> long_b_jump_size
|
||
+ B (TReg _) -> 1
|
||
+ BL _ _ -> 1
|
||
+ J_TBL {} -> 1
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Cond.hs
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Cond.hs
|
||
@@ -0,0 +1,42 @@
|
||
+module GHC.CmmToAsm.RV64.Cond
|
||
+ ( Cond (..),
|
||
+ )
|
||
+where
|
||
+
|
||
+import GHC.Prelude hiding (EQ)
|
||
+
|
||
+-- | Condition codes.
|
||
+--
|
||
+-- Used in conditional branches and bit setters. According to the available
|
||
+-- instruction set, some conditions are encoded as their negated opposites. I.e.
|
||
+-- these are logical things that don't necessarily map 1:1 to hardware/ISA.
|
||
+data Cond
|
||
+ = -- | int and float
|
||
+ EQ
|
||
+ | -- | int and float
|
||
+ NE
|
||
+ | -- | signed less than
|
||
+ SLT
|
||
+ | -- | signed less than or equal
|
||
+ SLE
|
||
+ | -- | signed greater than or equal
|
||
+ SGE
|
||
+ | -- | signed greater than
|
||
+ SGT
|
||
+ | -- | unsigned less than
|
||
+ ULT
|
||
+ | -- | unsigned less than or equal
|
||
+ ULE
|
||
+ | -- | unsigned greater than or equal
|
||
+ UGE
|
||
+ | -- | unsigned greater than
|
||
+ UGT
|
||
+ | -- | floating point instruction @flt@
|
||
+ FLT
|
||
+ | -- | floating point instruction @fle@
|
||
+ FLE
|
||
+ | -- | floating point instruction @fge@
|
||
+ FGE
|
||
+ | -- | floating point instruction @fgt@
|
||
+ FGT
|
||
+ deriving (Eq, Show)
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Instr.hs
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Instr.hs
|
||
@@ -0,0 +1,823 @@
|
||
+-- All instructions will be rendered eventually. Thus, there's no benefit in
|
||
+-- being lazy in data types.
|
||
+{-# LANGUAGE StrictData #-}
|
||
+{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||
+
|
||
+module GHC.CmmToAsm.RV64.Instr where
|
||
+
|
||
+import Data.Maybe
|
||
+import GHC.Cmm
|
||
+import GHC.Cmm.BlockId
|
||
+import GHC.Cmm.CLabel
|
||
+import GHC.Cmm.Dataflow.Label
|
||
+import GHC.CmmToAsm.Config
|
||
+import GHC.CmmToAsm.Format
|
||
+import GHC.CmmToAsm.Instr (RegUsage (..))
|
||
+import GHC.CmmToAsm.RV64.Cond
|
||
+import GHC.CmmToAsm.RV64.Regs
|
||
+import GHC.CmmToAsm.Types
|
||
+import GHC.CmmToAsm.Utils
|
||
+import GHC.Data.FastString (LexicalFastString)
|
||
+import GHC.Platform
|
||
+import GHC.Platform.Reg
|
||
+import GHC.Platform.Regs
|
||
+import GHC.Prelude
|
||
+import GHC.Stack
|
||
+import GHC.Types.Unique.Supply
|
||
+import GHC.Utils.Outputable
|
||
+import GHC.Utils.Panic
|
||
+
|
||
+-- | Stack frame header size in bytes.
|
||
+--
|
||
+-- The stack frame header is made of the values that are always saved
|
||
+-- (regardless of the context.) It consists of the saved return address and a
|
||
+-- pointer to the previous frame. Thus, its size is two stack frame slots which
|
||
+-- equals two addresses/words (2 * 8 byte).
|
||
+stackFrameHeaderSize :: Int
|
||
+stackFrameHeaderSize = 2 * spillSlotSize
|
||
+
|
||
+-- | All registers are 8 byte wide.
|
||
+spillSlotSize :: Int
|
||
+spillSlotSize = 8
|
||
+
|
||
+-- | The number of bytes that the stack pointer should be aligned to.
|
||
+stackAlign :: Int
|
||
+stackAlign = 16
|
||
+
|
||
+-- | The number of spill slots available without allocating more.
|
||
+maxSpillSlots :: NCGConfig -> Int
|
||
+maxSpillSlots config =
|
||
+ ( (ncgSpillPreallocSize config - stackFrameHeaderSize)
|
||
+ `div` spillSlotSize
|
||
+ )
|
||
+ - 1
|
||
+
|
||
+-- | Convert a spill slot number to a *byte* offset.
|
||
+spillSlotToOffset :: Int -> Int
|
||
+spillSlotToOffset slot =
|
||
+ stackFrameHeaderSize + spillSlotSize * slot
|
||
+
|
||
+instance Outputable RegUsage where
|
||
+ ppr (RU reads writes) = text "RegUsage(reads:" <+> ppr reads <> comma <+> text "writes:" <+> ppr writes <> char ')'
|
||
+
|
||
+-- | Get the registers that are being used by this instruction.
|
||
+-- regUsage doesn't need to do any trickery for jumps and such.
|
||
+-- Just state precisely the regs read and written by that insn.
|
||
+-- The consequences of control flow transfers, as far as register
|
||
+-- allocation goes, are taken care of by the register allocator.
|
||
+--
|
||
+-- RegUsage = RU [<read regs>] [<write regs>]
|
||
+regUsageOfInstr :: Platform -> Instr -> RegUsage
|
||
+regUsageOfInstr platform instr = case instr of
|
||
+ ANN _ i -> regUsageOfInstr platform i
|
||
+ COMMENT {} -> usage ([], [])
|
||
+ MULTILINE_COMMENT {} -> usage ([], [])
|
||
+ PUSH_STACK_FRAME -> usage ([], [])
|
||
+ POP_STACK_FRAME -> usage ([], [])
|
||
+ LOCATION {} -> usage ([], [])
|
||
+ DELTA {} -> usage ([], [])
|
||
+ ADD dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ MUL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ NEG dst src -> usage (regOp src, regOp dst)
|
||
+ MULH dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ DIV dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ REM dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ REMU dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ SUB dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ DIVU dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ OR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ SRA dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ XOR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ SLL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ SRL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
||
+ MOV dst src -> usage (regOp src, regOp dst)
|
||
+ -- ORI's third operand is always an immediate
|
||
+ ORI dst src1 _ -> usage (regOp src1, regOp dst)
|
||
+ XORI dst src1 _ -> usage (regOp src1, regOp dst)
|
||
+ J_TBL _ _ t -> usage ([t], [])
|
||
+ B t -> usage (regTarget t, [])
|
||
+ BCOND _ l r t -> usage (regTarget t ++ regOp l ++ regOp r, [])
|
||
+ BL t ps -> usage (t : ps, callerSavedRegisters)
|
||
+ CSET dst l r _ -> usage (regOp l ++ regOp r, regOp dst)
|
||
+ STR _ src dst -> usage (regOp src ++ regOp dst, [])
|
||
+ LDR _ dst src -> usage (regOp src, regOp dst)
|
||
+ LDRU _ dst src -> usage (regOp src, regOp dst)
|
||
+ FENCE _ _ -> usage ([], [])
|
||
+ FCVT _variant dst src -> usage (regOp src, regOp dst)
|
||
+ FABS dst src -> usage (regOp src, regOp dst)
|
||
+ FMA _ dst src1 src2 src3 ->
|
||
+ usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
|
||
+ _ -> panic $ "regUsageOfInstr: " ++ instrCon instr
|
||
+ where
|
||
+ -- filtering the usage is necessary, otherwise the register
|
||
+ -- allocator will try to allocate pre-defined fixed stg
|
||
+ -- registers as well, as they show up.
|
||
+ usage :: ([Reg], [Reg]) -> RegUsage
|
||
+ usage (srcRegs, dstRegs) =
|
||
+ RU
|
||
+ (filter (interesting platform) srcRegs)
|
||
+ (filter (interesting platform) dstRegs)
|
||
+
|
||
+ regAddr :: AddrMode -> [Reg]
|
||
+ regAddr (AddrRegImm r1 _imm) = [r1]
|
||
+ regAddr (AddrReg r1) = [r1]
|
||
+
|
||
+ regOp :: Operand -> [Reg]
|
||
+ regOp (OpReg _w r1) = [r1]
|
||
+ regOp (OpAddr a) = regAddr a
|
||
+ regOp (OpImm _imm) = []
|
||
+
|
||
+ regTarget :: Target -> [Reg]
|
||
+ regTarget (TBlock _bid) = []
|
||
+ regTarget (TReg r1) = [r1]
|
||
+
|
||
+ -- Is this register interesting for the register allocator?
|
||
+ interesting :: Platform -> Reg -> Bool
|
||
+ interesting _ (RegVirtual _) = True
|
||
+ interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
|
||
+
|
||
+-- | Caller-saved registers (according to calling convention)
|
||
+--
|
||
+-- These registers may be clobbered after a jump.
|
||
+callerSavedRegisters :: [Reg]
|
||
+callerSavedRegisters =
|
||
+ [regSingle raRegNo]
|
||
+ ++ map regSingle [t0RegNo .. t2RegNo]
|
||
+ ++ map regSingle [a0RegNo .. a7RegNo]
|
||
+ ++ map regSingle [t3RegNo .. t6RegNo]
|
||
+ ++ map regSingle [ft0RegNo .. ft7RegNo]
|
||
+ ++ map regSingle [fa0RegNo .. fa7RegNo]
|
||
+
|
||
+-- | Apply a given mapping to all the register references in this instruction.
|
||
+patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
|
||
+patchRegsOfInstr instr env = case instr of
|
||
+ ANN d i -> ANN d (patchRegsOfInstr i env)
|
||
+ COMMENT {} -> instr
|
||
+ MULTILINE_COMMENT {} -> instr
|
||
+ PUSH_STACK_FRAME -> instr
|
||
+ POP_STACK_FRAME -> instr
|
||
+ LOCATION {} -> instr
|
||
+ DELTA {} -> instr
|
||
+ ADD o1 o2 o3 -> ADD (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ MUL o1 o2 o3 -> MUL (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ NEG o1 o2 -> NEG (patchOp o1) (patchOp o2)
|
||
+ MULH o1 o2 o3 -> MULH (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ DIV o1 o2 o3 -> DIV (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ REM o1 o2 o3 -> REM (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ REMU o1 o2 o3 -> REMU (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ SUB o1 o2 o3 -> SUB (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ DIVU o1 o2 o3 -> DIVU (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ OR o1 o2 o3 -> OR (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ SRA o1 o2 o3 -> SRA (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ XOR o1 o2 o3 -> XOR (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ SLL o1 o2 o3 -> SLL (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ SRL o1 o2 o3 -> SRL (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ MOV o1 o2 -> MOV (patchOp o1) (patchOp o2)
|
||
+ -- o3 cannot be a register for ORI (always an immediate)
|
||
+ ORI o1 o2 o3 -> ORI (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ XORI o1 o2 o3 -> XORI (patchOp o1) (patchOp o2) (patchOp o3)
|
||
+ J_TBL ids mbLbl t -> J_TBL ids mbLbl (env t)
|
||
+ B t -> B (patchTarget t)
|
||
+ BL t ps -> BL (patchReg t) ps
|
||
+ BCOND c o1 o2 t -> BCOND c (patchOp o1) (patchOp o2) (patchTarget t)
|
||
+ CSET o l r c -> CSET (patchOp o) (patchOp l) (patchOp r) c
|
||
+ STR f o1 o2 -> STR f (patchOp o1) (patchOp o2)
|
||
+ LDR f o1 o2 -> LDR f (patchOp o1) (patchOp o2)
|
||
+ LDRU f o1 o2 -> LDRU f (patchOp o1) (patchOp o2)
|
||
+ FENCE o1 o2 -> FENCE o1 o2
|
||
+ FCVT variant o1 o2 -> FCVT variant (patchOp o1) (patchOp o2)
|
||
+ FABS o1 o2 -> FABS (patchOp o1) (patchOp o2)
|
||
+ FMA s o1 o2 o3 o4 ->
|
||
+ FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
|
||
+ _ -> panic $ "patchRegsOfInstr: " ++ instrCon instr
|
||
+ where
|
||
+ patchOp :: Operand -> Operand
|
||
+ patchOp (OpReg w r) = OpReg w (env r)
|
||
+ patchOp (OpAddr a) = OpAddr (patchAddr a)
|
||
+ patchOp opImm = opImm
|
||
+
|
||
+ patchTarget :: Target -> Target
|
||
+ patchTarget (TReg r) = TReg (env r)
|
||
+ patchTarget tBlock = tBlock
|
||
+
|
||
+ patchAddr :: AddrMode -> AddrMode
|
||
+ patchAddr (AddrRegImm r1 imm) = AddrRegImm (env r1) imm
|
||
+ patchAddr (AddrReg r) = AddrReg (env r)
|
||
+
|
||
+ patchReg :: Reg -> Reg
|
||
+ patchReg = env
|
||
+
|
||
+-- | Checks whether this instruction is a jump/branch instruction.
|
||
+--
|
||
+-- One that can change the flow of control in a way that the
|
||
+-- register allocator needs to worry about.
|
||
+isJumpishInstr :: Instr -> Bool
|
||
+isJumpishInstr instr = case instr of
|
||
+ ANN _ i -> isJumpishInstr i
|
||
+ J_TBL {} -> True
|
||
+ B {} -> True
|
||
+ BL {} -> True
|
||
+ BCOND {} -> True
|
||
+ _ -> False
|
||
+
|
||
+-- | Get the `BlockId`s of the jump destinations (if any)
|
||
+jumpDestsOfInstr :: Instr -> [BlockId]
|
||
+jumpDestsOfInstr (ANN _ i) = jumpDestsOfInstr i
|
||
+jumpDestsOfInstr (J_TBL ids _mbLbl _r) = catMaybes ids
|
||
+jumpDestsOfInstr (B t) = [id | TBlock id <- [t]]
|
||
+jumpDestsOfInstr (BCOND _ _ _ t) = [id | TBlock id <- [t]]
|
||
+jumpDestsOfInstr _ = []
|
||
+
|
||
+-- | Change the destination of this (potential) jump instruction.
|
||
+--
|
||
+-- Used in the linear allocator when adding fixup blocks for join
|
||
+-- points.
|
||
+patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
|
||
+patchJumpInstr instr patchF =
|
||
+ case instr of
|
||
+ ANN d i -> ANN d (patchJumpInstr i patchF)
|
||
+ J_TBL ids mbLbl r -> J_TBL (map (fmap patchF) ids) mbLbl r
|
||
+ B (TBlock bid) -> B (TBlock (patchF bid))
|
||
+ BCOND c o1 o2 (TBlock bid) -> BCOND c o1 o2 (TBlock (patchF bid))
|
||
+ _ -> panic $ "patchJumpInstr: " ++ instrCon instr
|
||
+
|
||
+-- -----------------------------------------------------------------------------
|
||
+-- Note [RISCV64 Spills and Reloads]
|
||
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
+--
|
||
+-- We reserve @RESERVED_C_STACK_BYTES@ on the C stack for spilling and reloading
|
||
+-- registers. The load and store instructions of RISCV64 address with a signed
|
||
+-- 12-bit immediate + a register; machine stackpointer (sp/x2) in this case.
|
||
+--
|
||
+-- The @RESERVED_C_STACK_BYTES@ is 16k, so we can't always address into it in a
|
||
+-- single load/store instruction. There are offsets to sp (not to be confused
|
||
+-- with STG's SP!) which need a register to be calculated.
|
||
+--
|
||
+-- Using sp to compute the offset would violate assumptions about the stack pointer
|
||
+-- pointing to the top of the stack during signal handling. As we can't force
|
||
+-- every signal to use its own stack, we have to ensure that the stack pointer
|
||
+-- always points to the top of the stack, and we can't use it for computation.
|
||
+--
|
||
+-- So, we reserve one register (TMP) for this purpose (and other, unrelated
|
||
+-- intermediate operations.) See Note [The made-up RISCV64 TMP (IP) register]
|
||
+
|
||
+-- | Generate instructions to spill a register into a spill slot.
|
||
+mkSpillInstr ::
|
||
+ (HasCallStack) =>
|
||
+ NCGConfig ->
|
||
+ -- | register to spill
|
||
+ Reg ->
|
||
+ -- | current stack delta
|
||
+ Int ->
|
||
+ -- | spill slot to use
|
||
+ Int ->
|
||
+ [Instr]
|
||
+mkSpillInstr _config reg delta slot =
|
||
+ case off - delta of
|
||
+ imm | fitsIn12bitImm imm -> [mkStrSpImm imm]
|
||
+ imm ->
|
||
+ [ movImmToTmp imm,
|
||
+ addSpToTmp,
|
||
+ mkStrTmp
|
||
+ ]
|
||
+ where
|
||
+ fmt = case reg of
|
||
+ RegReal (RealRegSingle n) | n < d0RegNo -> II64
|
||
+ _ -> FF64
|
||
+ mkStrSpImm imm =
|
||
+ ANN (text "Spill@" <> int (off - delta))
|
||
+ $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
|
||
+ movImmToTmp imm =
|
||
+ ANN (text "Spill: TMP <- " <> int imm)
|
||
+ $ MOV tmp (OpImm (ImmInt imm))
|
||
+ addSpToTmp =
|
||
+ ANN (text "Spill: TMP <- SP + TMP ")
|
||
+ $ ADD tmp tmp sp
|
||
+ mkStrTmp =
|
||
+ ANN (text "Spill@" <> int (off - delta))
|
||
+ $ STR fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
|
||
+
|
||
+ off = spillSlotToOffset slot
|
||
+
|
||
+-- | Generate instructions to load a register from a spill slot.
|
||
+mkLoadInstr ::
|
||
+ NCGConfig ->
|
||
+ -- | register to load
|
||
+ Reg ->
|
||
+ -- | current stack delta
|
||
+ Int ->
|
||
+ -- | spill slot to use
|
||
+ Int ->
|
||
+ [Instr]
|
||
+mkLoadInstr _config reg delta slot =
|
||
+ case off - delta of
|
||
+ imm | fitsIn12bitImm imm -> [mkLdrSpImm imm]
|
||
+ imm ->
|
||
+ [ movImmToTmp imm,
|
||
+ addSpToTmp,
|
||
+ mkLdrTmp
|
||
+ ]
|
||
+ where
|
||
+ fmt = case reg of
|
||
+ RegReal (RealRegSingle n) | n < d0RegNo -> II64
|
||
+ _ -> FF64
|
||
+ mkLdrSpImm imm =
|
||
+ ANN (text "Reload@" <> int (off - delta))
|
||
+ $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
|
||
+ movImmToTmp imm =
|
||
+ ANN (text "Reload: TMP <- " <> int imm)
|
||
+ $ MOV tmp (OpImm (ImmInt imm))
|
||
+ addSpToTmp =
|
||
+ ANN (text "Reload: TMP <- SP + TMP ")
|
||
+ $ ADD tmp tmp sp
|
||
+ mkLdrTmp =
|
||
+ ANN (text "Reload@" <> int (off - delta))
|
||
+ $ LDR fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
|
||
+
|
||
+ off = spillSlotToOffset slot
|
||
+
|
||
+-- | See if this instruction is telling us the current C stack delta
|
||
+takeDeltaInstr :: Instr -> Maybe Int
|
||
+takeDeltaInstr (ANN _ i) = takeDeltaInstr i
|
||
+takeDeltaInstr (DELTA i) = Just i
|
||
+takeDeltaInstr _ = Nothing
|
||
+
|
||
+-- | Not real instructions. Just meta data
|
||
+isMetaInstr :: Instr -> Bool
|
||
+isMetaInstr instr =
|
||
+ case instr of
|
||
+ ANN _ i -> isMetaInstr i
|
||
+ COMMENT {} -> True
|
||
+ MULTILINE_COMMENT {} -> True
|
||
+ LOCATION {} -> True
|
||
+ LDATA {} -> True
|
||
+ NEWBLOCK {} -> True
|
||
+ DELTA {} -> True
|
||
+ PUSH_STACK_FRAME -> True
|
||
+ POP_STACK_FRAME -> True
|
||
+ _ -> False
|
||
+
|
||
+-- | Copy the value in a register to another one.
|
||
+--
|
||
+-- Must work for all register classes.
|
||
+mkRegRegMoveInstr :: Reg -> Reg -> Instr
|
||
+mkRegRegMoveInstr src dst = ANN desc instr
|
||
+ where
|
||
+ desc = text "Reg->Reg Move: " <> ppr src <> text " -> " <> ppr dst
|
||
+ instr = MOV (operandFromReg dst) (operandFromReg src)
|
||
+
|
||
+-- | Take the source and destination from this (potential) reg -> reg move instruction
|
||
+--
|
||
+-- We have to be a bit careful here: A `MOV` can also mean an implicit
|
||
+-- conversion. This case is filtered out.
|
||
+takeRegRegMoveInstr :: Instr -> Maybe (Reg, Reg)
|
||
+takeRegRegMoveInstr (MOV (OpReg width dst) (OpReg width' src))
|
||
+ | width == width' && (isFloatReg dst == isFloatReg src) = pure (src, dst)
|
||
+takeRegRegMoveInstr _ = Nothing
|
||
+
|
||
+-- | Make an unconditional jump instruction.
|
||
+mkJumpInstr :: BlockId -> [Instr]
|
||
+mkJumpInstr = pure . B . TBlock
|
||
+
|
||
+-- | Decrement @sp@ to allocate stack space.
|
||
+--
|
||
+-- The stack grows downwards, so we decrement the stack pointer by @n@ (bytes).
|
||
+-- This is dual to `mkStackDeallocInstr`. @sp@ is the RISCV stack pointer, not
|
||
+-- to be confused with the STG stack pointer.
|
||
+mkStackAllocInstr :: Platform -> Int -> [Instr]
|
||
+mkStackAllocInstr _platform = moveSp . negate
|
||
+
|
||
+-- | Increment SP to deallocate stack space.
|
||
+--
|
||
+-- The stack grows downwards, so we increment the stack pointer by @n@ (bytes).
|
||
+-- This is dual to `mkStackAllocInstr`. @sp@ is the RISCV stack pointer, not to
|
||
+-- be confused with the STG stack pointer.
|
||
+mkStackDeallocInstr :: Platform -> Int -> [Instr]
|
||
+mkStackDeallocInstr _platform = moveSp
|
||
+
|
||
+moveSp :: Int -> [Instr]
|
||
+moveSp n
|
||
+ | n == 0 = []
|
||
+ | n /= 0 && fitsIn12bitImm n = pure . ANN desc $ ADD sp sp (OpImm (ImmInt n))
|
||
+ | otherwise =
|
||
+ -- This ends up in three effective instructions. We could get away with
|
||
+ -- two for intMax12bit < n < 3 * intMax12bit by recursing once. However,
|
||
+ -- this way is likely less surprising.
|
||
+ [ ANN desc (MOV tmp (OpImm (ImmInt n))),
|
||
+ ADD sp sp tmp
|
||
+ ]
|
||
+ where
|
||
+ desc = text "Move SP:" <+> int n
|
||
+
|
||
+--
|
||
+-- See Note [extra spill slots] in X86/Instr.hs
|
||
+--
|
||
+allocMoreStack ::
|
||
+ Platform ->
|
||
+ Int ->
|
||
+ NatCmmDecl statics GHC.CmmToAsm.RV64.Instr.Instr ->
|
||
+ UniqSM (NatCmmDecl statics GHC.CmmToAsm.RV64.Instr.Instr, [(BlockId, BlockId)])
|
||
+allocMoreStack _ _ top@(CmmData _ _) = return (top, [])
|
||
+allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
|
||
+ let entries = entryBlocks proc
|
||
+
|
||
+ uniqs <- getUniquesM
|
||
+
|
||
+ let delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
|
||
+ where
|
||
+ x = slots * spillSlotSize -- sp delta
|
||
+ alloc = mkStackAllocInstr platform delta
|
||
+ dealloc = mkStackDeallocInstr platform delta
|
||
+
|
||
+ retargetList = zip entries (map mkBlockId uniqs)
|
||
+
|
||
+ new_blockmap :: LabelMap BlockId
|
||
+ new_blockmap = mapFromList retargetList
|
||
+
|
||
+ insert_stack_insn (BasicBlock id insns)
|
||
+ | Just new_blockid <- mapLookup id new_blockmap =
|
||
+ [ BasicBlock id $ alloc ++ [B (TBlock new_blockid)],
|
||
+ BasicBlock new_blockid block'
|
||
+ ]
|
||
+ | otherwise =
|
||
+ [BasicBlock id block']
|
||
+ where
|
||
+ block' = foldr insert_dealloc [] insns
|
||
+
|
||
+ insert_dealloc insn r = case insn of
|
||
+ J_TBL {} -> dealloc ++ (insn : r)
|
||
+ ANN _ e -> insert_dealloc e r
|
||
+ _other
|
||
+ | jumpDestsOfInstr insn /= [] ->
|
||
+ patchJumpInstr insn retarget : r
|
||
+ _other -> insn : r
|
||
+ where
|
||
+ retarget b = fromMaybe b (mapLookup b new_blockmap)
|
||
+
|
||
+ new_code = concatMap insert_stack_insn code
|
||
+ return (CmmProc info lbl live (ListGraph new_code), retargetList)
|
||
+
|
||
+data Instr
|
||
+ = -- | Comment pseudo-op
|
||
+ COMMENT SDoc
|
||
+ | -- | Multi-line comment pseudo-op
|
||
+ MULTILINE_COMMENT SDoc
|
||
+ | -- | Annotated instruction. Should print <instr> # <doc>
|
||
+ ANN SDoc Instr
|
||
+ | -- | Location pseudo-op @.loc@ (file, line, col, name)
|
||
+ LOCATION Int Int Int LexicalFastString
|
||
+ | -- | Static data spat out during code generation.
|
||
+ LDATA Section RawCmmStatics
|
||
+ | -- | Start a new basic block.
|
||
+ --
|
||
+ -- Useful during codegen, removed later. Preceding instruction should be a
|
||
+ -- jump, as per the invariants for a BasicBlock (see Cmm).
|
||
+ NEWBLOCK BlockId
|
||
+ | -- | Specify current stack offset for benefit of subsequent passes
|
||
+ DELTA Int
|
||
+ | -- | Push a minimal stack frame consisting of the return address (RA) and the frame pointer (FP).
|
||
+ PUSH_STACK_FRAME
|
||
+ | -- | Pop the minimal stack frame of prior `PUSH_STACK_FRAME`.
|
||
+ POP_STACK_FRAME
|
||
+ | -- | Arithmetic addition (both integer and floating point)
|
||
+ --
|
||
+ -- @rd = rs1 + rs2@
|
||
+ ADD Operand Operand Operand
|
||
+ | -- | Arithmetic subtraction (both integer and floating point)
|
||
+ --
|
||
+ -- @rd = rs1 - rs2@
|
||
+ SUB Operand Operand Operand
|
||
+ | -- | Logical AND (integer only)
|
||
+ --
|
||
+ -- @rd = rs1 & rs2@
|
||
+ AND Operand Operand Operand
|
||
+ | -- | Logical OR (integer only)
|
||
+ --
|
||
+ -- @rd = rs1 | rs2@
|
||
+ OR Operand Operand Operand
|
||
+ | -- | Logical left shift (zero extened, integer only)
|
||
+ --
|
||
+ -- @rd = rs1 << rs2@
|
||
+ SLL Operand Operand Operand
|
||
+ | -- | Logical right shift (zero extened, integer only)
|
||
+ --
|
||
+ -- @rd = rs1 >> rs2@
|
||
+ SRL Operand Operand Operand
|
||
+ | -- | Arithmetic right shift (sign-extened, integer only)
|
||
+ --
|
||
+ -- @rd = rs1 >> rs2@
|
||
+ SRA Operand Operand Operand
|
||
+ | -- | Store to memory (both, integer and floating point)
|
||
+ STR Format Operand Operand
|
||
+ | -- | Load from memory (sign-extended, integer and floating point)
|
||
+ LDR Format Operand Operand
|
||
+ | -- | Load from memory (unsigned, integer and floating point)
|
||
+ LDRU Format Operand Operand
|
||
+ | -- | Arithmetic multiplication (both, integer and floating point)
|
||
+ --
|
||
+ -- @rd = rn × rm@
|
||
+ MUL Operand Operand Operand
|
||
+ | -- | Negation (both, integer and floating point)
|
||
+ --
|
||
+ -- @rd = -op2@
|
||
+ NEG Operand Operand
|
||
+ | -- | Division (both, integer and floating point)
|
||
+ --
|
||
+ -- @rd = rn ÷ rm@
|
||
+ DIV Operand Operand Operand
|
||
+ | -- | Remainder (integer only, signed)
|
||
+ --
|
||
+ -- @rd = rn % rm@
|
||
+ REM Operand Operand Operand --
|
||
+ | -- | Remainder (integer only, unsigned)
|
||
+ --
|
||
+ -- @rd = |rn % rm|@
|
||
+ REMU Operand Operand Operand
|
||
+ | -- | High part of a multiplication that doesn't fit into 64bits (integer only)
|
||
+ --
|
||
+ -- E.g. for a multiplication with 64bits width: @rd = (rs1 * rs2) >> 64@.
|
||
+ MULH Operand Operand Operand
|
||
+ | -- | Unsigned division (integer only)
|
||
+ --
|
||
+ -- @rd = |rn ÷ rm|@
|
||
+ DIVU Operand Operand Operand
|
||
+ | -- | XOR (integer only)
|
||
+ --
|
||
+ -- @rd = rn ⊕ op2@
|
||
+ XOR Operand Operand Operand
|
||
+ | -- | ORI with immediate (integer only)
|
||
+ --
|
||
+ -- @rd = rn | op2@
|
||
+ ORI Operand Operand Operand
|
||
+ | -- | OR with immediate (integer only)
|
||
+ --
|
||
+ -- @rd = rn ⊕ op2@
|
||
+ XORI Operand Operand Operand
|
||
+ | -- | Move to register (integer and floating point)
|
||
+ --
|
||
+ -- @rd = rn@ or @rd = #imm@
|
||
+ MOV Operand Operand
|
||
+ | -- | Pseudo-op for conditional setting of a register.
|
||
+ --
|
||
+ -- @if(o2 cond o3) op <- 1 else op <- 0@
|
||
+ CSET Operand Operand Operand Cond
|
||
+ | -- | A jump instruction with data for switch/jump tables
|
||
+ J_TBL [Maybe BlockId] (Maybe CLabel) Reg
|
||
+ | -- | Unconditional jump (no linking)
|
||
+ B Target
|
||
+ | -- | Unconditional jump, links return address (sets @ra@/@x1@)
|
||
+ BL Reg [Reg]
|
||
+ | -- | branch with condition (integer only)
|
||
+ BCOND Cond Operand Operand Target
|
||
+ | -- | Fence instruction
|
||
+ --
|
||
+ -- Memory barrier.
|
||
+ FENCE FenceType FenceType
|
||
+ | -- | Floating point conversion
|
||
+ FCVT FcvtVariant Operand Operand
|
||
+ | -- | Floating point ABSolute value
|
||
+ FABS Operand Operand
|
||
+ | -- | Floating-point fused multiply-add instructions
|
||
+ --
|
||
+ -- - fmadd : d = r1 * r2 + r3
|
||
+ -- - fnmsub: d = r1 * r2 - r3
|
||
+ -- - fmsub : d = - r1 * r2 + r3
|
||
+ -- - fnmadd: d = - r1 * r2 - r3
|
||
+ FMA FMASign Operand Operand Operand Operand
|
||
+
|
||
+-- | Operand of a FENCE instruction (@r@, @w@ or @rw@)
|
||
+data FenceType = FenceRead | FenceWrite | FenceReadWrite
|
||
+
|
||
+-- | Variant of a floating point conversion instruction
|
||
+data FcvtVariant = FloatToFloat | IntToFloat | FloatToInt
|
||
+
|
||
+instrCon :: Instr -> String
|
||
+instrCon i =
|
||
+ case i of
|
||
+ COMMENT {} -> "COMMENT"
|
||
+ MULTILINE_COMMENT {} -> "COMMENT"
|
||
+ ANN {} -> "ANN"
|
||
+ LOCATION {} -> "LOCATION"
|
||
+ LDATA {} -> "LDATA"
|
||
+ NEWBLOCK {} -> "NEWBLOCK"
|
||
+ DELTA {} -> "DELTA"
|
||
+ PUSH_STACK_FRAME {} -> "PUSH_STACK_FRAME"
|
||
+ POP_STACK_FRAME {} -> "POP_STACK_FRAME"
|
||
+ ADD {} -> "ADD"
|
||
+ OR {} -> "OR"
|
||
+ MUL {} -> "MUL"
|
||
+ NEG {} -> "NEG"
|
||
+ DIV {} -> "DIV"
|
||
+ REM {} -> "REM"
|
||
+ REMU {} -> "REMU"
|
||
+ MULH {} -> "MULH"
|
||
+ SUB {} -> "SUB"
|
||
+ DIVU {} -> "DIVU"
|
||
+ AND {} -> "AND"
|
||
+ SRA {} -> "SRA"
|
||
+ XOR {} -> "XOR"
|
||
+ SLL {} -> "SLL"
|
||
+ SRL {} -> "SRL"
|
||
+ MOV {} -> "MOV"
|
||
+ ORI {} -> "ORI"
|
||
+ XORI {} -> "ORI"
|
||
+ STR {} -> "STR"
|
||
+ LDR {} -> "LDR"
|
||
+ LDRU {} -> "LDRU"
|
||
+ CSET {} -> "CSET"
|
||
+ J_TBL {} -> "J_TBL"
|
||
+ B {} -> "B"
|
||
+ BL {} -> "BL"
|
||
+ BCOND {} -> "BCOND"
|
||
+ FENCE {} -> "FENCE"
|
||
+ FCVT {} -> "FCVT"
|
||
+ FABS {} -> "FABS"
|
||
+ FMA variant _ _ _ _ ->
|
||
+ case variant of
|
||
+ FMAdd -> "FMADD"
|
||
+ FMSub -> "FMSUB"
|
||
+ FNMAdd -> "FNMADD"
|
||
+ FNMSub -> "FNMSUB"
|
||
+
|
||
+data Target
|
||
+ = TBlock BlockId
|
||
+ | TReg Reg
|
||
+
|
||
+data Operand
|
||
+ = -- | register
|
||
+ OpReg Width Reg
|
||
+ | -- | immediate value
|
||
+ OpImm Imm
|
||
+ | -- | memory reference
|
||
+ OpAddr AddrMode
|
||
+ deriving (Eq, Show)
|
||
+
|
||
+operandFromReg :: Reg -> Operand
|
||
+operandFromReg = OpReg W64
|
||
+
|
||
+operandFromRegNo :: RegNo -> Operand
|
||
+operandFromRegNo = operandFromReg . regSingle
|
||
+
|
||
+zero, ra, sp, gp, tp, fp, tmp :: Operand
|
||
+zero = operandFromReg zeroReg
|
||
+ra = operandFromReg raReg
|
||
+sp = operandFromReg spMachReg
|
||
+gp = operandFromRegNo 3
|
||
+tp = operandFromRegNo 4
|
||
+fp = operandFromRegNo 8
|
||
+tmp = operandFromReg tmpReg
|
||
+
|
||
+x0, x1, x2, x3, x4, x5, x6, x7 :: Operand
|
||
+x8, x9, x10, x11, x12, x13, x14, x15 :: Operand
|
||
+x16, x17, x18, x19, x20, x21, x22, x23 :: Operand
|
||
+x24, x25, x26, x27, x28, x29, x30, x31 :: Operand
|
||
+x0 = operandFromRegNo x0RegNo
|
||
+x1 = operandFromRegNo 1
|
||
+x2 = operandFromRegNo 2
|
||
+x3 = operandFromRegNo 3
|
||
+x4 = operandFromRegNo 4
|
||
+x5 = operandFromRegNo x5RegNo
|
||
+x6 = operandFromRegNo 6
|
||
+x7 = operandFromRegNo x7RegNo
|
||
+
|
||
+x8 = operandFromRegNo 8
|
||
+
|
||
+x9 = operandFromRegNo 9
|
||
+
|
||
+x10 = operandFromRegNo x10RegNo
|
||
+
|
||
+x11 = operandFromRegNo 11
|
||
+
|
||
+x12 = operandFromRegNo 12
|
||
+
|
||
+x13 = operandFromRegNo 13
|
||
+
|
||
+x14 = operandFromRegNo 14
|
||
+
|
||
+x15 = operandFromRegNo 15
|
||
+
|
||
+x16 = operandFromRegNo 16
|
||
+
|
||
+x17 = operandFromRegNo x17RegNo
|
||
+
|
||
+x18 = operandFromRegNo 18
|
||
+
|
||
+x19 = operandFromRegNo 19
|
||
+
|
||
+x20 = operandFromRegNo 20
|
||
+
|
||
+x21 = operandFromRegNo 21
|
||
+
|
||
+x22 = operandFromRegNo 22
|
||
+
|
||
+x23 = operandFromRegNo 23
|
||
+
|
||
+x24 = operandFromRegNo 24
|
||
+
|
||
+x25 = operandFromRegNo 25
|
||
+
|
||
+x26 = operandFromRegNo 26
|
||
+
|
||
+x27 = operandFromRegNo 27
|
||
+
|
||
+x28 = operandFromRegNo x28RegNo
|
||
+
|
||
+x29 = operandFromRegNo 29
|
||
+
|
||
+x30 = operandFromRegNo 30
|
||
+
|
||
+x31 = operandFromRegNo x31RegNo
|
||
+
|
||
+d0, d1, d2, d3, d4, d5, d6, d7 :: Operand
|
||
+d8, d9, d10, d11, d12, d13, d14, d15 :: Operand
|
||
+d16, d17, d18, d19, d20, d21, d22, d23 :: Operand
|
||
+d24, d25, d26, d27, d28, d29, d30, d31 :: Operand
|
||
+d0 = operandFromRegNo d0RegNo
|
||
+d1 = operandFromRegNo 33
|
||
+d2 = operandFromRegNo 34
|
||
+d3 = operandFromRegNo 35
|
||
+d4 = operandFromRegNo 36
|
||
+d5 = operandFromRegNo 37
|
||
+d6 = operandFromRegNo 38
|
||
+d7 = operandFromRegNo d7RegNo
|
||
+
|
||
+d8 = operandFromRegNo 40
|
||
+
|
||
+d9 = operandFromRegNo 41
|
||
+
|
||
+d10 = operandFromRegNo d10RegNo
|
||
+
|
||
+d11 = operandFromRegNo 43
|
||
+
|
||
+d12 = operandFromRegNo 44
|
||
+
|
||
+d13 = operandFromRegNo 45
|
||
+
|
||
+d14 = operandFromRegNo 46
|
||
+
|
||
+d15 = operandFromRegNo 47
|
||
+
|
||
+d16 = operandFromRegNo 48
|
||
+
|
||
+d17 = operandFromRegNo d17RegNo
|
||
+
|
||
+d18 = operandFromRegNo 50
|
||
+
|
||
+d19 = operandFromRegNo 51
|
||
+
|
||
+d20 = operandFromRegNo 52
|
||
+
|
||
+d21 = operandFromRegNo 53
|
||
+
|
||
+d22 = operandFromRegNo 54
|
||
+
|
||
+d23 = operandFromRegNo 55
|
||
+
|
||
+d24 = operandFromRegNo 56
|
||
+
|
||
+d25 = operandFromRegNo 57
|
||
+
|
||
+d26 = operandFromRegNo 58
|
||
+
|
||
+d27 = operandFromRegNo 59
|
||
+
|
||
+d28 = operandFromRegNo 60
|
||
+
|
||
+d29 = operandFromRegNo 61
|
||
+
|
||
+d30 = operandFromRegNo 62
|
||
+
|
||
+d31 = operandFromRegNo d31RegNo
|
||
+
|
||
+fitsIn12bitImm :: (Num a, Ord a) => a -> Bool
|
||
+fitsIn12bitImm off = off >= intMin12bit && off <= intMax12bit
|
||
+
|
||
+intMin12bit :: (Num a) => a
|
||
+intMin12bit = -2048
|
||
+
|
||
+intMax12bit :: (Num a) => a
|
||
+intMax12bit = 2047
|
||
+
|
||
+fitsIn32bits :: (Num a, Ord a, Bits a) => a -> Bool
|
||
+fitsIn32bits i = (-1 `shiftL` 31) <= i && i <= (1 `shiftL` 31 - 1)
|
||
+
|
||
+isNbitEncodeable :: Int -> Integer -> Bool
|
||
+isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
|
||
+
|
||
+isEncodeableInWidth :: Width -> Integer -> Bool
|
||
+isEncodeableInWidth = isNbitEncodeable . widthInBits
|
||
+
|
||
+isIntOp :: Operand -> Bool
|
||
+isIntOp = not . isFloatOp
|
||
+
|
||
+isFloatOp :: Operand -> Bool
|
||
+isFloatOp (OpReg _ reg) | isFloatReg reg = True
|
||
+isFloatOp _ = False
|
||
+
|
||
+isFloatReg :: Reg -> Bool
|
||
+isFloatReg (RegReal (RealRegSingle i)) | i > 31 = True
|
||
+isFloatReg (RegVirtual (VirtualRegF _)) = True
|
||
+isFloatReg (RegVirtual (VirtualRegD _)) = True
|
||
+isFloatReg _ = False
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Ppr.hs
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Ppr.hs
|
||
@@ -0,0 +1,715 @@
|
||
+{-# LANGUAGE ScopedTypeVariables #-}
|
||
+
|
||
+module GHC.CmmToAsm.RV64.Ppr (pprNatCmmDecl, pprInstr) where
|
||
+
|
||
+import GHC.Cmm hiding (topInfoTable)
|
||
+import GHC.Cmm.BlockId
|
||
+import GHC.Cmm.CLabel
|
||
+import GHC.Cmm.Dataflow.Label
|
||
+import GHC.CmmToAsm.Config
|
||
+import GHC.CmmToAsm.Format
|
||
+import GHC.CmmToAsm.Ppr
|
||
+import GHC.CmmToAsm.RV64.Cond
|
||
+import GHC.CmmToAsm.RV64.Instr
|
||
+import GHC.CmmToAsm.RV64.Regs
|
||
+import GHC.CmmToAsm.Types
|
||
+import GHC.CmmToAsm.Utils
|
||
+import GHC.Platform
|
||
+import GHC.Platform.Reg
|
||
+import GHC.Prelude hiding (EQ)
|
||
+import GHC.Types.Basic (Alignment, alignmentBytes, mkAlignment)
|
||
+import GHC.Types.Unique (getUnique, pprUniqueAlways)
|
||
+import GHC.Utils.Outputable
|
||
+import GHC.Utils.Panic
|
||
+
|
||
+pprNatCmmDecl :: forall doc. (IsDoc doc) => NCGConfig -> NatCmmDecl RawCmmStatics Instr -> doc
|
||
+pprNatCmmDecl config (CmmData section dats) =
|
||
+ pprSectionAlign config section $$ pprDatas config dats
|
||
+pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
|
||
+ let platform = ncgPlatform config
|
||
+
|
||
+ pprProcAlignment :: doc
|
||
+ pprProcAlignment = maybe empty (pprAlign . mkAlignment) (ncgProcAlignment config)
|
||
+ in pprProcAlignment
|
||
+ $$ case topInfoTable proc of
|
||
+ Nothing ->
|
||
+ -- special case for code without info table:
|
||
+ pprSectionAlign config (Section Text lbl)
|
||
+ $$
|
||
+ -- do not
|
||
+ -- pprProcAlignment config $$
|
||
+ pprLabel platform lbl
|
||
+ $$ vcat (map (pprBasicBlock config top_info) blocks) -- blocks guaranteed not null, so label needed
|
||
+ $$ ppWhen
|
||
+ (ncgDwarfEnabled config)
|
||
+ (line (pprBlockEndLabel platform lbl) $$ line (pprProcEndLabel platform lbl))
|
||
+ $$ pprSizeDecl platform lbl
|
||
+ Just (CmmStaticsRaw info_lbl _) ->
|
||
+ pprSectionAlign config (Section Text info_lbl)
|
||
+ $$
|
||
+ -- pprProcAlignment config $$
|
||
+ ( if platformHasSubsectionsViaSymbols platform
|
||
+ then line (pprAsmLabel platform (mkDeadStripPreventer info_lbl) <> char ':')
|
||
+ else empty
|
||
+ )
|
||
+ $$ vcat (map (pprBasicBlock config top_info) blocks)
|
||
+ $$ ppWhen (ncgDwarfEnabled config) (line (pprProcEndLabel platform info_lbl))
|
||
+ $$
|
||
+ -- above: Even the first block gets a label, because with branch-chain
|
||
+ -- elimination, it might be the target of a goto.
|
||
+ ( if platformHasSubsectionsViaSymbols platform
|
||
+ then -- See Note [Subsections Via Symbols]
|
||
+
|
||
+ line
|
||
+ $ text "\t.long "
|
||
+ <+> pprAsmLabel platform info_lbl
|
||
+ <+> char '-'
|
||
+ <+> pprAsmLabel platform (mkDeadStripPreventer info_lbl)
|
||
+ else empty
|
||
+ )
|
||
+ $$ pprSizeDecl platform info_lbl
|
||
+{-# SPECIALIZE pprNatCmmDecl :: NCGConfig -> NatCmmDecl RawCmmStatics Instr -> SDoc #-}
|
||
+{-# SPECIALIZE pprNatCmmDecl :: NCGConfig -> NatCmmDecl RawCmmStatics Instr -> HDoc #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
|
||
+
|
||
+pprLabel :: (IsDoc doc) => Platform -> CLabel -> doc
|
||
+pprLabel platform lbl =
|
||
+ pprGloblDecl platform lbl
|
||
+ $$ pprTypeDecl platform lbl
|
||
+ $$ line (pprAsmLabel platform lbl <> char ':')
|
||
+
|
||
+pprAlign :: (IsDoc doc) => Alignment -> doc
|
||
+pprAlign alignment =
|
||
+ -- "The .align directive for RISC-V is an alias to .p2align, which aligns to a
|
||
+ -- power of two, so .align 2 means align to 4 bytes. Because the definition of
|
||
+ -- the .align directive varies by architecture, it is recommended to use the
|
||
+ -- unambiguous .p2align or .balign directives instead."
|
||
+ -- (https://github.com/riscv-non-isa/riscv-asm-manual/blob/main/riscv-asm.md#-align)
|
||
+ line $ text "\t.balign " <> int (alignmentBytes alignment)
|
||
+
|
||
+-- | Print appropriate alignment for the given section type.
|
||
+--
|
||
+-- Currently, this always aligns to a full machine word (8 byte.) A future
|
||
+-- improvement could be to really do this per section type (though, it's
|
||
+-- probably not a big gain.)
|
||
+pprAlignForSection :: (IsDoc doc) => SectionType -> doc
|
||
+pprAlignForSection _seg = pprAlign . mkAlignment $ 8
|
||
+
|
||
+-- | Print section header and appropriate alignment for that section.
|
||
+--
|
||
+-- This will e.g. emit a header like:
|
||
+--
|
||
+-- .section .text
|
||
+-- .balign 8
|
||
+pprSectionAlign :: (IsDoc doc) => NCGConfig -> Section -> doc
|
||
+pprSectionAlign _config (Section (OtherSection _) _) =
|
||
+ panic "RV64.Ppr.pprSectionAlign: unknown section"
|
||
+pprSectionAlign config sec@(Section seg _) =
|
||
+ line (pprSectionHeader config sec)
|
||
+ $$ pprAlignForSection seg
|
||
+
|
||
+pprProcEndLabel ::
|
||
+ (IsLine doc) =>
|
||
+ Platform ->
|
||
+ -- | Procedure name
|
||
+ CLabel ->
|
||
+ doc
|
||
+pprProcEndLabel platform lbl =
|
||
+ pprAsmLabel platform (mkAsmTempProcEndLabel lbl) <> colon
|
||
+
|
||
+pprBlockEndLabel ::
|
||
+ (IsLine doc) =>
|
||
+ Platform ->
|
||
+ -- | Block name
|
||
+ CLabel ->
|
||
+ doc
|
||
+pprBlockEndLabel platform lbl =
|
||
+ pprAsmLabel platform (mkAsmTempEndLabel lbl) <> colon
|
||
+
|
||
+-- | Output the ELF .size directive (if needed.)
|
||
+pprSizeDecl :: (IsDoc doc) => Platform -> CLabel -> doc
|
||
+pprSizeDecl platform lbl
|
||
+ | osElfTarget (platformOS platform) =
|
||
+ line $ text "\t.size" <+> asmLbl <> text ", .-" <> asmLbl
|
||
+ where
|
||
+ asmLbl = pprAsmLabel platform lbl
|
||
+pprSizeDecl _ _ = empty
|
||
+
|
||
+pprBasicBlock ::
|
||
+ (IsDoc doc) =>
|
||
+ NCGConfig ->
|
||
+ LabelMap RawCmmStatics ->
|
||
+ NatBasicBlock Instr ->
|
||
+ doc
|
||
+pprBasicBlock config info_env (BasicBlock blockid instrs) =
|
||
+ maybe_infotable
|
||
+ $ pprLabel platform asmLbl
|
||
+ $$ vcat (map (pprInstr platform) (id {-detectTrivialDeadlock-} optInstrs))
|
||
+ $$ ppWhen
|
||
+ (ncgDwarfEnabled config)
|
||
+ ( -- Emit both end labels since this may end up being a standalone
|
||
+ -- top-level block
|
||
+ line
|
||
+ ( pprBlockEndLabel platform asmLbl
|
||
+ <> pprProcEndLabel platform asmLbl
|
||
+ )
|
||
+ )
|
||
+ where
|
||
+ -- TODO: Check if we can filter more instructions here.
|
||
+ -- TODO: Shouldn't this be a more general check on a higher level?
|
||
+ -- Filter out identity moves. E.g. mov x18, x18 will be dropped.
|
||
+ optInstrs = filter f instrs
|
||
+ where
|
||
+ f (MOV o1 o2) | o1 == o2 = False
|
||
+ f _ = True
|
||
+
|
||
+ asmLbl = blockLbl blockid
|
||
+ platform = ncgPlatform config
|
||
+ maybe_infotable c = case mapLookup blockid info_env of
|
||
+ Nothing -> c
|
||
+ Just (CmmStaticsRaw info_lbl info) ->
|
||
+ -- pprAlignForSection platform Text $$
|
||
+ infoTableLoc
|
||
+ $$ vcat (map (pprData config) info)
|
||
+ $$ pprLabel platform info_lbl
|
||
+ $$ c
|
||
+ $$ ppWhen
|
||
+ (ncgDwarfEnabled config)
|
||
+ (line (pprBlockEndLabel platform info_lbl))
|
||
+ -- Make sure the info table has the right .loc for the block
|
||
+ -- coming right after it. See Note [Info Offset]
|
||
+ infoTableLoc = case instrs of
|
||
+ (l@LOCATION {} : _) -> pprInstr platform l
|
||
+ _other -> empty
|
||
+
|
||
+pprDatas :: (IsDoc doc) => NCGConfig -> RawCmmStatics -> doc
|
||
+-- See Note [emit-time elimination of static indirections] in "GHC.Cmm.CLabel".
|
||
+pprDatas config (CmmStaticsRaw alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _])
|
||
+ | lbl == mkIndStaticInfoLabel,
|
||
+ let labelInd (CmmLabelOff l _) = Just l
|
||
+ labelInd (CmmLabel l) = Just l
|
||
+ labelInd _ = Nothing,
|
||
+ Just ind' <- labelInd ind,
|
||
+ alias `mayRedirectTo` ind' =
|
||
+ pprGloblDecl (ncgPlatform config) alias
|
||
+ $$ line (text ".equiv" <+> pprAsmLabel (ncgPlatform config) alias <> comma <> pprAsmLabel (ncgPlatform config) ind')
|
||
+pprDatas config (CmmStaticsRaw lbl dats) =
|
||
+ vcat (pprLabel platform lbl : map (pprData config) dats)
|
||
+ where
|
||
+ platform = ncgPlatform config
|
||
+
|
||
+pprData :: (IsDoc doc) => NCGConfig -> CmmStatic -> doc
|
||
+pprData _config (CmmString str) = line (pprString str)
|
||
+pprData _config (CmmFileEmbed path _) = line (pprFileEmbed path)
|
||
+-- TODO: AFAIK there no Darwin for RISCV, so we may consider to simplify this.
|
||
+pprData config (CmmUninitialised bytes) =
|
||
+ line
|
||
+ $ let platform = ncgPlatform config
|
||
+ in if platformOS platform == OSDarwin
|
||
+ then text ".space " <> int bytes
|
||
+ else text ".skip " <> int bytes
|
||
+pprData config (CmmStaticLit lit) = pprDataItem config lit
|
||
+
|
||
+pprGloblDecl :: (IsDoc doc) => Platform -> CLabel -> doc
|
||
+pprGloblDecl platform lbl
|
||
+ | not (externallyVisibleCLabel lbl) = empty
|
||
+ | otherwise = line (text "\t.globl " <> pprAsmLabel platform lbl)
|
||
+
|
||
+-- Note [Always use objects for info tables]
|
||
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
+-- See discussion in X86.Ppr for why this is necessary. Essentially we need to
|
||
+-- ensure that we never pass function symbols when we might want to lookup the
|
||
+-- info table. If we did, we could end up with procedure linking tables
|
||
+-- (PLT)s, and thus the lookup wouldn't point to the function, but into the
|
||
+-- jump table.
|
||
+--
|
||
+-- Fun fact: The LLVMMangler exists to patch this issue on the LLVM side as
|
||
+-- well.
|
||
+pprLabelType' :: (IsLine doc) => Platform -> CLabel -> doc
|
||
+pprLabelType' platform lbl =
|
||
+ if isCFunctionLabel lbl || functionOkInfoTable
|
||
+ then text "@function"
|
||
+ else text "@object"
|
||
+ where
|
||
+ functionOkInfoTable =
|
||
+ platformTablesNextToCode platform
|
||
+ && isInfoTableLabel lbl
|
||
+ && not (isCmmInfoTableLabel lbl)
|
||
+ && not (isConInfoTableLabel lbl)
|
||
+
|
||
+-- this is called pprTypeAndSizeDecl in PPC.Ppr
|
||
+pprTypeDecl :: (IsDoc doc) => Platform -> CLabel -> doc
|
||
+pprTypeDecl platform lbl =
|
||
+ if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl
|
||
+ then line (text ".type " <> pprAsmLabel platform lbl <> text ", " <> pprLabelType' platform lbl)
|
||
+ else empty
|
||
+
|
||
+pprDataItem :: (IsDoc doc) => NCGConfig -> CmmLit -> doc
|
||
+pprDataItem config lit =
|
||
+ lines_ (ppr_item (cmmTypeFormat $ cmmLitType platform lit) lit)
|
||
+ where
|
||
+ platform = ncgPlatform config
|
||
+
|
||
+ imm = litToImm lit
|
||
+
|
||
+ ppr_item II8 _ = [text "\t.byte\t" <> pprDataImm platform imm]
|
||
+ ppr_item II16 _ = [text "\t.short\t" <> pprDataImm platform imm]
|
||
+ ppr_item II32 _ = [text "\t.long\t" <> pprDataImm platform imm]
|
||
+ ppr_item II64 _ = [text "\t.quad\t" <> pprDataImm platform imm]
|
||
+ ppr_item FF32 (CmmFloat r _) =
|
||
+ let bs = floatToBytes (fromRational r)
|
||
+ in map (\b -> text "\t.byte\t" <> int (fromIntegral b)) bs
|
||
+ ppr_item FF64 (CmmFloat r _) =
|
||
+ let bs = doubleToBytes (fromRational r)
|
||
+ in map (\b -> text "\t.byte\t" <> int (fromIntegral b)) bs
|
||
+ ppr_item _ _ = pprPanic "pprDataItem:ppr_item" (text $ show lit)
|
||
+
|
||
+-- | Pretty print an immediate value in the @data@ section
|
||
+--
|
||
+-- This does not include any checks. We rely on the Assembler to check for
|
||
+-- errors. Use `pprOpImm` for immediates in instructions (operands.)
|
||
+pprDataImm :: (IsLine doc) => Platform -> Imm -> doc
|
||
+pprDataImm _ (ImmInt i) = int i
|
||
+pprDataImm _ (ImmInteger i) = integer i
|
||
+pprDataImm p (ImmCLbl l) = pprAsmLabel p l
|
||
+pprDataImm p (ImmIndex l i) = pprAsmLabel p l <> char '+' <> int i
|
||
+pprDataImm _ (ImmLit s) = ftext s
|
||
+pprDataImm _ (ImmFloat f) = float (fromRational f)
|
||
+pprDataImm _ (ImmDouble d) = double (fromRational d)
|
||
+pprDataImm p (ImmConstantSum a b) = pprDataImm p a <> char '+' <> pprDataImm p b
|
||
+pprDataImm p (ImmConstantDiff a b) =
|
||
+ pprDataImm p a
|
||
+ <> char '-'
|
||
+ <> lparen
|
||
+ <> pprDataImm p b
|
||
+ <> rparen
|
||
+
|
||
+-- | Comment @c@ with @# c@
|
||
+asmComment :: SDoc -> SDoc
|
||
+asmComment c = text "#" <+> c
|
||
+
|
||
+-- | Commen @c@ with @// c@
|
||
+asmDoubleslashComment :: SDoc -> SDoc
|
||
+asmDoubleslashComment c = text "//" <+> c
|
||
+
|
||
+-- | Comment @c@ with @/* c */@ (multiline comment)
|
||
+asmMultilineComment :: SDoc -> SDoc
|
||
+asmMultilineComment c = text "/*" $+$ c $+$ text "*/"
|
||
+
|
||
+-- | Pretty print an immediate operand of an instruction
|
||
+--
|
||
+-- The kinds of immediates we can use here is pretty limited: RISCV doesn't
|
||
+-- support index expressions (as e.g. Aarch64 does.) Floating points need to
|
||
+-- fit in range. As we don't need them, forbit them to save us from future
|
||
+-- troubles.
|
||
+pprOpImm :: (IsLine doc) => Platform -> Imm -> doc
|
||
+pprOpImm platform im = case im of
|
||
+ ImmInt i -> int i
|
||
+ ImmInteger i -> integer i
|
||
+ ImmCLbl l -> char '=' <> pprAsmLabel platform l
|
||
+ _ -> pprPanic "RV64.Ppr.pprOpImm" (text "Unsupported immediate for instruction operands" <> colon <+> (text . show) im)
|
||
+
|
||
+-- | Negate integer immediate operand
|
||
+--
|
||
+-- This function is partial and will panic if the operand is not an integer.
|
||
+negOp :: Operand -> Operand
|
||
+negOp (OpImm (ImmInt i)) = OpImm (ImmInt (negate i))
|
||
+negOp (OpImm (ImmInteger i)) = OpImm (ImmInteger (negate i))
|
||
+negOp op = pprPanic "RV64.negOp" (text $ show op)
|
||
+
|
||
+-- | Pretty print an operand
|
||
+pprOp :: (IsLine doc) => Platform -> Operand -> doc
|
||
+pprOp plat op = case op of
|
||
+ OpReg w r -> pprReg w r
|
||
+ OpImm im -> pprOpImm plat im
|
||
+ OpAddr (AddrRegImm r1 im) -> pprOpImm plat im <> char '(' <> pprReg W64 r1 <> char ')'
|
||
+ OpAddr (AddrReg r1) -> text "0(" <+> pprReg W64 r1 <+> char ')'
|
||
+
|
||
+-- | Pretty print register with calling convention name
|
||
+--
|
||
+-- This representation makes it easier to reason about the emitted assembly
|
||
+-- code.
|
||
+pprReg :: forall doc. (IsLine doc) => Width -> Reg -> doc
|
||
+pprReg w r = case r of
|
||
+ RegReal (RealRegSingle i) -> ppr_reg_no i
|
||
+ -- virtual regs should not show up, but this is helpful for debugging.
|
||
+ RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u
|
||
+ RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
|
||
+ RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
|
||
+ _ -> pprPanic "RiscV64.pprReg" (text (show r) <+> ppr w)
|
||
+ where
|
||
+ ppr_reg_no :: Int -> doc
|
||
+ -- General Purpose Registers
|
||
+ ppr_reg_no 0 = text "zero"
|
||
+ ppr_reg_no 1 = text "ra"
|
||
+ ppr_reg_no 2 = text "sp"
|
||
+ ppr_reg_no 3 = text "gp"
|
||
+ ppr_reg_no 4 = text "tp"
|
||
+ ppr_reg_no 5 = text "t0"
|
||
+ ppr_reg_no 6 = text "t1"
|
||
+ ppr_reg_no 7 = text "t2"
|
||
+ ppr_reg_no 8 = text "s0"
|
||
+ ppr_reg_no 9 = text "s1"
|
||
+ ppr_reg_no 10 = text "a0"
|
||
+ ppr_reg_no 11 = text "a1"
|
||
+ ppr_reg_no 12 = text "a2"
|
||
+ ppr_reg_no 13 = text "a3"
|
||
+ ppr_reg_no 14 = text "a4"
|
||
+ ppr_reg_no 15 = text "a5"
|
||
+ ppr_reg_no 16 = text "a6"
|
||
+ ppr_reg_no 17 = text "a7"
|
||
+ ppr_reg_no 18 = text "s2"
|
||
+ ppr_reg_no 19 = text "s3"
|
||
+ ppr_reg_no 20 = text "s4"
|
||
+ ppr_reg_no 21 = text "s5"
|
||
+ ppr_reg_no 22 = text "s6"
|
||
+ ppr_reg_no 23 = text "s7"
|
||
+ ppr_reg_no 24 = text "s8"
|
||
+ ppr_reg_no 25 = text "s9"
|
||
+ ppr_reg_no 26 = text "s10"
|
||
+ ppr_reg_no 27 = text "s11"
|
||
+ ppr_reg_no 28 = text "t3"
|
||
+ ppr_reg_no 29 = text "t4"
|
||
+ ppr_reg_no 30 = text "t5"
|
||
+ ppr_reg_no 31 = text "t6"
|
||
+ -- Floating Point Registers
|
||
+ ppr_reg_no 32 = text "ft0"
|
||
+ ppr_reg_no 33 = text "ft1"
|
||
+ ppr_reg_no 34 = text "ft2"
|
||
+ ppr_reg_no 35 = text "ft3"
|
||
+ ppr_reg_no 36 = text "ft4"
|
||
+ ppr_reg_no 37 = text "ft5"
|
||
+ ppr_reg_no 38 = text "ft6"
|
||
+ ppr_reg_no 39 = text "ft7"
|
||
+ ppr_reg_no 40 = text "fs0"
|
||
+ ppr_reg_no 41 = text "fs1"
|
||
+ ppr_reg_no 42 = text "fa0"
|
||
+ ppr_reg_no 43 = text "fa1"
|
||
+ ppr_reg_no 44 = text "fa2"
|
||
+ ppr_reg_no 45 = text "fa3"
|
||
+ ppr_reg_no 46 = text "fa4"
|
||
+ ppr_reg_no 47 = text "fa5"
|
||
+ ppr_reg_no 48 = text "fa6"
|
||
+ ppr_reg_no 49 = text "fa7"
|
||
+ ppr_reg_no 50 = text "fs2"
|
||
+ ppr_reg_no 51 = text "fs3"
|
||
+ ppr_reg_no 52 = text "fs4"
|
||
+ ppr_reg_no 53 = text "fs5"
|
||
+ ppr_reg_no 54 = text "fs6"
|
||
+ ppr_reg_no 55 = text "fs7"
|
||
+ ppr_reg_no 56 = text "fs8"
|
||
+ ppr_reg_no 57 = text "fs9"
|
||
+ ppr_reg_no 58 = text "fs10"
|
||
+ ppr_reg_no 59 = text "fs11"
|
||
+ ppr_reg_no 60 = text "ft8"
|
||
+ ppr_reg_no 61 = text "ft9"
|
||
+ ppr_reg_no 62 = text "ft10"
|
||
+ ppr_reg_no 63 = text "ft11"
|
||
+ ppr_reg_no i
|
||
+ | i < 0 = pprPanic "Unexpected register number (min is 0)" (ppr w <+> int i)
|
||
+ | i > 63 = pprPanic "Unexpected register number (max is 63)" (ppr w <+> int i)
|
||
+ -- no support for widths > W64.
|
||
+ | otherwise = pprPanic "Unsupported width in register (max is 64)" (ppr w <+> int i)
|
||
+
|
||
+-- | Single precission `Operand` (floating-point)
|
||
+isSingleOp :: Operand -> Bool
|
||
+isSingleOp (OpReg W32 _) = True
|
||
+isSingleOp _ = False
|
||
+
|
||
+-- | Double precission `Operand` (floating-point)
|
||
+isDoubleOp :: Operand -> Bool
|
||
+isDoubleOp (OpReg W64 _) = True
|
||
+isDoubleOp _ = False
|
||
+
|
||
+-- | `Operand` is an immediate value
|
||
+isImmOp :: Operand -> Bool
|
||
+isImmOp (OpImm _) = True
|
||
+isImmOp _ = False
|
||
+
|
||
+-- | `Operand` is an immediate @0@ value
|
||
+isImmZero :: Operand -> Bool
|
||
+isImmZero (OpImm (ImmFloat 0)) = True
|
||
+isImmZero (OpImm (ImmDouble 0)) = True
|
||
+isImmZero (OpImm (ImmInt 0)) = True
|
||
+isImmZero _ = False
|
||
+
|
||
+-- | `Target` represents a label
|
||
+isLabel :: Target -> Bool
|
||
+isLabel (TBlock _) = True
|
||
+isLabel _ = False
|
||
+
|
||
+-- | Get the pretty-printed label from a `Target`
|
||
+--
|
||
+-- This function is partial and will panic if the `Target` is not a label.
|
||
+getLabel :: (IsLine doc) => Platform -> Target -> doc
|
||
+getLabel platform (TBlock bid) = pprBlockId platform bid
|
||
+ where
|
||
+ pprBlockId :: (IsLine doc) => Platform -> BlockId -> doc
|
||
+ pprBlockId platform bid = pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
|
||
+getLabel _platform _other = panic "Cannot turn this into a label"
|
||
+
|
||
+-- | Pretty-print an `Instr`
|
||
+--
|
||
+-- This function is partial and will panic if the `Instr` is not supported. This
|
||
+-- can happen due to invalid operands or unexpected meta instructions.
|
||
+pprInstr :: (IsDoc doc) => Platform -> Instr -> doc
|
||
+pprInstr platform instr = case instr of
|
||
+ -- see Note [dualLine and dualDoc] in GHC.Utils.Outputable
|
||
+ COMMENT s -> dualDoc (asmComment s) empty
|
||
+ MULTILINE_COMMENT s -> dualDoc (asmMultilineComment s) empty
|
||
+ ANN d i -> dualDoc (pprInstr platform i <+> asmDoubleslashComment d) (pprInstr platform i)
|
||
+ LOCATION file line' col _name ->
|
||
+ line (text "\t.loc" <+> int file <+> int line' <+> int col)
|
||
+ DELTA d -> dualDoc (asmComment $ text "\tdelta = " <> int d) empty
|
||
+ NEWBLOCK _ -> panic "PprInstr: NEWBLOCK"
|
||
+ LDATA _ _ -> panic "pprInstr: LDATA"
|
||
+ PUSH_STACK_FRAME ->
|
||
+ lines_
|
||
+ [ text "\taddi sp, sp, -16",
|
||
+ text "\tsd x1, 8(sp)", -- store RA
|
||
+ text "\tsd x8, 0(sp)", -- store FP/s0
|
||
+ text "\taddi x8, sp, 16"
|
||
+ ]
|
||
+ POP_STACK_FRAME ->
|
||
+ lines_
|
||
+ [ text "\tld x8, 0(sp)", -- restore FP/s0
|
||
+ text "\tld x1, 8(sp)", -- restore RA
|
||
+ text "\taddi sp, sp, 16"
|
||
+ ]
|
||
+ ADD o1 o2 o3
|
||
+ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfadd." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3
|
||
+ -- This case is used for sign extension: SEXT.W op
|
||
+ | OpReg W64 _ <- o1, OpReg W32 _ <- o2, isImmOp o3 -> op3 (text "\taddiw") o1 o2 o3
|
||
+ | otherwise -> op3 (text "\tadd") o1 o2 o3
|
||
+ MUL o1 o2 o3
|
||
+ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfmul." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3
|
||
+ | otherwise -> op3 (text "\tmul") o1 o2 o3
|
||
+ MULH o1 o2 o3 -> op3 (text "\tmulh") o1 o2 o3
|
||
+ NEG o1 o2 | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfneg.s") o1 o2
|
||
+ NEG o1 o2 | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfneg.d") o1 o2
|
||
+ NEG o1 o2 -> op2 (text "\tneg") o1 o2
|
||
+ DIV o1 o2 o3
|
||
+ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 ->
|
||
+ -- TODO: This must (likely) be refined regarding width
|
||
+ op3 (text "\tfdiv." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3
|
||
+ DIV o1 o2 o3 -> op3 (text "\tdiv") o1 o2 o3
|
||
+ REM o1 o2 o3
|
||
+ | isFloatOp o1 || isFloatOp o2 || isFloatOp o3 ->
|
||
+ panic "pprInstr - REM not implemented for floats (yet)"
|
||
+ REM o1 o2 o3 -> op3 (text "\trem") o1 o2 o3
|
||
+ REMU o1 o2 o3 -> op3 (text "\tremu") o1 o2 o3
|
||
+ SUB o1 o2 o3
|
||
+ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfsub." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3
|
||
+ | isImmOp o3 -> op3 (text "\taddi") o1 o2 (negOp o3)
|
||
+ | otherwise -> op3 (text "\tsub") o1 o2 o3
|
||
+ DIVU o1 o2 o3 -> op3 (text "\tdivu") o1 o2 o3
|
||
+ AND o1 o2 o3
|
||
+ | isImmOp o3 -> op3 (text "\tandi") o1 o2 o3
|
||
+ | otherwise -> op3 (text "\tand") o1 o2 o3
|
||
+ OR o1 o2 o3 -> op3 (text "\tor") o1 o2 o3
|
||
+ SRA o1 o2 o3 | isImmOp o3 -> op3 (text "\tsrai") o1 o2 o3
|
||
+ SRA o1 o2 o3 -> op3 (text "\tsra") o1 o2 o3
|
||
+ XOR o1 o2 o3 -> op3 (text "\txor") o1 o2 o3
|
||
+ SLL o1 o2 o3 -> op3 (text "\tsll") o1 o2 o3
|
||
+ SRL o1 o2 o3 -> op3 (text "\tsrl") o1 o2 o3
|
||
+ MOV o1 o2
|
||
+ | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.d") o1 o2 -- fmv.d rd, rs is pseudo op fsgnj.d rd, rs, rs
|
||
+ | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.s") o1 o2 -- fmv.s rd, rs is pseudo op fsgnj.s rd, rs, rs
|
||
+ | isFloatOp o1 && isImmZero o2 && isDoubleOp o1 -> op2 (text "\tfcvt.d.w") o1 zero
|
||
+ | isFloatOp o1 && isImmZero o2 && isSingleOp o1 -> op2 (text "\tfcvt.s.w") o1 zero
|
||
+ | isFloatOp o1 && not (isFloatOp o2) && isSingleOp o1 -> op2 (text "\tfmv.w.x") o1 o2
|
||
+ | isFloatOp o1 && not (isFloatOp o2) && isDoubleOp o1 -> op2 (text "\tfmv.d.x") o1 o2
|
||
+ | not (isFloatOp o1) && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.x.w") o1 o2
|
||
+ | not (isFloatOp o1) && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.x.d") o1 o2
|
||
+ | (OpImm (ImmInteger i)) <- o2,
|
||
+ fitsIn12bitImm i ->
|
||
+ lines_ [text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <> comma <+> pprOp platform o2]
|
||
+ | (OpImm (ImmInt i)) <- o2,
|
||
+ fitsIn12bitImm i ->
|
||
+ lines_ [text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <> comma <+> pprOp platform o2]
|
||
+ | (OpImm (ImmInteger i)) <- o2,
|
||
+ fitsIn32bits i ->
|
||
+ lines_
|
||
+ [ text "\tlui" <+> pprOp platform o1 <> comma <+> text "%hi(" <> pprOp platform o2 <> text ")",
|
||
+ text "\taddw" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%lo(" <> pprOp platform o2 <> text ")"
|
||
+ ]
|
||
+ | (OpImm (ImmInt i)) <- o2,
|
||
+ fitsIn32bits i ->
|
||
+ lines_
|
||
+ [ text "\tlui" <+> pprOp platform o1 <> comma <+> text "%hi(" <> pprOp platform o2 <> text ")",
|
||
+ text "\taddw" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%lo(" <> pprOp platform o2 <> text ")"
|
||
+ ]
|
||
+ | isImmOp o2 ->
|
||
+ -- Surrender! Let the assembler figure out the right expressions with pseudo-op LI.
|
||
+ lines_ [text "\tli" <+> pprOp platform o1 <> comma <+> pprOp platform o2]
|
||
+ | otherwise -> op3 (text "\taddi") o1 o2 (OpImm (ImmInt 0))
|
||
+ ORI o1 o2 o3 -> op3 (text "\tori") o1 o2 o3
|
||
+ XORI o1 o2 o3 -> op3 (text "\txori") o1 o2 o3
|
||
+ J_TBL _ _ r -> pprInstr platform (B (TReg r))
|
||
+ B l | isLabel l -> line $ text "\tjal" <+> pprOp platform x0 <> comma <+> getLabel platform l
|
||
+ B (TReg r) -> line $ text "\tjalr" <+> pprOp platform x0 <> comma <+> pprReg W64 r <> comma <+> text "0"
|
||
+ BL r _ -> line $ text "\tjalr" <+> text "x1" <> comma <+> pprReg W64 r <> comma <+> text "0"
|
||
+ BCOND c l r t
|
||
+ | isLabel t ->
|
||
+ line $ text "\t" <> pprBcond c <+> pprOp platform l <> comma <+> pprOp platform r <> comma <+> getLabel platform t
|
||
+ BCOND _ _ _ (TReg _) -> panic "RV64.ppr: No conditional branching to registers!"
|
||
+ CSET o l r c -> case c of
|
||
+ EQ
|
||
+ | isIntOp l && isIntOp r ->
|
||
+ lines_
|
||
+ [ subFor l r,
|
||
+ text "\tseqz" <+> pprOp platform o <> comma <+> pprOp platform o
|
||
+ ]
|
||
+ EQ | isFloatOp l && isFloatOp r -> line $ binOp ("\tfeq." ++ floatOpPrecision platform l r)
|
||
+ NE
|
||
+ | isIntOp l && isIntOp r ->
|
||
+ lines_
|
||
+ [ subFor l r,
|
||
+ text "\tsnez" <+> pprOp platform o <> comma <+> pprOp platform o
|
||
+ ]
|
||
+ NE
|
||
+ | isFloatOp l && isFloatOp r ->
|
||
+ lines_
|
||
+ [ binOp ("\tfeq." ++ floatOpPrecision platform l r),
|
||
+ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1"
|
||
+ ]
|
||
+ SLT -> lines_ [sltFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r]
|
||
+ SLE ->
|
||
+ lines_
|
||
+ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l,
|
||
+ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1"
|
||
+ ]
|
||
+ SGE ->
|
||
+ lines_
|
||
+ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r,
|
||
+ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1"
|
||
+ ]
|
||
+ SGT -> lines_ [sltFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l]
|
||
+ ULT -> lines_ [sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r]
|
||
+ ULE ->
|
||
+ lines_
|
||
+ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l,
|
||
+ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1"
|
||
+ ]
|
||
+ UGE ->
|
||
+ lines_
|
||
+ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r,
|
||
+ text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1"
|
||
+ ]
|
||
+ UGT -> lines_ [sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l]
|
||
+ FLT | isFloatOp l && isFloatOp r -> line $ binOp ("\tflt." ++ floatOpPrecision platform l r)
|
||
+ FLE | isFloatOp l && isFloatOp r -> line $ binOp ("\tfle." ++ floatOpPrecision platform l r)
|
||
+ FGT | isFloatOp l && isFloatOp r -> line $ binOp ("\tfgt." ++ floatOpPrecision platform l r)
|
||
+ FGE | isFloatOp l && isFloatOp r -> line $ binOp ("\tfge." ++ floatOpPrecision platform l r)
|
||
+ x -> pprPanic "RV64.ppr: unhandled CSET conditional" (text (show x) <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l)
|
||
+ where
|
||
+ subFor l r
|
||
+ | (OpImm _) <- r = text "\taddi" <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform (negOp r)
|
||
+ | (OpImm _) <- l = panic "RV64.ppr: Cannot SUB IMM _"
|
||
+ | otherwise = text "\tsub" <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r
|
||
+ sltFor l r
|
||
+ | (OpImm _) <- r = text "\tslti"
|
||
+ | (OpImm _) <- l = panic "PV64.ppr: Cannot SLT IMM _"
|
||
+ | otherwise = text "\tslt"
|
||
+ sltuFor l r
|
||
+ | (OpImm _) <- r = text "\tsltui"
|
||
+ | (OpImm _) <- l = panic "PV64.ppr: Cannot SLTU IMM _"
|
||
+ | otherwise = text "\tsltu"
|
||
+ binOp :: (IsLine doc) => String -> doc
|
||
+ binOp op = text op <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r
|
||
+ STR II8 o1 o2 -> op2 (text "\tsb") o1 o2
|
||
+ STR II16 o1 o2 -> op2 (text "\tsh") o1 o2
|
||
+ STR II32 o1 o2 -> op2 (text "\tsw") o1 o2
|
||
+ STR II64 o1 o2 -> op2 (text "\tsd") o1 o2
|
||
+ STR FF32 o1 o2 -> op2 (text "\tfsw") o1 o2
|
||
+ STR FF64 o1 o2 -> op2 (text "\tfsd") o1 o2
|
||
+ LDR _f o1 (OpImm (ImmIndex lbl off)) ->
|
||
+ lines_
|
||
+ [ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl,
|
||
+ text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> int off
|
||
+ ]
|
||
+ LDR _f o1 (OpImm (ImmCLbl lbl)) ->
|
||
+ line $ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl
|
||
+ LDR II8 o1 o2 -> op2 (text "\tlb") o1 o2
|
||
+ LDR II16 o1 o2 -> op2 (text "\tlh") o1 o2
|
||
+ LDR II32 o1 o2 -> op2 (text "\tlw") o1 o2
|
||
+ LDR II64 o1 o2 -> op2 (text "\tld") o1 o2
|
||
+ LDR FF32 o1 o2 -> op2 (text "\tflw") o1 o2
|
||
+ LDR FF64 o1 o2 -> op2 (text "\tfld") o1 o2
|
||
+ LDRU II8 o1 o2 -> op2 (text "\tlbu") o1 o2
|
||
+ LDRU II16 o1 o2 -> op2 (text "\tlhu") o1 o2
|
||
+ LDRU II32 o1 o2 -> op2 (text "\tlwu") o1 o2
|
||
+ -- double words (64bit) cannot be sign extended by definition
|
||
+ LDRU II64 o1 o2 -> op2 (text "\tld") o1 o2
|
||
+ LDRU FF32 o1 o2@(OpAddr (AddrReg _)) -> op2 (text "\tflw") o1 o2
|
||
+ LDRU FF32 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tflw") o1 o2
|
||
+ LDRU FF64 o1 o2@(OpAddr (AddrReg _)) -> op2 (text "\tfld") o1 o2
|
||
+ LDRU FF64 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tfld") o1 o2
|
||
+ LDRU f o1 o2 -> pprPanic "Unsupported unsigned load" ((text . show) f <+> pprOp platform o1 <+> pprOp platform o2)
|
||
+ FENCE r w -> line $ text "\tfence" <+> pprFenceType r <> char ',' <+> pprFenceType w
|
||
+ FCVT FloatToFloat o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.d") o1 o2
|
||
+ FCVT FloatToFloat o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.d.s") o1 o2
|
||
+ FCVT FloatToFloat o1 o2 ->
|
||
+ pprPanic "RV64.pprInstr - impossible float to float conversion"
|
||
+ $ line (pprOp platform o1 <> text "->" <> pprOp platform o2)
|
||
+ FCVT IntToFloat o1@(OpReg W32 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.s.w") o1 o2
|
||
+ FCVT IntToFloat o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.l") o1 o2
|
||
+ FCVT IntToFloat o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.d.w") o1 o2
|
||
+ FCVT IntToFloat o1@(OpReg W64 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.d.l") o1 o2
|
||
+ FCVT IntToFloat o1 o2 ->
|
||
+ pprPanic "RV64.pprInstr - impossible integer to float conversion"
|
||
+ $ line (pprOp platform o1 <> text "->" <> pprOp platform o2)
|
||
+ FCVT FloatToInt o1@(OpReg W32 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.w.s") o1 o2
|
||
+ FCVT FloatToInt o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.w.d") o1 o2
|
||
+ FCVT FloatToInt o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.l.s") o1 o2
|
||
+ FCVT FloatToInt o1@(OpReg W64 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.l.d") o1 o2
|
||
+ FCVT FloatToInt o1 o2 ->
|
||
+ pprPanic "RV64.pprInstr - impossible float to integer conversion"
|
||
+ $ line (pprOp platform o1 <> text "->" <> pprOp platform o2)
|
||
+ FABS o1 o2 | isSingleOp o2 -> op2 (text "\tfabs.s") o1 o2
|
||
+ FABS o1 o2 | isDoubleOp o2 -> op2 (text "\tfabs.d") o1 o2
|
||
+ FMA variant d r1 r2 r3 ->
|
||
+ let fma = case variant of
|
||
+ FMAdd -> text "\tfmadd" <> dot <> floatPrecission d
|
||
+ FMSub -> text "\tfmsub" <> dot <> floatPrecission d
|
||
+ FNMAdd -> text "\tfnmadd" <> dot <> floatPrecission d
|
||
+ FNMSub -> text "\tfnmsub" <> dot <> floatPrecission d
|
||
+ in op4 fma d r1 r2 r3
|
||
+ instr -> panic $ "RV64.pprInstr - Unknown instruction: " ++ instrCon instr
|
||
+ where
|
||
+ op2 op o1 o2 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2
|
||
+ op3 op o1 o2 o3 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3
|
||
+ op4 op o1 o2 o3 o4 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4
|
||
+ pprFenceType FenceRead = text "r"
|
||
+ pprFenceType FenceWrite = text "w"
|
||
+ pprFenceType FenceReadWrite = text "rw"
|
||
+ floatPrecission o
|
||
+ | isSingleOp o = text "s"
|
||
+ | isDoubleOp o = text "d"
|
||
+ | otherwise = pprPanic "Impossible floating point precission: " (pprOp platform o)
|
||
+
|
||
+floatOpPrecision :: Platform -> Operand -> Operand -> String
|
||
+floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isSingleOp l && isSingleOp r = "s" -- single precision
|
||
+floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isDoubleOp l && isDoubleOp r = "d" -- double precision
|
||
+floatOpPrecision p l r = pprPanic "Cannot determine floating point precission" (text "op1" <+> pprOp p l <+> text "op2" <+> pprOp p r)
|
||
+
|
||
+-- | Pretty print a conditional branch
|
||
+--
|
||
+-- This function is partial and will panic if the conditional is not supported;
|
||
+-- i.e. if its floating point related.
|
||
+pprBcond :: (IsLine doc) => Cond -> doc
|
||
+pprBcond c = text "b" <> pprCond c
|
||
+ where
|
||
+ pprCond :: (IsLine doc) => Cond -> doc
|
||
+ pprCond c = case c of
|
||
+ EQ -> text "eq"
|
||
+ NE -> text "ne"
|
||
+ SLT -> text "lt"
|
||
+ SLE -> text "le"
|
||
+ SGE -> text "ge"
|
||
+ SGT -> text "gt"
|
||
+ ULT -> text "ltu"
|
||
+ ULE -> text "leu"
|
||
+ UGE -> text "geu"
|
||
+ UGT -> text "gtu"
|
||
+ -- BCOND cannot handle floating point comparisons / registers
|
||
+ _ -> panic $ "RV64.ppr: unhandled BCOND conditional: " ++ show c
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/RegInfo.hs
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/RegInfo.hs
|
||
@@ -0,0 +1,41 @@
|
||
+-- | Minimum viable implementation of jump short-cutting: No short-cutting.
|
||
+--
|
||
+-- The functions here simply implement the no-short-cutting case. Implementing
|
||
+-- the real behaviour would be a great optimization in future.
|
||
+module GHC.CmmToAsm.RV64.RegInfo
|
||
+ ( getJumpDestBlockId,
|
||
+ canShortcut,
|
||
+ shortcutStatics,
|
||
+ shortcutJump,
|
||
+ JumpDest (..),
|
||
+ )
|
||
+where
|
||
+
|
||
+import GHC.Cmm
|
||
+import GHC.Cmm.BlockId
|
||
+import GHC.CmmToAsm.RV64.Instr
|
||
+import GHC.Prelude
|
||
+import GHC.Utils.Outputable
|
||
+
|
||
+newtype JumpDest = DestBlockId BlockId
|
||
+
|
||
+instance Outputable JumpDest where
|
||
+ ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid
|
||
+
|
||
+-- | Extract BlockId
|
||
+--
|
||
+-- Never `Nothing` for Riscv64 NCG.
|
||
+getJumpDestBlockId :: JumpDest -> Maybe BlockId
|
||
+getJumpDestBlockId (DestBlockId bid) = Just bid
|
||
+
|
||
+-- No `Instr`s can bet shortcut (for now)
|
||
+canShortcut :: Instr -> Maybe JumpDest
|
||
+canShortcut _ = Nothing
|
||
+
|
||
+-- Identity of the provided `RawCmmStatics`
|
||
+shortcutStatics :: (BlockId -> Maybe JumpDest) -> RawCmmStatics -> RawCmmStatics
|
||
+shortcutStatics _ other_static = other_static
|
||
+
|
||
+-- Identity of the provided `Instr`
|
||
+shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
|
||
+shortcutJump _ other = other
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Regs.hs
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/RV64/Regs.hs
|
||
@@ -0,0 +1,230 @@
|
||
+module GHC.CmmToAsm.RV64.Regs where
|
||
+
|
||
+import GHC.Cmm
|
||
+import GHC.Cmm.CLabel (CLabel)
|
||
+import GHC.CmmToAsm.Format
|
||
+import GHC.Data.FastString
|
||
+import GHC.Platform
|
||
+import GHC.Platform.Reg
|
||
+import GHC.Platform.Reg.Class
|
||
+import GHC.Platform.Regs
|
||
+import GHC.Prelude
|
||
+import GHC.Types.Unique
|
||
+import GHC.Utils.Outputable
|
||
+import GHC.Utils.Panic
|
||
+
|
||
+-- * Registers
|
||
+
|
||
+-- | First integer register number. @zero@ register.
|
||
+x0RegNo :: RegNo
|
||
+x0RegNo = 0
|
||
+
|
||
+-- | return address register
|
||
+x1RegNo, raRegNo :: RegNo
|
||
+x1RegNo = 1
|
||
+raRegNo = x1RegNo
|
||
+
|
||
+x5RegNo, t0RegNo :: RegNo
|
||
+x5RegNo = 5
|
||
+t0RegNo = x5RegNo
|
||
+
|
||
+x7RegNo, t2RegNo :: RegNo
|
||
+x7RegNo = 7
|
||
+t2RegNo = x7RegNo
|
||
+
|
||
+x28RegNo, t3RegNo :: RegNo
|
||
+x28RegNo = 28
|
||
+t3RegNo = x28RegNo
|
||
+
|
||
+-- | Last integer register number. Used as TMP (IP) register.
|
||
+x31RegNo, t6RegNo, tmpRegNo :: RegNo
|
||
+x31RegNo = 31
|
||
+t6RegNo = x31RegNo
|
||
+tmpRegNo = x31RegNo
|
||
+
|
||
+-- | First floating point register.
|
||
+d0RegNo, ft0RegNo :: RegNo
|
||
+d0RegNo = 32
|
||
+ft0RegNo = d0RegNo
|
||
+
|
||
+d7RegNo, ft7RegNo :: RegNo
|
||
+d7RegNo = 39
|
||
+ft7RegNo = d7RegNo
|
||
+
|
||
+-- | Last floating point register.
|
||
+d31RegNo :: RegNo
|
||
+d31RegNo = 63
|
||
+
|
||
+a0RegNo, x10RegNo :: RegNo
|
||
+x10RegNo = 10
|
||
+a0RegNo = x10RegNo
|
||
+
|
||
+a7RegNo, x17RegNo :: RegNo
|
||
+x17RegNo = 17
|
||
+a7RegNo = x17RegNo
|
||
+
|
||
+fa0RegNo, d10RegNo :: RegNo
|
||
+d10RegNo = 42
|
||
+fa0RegNo = d10RegNo
|
||
+
|
||
+fa7RegNo, d17RegNo :: RegNo
|
||
+d17RegNo = 49
|
||
+fa7RegNo = d17RegNo
|
||
+
|
||
+-- Note [The made-up RISCV64 TMP (IP) register]
|
||
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
+--
|
||
+-- RISCV64 has no inter-procedural register in its ABI. However, we need one to
|
||
+-- make register spills/loads to/from high number slots. I.e. slot numbers that
|
||
+-- do not fit in a 12bit integer which is used as immediate in the arithmetic
|
||
+-- operations. Thus, we're marking one additional register (x31) as permanently
|
||
+-- non-free and call it TMP.
|
||
+--
|
||
+-- TMP can be used as temporary register in all operations. Just be aware that
|
||
+-- it may be clobbered as soon as you loose direct control over it (i.e. using
|
||
+-- TMP by-passes the register allocation/spilling mechanisms.) It should be fine
|
||
+-- to use it as temporary register in a MachOp translation as long as you don't
|
||
+-- rely on its value beyond this limited scope.
|
||
+--
|
||
+-- X31 is a caller-saved register. I.e. there are no guarantees about what the
|
||
+-- callee does with it. That's exactly what we want here.
|
||
+
|
||
+zeroReg, raReg, spMachReg, tmpReg :: Reg
|
||
+zeroReg = regSingle x0RegNo
|
||
+raReg = regSingle 1
|
||
+
|
||
+-- | Not to be confused with the `CmmReg` `spReg`
|
||
+spMachReg = regSingle 2
|
||
+
|
||
+tmpReg = regSingle tmpRegNo
|
||
+
|
||
+-- | All machine register numbers.
|
||
+allMachRegNos :: [RegNo]
|
||
+allMachRegNos = intRegs ++ fpRegs
|
||
+ where
|
||
+ intRegs = [x0RegNo .. x31RegNo]
|
||
+ fpRegs = [d0RegNo .. d31RegNo]
|
||
+
|
||
+-- | Registers available to the register allocator.
|
||
+--
|
||
+-- These are all registers minus those with a fixed role in RISCV ABI (zero, lr,
|
||
+-- sp, gp, tp, fp, tmp) and GHC RTS (Base, Sp, Hp, HpLim, R1..R8, F1..F6,
|
||
+-- D1..D6.)
|
||
+allocatableRegs :: Platform -> [RealReg]
|
||
+allocatableRegs platform =
|
||
+ let isFree = freeReg platform
|
||
+ in map RealRegSingle $ filter isFree allMachRegNos
|
||
+
|
||
+-- | Integer argument registers according to the calling convention
|
||
+allGpArgRegs :: [Reg]
|
||
+allGpArgRegs = map regSingle [a0RegNo .. a7RegNo]
|
||
+
|
||
+-- | Floating point argument registers according to the calling convention
|
||
+allFpArgRegs :: [Reg]
|
||
+allFpArgRegs = map regSingle [fa0RegNo .. fa7RegNo]
|
||
+
|
||
+-- * Addressing modes
|
||
+
|
||
+-- | Addressing modes
|
||
+data AddrMode
|
||
+ = -- | A register plus some immediate integer, e.g. @8(sp)@ or @-16(sp)@. The
|
||
+ -- offset needs to fit into 12bits.
|
||
+ AddrRegImm Reg Imm
|
||
+ | -- | A register
|
||
+ AddrReg Reg
|
||
+ deriving (Eq, Show)
|
||
+
|
||
+-- * Immediates
|
||
+
|
||
+data Imm
|
||
+ = ImmInt Int
|
||
+ | ImmInteger Integer -- Sigh.
|
||
+ | ImmCLbl CLabel -- AbstractC Label (with baggage)
|
||
+ | ImmLit FastString
|
||
+ | ImmIndex CLabel Int
|
||
+ | ImmFloat Rational
|
||
+ | ImmDouble Rational
|
||
+ | ImmConstantSum Imm Imm
|
||
+ | ImmConstantDiff Imm Imm
|
||
+ deriving (Eq, Show)
|
||
+
|
||
+-- | Map `CmmLit` to `Imm`
|
||
+--
|
||
+-- N.B. this is a partial function, because not all `CmmLit`s have an immediate
|
||
+-- representation.
|
||
+litToImm :: CmmLit -> Imm
|
||
+litToImm (CmmInt i w) = ImmInteger (narrowS w i)
|
||
+-- narrow to the width: a CmmInt might be out of
|
||
+-- range, but we assume that ImmInteger only contains
|
||
+-- in-range values. A signed value should be fine here.
|
||
+litToImm (CmmFloat f W32) = ImmFloat f
|
||
+litToImm (CmmFloat f W64) = ImmDouble f
|
||
+litToImm (CmmLabel l) = ImmCLbl l
|
||
+litToImm (CmmLabelOff l off) = ImmIndex l off
|
||
+litToImm (CmmLabelDiffOff l1 l2 off _) =
|
||
+ ImmConstantSum
|
||
+ (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
|
||
+ (ImmInt off)
|
||
+litToImm l = panic $ "RV64.Regs.litToImm: no match for " ++ show l
|
||
+
|
||
+-- == To satisfy GHC.CmmToAsm.Reg.Target =======================================
|
||
+
|
||
+-- squeese functions for the graph allocator -----------------------------------
|
||
+
|
||
+-- | regSqueeze_class reg
|
||
+-- Calculate the maximum number of register colors that could be
|
||
+-- denied to a node of this class due to having this reg
|
||
+-- as a neighbour.
|
||
+{-# INLINE virtualRegSqueeze #-}
|
||
+virtualRegSqueeze :: RegClass -> VirtualReg -> Int
|
||
+virtualRegSqueeze cls vr =
|
||
+ case cls of
|
||
+ RcInteger ->
|
||
+ case vr of
|
||
+ VirtualRegI {} -> 1
|
||
+ VirtualRegHi {} -> 1
|
||
+ _other -> 0
|
||
+ RcDouble ->
|
||
+ case vr of
|
||
+ VirtualRegD {} -> 1
|
||
+ VirtualRegF {} -> 0
|
||
+ _other -> 0
|
||
+ _other -> 0
|
||
+
|
||
+{-# INLINE realRegSqueeze #-}
|
||
+realRegSqueeze :: RegClass -> RealReg -> Int
|
||
+realRegSqueeze cls rr =
|
||
+ case cls of
|
||
+ RcInteger ->
|
||
+ case rr of
|
||
+ RealRegSingle regNo
|
||
+ | regNo < d0RegNo -> 1
|
||
+ | otherwise -> 0
|
||
+ RcDouble ->
|
||
+ case rr of
|
||
+ RealRegSingle regNo
|
||
+ | regNo < d0RegNo -> 0
|
||
+ | otherwise -> 1
|
||
+ _other -> 0
|
||
+
|
||
+mkVirtualReg :: Unique -> Format -> VirtualReg
|
||
+mkVirtualReg u format
|
||
+ | not (isFloatFormat format) = VirtualRegI u
|
||
+ | otherwise =
|
||
+ case format of
|
||
+ FF32 -> VirtualRegD u
|
||
+ FF64 -> VirtualRegD u
|
||
+ _ -> panic "RV64.mkVirtualReg"
|
||
+
|
||
+{-# INLINE classOfRealReg #-}
|
||
+classOfRealReg :: RealReg -> RegClass
|
||
+classOfRealReg (RealRegSingle i)
|
||
+ | i < d0RegNo = RcInteger
|
||
+ | otherwise = RcDouble
|
||
+
|
||
+regDotColor :: RealReg -> SDoc
|
||
+regDotColor reg =
|
||
+ case classOfRealReg reg of
|
||
+ RcInteger -> text "blue"
|
||
+ RcFloat -> text "red"
|
||
+ RcDouble -> text "green"
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
|
||
@@ -119,7 +119,7 @@ trivColorable platform virtualRegSqueeze
|
||
ArchMipseb -> panic "trivColorable ArchMipseb"
|
||
ArchMipsel -> panic "trivColorable ArchMipsel"
|
||
ArchS390X -> panic "trivColorable ArchS390X"
|
||
- ArchRISCV64 -> panic "trivColorable ArchRISCV64"
|
||
+ ArchRISCV64 -> 14
|
||
ArchLoongArch64->panic "trivColorable ArchLoongArch64"
|
||
ArchJavaScript-> panic "trivColorable ArchJavaScript"
|
||
ArchWasm32 -> panic "trivColorable ArchWasm32"
|
||
@@ -154,7 +154,7 @@ trivColorable platform virtualRegSqueeze
|
||
ArchMipseb -> panic "trivColorable ArchMipseb"
|
||
ArchMipsel -> panic "trivColorable ArchMipsel"
|
||
ArchS390X -> panic "trivColorable ArchS390X"
|
||
- ArchRISCV64 -> panic "trivColorable ArchRISCV64"
|
||
+ ArchRISCV64 -> 0
|
||
ArchLoongArch64->panic "trivColorable ArchLoongArch64"
|
||
ArchJavaScript-> panic "trivColorable ArchJavaScript"
|
||
ArchWasm32 -> panic "trivColorable ArchWasm32"
|
||
@@ -188,7 +188,7 @@ trivColorable platform virtualRegSqueeze
|
||
ArchMipseb -> panic "trivColorable ArchMipseb"
|
||
ArchMipsel -> panic "trivColorable ArchMipsel"
|
||
ArchS390X -> panic "trivColorable ArchS390X"
|
||
- ArchRISCV64 -> panic "trivColorable ArchRISCV64"
|
||
+ ArchRISCV64 -> 20
|
||
ArchLoongArch64->panic "trivColorable ArchLoongArch64"
|
||
ArchJavaScript-> panic "trivColorable ArchJavaScript"
|
||
ArchWasm32 -> panic "trivColorable ArchWasm32"
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Linear.hs
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear.hs
|
||
@@ -112,6 +112,7 @@ import qualified GHC.CmmToAsm.Reg.Linear
|
||
import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86
|
||
import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64
|
||
import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64
|
||
+import qualified GHC.CmmToAsm.Reg.Linear.RV64 as RV64
|
||
import GHC.CmmToAsm.Reg.Target
|
||
import GHC.CmmToAsm.Reg.Liveness
|
||
import GHC.CmmToAsm.Reg.Utils
|
||
@@ -221,7 +222,7 @@ linearRegAlloc config entry_ids block_li
|
||
ArchAlpha -> panic "linearRegAlloc ArchAlpha"
|
||
ArchMipseb -> panic "linearRegAlloc ArchMipseb"
|
||
ArchMipsel -> panic "linearRegAlloc ArchMipsel"
|
||
- ArchRISCV64 -> panic "linearRegAlloc ArchRISCV64"
|
||
+ ArchRISCV64 -> go (frInitFreeRegs platform :: RV64.FreeRegs)
|
||
ArchLoongArch64-> panic "linearRegAlloc ArchLoongArch64"
|
||
ArchJavaScript -> panic "linearRegAlloc ArchJavaScript"
|
||
ArchWasm32 -> panic "linearRegAlloc ArchWasm32"
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
|
||
@@ -29,10 +29,12 @@ import qualified GHC.CmmToAsm.Reg.Linear
|
||
import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86
|
||
import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64
|
||
import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64
|
||
+import qualified GHC.CmmToAsm.Reg.Linear.RV64 as RV64
|
||
|
||
import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr
|
||
import qualified GHC.CmmToAsm.X86.Instr as X86.Instr
|
||
import qualified GHC.CmmToAsm.AArch64.Instr as AArch64.Instr
|
||
+import qualified GHC.CmmToAsm.RV64.Instr as RV64.Instr
|
||
|
||
class Show freeRegs => FR freeRegs where
|
||
frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs
|
||
@@ -64,6 +66,12 @@ instance FR AArch64.FreeRegs where
|
||
frInitFreeRegs = AArch64.initFreeRegs
|
||
frReleaseReg = \_ -> AArch64.releaseReg
|
||
|
||
+instance FR RV64.FreeRegs where
|
||
+ frAllocateReg = const RV64.allocateReg
|
||
+ frGetFreeRegs = const RV64.getFreeRegs
|
||
+ frInitFreeRegs = RV64.initFreeRegs
|
||
+ frReleaseReg = const RV64.releaseReg
|
||
+
|
||
maxSpillSlots :: NCGConfig -> Int
|
||
maxSpillSlots config = case platformArch (ncgPlatform config) of
|
||
ArchX86 -> X86.Instr.maxSpillSlots config
|
||
@@ -76,7 +84,7 @@ maxSpillSlots config = case platformArch
|
||
ArchAlpha -> panic "maxSpillSlots ArchAlpha"
|
||
ArchMipseb -> panic "maxSpillSlots ArchMipseb"
|
||
ArchMipsel -> panic "maxSpillSlots ArchMipsel"
|
||
- ArchRISCV64 -> panic "maxSpillSlots ArchRISCV64"
|
||
+ ArchRISCV64 -> RV64.Instr.maxSpillSlots config
|
||
ArchLoongArch64->panic "maxSpillSlots ArchLoongArch64"
|
||
ArchJavaScript-> panic "maxSpillSlots ArchJavaScript"
|
||
ArchWasm32 -> panic "maxSpillSlots ArchWasm32"
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
|
||
@@ -0,0 +1,96 @@
|
||
+-- | Functions to implement the @FR@ (as in "free regs") type class.
|
||
+--
|
||
+-- For LLVM GHC calling convention (used registers), see
|
||
+-- https://github.com/llvm/llvm-project/blob/6ab900f8746e7d8e24afafb5886a40801f6799f4/llvm/lib/Target/RISCV/RISCVISelLowering.cpp#L13638-L13685
|
||
+module GHC.CmmToAsm.Reg.Linear.RV64
|
||
+ ( allocateReg,
|
||
+ getFreeRegs,
|
||
+ initFreeRegs,
|
||
+ releaseReg,
|
||
+ FreeRegs (..),
|
||
+ )
|
||
+where
|
||
+
|
||
+import Data.Word
|
||
+import GHC.CmmToAsm.RV64.Regs
|
||
+import GHC.Platform
|
||
+import GHC.Platform.Reg
|
||
+import GHC.Platform.Reg.Class
|
||
+import GHC.Prelude
|
||
+import GHC.Stack
|
||
+import GHC.Utils.Outputable
|
||
+import GHC.Utils.Panic
|
||
+
|
||
+-- | Bitmaps to indicate which registers are free (currently unused)
|
||
+--
|
||
+-- The bit index represents the `RegNo`, in case of floating point registers
|
||
+-- with an offset of 32. The register is free when the bit is set.
|
||
+data FreeRegs
|
||
+ = FreeRegs
|
||
+ -- | integer/general purpose registers (`RcInteger`)
|
||
+ !Word32
|
||
+ -- | floating point registers (`RcDouble`)
|
||
+ !Word32
|
||
+
|
||
+instance Show FreeRegs where
|
||
+ show (FreeRegs g f) = "FreeRegs 0b" ++ showBits g ++ " 0b" ++ showBits f
|
||
+
|
||
+-- | Show bits as a `String` of @1@s and @0@s
|
||
+showBits :: Word32 -> String
|
||
+showBits w = map (\i -> if testBit w i then '1' else '0') [0 .. 31]
|
||
+
|
||
+instance Outputable FreeRegs where
|
||
+ ppr (FreeRegs g f) =
|
||
+ text " "
|
||
+ <+> foldr (\i x -> pad_int i <+> x) (text "") [0 .. 31]
|
||
+ $$ text "GPR"
|
||
+ <+> foldr (\i x -> show_bit g i <+> x) (text "") [0 .. 31]
|
||
+ $$ text "FPR"
|
||
+ <+> foldr (\i x -> show_bit f i <+> x) (text "") [0 .. 31]
|
||
+ where
|
||
+ pad_int i | i < 10 = char ' ' <> int i
|
||
+ pad_int i = int i
|
||
+ -- remember bit = 1 means it's available.
|
||
+ show_bit bits bit | testBit bits bit = text " "
|
||
+ show_bit _ _ = text " x"
|
||
+
|
||
+-- | Set bits of all allocatable registers to 1
|
||
+initFreeRegs :: Platform -> FreeRegs
|
||
+initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
|
||
+ where
|
||
+ noFreeRegs :: FreeRegs
|
||
+ noFreeRegs = FreeRegs 0 0
|
||
+
|
||
+-- | Get all free `RealReg`s (i.e. those where the corresponding bit is 1)
|
||
+getFreeRegs :: RegClass -> FreeRegs -> [RealReg]
|
||
+getFreeRegs cls (FreeRegs g f)
|
||
+ | RcFloat <- cls = [] -- For now we only support double and integer registers, floats will need to be promoted.
|
||
+ | RcDouble <- cls = go 32 f allocatableDoubleRegs
|
||
+ | RcInteger <- cls = go 0 g allocatableIntRegs
|
||
+ where
|
||
+ go _ _ [] = []
|
||
+ go off x (i : is)
|
||
+ | testBit x i = RealRegSingle (off + i) : (go off x $! is)
|
||
+ | otherwise = go off x $! is
|
||
+ -- The lists of allocatable registers are manually crafted: Register
|
||
+ -- allocation is pretty hot code. We don't want to iterate and map like
|
||
+ -- `initFreeRegs` all the time! (The register mappings aren't supposed to
|
||
+ -- change often.)
|
||
+ allocatableIntRegs = [5 .. 7] ++ [10 .. 17] ++ [28 .. 30]
|
||
+ allocatableDoubleRegs = [0 .. 7] ++ [10 .. 17] ++ [28 .. 31]
|
||
+
|
||
+-- | Set corresponding register bit to 0
|
||
+allocateReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs
|
||
+allocateReg (RealRegSingle r) (FreeRegs g f)
|
||
+ | r > 31 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32))
|
||
+ | r < 32 && testBit g r = FreeRegs (clearBit g r) f
|
||
+ | r > 31 = panic $ "Linear.RV64.allocReg: double allocation of float reg v" ++ show (r - 32) ++ "; " ++ showBits f
|
||
+ | otherwise = pprPanic "Linear.RV64.allocReg" $ text ("double allocation of gp reg x" ++ show r ++ "; " ++ showBits g)
|
||
+
|
||
+-- | Set corresponding register bit to 1
|
||
+releaseReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs
|
||
+releaseReg (RealRegSingle r) (FreeRegs g f)
|
||
+ | r > 31 && testBit f (r - 32) = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg v" <> int (r - 32))
|
||
+ | r < 32 && testBit g r = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg x" <> int r)
|
||
+ | r > 31 = FreeRegs g (setBit f (r - 32))
|
||
+ | otherwise = FreeRegs (setBit g r) f
|
||
Index: ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Target.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/GHC/CmmToAsm/Reg/Target.hs
|
||
+++ ghc-9.10.1/compiler/GHC/CmmToAsm/Reg/Target.hs
|
||
@@ -34,7 +34,7 @@ import qualified GHC.CmmToAsm.X86.Regs
|
||
import qualified GHC.CmmToAsm.X86.RegInfo as X86
|
||
import qualified GHC.CmmToAsm.PPC.Regs as PPC
|
||
import qualified GHC.CmmToAsm.AArch64.Regs as AArch64
|
||
-
|
||
+import qualified GHC.CmmToAsm.RV64.Regs as RV64
|
||
|
||
targetVirtualRegSqueeze :: Platform -> RegClass -> VirtualReg -> Int
|
||
targetVirtualRegSqueeze platform
|
||
@@ -49,7 +49,7 @@ targetVirtualRegSqueeze platform
|
||
ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha"
|
||
ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb"
|
||
ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel"
|
||
- ArchRISCV64 -> panic "targetVirtualRegSqueeze ArchRISCV64"
|
||
+ ArchRISCV64 -> RV64.virtualRegSqueeze
|
||
ArchLoongArch64->panic "targetVirtualRegSqueeze ArchLoongArch64"
|
||
ArchJavaScript-> panic "targetVirtualRegSqueeze ArchJavaScript"
|
||
ArchWasm32 -> panic "targetVirtualRegSqueeze ArchWasm32"
|
||
@@ -69,7 +69,7 @@ targetRealRegSqueeze platform
|
||
ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha"
|
||
ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb"
|
||
ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel"
|
||
- ArchRISCV64 -> panic "targetRealRegSqueeze ArchRISCV64"
|
||
+ ArchRISCV64 -> RV64.realRegSqueeze
|
||
ArchLoongArch64->panic "targetRealRegSqueeze ArchLoongArch64"
|
||
ArchJavaScript-> panic "targetRealRegSqueeze ArchJavaScript"
|
||
ArchWasm32 -> panic "targetRealRegSqueeze ArchWasm32"
|
||
@@ -88,7 +88,7 @@ targetClassOfRealReg platform
|
||
ArchAlpha -> panic "targetClassOfRealReg ArchAlpha"
|
||
ArchMipseb -> panic "targetClassOfRealReg ArchMipseb"
|
||
ArchMipsel -> panic "targetClassOfRealReg ArchMipsel"
|
||
- ArchRISCV64 -> panic "targetClassOfRealReg ArchRISCV64"
|
||
+ ArchRISCV64 -> RV64.classOfRealReg
|
||
ArchLoongArch64->panic "targetClassOfRealReg ArchLoongArch64"
|
||
ArchJavaScript-> panic "targetClassOfRealReg ArchJavaScript"
|
||
ArchWasm32 -> panic "targetClassOfRealReg ArchWasm32"
|
||
@@ -107,7 +107,7 @@ targetMkVirtualReg platform
|
||
ArchAlpha -> panic "targetMkVirtualReg ArchAlpha"
|
||
ArchMipseb -> panic "targetMkVirtualReg ArchMipseb"
|
||
ArchMipsel -> panic "targetMkVirtualReg ArchMipsel"
|
||
- ArchRISCV64 -> panic "targetMkVirtualReg ArchRISCV64"
|
||
+ ArchRISCV64 -> RV64.mkVirtualReg
|
||
ArchLoongArch64->panic "targetMkVirtualReg ArchLoongArch64"
|
||
ArchJavaScript-> panic "targetMkVirtualReg ArchJavaScript"
|
||
ArchWasm32 -> panic "targetMkVirtualReg ArchWasm32"
|
||
@@ -126,7 +126,7 @@ targetRegDotColor platform
|
||
ArchAlpha -> panic "targetRegDotColor ArchAlpha"
|
||
ArchMipseb -> panic "targetRegDotColor ArchMipseb"
|
||
ArchMipsel -> panic "targetRegDotColor ArchMipsel"
|
||
- ArchRISCV64 -> panic "targetRegDotColor ArchRISCV64"
|
||
+ ArchRISCV64 -> RV64.regDotColor
|
||
ArchLoongArch64->panic "targetRegDotColor ArchLoongArch64"
|
||
ArchJavaScript-> panic "targetRegDotColor ArchJavaScript"
|
||
ArchWasm32 -> panic "targetRegDotColor ArchWasm32"
|
||
Index: ghc-9.10.1/compiler/GHC/Driver/Backend.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/GHC/Driver/Backend.hs
|
||
+++ ghc-9.10.1/compiler/GHC/Driver/Backend.hs
|
||
@@ -213,6 +213,7 @@ platformNcgSupported platform = if
|
||
ArchPPC_64 {} -> True
|
||
ArchAArch64 -> True
|
||
ArchWasm32 -> True
|
||
+ ArchRISCV64 -> True
|
||
_ -> False
|
||
|
||
-- | Is the platform supported by the JS backend?
|
||
Index: ghc-9.10.1/compiler/GHC/Driver/DynFlags.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/GHC/Driver/DynFlags.hs
|
||
+++ ghc-9.10.1/compiler/GHC/Driver/DynFlags.hs
|
||
@@ -1325,6 +1325,7 @@ default_PIC platform =
|
||
(OSDarwin, ArchAArch64) -> [Opt_PIC]
|
||
(OSLinux, ArchAArch64) -> [Opt_PIC, Opt_ExternalDynamicRefs]
|
||
(OSLinux, ArchARM {}) -> [Opt_PIC, Opt_ExternalDynamicRefs]
|
||
+ (OSLinux, ArchRISCV64 {}) -> [Opt_PIC, Opt_ExternalDynamicRefs]
|
||
(OSOpenBSD, ArchX86_64) -> [Opt_PIC] -- Due to PIE support in
|
||
-- OpenBSD since 5.3 release
|
||
-- (1 May 2013) we need to
|
||
Index: ghc-9.10.1/compiler/GHC/Platform.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/GHC/Platform.hs
|
||
+++ ghc-9.10.1/compiler/GHC/Platform.hs
|
||
@@ -250,7 +250,6 @@ platformHasRTSLinker p = case archOS_arc
|
||
ArchPPC_64 ELF_V1 -> False -- powerpc64
|
||
ArchPPC_64 ELF_V2 -> False -- powerpc64le
|
||
ArchS390X -> False
|
||
- ArchRISCV64 -> False
|
||
ArchLoongArch64 -> False
|
||
ArchJavaScript -> False
|
||
ArchWasm32 -> False
|
||
Index: ghc-9.10.1/compiler/ghc.cabal.in
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/compiler/ghc.cabal.in
|
||
+++ ghc-9.10.1/compiler/ghc.cabal.in
|
||
@@ -290,6 +290,7 @@ Library
|
||
GHC.CmmToAsm.Reg.Linear.FreeRegs
|
||
GHC.CmmToAsm.Reg.Linear.JoinToTargets
|
||
GHC.CmmToAsm.Reg.Linear.PPC
|
||
+ GHC.CmmToAsm.Reg.Linear.RV64
|
||
GHC.CmmToAsm.Reg.Linear.StackMap
|
||
GHC.CmmToAsm.Reg.Linear.State
|
||
GHC.CmmToAsm.Reg.Linear.Stats
|
||
@@ -298,6 +299,13 @@ Library
|
||
GHC.CmmToAsm.Reg.Liveness
|
||
GHC.CmmToAsm.Reg.Target
|
||
GHC.CmmToAsm.Reg.Utils
|
||
+ GHC.CmmToAsm.RV64
|
||
+ GHC.CmmToAsm.RV64.CodeGen
|
||
+ GHC.CmmToAsm.RV64.Cond
|
||
+ GHC.CmmToAsm.RV64.Instr
|
||
+ GHC.CmmToAsm.RV64.Ppr
|
||
+ GHC.CmmToAsm.RV64.RegInfo
|
||
+ GHC.CmmToAsm.RV64.Regs
|
||
GHC.CmmToAsm.Types
|
||
GHC.CmmToAsm.Utils
|
||
GHC.CmmToAsm.X86
|
||
Index: ghc-9.10.1/hadrian/bindist/config.mk.in
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/hadrian/bindist/config.mk.in
|
||
+++ ghc-9.10.1/hadrian/bindist/config.mk.in
|
||
@@ -152,7 +152,7 @@ GhcWithSMP := $(strip $(if $(filter YESN
|
||
# Whether to include GHCi in the compiler. Depends on whether the RTS linker
|
||
# has support for this OS/ARCH combination.
|
||
OsSupportsGHCi=$(strip $(patsubst $(TargetOS_CPP), YES, $(findstring $(TargetOS_CPP), mingw32 linux solaris2 freebsd dragonfly netbsd openbsd darwin kfreebsdgnu)))
|
||
-ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64)))
|
||
+ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64 riscv64)))
|
||
|
||
ifeq "$(OsSupportsGHCi)$(ArchSupportsGHCi)" "YESYES"
|
||
GhcWithInterpreter=YES
|
||
Index: ghc-9.10.1/hadrian/src/Settings/Builders/RunTest.hs
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/hadrian/src/Settings/Builders/RunTest.hs
|
||
+++ ghc-9.10.1/hadrian/src/Settings/Builders/RunTest.hs
|
||
@@ -118,7 +118,7 @@ inTreeCompilerArgs stg = do
|
||
|
||
os <- queryHostTarget queryOS
|
||
arch <- queryTargetTarget queryArch
|
||
- let codegen_arches = ["x86_64", "i386", "powerpc", "powerpc64", "powerpc64le", "aarch64", "wasm32"]
|
||
+ let codegen_arches = ["x86_64", "i386", "powerpc", "powerpc64", "powerpc64le", "aarch64", "wasm32", "riscv64"]
|
||
let withNativeCodeGen
|
||
| unregisterised = False
|
||
| arch `elem` codegen_arches = True
|
||
@@ -139,7 +139,7 @@ inTreeCompilerArgs stg = do
|
||
-- For this information, we need to query ghc --info, however, that would
|
||
-- require building ghc, which we don't want to do here. Therefore, the
|
||
-- logic from `platformHasRTSLinker` is duplicated here.
|
||
- let rtsLinker = not $ arch `elem` ["powerpc", "powerpc64", "powerpc64le", "s390x", "riscv64", "loongarch64", "javascript", "wasm32"]
|
||
+ let rtsLinker = not $ arch `elem` ["powerpc", "powerpc64", "powerpc64le", "s390x", "loongarch64", "javascript", "wasm32"]
|
||
|
||
return TestCompilerArgs{..}
|
||
|
||
Index: ghc-9.10.1/rts/LinkerInternals.h
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/LinkerInternals.h
|
||
+++ ghc-9.10.1/rts/LinkerInternals.h
|
||
@@ -208,7 +208,7 @@ typedef struct _Segment {
|
||
int n_sections;
|
||
} Segment;
|
||
|
||
-#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
|
||
+#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
|
||
#define NEED_SYMBOL_EXTRAS 1
|
||
#endif
|
||
|
||
@@ -220,8 +220,9 @@ typedef struct _Segment {
|
||
#define NEED_M32 1
|
||
#endif
|
||
|
||
-/* Jump Islands are sniplets of machine code required for relative
|
||
- * address relocations on the PowerPC, x86_64 and ARM.
|
||
+/* Jump Islands are sniplets of machine code required for relative address
|
||
+ * relocations on the PowerPC, x86_64 and ARM. On RISCV64 we use symbolextras
|
||
+ * like a GOT for locals where SymbolExtra represents one entry.
|
||
*/
|
||
typedef struct {
|
||
#if defined(powerpc_HOST_ARCH)
|
||
@@ -237,6 +238,8 @@ typedef struct {
|
||
uint8_t jumpIsland[8];
|
||
#elif defined(arm_HOST_ARCH)
|
||
uint8_t jumpIsland[16];
|
||
+#elif defined(riscv64_HOST_ARCH)
|
||
+ uint64_t addr;
|
||
#endif
|
||
} SymbolExtra;
|
||
|
||
Index: ghc-9.10.1/rts/RtsSymbols.c
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/RtsSymbols.c
|
||
+++ ghc-9.10.1/rts/RtsSymbols.c
|
||
@@ -980,6 +980,17 @@ extern char **environ;
|
||
#define RTS_LIBGCC_SYMBOLS
|
||
#endif
|
||
|
||
+#if defined(riscv64_HOST_ARCH)
|
||
+// See https://gcc.gnu.org/onlinedocs/gccint/Integer-library-routines.html as
|
||
+// reference for the following built-ins. __clzdi2 and __ctzdi2 probably relate
|
||
+// to __builtin-s in libraries/ghc-prim/cbits/ctz.c.
|
||
+#define RTS_ARCH_LIBGCC_SYMBOLS \
|
||
+ SymI_NeedsProto(__clzdi2) \
|
||
+ SymI_NeedsProto(__ctzdi2)
|
||
+#else
|
||
+#define RTS_ARCH_LIBGCC_SYMBOLS
|
||
+#endif
|
||
+
|
||
// Symbols defined by libgcc/compiler-rt for AArch64's outline atomics.
|
||
#if defined(HAVE_ARM_OUTLINE_ATOMICS)
|
||
#include "ARMOutlineAtomicsSymbols.h"
|
||
@@ -1032,6 +1043,7 @@ RTS_DARWIN_ONLY_SYMBOLS
|
||
RTS_OPENBSD_ONLY_SYMBOLS
|
||
RTS_LIBC_SYMBOLS
|
||
RTS_LIBGCC_SYMBOLS
|
||
+RTS_ARCH_LIBGCC_SYMBOLS
|
||
RTS_FINI_ARRAY_SYMBOLS
|
||
RTS_LIBFFI_SYMBOLS
|
||
RTS_ARM_OUTLINE_ATOMIC_SYMBOLS
|
||
@@ -1074,6 +1086,7 @@ RtsSymbolVal rtsSyms[] = {
|
||
RTS_DARWIN_ONLY_SYMBOLS
|
||
RTS_OPENBSD_ONLY_SYMBOLS
|
||
RTS_LIBGCC_SYMBOLS
|
||
+ RTS_ARCH_LIBGCC_SYMBOLS
|
||
RTS_FINI_ARRAY_SYMBOLS
|
||
RTS_LIBFFI_SYMBOLS
|
||
RTS_ARM_OUTLINE_ATOMIC_SYMBOLS
|
||
Index: ghc-9.10.1/rts/adjustor/LibffiAdjustor.c
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/adjustor/LibffiAdjustor.c
|
||
+++ ghc-9.10.1/rts/adjustor/LibffiAdjustor.c
|
||
@@ -12,6 +12,7 @@
|
||
#include "Adjustor.h"
|
||
|
||
#include "rts/ghc_ffi.h"
|
||
+#include <stdint.h>
|
||
#include <string.h>
|
||
|
||
// Note that ffi_alloc_prep_closure is a non-standard libffi closure
|
||
@@ -187,5 +188,21 @@ createAdjustor (int cconv,
|
||
barf("createAdjustor: failed to allocate memory");
|
||
}
|
||
|
||
- return (void*)code;
|
||
+#if defined(riscv64_HOST_ARCH)
|
||
+ // Synchronize the memory and instruction cache to prevent illegal
|
||
+ // instruction exceptions.
|
||
+
|
||
+ // We expect two instructions for address loading, one for the jump.
|
||
+ int instrCount = 3;
|
||
+ // On Linux the parameters of __builtin___clear_cache are currently unused.
|
||
+ // Add them anyways for future compatibility. (I.e. the parameters couldn't
|
||
+ // be checked during development.)
|
||
+ // TODO: Check the upper boundary e.g. with a debugger.
|
||
+ __builtin___clear_cache((void *)code,
|
||
+ (void *)((uint64_t *) code + instrCount));
|
||
+ // Memory barrier to ensure nothing circumvents the fence.i / cache flush.
|
||
+ SEQ_CST_FENCE();
|
||
+#endif
|
||
+
|
||
+ return (void *)code;
|
||
}
|
||
Index: ghc-9.10.1/rts/linker/Elf.c
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/linker/Elf.c
|
||
+++ ghc-9.10.1/rts/linker/Elf.c
|
||
@@ -103,7 +103,8 @@
|
||
|
||
#include "elf_got.h"
|
||
|
||
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
|
||
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH)
|
||
+# define NEED_GOT
|
||
# define NEED_PLT
|
||
# include "elf_plt.h"
|
||
# include "elf_reloc.h"
|
||
@@ -430,10 +431,7 @@ ocVerifyImage_ELF ( ObjectCode* oc )
|
||
case EM_AARCH64: IF_DEBUG(linker,debugBelch( "aarch64" )); break;
|
||
#endif
|
||
#if defined(EM_RISCV)
|
||
- case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" ));
|
||
- errorBelch("%s: RTS linker not implemented on riscv",
|
||
- oc->fileName);
|
||
- return 0;
|
||
+ case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" )); break;
|
||
#endif
|
||
#if defined(EM_LOONGARCH)
|
||
case EM_LOONGARCH: IF_DEBUG(linker,debugBelch( "loongarch64" ));
|
||
@@ -1130,9 +1128,10 @@ end:
|
||
return result;
|
||
}
|
||
|
||
-// the aarch64 linker uses relocacteObjectCodeAarch64,
|
||
-// see elf_reloc_aarch64.{h,c}
|
||
-#if !defined(aarch64_HOST_ARCH)
|
||
+// the aarch64 and riscv64 linkers use relocateObjectCodeAarch64() and
|
||
+// relocateObjectCodeRISCV64() (respectively), see elf_reloc_aarch64.{h,c} and
|
||
+// elf_reloc_riscv64.{h,c}
|
||
+#if !defined(aarch64_HOST_ARCH) && !defined(riscv64_HOST_ARCH)
|
||
|
||
/* Do ELF relocations which lack an explicit addend. All x86-linux
|
||
and arm-linux relocations appear to be of this form. */
|
||
@@ -1359,7 +1358,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc,
|
||
/* try to locate an existing stub for this target */
|
||
if(findStub(&oc->sections[target_shndx], (void**)&S, 0)) {
|
||
/* didn't find any. Need to create one */
|
||
- if(makeStub(&oc->sections[target_shndx], (void**)&S, 0)) {
|
||
+ if(makeStub(&oc->sections[target_shndx], (void**)&S, NULL, 0)) {
|
||
errorBelch("Unable to create veneer for ARM_CALL\n");
|
||
return 0;
|
||
}
|
||
@@ -1451,7 +1450,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc,
|
||
/* try to locate an existing stub for this target */
|
||
if(findStub(&oc->sections[target_shndx], (void**)&S, 1)) {
|
||
/* didn't find any. Need to create one */
|
||
- if(makeStub(&oc->sections[target_shndx], (void**)&S, 1)) {
|
||
+ if(makeStub(&oc->sections[target_shndx], (void**)&S, NULL, 1)) {
|
||
errorBelch("Unable to create veneer for ARM_THM_CALL\n");
|
||
return 0;
|
||
}
|
||
@@ -1991,7 +1990,7 @@ ocResolve_ELF ( ObjectCode* oc )
|
||
(void) shnum;
|
||
(void) shdr;
|
||
|
||
-#if defined(aarch64_HOST_ARCH)
|
||
+#if defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
|
||
/* use new relocation design */
|
||
if(relocateObjectCode( oc ))
|
||
return 0;
|
||
@@ -2014,6 +2013,9 @@ ocResolve_ELF ( ObjectCode* oc )
|
||
|
||
#if defined(powerpc_HOST_ARCH)
|
||
ocFlushInstructionCache( oc );
|
||
+#elif defined(riscv64_HOST_ARCH)
|
||
+ /* New-style pseudo-polymorph (by architecture) call */
|
||
+ flushInstructionCache( oc );
|
||
#endif
|
||
|
||
return ocMprotect_Elf(oc);
|
||
Index: ghc-9.10.1/rts/linker/ElfTypes.h
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/linker/ElfTypes.h
|
||
+++ ghc-9.10.1/rts/linker/ElfTypes.h
|
||
@@ -150,6 +150,7 @@ typedef
|
||
struct _Stub {
|
||
void * addr;
|
||
void * target;
|
||
+ void* got_addr;
|
||
/* flags can hold architecture specific information they are used during
|
||
* lookup of stubs as well. Thus two stubs for the same target with
|
||
* different flags are considered unequal.
|
||
Index: ghc-9.10.1/rts/linker/SymbolExtras.c
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/linker/SymbolExtras.c
|
||
+++ ghc-9.10.1/rts/linker/SymbolExtras.c
|
||
@@ -153,7 +153,7 @@ void ocProtectExtras(ObjectCode* oc)
|
||
}
|
||
|
||
|
||
-#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
|
||
+#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
|
||
SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
|
||
unsigned long symbolNumber,
|
||
unsigned long target )
|
||
@@ -189,9 +189,12 @@ SymbolExtra* makeSymbolExtra( ObjectCode
|
||
extra->addr = target;
|
||
memcpy(extra->jumpIsland, jmp, 8);
|
||
#endif /* x86_64_HOST_ARCH */
|
||
-
|
||
+#if defined(riscv64_HOST_ARCH)
|
||
+ // Fake GOT entry (used like GOT, but located in symbol extras)
|
||
+ extra->addr = target;
|
||
+#endif
|
||
return extra;
|
||
}
|
||
-#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH */
|
||
+#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH || riscv64_HOST_ARCH */
|
||
#endif /* !x86_64_HOST_ARCH) || !mingw32_HOST_OS */
|
||
#endif // NEED_SYMBOL_EXTRAS
|
||
Index: ghc-9.10.1/rts/linker/SymbolExtras.h
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/linker/SymbolExtras.h
|
||
+++ ghc-9.10.1/rts/linker/SymbolExtras.h
|
||
@@ -16,7 +16,7 @@ SymbolExtra* makeArmSymbolExtra( ObjectC
|
||
unsigned long target,
|
||
bool fromThumb,
|
||
bool toThumb );
|
||
-#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
|
||
+#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
|
||
SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
|
||
unsigned long symbolNumber,
|
||
unsigned long target );
|
||
Index: ghc-9.10.1/rts/linker/elf_plt.c
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/linker/elf_plt.c
|
||
+++ ghc-9.10.1/rts/linker/elf_plt.c
|
||
@@ -5,7 +5,7 @@
|
||
#include <stdint.h>
|
||
#include <stdlib.h>
|
||
|
||
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
|
||
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
|
||
#if defined(OBJFORMAT_ELF)
|
||
|
||
#define STRINGIFY(x) #x
|
||
@@ -49,11 +49,13 @@ findStub(Section * section,
|
||
bool
|
||
makeStub(Section * section,
|
||
void* * addr,
|
||
+ void* got_addr,
|
||
uint8_t flags) {
|
||
|
||
Stub * s = calloc(1, sizeof(Stub));
|
||
ASSERT(s != NULL);
|
||
s->target = *addr;
|
||
+ s->got_addr = got_addr;
|
||
s->flags = flags;
|
||
s->next = NULL;
|
||
s->addr = (uint8_t *)section->info->stub_offset + 8
|
||
Index: ghc-9.10.1/rts/linker/elf_plt.h
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/linker/elf_plt.h
|
||
+++ ghc-9.10.1/rts/linker/elf_plt.h
|
||
@@ -4,8 +4,9 @@
|
||
|
||
#include "elf_plt_arm.h"
|
||
#include "elf_plt_aarch64.h"
|
||
+#include "elf_plt_riscv64.h"
|
||
|
||
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
|
||
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH)
|
||
|
||
#if defined(OBJFORMAT_ELF)
|
||
|
||
@@ -21,6 +22,8 @@
|
||
#define __suffix__ Arm
|
||
#elif defined(__mips__)
|
||
#define __suffix__ Mips
|
||
+#elif defined(__riscv)
|
||
+#define __suffix__ RISCV64
|
||
#else
|
||
#error "unknown architecture"
|
||
#endif
|
||
@@ -34,10 +37,10 @@ unsigned numberOfStubsForSection( Objec
|
||
#define STUB_SIZE ADD_SUFFIX(stubSize)
|
||
|
||
bool findStub(Section * section, void* * addr, uint8_t flags);
|
||
-bool makeStub(Section * section, void* * addr, uint8_t flags);
|
||
+bool makeStub(Section * section, void* * addr, void* got_addr, uint8_t flags);
|
||
|
||
void freeStubs(Section * section);
|
||
|
||
#endif // OBJECTFORMAT_ELF
|
||
|
||
-#endif // arm/aarch64_HOST_ARCH
|
||
+#endif // arm/aarch64_HOST_ARCH/riscv64_HOST_ARCH
|
||
Index: ghc-9.10.1/rts/linker/elf_plt_riscv64.c
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/rts/linker/elf_plt_riscv64.c
|
||
@@ -0,0 +1,90 @@
|
||
+#include "Rts.h"
|
||
+#include "elf_compat.h"
|
||
+#include "elf_plt_riscv64.h"
|
||
+#include "rts/Messages.h"
|
||
+#include "linker/ElfTypes.h"
|
||
+
|
||
+#include <stdint.h>
|
||
+#include <stdlib.h>
|
||
+
|
||
+#if defined(riscv64_HOST_ARCH)
|
||
+
|
||
+#if defined(OBJFORMAT_ELF)
|
||
+
|
||
+const size_t instSizeRISCV64 = 4;
|
||
+const size_t stubSizeRISCV64 = 3 * instSizeRISCV64;
|
||
+
|
||
+bool needStubForRelRISCV64(Elf_Rel *rel) {
|
||
+ switch (ELF64_R_TYPE(rel->r_info)) {
|
||
+ case R_RISCV_CALL:
|
||
+ case R_RISCV_CALL_PLT:
|
||
+ return true;
|
||
+ default:
|
||
+ return false;
|
||
+ }
|
||
+}
|
||
+
|
||
+bool needStubForRelaRISCV64(Elf_Rela *rela) {
|
||
+ switch (ELF64_R_TYPE(rela->r_info)) {
|
||
+ case R_RISCV_CALL:
|
||
+ case R_RISCV_CALL_PLT:
|
||
+ return true;
|
||
+ default:
|
||
+ return false;
|
||
+ }
|
||
+}
|
||
+
|
||
+// After the global offset table (GOT) has been set up, we can use these three
|
||
+// instructions to jump to the target address / function:
|
||
+//
|
||
+// 1. AUIPC ip, %pcrel_hi(addr)
|
||
+// 2. LD ip, %pcrel_lo(addr)(ip)
|
||
+// 3. JARL x0, ip, 0
|
||
+//
|
||
+// We could use the absolute address of the target (because we know it), but
|
||
+// that would require loading a 64-bit constant which is a nightmare to do in
|
||
+// riscv64 assembly. (See
|
||
+// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/5ffe5b5aeedb37b1c1c0c3d94641267d9ad4795a/riscv-elf.adoc#procedure-linkage-table)
|
||
+//
|
||
+// So far, PC-relative addressing seems to be good enough. If it ever turns out
|
||
+// to be not, one could (additionally for out-of-range cases?) encode absolute
|
||
+// addressing here.
|
||
+bool makeStubRISCV64(Stub *s) {
|
||
+ uint32_t *P = (uint32_t *)s->addr;
|
||
+ int32_t addr = (uint64_t)s->got_addr - (uint64_t)P;
|
||
+
|
||
+ uint64_t hi = (addr + 0x800) >> 12;
|
||
+ uint64_t lo = addr - (hi << 12);
|
||
+
|
||
+ IF_DEBUG(
|
||
+ linker,
|
||
+ debugBelch(
|
||
+ "makeStubRISCV64: P = %p, got_addr = %p, target = %p, addr = 0x%x "
|
||
+ ", hi = 0x%lx, lo = 0x%lx\n",
|
||
+ P, s->got_addr, s->target, addr, hi, lo));
|
||
+
|
||
+ // AUIPC ip, %pcrel_hi(addr)
|
||
+ uint32_t auipcInst = 0b0010111; // opcode
|
||
+ auipcInst |= 0x1f << 7; // rd = ip (x31)
|
||
+ auipcInst |= hi << 12; // imm[31:12]
|
||
+
|
||
+ // LD ip, %pcrel_lo(addr)(ip)
|
||
+ uint32_t ldInst = 0b0000011; // opcode
|
||
+ ldInst |= 0x1f << 7; // rd = ip (x31)
|
||
+ ldInst |= 0x1f << 15; // rs = ip (x31)
|
||
+ ldInst |= 0b11 << 12; // funct3 = 0x3 (LD)
|
||
+ ldInst |= lo << 20; // imm[11:0]
|
||
+
|
||
+ // JARL x0, ip, 0
|
||
+ uint32_t jalrInst = 0b1100111; // opcode
|
||
+ jalrInst |= 0x1f << 15; // rs = ip (x31)
|
||
+
|
||
+ P[0] = auipcInst;
|
||
+ P[1] = ldInst;
|
||
+ P[2] = jalrInst;
|
||
+
|
||
+ return EXIT_SUCCESS;
|
||
+}
|
||
+
|
||
+#endif
|
||
+#endif
|
||
Index: ghc-9.10.1/rts/linker/elf_plt_riscv64.h
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/rts/linker/elf_plt_riscv64.h
|
||
@@ -0,0 +1,12 @@
|
||
+#pragma once
|
||
+
|
||
+#include "LinkerInternals.h"
|
||
+
|
||
+#if defined(OBJFORMAT_ELF)
|
||
+
|
||
+extern const size_t stubSizeRISCV64;
|
||
+bool needStubForRelRISCV64(Elf_Rel * rel);
|
||
+bool needStubForRelaRISCV64(Elf_Rela * rel);
|
||
+bool makeStubRISCV64(Stub * s);
|
||
+
|
||
+#endif
|
||
Index: ghc-9.10.1/rts/linker/elf_reloc.c
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/linker/elf_reloc.c
|
||
+++ ghc-9.10.1/rts/linker/elf_reloc.c
|
||
@@ -4,13 +4,18 @@
|
||
|
||
#if defined(OBJFORMAT_ELF)
|
||
|
||
-/* we currently only use this abstraction for elf/aarch64 */
|
||
-#if defined(aarch64_HOST_ARCH)
|
||
+/* we currently only use this abstraction for elf/aarch64 and elf/riscv64 */
|
||
+#if defined(aarch64_HOST_ARCH) | defined(riscv64_HOST_ARCH)
|
||
|
||
bool
|
||
relocateObjectCode(ObjectCode * oc) {
|
||
return ADD_SUFFIX(relocateObjectCode)(oc);
|
||
}
|
||
+
|
||
+
|
||
+void flushInstructionCache(ObjectCode * oc){
|
||
+ return ADD_SUFFIX(flushInstructionCache)(oc);
|
||
+}
|
||
#endif
|
||
|
||
#endif
|
||
Index: ghc-9.10.1/rts/linker/elf_reloc.h
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/linker/elf_reloc.h
|
||
+++ ghc-9.10.1/rts/linker/elf_reloc.h
|
||
@@ -5,9 +5,10 @@
|
||
#if defined(OBJFORMAT_ELF)
|
||
|
||
#include "elf_reloc_aarch64.h"
|
||
+#include "elf_reloc_riscv64.h"
|
||
|
||
bool
|
||
relocateObjectCode(ObjectCode * oc);
|
||
|
||
-
|
||
+void flushInstructionCache(ObjectCode *oc);
|
||
#endif /* OBJETFORMAT_ELF */
|
||
Index: ghc-9.10.1/rts/linker/elf_reloc_aarch64.c
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/linker/elf_reloc_aarch64.c
|
||
+++ ghc-9.10.1/rts/linker/elf_reloc_aarch64.c
|
||
@@ -240,7 +240,7 @@ computeAddend(Section * section, Elf_Rel
|
||
/* check if we already have that stub */
|
||
if(findStub(section, (void**)&S, 0)) {
|
||
/* did not find it. Crete a new stub. */
|
||
- if(makeStub(section, (void**)&S, 0)) {
|
||
+ if(makeStub(section, (void**)&S, NULL, 0)) {
|
||
abort(/* could not find or make stub */);
|
||
}
|
||
}
|
||
@@ -339,5 +339,10 @@ relocateObjectCodeAarch64(ObjectCode * o
|
||
return EXIT_SUCCESS;
|
||
}
|
||
|
||
+void flushInstructionCacheAarch64(ObjectCode * oc STG_UNUSED) {
|
||
+ // Looks like we don't need this on Aarch64.
|
||
+ /* no-op */
|
||
+}
|
||
+
|
||
#endif /* OBJECTFORMAT_ELF */
|
||
#endif /* aarch64_HOST_ARCH */
|
||
Index: ghc-9.10.1/rts/linker/elf_reloc_aarch64.h
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/linker/elf_reloc_aarch64.h
|
||
+++ ghc-9.10.1/rts/linker/elf_reloc_aarch64.h
|
||
@@ -7,4 +7,5 @@
|
||
bool
|
||
relocateObjectCodeAarch64(ObjectCode * oc);
|
||
|
||
+void flushInstructionCacheAarch64(ObjectCode *oc);
|
||
#endif /* OBJETFORMAT_ELF */
|
||
Index: ghc-9.10.1/rts/linker/elf_reloc_riscv64.c
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/rts/linker/elf_reloc_riscv64.c
|
||
@@ -0,0 +1,693 @@
|
||
+#include "elf_reloc_riscv64.h"
|
||
+#include "LinkerInternals.h"
|
||
+#include "Rts.h"
|
||
+#include "Stg.h"
|
||
+#include "SymbolExtras.h"
|
||
+#include "linker/ElfTypes.h"
|
||
+#include "elf_plt.h"
|
||
+#include "elf_util.h"
|
||
+#include "rts/Messages.h"
|
||
+#include "util.h"
|
||
+
|
||
+#include <stdint.h>
|
||
+#include <stdlib.h>
|
||
+
|
||
+#if defined(riscv64_HOST_ARCH)
|
||
+
|
||
+#if defined(OBJFORMAT_ELF)
|
||
+
|
||
+typedef uint64_t addr_t;
|
||
+
|
||
+/* regular instructions are 32bit */
|
||
+typedef uint32_t inst_t;
|
||
+
|
||
+/* compressed instructions are 16bit */
|
||
+typedef uint16_t cinst_t;
|
||
+
|
||
+// TODO: These instances could be static. They are not yet, because we might
|
||
+// need their debugging symbols.
|
||
+char *relocationTypeToString(Elf64_Xword type);
|
||
+int32_t decodeAddendRISCV64(Section *section, Elf_Rel *rel);
|
||
+bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend);
|
||
+void write8le(uint8_t *p, uint8_t v);
|
||
+uint8_t read8le(const uint8_t *P);
|
||
+void write16le(cinst_t *p, uint16_t v);
|
||
+uint16_t read16le(const cinst_t *P);
|
||
+uint32_t read32le(const inst_t *P);
|
||
+void write32le(inst_t *p, uint32_t v);
|
||
+uint64_t read64le(const uint64_t *P);
|
||
+void write64le(uint64_t *p, uint64_t v);
|
||
+uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end);
|
||
+void setCJType(cinst_t *loc, uint32_t val);
|
||
+void setCBType(cinst_t *loc, uint32_t val);
|
||
+void setBType(inst_t *loc, uint32_t val);
|
||
+void setSType(inst_t *loc, uint32_t val);
|
||
+int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol,
|
||
+ int64_t addend, ObjectCode *oc);
|
||
+void setJType(inst_t *loc, uint32_t val);
|
||
+void setIType(inst_t *loc, int32_t val);
|
||
+void checkInt(inst_t *loc, int32_t v, int n);
|
||
+void setUType(inst_t *loc, int32_t val);
|
||
+
|
||
+
|
||
+char *relocationTypeToString(Elf64_Xword type) {
|
||
+ switch (ELF64_R_TYPE(type)) {
|
||
+ case R_RISCV_NONE:
|
||
+ return "R_RISCV_NONE";
|
||
+ case R_RISCV_32:
|
||
+ return "R_RISCV_32";
|
||
+ case R_RISCV_64:
|
||
+ return "R_RISCV_64";
|
||
+ case R_RISCV_RELATIVE:
|
||
+ return "R_RISCV_RELATIVE";
|
||
+ case R_RISCV_COPY:
|
||
+ return "R_RISCV_COPY";
|
||
+ case R_RISCV_JUMP_SLOT:
|
||
+ return "R_RISCV_JUMP_SLOT";
|
||
+ case R_RISCV_TLS_DTPMOD32:
|
||
+ return "R_RISCV_TLS_DTPMOD32";
|
||
+ case R_RISCV_TLS_DTPMOD64:
|
||
+ return "R_RISCV_TLS_DTPMOD64";
|
||
+ case R_RISCV_TLS_DTPREL32:
|
||
+ return "R_RISCV_TLS_DTPREL32";
|
||
+ case R_RISCV_TLS_DTPREL64:
|
||
+ return "R_RISCV_TLS_DTPREL64";
|
||
+ case R_RISCV_TLS_TPREL32:
|
||
+ return "R_RISCV_TLS_TPREL32";
|
||
+ case R_RISCV_TLS_TPREL64:
|
||
+ return "R_RISCV_TLS_TPREL64";
|
||
+ case R_RISCV_BRANCH:
|
||
+ return "R_RISCV_BRANCH";
|
||
+ case R_RISCV_JAL:
|
||
+ return "R_RISCV_JAL";
|
||
+ case R_RISCV_CALL:
|
||
+ return "R_RISCV_CALL";
|
||
+ case R_RISCV_CALL_PLT:
|
||
+ return "R_RISCV_CALL_PLT";
|
||
+ case R_RISCV_GOT_HI20:
|
||
+ return "R_RISCV_GOT_HI20";
|
||
+ case R_RISCV_PCREL_HI20:
|
||
+ return "R_RISCV_PCREL_HI20";
|
||
+ case R_RISCV_LO12_I:
|
||
+ return "R_RISCV_LO12_I";
|
||
+ case R_RISCV_PCREL_LO12_I:
|
||
+ return "R_RISCV_PCREL_LO12_I";
|
||
+ case R_RISCV_HI20:
|
||
+ return "R_RISCV_HI20";
|
||
+ case R_RISCV_LO12_S:
|
||
+ return "R_RISCV_LO12_S";
|
||
+ case R_RISCV_PCREL_LO12_S:
|
||
+ return "R_RISCV_PCREL_LO12_S";
|
||
+ case R_RISCV_RELAX:
|
||
+ return "R_RISCV_RELAX";
|
||
+ case R_RISCV_RVC_BRANCH:
|
||
+ return "R_RISCV_RVC_BRANCH";
|
||
+ case R_RISCV_RVC_JUMP:
|
||
+ return "R_RISCV_RVC_JUMP";
|
||
+ default:
|
||
+ return "Unknown relocation type";
|
||
+ }
|
||
+}
|
||
+
|
||
+STG_NORETURN
|
||
+int32_t decodeAddendRISCV64(Section *section STG_UNUSED,
|
||
+ Elf_Rel *rel STG_UNUSED) {
|
||
+ barf("decodeAddendRISCV64: Relocations with explicit addend are not supported."
|
||
+ " Please open a ticket; providing the causing code/binary.");
|
||
+}
|
||
+
|
||
+// Make sure that V can be represented as an N bit signed integer.
|
||
+void checkInt(inst_t *loc, int32_t v, int n) {
|
||
+ if (!isInt(n, v)) {
|
||
+ barf("Relocation at 0x%x is out of range. value: 0x%x (%d), "
|
||
+ "sign-extended value: 0x%x (%d), max bits 0x%x (%d)\n",
|
||
+ *loc, v, v, signExtend32(v, n), signExtend32(v, n), n, n);
|
||
+ }
|
||
+}
|
||
+
|
||
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
|
||
+void write8le(uint8_t *p, uint8_t v) { *p = v; }
|
||
+
|
||
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
|
||
+uint8_t read8le(const uint8_t *p) { return *p; }
|
||
+
|
||
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
|
||
+void write16le(cinst_t *p, uint16_t v) { *p = v; }
|
||
+
|
||
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
|
||
+uint16_t read16le(const cinst_t *p) { return *p; }
|
||
+
|
||
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
|
||
+uint32_t read32le(const inst_t *p) { return *p; }
|
||
+
|
||
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
|
||
+void write32le(inst_t *p, uint32_t v) { *p = v; }
|
||
+
|
||
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
|
||
+uint64_t read64le(const uint64_t *p) { return *p; }
|
||
+
|
||
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
|
||
+void write64le(uint64_t *p, uint64_t v) { *p = v; }
|
||
+
|
||
+uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
|
||
+ return (v & ((1ULL << (begin + 1)) - 1)) >> end;
|
||
+}
|
||
+
|
||
+// Set immediate val in the instruction at *loc. In U-type instructions the
|
||
+// upper 20bits carry the upper 20bits of the immediate.
|
||
+void setUType(inst_t *loc, int32_t val) {
|
||
+ const unsigned bits = 32;
|
||
+ uint32_t hi = val + 0x800;
|
||
+ checkInt(loc, signExtend32(hi, bits) >> 12, 20);
|
||
+ IF_DEBUG(linker, debugBelch("setUType: hi 0x%x val 0x%x\n", hi, val));
|
||
+
|
||
+ uint32_t imm = hi & 0xFFFFF000;
|
||
+ write32le(loc, (read32le(loc) & 0xFFF) | imm);
|
||
+}
|
||
+
|
||
+// Set immediate val in the instruction at *loc. In I-type instructions the
|
||
+// upper 12bits carry the lower 12bit of the immediate.
|
||
+void setIType(inst_t *loc, int32_t val) {
|
||
+ uint64_t hi = (val + 0x800) >> 12;
|
||
+ uint64_t lo = val - (hi << 12);
|
||
+
|
||
+ IF_DEBUG(linker, debugBelch("setIType: hi 0x%lx lo 0x%lx\n", hi, lo));
|
||
+ IF_DEBUG(linker, debugBelch("setIType: loc %p *loc 0x%x val 0x%x\n", loc,
|
||
+ *loc, val));
|
||
+
|
||
+ uint32_t imm = lo & 0xfff;
|
||
+ uint32_t instr = (read32le(loc) & 0xfffff) | (imm << 20);
|
||
+
|
||
+ IF_DEBUG(linker, debugBelch("setIType: insn 0x%x\n", instr));
|
||
+ write32le(loc, instr);
|
||
+ IF_DEBUG(linker, debugBelch("setIType: loc %p *loc' 0x%x val 0x%x\n", loc,
|
||
+ *loc, val));
|
||
+}
|
||
+
|
||
+// Set immediate val in the instruction at *loc. In S-type instructions the
|
||
+// lower 12 bits of the immediate are at bits 7 to 11 ([0:4]) and 25 to 31
|
||
+// ([5:11]).
|
||
+void setSType(inst_t *loc, uint32_t val) {
|
||
+ uint64_t hi = (val + 0x800) >> 12;
|
||
+ uint64_t lo = val - (hi << 12);
|
||
+
|
||
+ uint32_t imm = lo;
|
||
+ uint32_t instr = (read32le(loc) & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) |
|
||
+ (extractBits(imm, 4, 0) << 7);
|
||
+
|
||
+ write32le(loc, instr);
|
||
+}
|
||
+
|
||
+// Set immediate val in the instruction at *loc. In J-type instructions the
|
||
+// immediate has 20bits which are pretty scattered:
|
||
+// instr bit -> imm bit
|
||
+// 31 -> 20
|
||
+// [30:21] -> [10:1]
|
||
+// 20 -> 11
|
||
+// [19:12] -> [19:12]
|
||
+//
|
||
+// N.B. bit 0 of the immediate is missing!
|
||
+void setJType(inst_t *loc, uint32_t val) {
|
||
+ checkInt(loc, val, 21);
|
||
+
|
||
+ uint32_t insn = read32le(loc) & 0xFFF;
|
||
+ uint32_t imm20 = extractBits(val, 20, 20) << 31;
|
||
+ uint32_t imm10_1 = extractBits(val, 10, 1) << 21;
|
||
+ uint32_t imm11 = extractBits(val, 11, 11) << 20;
|
||
+ uint32_t imm19_12 = extractBits(val, 19, 12) << 12;
|
||
+ insn |= imm20 | imm10_1 | imm11 | imm19_12;
|
||
+
|
||
+ write32le(loc, insn);
|
||
+}
|
||
+
|
||
+// Set immediate val in the instruction at *loc. In B-type instructions the
|
||
+// immediate has 12bits which are pretty scattered:
|
||
+// instr bit -> imm bit
|
||
+// 31 -> 12
|
||
+// [30:25] -> [10:5]
|
||
+// [11:8] -> [4:1]
|
||
+// 7 -> 11
|
||
+//
|
||
+// N.B. bit 0 of the immediate is missing!
|
||
+void setBType(inst_t *loc, uint32_t val) {
|
||
+ checkInt(loc, val, 13);
|
||
+
|
||
+ uint32_t insn = read32le(loc) & 0x1FFF07F;
|
||
+ uint32_t imm12 = extractBits(val, 12, 12) << 31;
|
||
+ uint32_t imm10_5 = extractBits(val, 10, 5) << 25;
|
||
+ uint32_t imm4_1 = extractBits(val, 4, 1) << 8;
|
||
+ uint32_t imm11 = extractBits(val, 11, 11) << 7;
|
||
+ insn |= imm12 | imm10_5 | imm4_1 | imm11;
|
||
+
|
||
+ write32le(loc, insn);
|
||
+}
|
||
+
|
||
+
|
||
+// Set immediate val in the instruction at *loc. CB-type instructions have a
|
||
+// lenght of 16 bits (half-word, compared to the usual 32bit/word instructions.)
|
||
+// The immediate has 8bits which are pretty scattered:
|
||
+// instr bit -> imm bit
|
||
+// 12 -> 8
|
||
+// [11:10] -> [4:3]
|
||
+// [6:5] -> [7:6]
|
||
+// [4:3] -> [2:1]
|
||
+// 2 -> 5
|
||
+//
|
||
+// N.B. bit 0 of the immediate is missing!
|
||
+void setCBType(cinst_t *loc, uint32_t val) {
|
||
+ checkInt((inst_t *)loc, val, 9);
|
||
+ uint16_t insn = read16le(loc) & 0xE383;
|
||
+ uint16_t imm8 = extractBits(val, 8, 8) << 12;
|
||
+ uint16_t imm4_3 = extractBits(val, 4, 3) << 10;
|
||
+ uint16_t imm7_6 = extractBits(val, 7, 6) << 5;
|
||
+ uint16_t imm2_1 = extractBits(val, 2, 1) << 3;
|
||
+ uint16_t imm5 = extractBits(val, 5, 5) << 2;
|
||
+ insn |= imm8 | imm4_3 | imm7_6 | imm2_1 | imm5;
|
||
+
|
||
+ write16le(loc, insn);
|
||
+}
|
||
+
|
||
+// Set immediate val in the instruction at *loc. CJ-type instructions have a
|
||
+// lenght of 16 bits (half-word, compared to the usual 32bit/word instructions.)
|
||
+// The immediate has 11bits which are pretty scattered:
|
||
+// instr bit -> imm bit
|
||
+// 12 -> 11
|
||
+// 11 -> 4
|
||
+// [10:9] ->[9:8]
|
||
+// 8 -> 10
|
||
+// 7 -> 6
|
||
+// 6 -> 7
|
||
+// [5:3] -> [3:1]
|
||
+// 2 -> 5
|
||
+//
|
||
+// N.B. bit 0 of the immediate is missing!
|
||
+void setCJType(cinst_t *loc, uint32_t val) {
|
||
+ checkInt((inst_t *)loc, val, 12);
|
||
+ uint16_t insn = read16le(loc) & 0xE003;
|
||
+ uint16_t imm11 = extractBits(val, 11, 11) << 12;
|
||
+ uint16_t imm4 = extractBits(val, 4, 4) << 11;
|
||
+ uint16_t imm9_8 = extractBits(val, 9, 8) << 9;
|
||
+ uint16_t imm10 = extractBits(val, 10, 10) << 8;
|
||
+ uint16_t imm6 = extractBits(val, 6, 6) << 7;
|
||
+ uint16_t imm7 = extractBits(val, 7, 7) << 6;
|
||
+ uint16_t imm3_1 = extractBits(val, 3, 1) << 3;
|
||
+ uint16_t imm5 = extractBits(val, 5, 5) << 2;
|
||
+ insn |= imm11 | imm4 | imm9_8 | imm10 | imm6 | imm7 | imm3_1 | imm5;
|
||
+
|
||
+ write16le(loc, insn);
|
||
+}
|
||
+
|
||
+// Encode the addend according to the relocaction into the instruction.
|
||
+bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend) {
|
||
+ // instruction to rewrite (P: Position of the relocation)
|
||
+ addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset);
|
||
+ IF_DEBUG(linker,
|
||
+ debugBelch(
|
||
+ "Relocation type %s 0x%lx (%lu) symbol 0x%lx addend 0x%x (%u / "
|
||
+ "%d) P 0x%lx\n",
|
||
+ relocationTypeToString(rel->r_info), ELF64_R_TYPE(rel->r_info),
|
||
+ ELF64_R_TYPE(rel->r_info), ELF64_R_SYM(rel->r_info), addend,
|
||
+ addend, addend, P));
|
||
+ switch (ELF64_R_TYPE(rel->r_info)) {
|
||
+ case R_RISCV_32_PCREL:
|
||
+ case R_RISCV_32:
|
||
+ write32le((inst_t *)P, addend);
|
||
+ break;
|
||
+ case R_RISCV_64:
|
||
+ write64le((uint64_t *)P, addend);
|
||
+ break;
|
||
+ case R_RISCV_GOT_HI20:
|
||
+ case R_RISCV_PCREL_HI20:
|
||
+ case R_RISCV_HI20: {
|
||
+ setUType((inst_t *)P, addend);
|
||
+ break;
|
||
+ }
|
||
+ case R_RISCV_PCREL_LO12_I:
|
||
+ case R_RISCV_LO12_I: {
|
||
+ setIType((inst_t *)P, addend);
|
||
+ break;
|
||
+ }
|
||
+ case R_RISCV_RVC_JUMP: {
|
||
+ setCJType((cinst_t *)P, addend);
|
||
+ break;
|
||
+ }
|
||
+ case R_RISCV_RVC_BRANCH: {
|
||
+ setCBType((cinst_t *)P, addend);
|
||
+ break;
|
||
+ }
|
||
+ case R_RISCV_BRANCH: {
|
||
+ setBType((inst_t *)P, addend);
|
||
+ break;
|
||
+ }
|
||
+ case R_RISCV_CALL:
|
||
+ case R_RISCV_CALL_PLT: {
|
||
+ // We could relax more (in some cases) but right now most important is to
|
||
+ // make it work.
|
||
+ setUType((inst_t *)P, addend);
|
||
+ setIType(((inst_t *)P) + 1, addend);
|
||
+ break;
|
||
+ }
|
||
+ case R_RISCV_JAL: {
|
||
+ setJType((inst_t *)P, addend);
|
||
+ break;
|
||
+ }
|
||
+ case R_RISCV_ADD8:
|
||
+ write8le((uint8_t *)P, read8le((uint8_t *)P) + addend);
|
||
+ break;
|
||
+ case R_RISCV_ADD16:
|
||
+ write16le((cinst_t *)P, read16le((cinst_t *)P) + addend);
|
||
+ break;
|
||
+ case R_RISCV_ADD32:
|
||
+ write32le((inst_t *)P, read32le((inst_t *)P) + addend);
|
||
+ break;
|
||
+ case R_RISCV_ADD64:
|
||
+ write64le((uint64_t *)P, read64le((uint64_t *)P) + addend);
|
||
+ break;
|
||
+ case R_RISCV_SUB6: {
|
||
+ uint8_t keep = *((uint8_t *)P) & 0xc0;
|
||
+ uint8_t imm = (((*(uint8_t *)P) & 0x3f) - addend) & 0x3f;
|
||
+
|
||
+ write8le((uint8_t *)P, keep | imm);
|
||
+ break;
|
||
+ }
|
||
+ case R_RISCV_SUB8:
|
||
+ write8le((uint8_t *)P, read8le((uint8_t *)P) - addend);
|
||
+ break;
|
||
+ case R_RISCV_SUB16:
|
||
+ write16le((cinst_t *)P, read16le((cinst_t *)P) - addend);
|
||
+ break;
|
||
+ case R_RISCV_SUB32:
|
||
+ write32le((inst_t *)P, read32le((inst_t *)P) - addend);
|
||
+ break;
|
||
+ case R_RISCV_SUB64:
|
||
+ write64le((uint64_t *)P, read64le((uint64_t *)P) - addend);
|
||
+ break;
|
||
+ case R_RISCV_SET6: {
|
||
+ uint8_t keep = *((uint8_t *)P) & 0xc0;
|
||
+ uint8_t imm = (addend & 0x3f) & 0x3f;
|
||
+
|
||
+ write8le((uint8_t *)P, keep | imm);
|
||
+ break;
|
||
+ }
|
||
+ case R_RISCV_SET8:
|
||
+ write8le((uint8_t *)P, addend);
|
||
+ break;
|
||
+ case R_RISCV_SET16:
|
||
+ write16le((cinst_t *)P, addend);
|
||
+ break;
|
||
+ case R_RISCV_SET32:
|
||
+ write32le((inst_t *)P, addend);
|
||
+ break;
|
||
+ case R_RISCV_PCREL_LO12_S:
|
||
+ case R_RISCV_TPREL_LO12_S:
|
||
+ case R_RISCV_LO12_S: {
|
||
+ setSType((inst_t *)P, addend);
|
||
+ break;
|
||
+ }
|
||
+ case R_RISCV_RELAX:
|
||
+ case R_RISCV_ALIGN:
|
||
+ // Implementing relaxations (rewriting instructions to more efficient ones)
|
||
+ // could be implemented in future. As the code already is aligned and we do
|
||
+ // not change the instruction sizes, we should get away with not aligning
|
||
+ // (though, that is cheating.) To align or change the instruction count, we
|
||
+ // would need machinery to squeeze or extend memory at the current location.
|
||
+ break;
|
||
+ default:
|
||
+ barf("Missing relocation 0x%lx\n", ELF64_R_TYPE(rel->r_info));
|
||
+ }
|
||
+ return EXIT_SUCCESS;
|
||
+}
|
||
+
|
||
+/**
|
||
+ * Compute the *new* addend for a relocation, given a pre-existing addend.
|
||
+ * @param section The section the relocation is in.
|
||
+ * @param rel The Relocation struct.
|
||
+ * @param symbol The target symbol.
|
||
+ * @param addend The existing addend. Either explicit or implicit.
|
||
+ * @return The new computed addend.
|
||
+ */
|
||
+int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol,
|
||
+ int64_t addend, ObjectCode *oc) {
|
||
+ Section * section = &oc->sections[relaTab->targetSectionIndex];
|
||
+
|
||
+ // instruction to rewrite (P: Position of the relocation)
|
||
+ addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset);
|
||
+
|
||
+ CHECK(0x0 != P);
|
||
+ CHECK((uint64_t)section->start <= P);
|
||
+ CHECK(P <= (uint64_t)section->start + section->size);
|
||
+ // S: Value of the symbol in the symbol table
|
||
+ addr_t S = (addr_t)symbol->addr;
|
||
+ /* GOT slot for the symbol (G + GOT) */
|
||
+ addr_t GOT_S = (addr_t)symbol->got_addr;
|
||
+
|
||
+ // A: Addend field in the relocation entry associated with the symbol
|
||
+ int64_t A = addend;
|
||
+
|
||
+ IF_DEBUG(linker, debugBelch("%s: P 0x%lx S 0x%lx %s GOT_S 0x%lx A 0x%lx relNo %u\n",
|
||
+ relocationTypeToString(rel->r_info), P, S,
|
||
+ symbol->name, GOT_S, A, relNo));
|
||
+ switch (ELF64_R_TYPE(rel->r_info)) {
|
||
+ case R_RISCV_32:
|
||
+ return S + A;
|
||
+ case R_RISCV_64:
|
||
+ return S + A;
|
||
+ case R_RISCV_HI20:
|
||
+ return S + A;
|
||
+ case R_RISCV_JUMP_SLOT:
|
||
+ return S;
|
||
+ case R_RISCV_JAL:
|
||
+ return S + A - P;
|
||
+ case R_RISCV_PCREL_HI20:
|
||
+ return S + A - P;
|
||
+ case R_RISCV_LO12_I:
|
||
+ return S + A;
|
||
+ // Quoting LLVM docs: For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}),
|
||
+ // the symbol actually points the corresponding R_RISCV_PCREL_HI20
|
||
+ // relocation, and the target VA is calculated using PCREL_HI20's symbol.
|
||
+ case R_RISCV_PCREL_LO12_S:
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_PCREL_LO12_I: {
|
||
+ // Lookup related HI20 relocation and use that value. I'm still confused why
|
||
+ // relocations aren't self-contained, but this is how LLVM does it. And,
|
||
+ // calculating the lower 12 bit without any relationship to the GOT entry's
|
||
+ // address makes no sense either.
|
||
+ for (int64_t i = relNo; i >= 0 ; i--) {
|
||
+ Elf_Rela *rel_prime = &relaTab->relocations[i];
|
||
+
|
||
+ addr_t P_prime =
|
||
+ (addr_t)((uint8_t *)section->start + rel_prime->r_offset);
|
||
+
|
||
+ if (P_prime != S) {
|
||
+ // S points to the P of the corresponding *_HI20 relocation.
|
||
+ continue;
|
||
+ }
|
||
+
|
||
+ ElfSymbol *symbol_prime =
|
||
+ findSymbol(oc, relaTab->sectionHeader->sh_link,
|
||
+ ELF64_R_SYM((Elf64_Xword)rel_prime->r_info));
|
||
+
|
||
+ CHECK(0x0 != symbol_prime);
|
||
+
|
||
+ /* take explicit addend */
|
||
+ int64_t addend_prime = rel_prime->r_addend;
|
||
+
|
||
+ uint64_t type_prime = ELF64_R_TYPE(rel_prime->r_info);
|
||
+
|
||
+ if (type_prime == R_RISCV_PCREL_HI20 ||
|
||
+ type_prime == R_RISCV_GOT_HI20 ||
|
||
+ type_prime == R_RISCV_TLS_GD_HI20 ||
|
||
+ type_prime == R_RISCV_TLS_GOT_HI20) {
|
||
+ IF_DEBUG(linker,
|
||
+ debugBelch(
|
||
+ "Found matching relocation: %s (P: 0x%lx, S: 0x%lx, "
|
||
+ "sym-name: %s) -> %s (P: 0x%lx, S: %p, sym-name: %s, relNo: %ld)",
|
||
+ relocationTypeToString(rel->r_info), P, S, symbol->name,
|
||
+ relocationTypeToString(rel_prime->r_info), P_prime,
|
||
+ symbol_prime->addr, symbol_prime->name, i));
|
||
+ int32_t result = computeAddend(relaTab, i, (Elf_Rel *)rel_prime,
|
||
+ symbol_prime, addend_prime, oc);
|
||
+ IF_DEBUG(linker, debugBelch("Result of computeAddend: 0x%x (%d)\n",
|
||
+ result, result));
|
||
+ return result;
|
||
+ }
|
||
+ }
|
||
+ debugBelch("Missing HI relocation for %s: P 0x%lx S 0x%lx %s\n",
|
||
+ relocationTypeToString(rel->r_info), P, S, symbol->name);
|
||
+ abort();
|
||
+ }
|
||
+
|
||
+ case R_RISCV_RVC_JUMP:
|
||
+ return S + A - P;
|
||
+ case R_RISCV_RVC_BRANCH:
|
||
+ return S + A - P;
|
||
+ case R_RISCV_BRANCH:
|
||
+ return S + A - P;
|
||
+ case R_RISCV_CALL:
|
||
+ case R_RISCV_CALL_PLT: {
|
||
+ addr_t GOT_Target;
|
||
+ if (GOT_S != 0) {
|
||
+ // 1. Public symbol with GOT entry.
|
||
+ GOT_Target = GOT_S;
|
||
+ } else {
|
||
+ // 2. Fake GOT entry with symbol extra entry.
|
||
+ SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S);
|
||
+ addr_t* FAKE_GOT_S = &symbolExtra->addr;
|
||
+ IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT w/ SymbolExtra = %p , "
|
||
+ "entry = %p\n",
|
||
+ symbolExtra, FAKE_GOT_S));
|
||
+ GOT_Target = (addr_t) FAKE_GOT_S;
|
||
+ }
|
||
+
|
||
+ if (findStub(section, (void **)&S, 0)) {
|
||
+ /* did not find it. Crete a new stub. */
|
||
+ if (makeStub(section, (void **)&S, (void *)GOT_Target, 0)) {
|
||
+ abort(/* could not find or make stub */);
|
||
+ }
|
||
+ }
|
||
+ IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT: S = 0x%lx A = 0x%lx P = "
|
||
+ "0x%lx (S + A) - P = 0x%lx \n",
|
||
+ S, A, P, (S + A) - P));
|
||
+ return (S + A) - P;
|
||
+ }
|
||
+ case R_RISCV_ADD8:
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_ADD16:
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_ADD32:
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_ADD64:
|
||
+ return S + A; // Add V when the value is set
|
||
+ case R_RISCV_SUB6:
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_SUB8:
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_SUB16:
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_SUB32:
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_SUB64:
|
||
+ return S + A; // Subtract from V when value is set
|
||
+ case R_RISCV_SET6:
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_SET8:
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_SET16:
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_SET32:
|
||
+ return S + A;
|
||
+ case R_RISCV_RELAX:
|
||
+ // This "relocation" has no addend.
|
||
+ FALLTHROUGH;
|
||
+ case R_RISCV_ALIGN:
|
||
+ // I guess we don't need to implement this relaxation. Otherwise, this
|
||
+ // should return the number of blank bytes to insert via NOPs.
|
||
+ return 0;
|
||
+ case R_RISCV_32_PCREL:
|
||
+ return S + A - P;
|
||
+ case R_RISCV_GOT_HI20: {
|
||
+ // TODO: Allocating extra memory for every symbol just to play this trick
|
||
+ // seems to be a bit obscene. (GOT relocations hitting local symbols
|
||
+ // happens, but not very often.) It would be better to allocate only what we
|
||
+ // really need.
|
||
+
|
||
+ // There are two cases here: 1. The symbol is public and has an entry in the
|
||
+ // GOT. 2. It's local and has no corresponding GOT entry. The first case is
|
||
+ // easy: We simply calculate the addend with the GOT address. In the second
|
||
+ // case we create a symbol extra entry and pretend it's the GOT.
|
||
+ if (GOT_S != 0) {
|
||
+ // 1. Public symbol with GOT entry.
|
||
+ return GOT_S + A - P;
|
||
+ } else {
|
||
+ // 2. Fake GOT entry with symbol extra entry.
|
||
+ SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S);
|
||
+ addr_t* FAKE_GOT_S = &symbolExtra->addr;
|
||
+ addr_t res = (addr_t) FAKE_GOT_S + A - P;
|
||
+ IF_DEBUG(linker, debugBelch("R_RISCV_GOT_HI20 w/ SymbolExtra = %p , "
|
||
+ "entry = %p , reloc-addend = 0x%lu ",
|
||
+ symbolExtra, FAKE_GOT_S, res));
|
||
+ return res;
|
||
+ }
|
||
+ }
|
||
+ default:
|
||
+ barf("Unimplemented relocation: 0x%lx\n (%lu)",
|
||
+ ELF64_R_TYPE(rel->r_info), ELF64_R_TYPE(rel->r_info));
|
||
+ }
|
||
+ barf("This should never happen!");
|
||
+}
|
||
+
|
||
+// Iterate over all relocations and perform them.
|
||
+bool relocateObjectCodeRISCV64(ObjectCode *oc) {
|
||
+ for (ElfRelocationTable *relTab = oc->info->relTable; relTab != NULL;
|
||
+ relTab = relTab->next) {
|
||
+ /* only relocate interesting sections */
|
||
+ if (SECTIONKIND_OTHER == oc->sections[relTab->targetSectionIndex].kind)
|
||
+ continue;
|
||
+
|
||
+ Section *targetSection = &oc->sections[relTab->targetSectionIndex];
|
||
+
|
||
+ for (unsigned i = 0; i < relTab->n_relocations; i++) {
|
||
+ Elf_Rel *rel = &relTab->relocations[i];
|
||
+
|
||
+ ElfSymbol *symbol = findSymbol(oc, relTab->sectionHeader->sh_link,
|
||
+ ELF64_R_SYM((Elf64_Xword)rel->r_info));
|
||
+
|
||
+ CHECK(0x0 != symbol);
|
||
+
|
||
+ // This always fails, because we don't support Rel locations, yet: Do we
|
||
+ // need this case? Leaving it in to spot the potential bug when it
|
||
+ // appears.
|
||
+ /* decode implicit addend */
|
||
+ int64_t addend = decodeAddendRISCV64(targetSection, rel);
|
||
+
|
||
+ addend = computeAddend((ElfRelocationATable*) relTab, i, rel, symbol, addend, oc);
|
||
+ encodeAddendRISCV64(targetSection, rel, addend);
|
||
+ }
|
||
+ }
|
||
+ for (ElfRelocationATable *relaTab = oc->info->relaTable; relaTab != NULL;
|
||
+ relaTab = relaTab->next) {
|
||
+ /* only relocate interesting sections */
|
||
+ if (SECTIONKIND_OTHER == oc->sections[relaTab->targetSectionIndex].kind)
|
||
+ continue;
|
||
+
|
||
+ Section *targetSection = &oc->sections[relaTab->targetSectionIndex];
|
||
+
|
||
+ for (unsigned i = 0; i < relaTab->n_relocations; i++) {
|
||
+
|
||
+ Elf_Rela *rel = &relaTab->relocations[i];
|
||
+
|
||
+ ElfSymbol *symbol = findSymbol(oc, relaTab->sectionHeader->sh_link,
|
||
+ ELF64_R_SYM((Elf64_Xword)rel->r_info));
|
||
+
|
||
+ CHECK(0x0 != symbol);
|
||
+
|
||
+ /* take explicit addend */
|
||
+ int64_t addend = rel->r_addend;
|
||
+
|
||
+ addend = computeAddend(relaTab, i, (Elf_Rel *)rel, symbol, addend, oc);
|
||
+ encodeAddendRISCV64(targetSection, (Elf_Rel *)rel, addend);
|
||
+ }
|
||
+ }
|
||
+ return EXIT_SUCCESS;
|
||
+}
|
||
+
|
||
+void flushInstructionCacheRISCV64(ObjectCode *oc) {
|
||
+ // Synchronize the memory and instruction cache to prevent illegal instruction
|
||
+ // exceptions. On Linux the parameters of __builtin___clear_cache are
|
||
+ // currently unused. Add them anyways for future compatibility. (I.e. the
|
||
+ // parameters couldn't be checked during development.)
|
||
+
|
||
+ /* The main object code */
|
||
+ void *codeBegin = oc->image + oc->misalignment;
|
||
+ __builtin___clear_cache(codeBegin, (void*) ((uint64_t*) codeBegin + oc->fileSize));
|
||
+
|
||
+ /* Jump Islands */
|
||
+ __builtin___clear_cache((void *)oc->symbol_extras,
|
||
+ (void *)(oc->symbol_extras + oc->n_symbol_extras));
|
||
+
|
||
+ // Memory barrier to ensure nothing circumvents the fence.i / cache flushes.
|
||
+ SEQ_CST_FENCE();
|
||
+}
|
||
+
|
||
+#endif /* OBJECTFORMAT_ELF */
|
||
+#endif /* riscv64_HOST_ARCH */
|
||
Index: ghc-9.10.1/rts/linker/elf_reloc_riscv64.h
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/rts/linker/elf_reloc_riscv64.h
|
||
@@ -0,0 +1,11 @@
|
||
+#pragma once
|
||
+
|
||
+#include "LinkerInternals.h"
|
||
+
|
||
+#if defined(OBJFORMAT_ELF)
|
||
+
|
||
+bool
|
||
+relocateObjectCodeRISCV64(ObjectCode * oc);
|
||
+
|
||
+void flushInstructionCacheRISCV64(ObjectCode *oc);
|
||
+#endif /* OBJETFORMAT_ELF */
|
||
Index: ghc-9.10.1/rts/rts.cabal
|
||
===================================================================
|
||
--- ghc-9.10.1.orig/rts/rts.cabal
|
||
+++ ghc-9.10.1/rts/rts.cabal
|
||
@@ -468,9 +468,11 @@ library
|
||
linker/elf_got.c
|
||
linker/elf_plt.c
|
||
linker/elf_plt_aarch64.c
|
||
+ linker/elf_plt_riscv64.c
|
||
linker/elf_plt_arm.c
|
||
linker/elf_reloc.c
|
||
linker/elf_reloc_aarch64.c
|
||
+ linker/elf_reloc_riscv64.c
|
||
linker/elf_tlsgd.c
|
||
linker/elf_util.c
|
||
sm/BlockAlloc.c
|
||
Index: ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.hs
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.hs
|
||
@@ -0,0 +1,123 @@
|
||
+{-# LANGUAGE ForeignFunctionInterface #-}
|
||
+{-# LANGUAGE GHCForeignImportPrim #-}
|
||
+{-# LANGUAGE MagicHash #-}
|
||
+{-# LANGUAGE UnboxedTuples #-}
|
||
+{-# LANGUAGE UnliftedFFITypes #-}
|
||
+
|
||
+-- | This test ensures that sub-word signed and unsigned parameters are correctly
|
||
+-- handed over to C functions. I.e. it asserts the calling-convention.
|
||
+--
|
||
+-- The number of parameters is currently shaped for the RISCV64 calling-convention.
|
||
+-- You may need to add more parameters to the C functions in case there are more
|
||
+-- registers reserved for parameters in your architecture.
|
||
+module Main where
|
||
+
|
||
+import Data.Word
|
||
+import GHC.Exts
|
||
+import GHC.Int
|
||
+
|
||
+foreign import ccall "fun8"
|
||
+ fun8 ::
|
||
+ Int8# -> -- a0
|
||
+ Word8# -> -- a1
|
||
+ Int8# -> -- a2
|
||
+ Int8# -> -- a3
|
||
+ Int8# -> -- a4
|
||
+ Int8# -> -- a5
|
||
+ Int8# -> -- a6
|
||
+ Int8# -> -- a7
|
||
+ Word8# -> -- s0
|
||
+ Int8# -> -- s1
|
||
+ Int64# -- result
|
||
+
|
||
+foreign import ccall "fun16"
|
||
+ fun16 ::
|
||
+ Int16# -> -- a0
|
||
+ Word16# -> -- a1
|
||
+ Int16# -> -- a2
|
||
+ Int16# -> -- a3
|
||
+ Int16# -> -- a4
|
||
+ Int16# -> -- a5
|
||
+ Int16# -> -- a6
|
||
+ Int16# -> -- a7
|
||
+ Word16# -> -- s0
|
||
+ Int16# -> -- s1
|
||
+ Int64# -- result
|
||
+
|
||
+foreign import ccall "fun32"
|
||
+ fun32 ::
|
||
+ Int32# -> -- a0
|
||
+ Word32# -> -- a1
|
||
+ Int32# -> -- a2
|
||
+ Int32# -> -- a3
|
||
+ Int32# -> -- a4
|
||
+ Int32# -> -- a5
|
||
+ Int32# -> -- a6
|
||
+ Int32# -> -- a7
|
||
+ Word32# -> -- s0
|
||
+ Int32# -> -- s1
|
||
+ Int64# -- result
|
||
+
|
||
+foreign import ccall "funFloat"
|
||
+ funFloat ::
|
||
+ Float# -> -- a0
|
||
+ Float# -> -- a1
|
||
+ Float# -> -- a2
|
||
+ Float# -> -- a3
|
||
+ Float# -> -- a4
|
||
+ Float# -> -- a5
|
||
+ Float# -> -- a6
|
||
+ Float# -> -- a7
|
||
+ Float# -> -- s0
|
||
+ Float# -> -- s1
|
||
+ Float# -- result
|
||
+
|
||
+foreign import ccall "funDouble"
|
||
+ funDouble ::
|
||
+ Double# -> -- a0
|
||
+ Double# -> -- a1
|
||
+ Double# -> -- a2
|
||
+ Double# -> -- a3
|
||
+ Double# -> -- a4
|
||
+ Double# -> -- a5
|
||
+ Double# -> -- a6
|
||
+ Double# -> -- a7
|
||
+ Double# -> -- s0
|
||
+ Double# -> -- s1
|
||
+ Double# -- result
|
||
+
|
||
+main :: IO ()
|
||
+main =
|
||
+ -- N.B. the values here aren't choosen by accident: -1 means all bits one in
|
||
+ -- twos-complement, which is the same as the max word value.
|
||
+ let i8 :: Int8# = intToInt8# (-1#)
|
||
+ w8 :: Word8# = wordToWord8# (255##)
|
||
+ res8 :: Int64# = fun8 i8 w8 i8 i8 i8 i8 i8 i8 w8 i8
|
||
+ expected_res8 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word8) + 8 * (-1)
|
||
+ i16 :: Int16# = intToInt16# (-1#)
|
||
+ w16 :: Word16# = wordToWord16# (65535##)
|
||
+ res16 :: Int64# = fun16 i16 w16 i16 i16 i16 i16 i16 i16 w16 i16
|
||
+ expected_res16 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word16) + 8 * (-1)
|
||
+ i32 :: Int32# = intToInt32# (-1#)
|
||
+ w32 :: Word32# = wordToWord32# (4294967295##)
|
||
+ res32 :: Int64# = fun32 i32 w32 i32 i32 i32 i32 i32 i32 w32 i32
|
||
+ expected_res32 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word32) + 8 * (-1)
|
||
+ resFloat :: Float = F# (funFloat 1.0# 1.1# 1.2# 1.3# 1.4# 1.5# 1.6# 1.7# 1.8# 1.9#)
|
||
+ resDouble :: Double = D# (funDouble 1.0## 1.1## 1.2## 1.3## 1.4## 1.5## 1.6## 1.7## 1.8## 1.9##)
|
||
+ in do
|
||
+ print $ "fun8 result:" ++ show (I64# res8)
|
||
+ assertEqual expected_res8 (I64# res8)
|
||
+ print $ "fun16 result:" ++ show (I64# res16)
|
||
+ assertEqual expected_res16 (I64# res16)
|
||
+ print $ "fun32 result:" ++ show (I64# res32)
|
||
+ assertEqual expected_res32 (I64# res32)
|
||
+ print $ "funFloat result:" ++ show resFloat
|
||
+ assertEqual (14.5 :: Float) resFloat
|
||
+ print $ "funDouble result:" ++ show resDouble
|
||
+ assertEqual (14.5 :: Double) resDouble
|
||
+
|
||
+assertEqual :: (Eq a, Show a) => a -> a -> IO ()
|
||
+assertEqual a b =
|
||
+ if a == b
|
||
+ then pure ()
|
||
+ else error $ show a ++ " =/= " ++ show b
|
||
Index: ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.stdout
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv.stdout
|
||
@@ -0,0 +1,60 @@
|
||
+"fun8 result:502"
|
||
+"fun16 result:131062"
|
||
+"fun32 result:8589934582"
|
||
+"funFloat result:14.5"
|
||
+"funDouble result:14.5"
|
||
+fun32:
|
||
+a0: 0xffffffff -1
|
||
+a1: 0xffffffff 4294967295
|
||
+a2: 0xffffffff -1
|
||
+a3: 0xffffffff -1
|
||
+a4: 0xffffffff -1
|
||
+a5: 0xffffffff -1
|
||
+a6: 0xffffffff -1
|
||
+a7: 0xffffffff -1
|
||
+s0: 0xffffffff -1
|
||
+s1: 0xffffffff 4294967295
|
||
+fun16:
|
||
+a0: 0xffffffff -1
|
||
+a1: 0xffff 65535
|
||
+a2: 0xffffffff -1
|
||
+a3: 0xffffffff -1
|
||
+a4: 0xffffffff -1
|
||
+a5: 0xffffffff -1
|
||
+a6: 0xffffffff -1
|
||
+a7: 0xffffffff -1
|
||
+s0: 0xffffffff -1
|
||
+s1: 0xffff 65535
|
||
+fun8:
|
||
+a0: 0xffffffff -1
|
||
+a1: 0xff 255
|
||
+a2: 0xffffffff -1
|
||
+a3: 0xffffffff -1
|
||
+a4: 0xffffffff -1
|
||
+a5: 0xffffffff -1
|
||
+a6: 0xffffffff -1
|
||
+a7: 0xffffffff -1
|
||
+s0: 0xffffffff -1
|
||
+s1: 0xff 255
|
||
+funFloat:
|
||
+a0: 1.000000
|
||
+a1: 1.100000
|
||
+a2: 1.200000
|
||
+a3: 1.300000
|
||
+a4: 1.400000
|
||
+a5: 1.500000
|
||
+a6: 1.600000
|
||
+a7: 1.700000
|
||
+s0: 1.800000
|
||
+s1: 1.900000
|
||
+funDouble:
|
||
+a0: 1.000000
|
||
+a1: 1.100000
|
||
+a2: 1.200000
|
||
+a3: 1.300000
|
||
+a4: 1.400000
|
||
+a5: 1.500000
|
||
+a6: 1.600000
|
||
+a7: 1.700000
|
||
+s0: 1.800000
|
||
+s1: 1.900000
|
||
Index: ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv_c.c
|
||
===================================================================
|
||
--- /dev/null
|
||
+++ ghc-9.10.1/testsuite/tests/codeGen/should_run/CCallConv_c.c
|
||
@@ -0,0 +1,91 @@
|
||
+#include "stdint.h"
|
||
+#include "stdio.h"
|
||
+
|
||
+int64_t fun8(int8_t a0, uint8_t a1, int8_t a2, int8_t a3, int8_t a4, int8_t a5,
|
||
+ int8_t a6, int8_t a7, int8_t s0, uint8_t s1) {
|
||
+ printf("fun8:\n");
|
||
+ printf("a0: %#x %hhd\n", a0, a0);
|
||
+ printf("a1: %#x %hhu\n", a1, a1);
|
||
+ printf("a2: %#x %hhd\n", a2, a2);
|
||
+ printf("a3: %#x %hhd\n", a3, a3);
|
||
+ printf("a4: %#x %hhd\n", a4, a4);
|
||
+ printf("a5: %#x %hhd\n", a5, a5);
|
||
+ printf("a6: %#x %hhd\n", a6, a6);
|
||
+ printf("a7: %#x %hhd\n", a7, a7);
|
||
+ printf("s0: %#x %hhd\n", s0, s0);
|
||
+ printf("s1: %#x %hhu\n", s1, s1);
|
||
+
|
||
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
|
||
+}
|
||
+
|
||
+int64_t fun16(int16_t a0, uint16_t a1, int16_t a2, int16_t a3, int16_t a4,
|
||
+ int16_t a5, int16_t a6, int16_t a7, int16_t s0, uint16_t s1) {
|
||
+ printf("fun16:\n");
|
||
+ printf("a0: %#x %hd\n", a0, a0);
|
||
+ printf("a1: %#x %hu\n", a1, a1);
|
||
+ printf("a2: %#x %hd\n", a2, a2);
|
||
+ printf("a3: %#x %hd\n", a3, a3);
|
||
+ printf("a4: %#x %hd\n", a4, a4);
|
||
+ printf("a5: %#x %hd\n", a5, a5);
|
||
+ printf("a6: %#x %hd\n", a6, a6);
|
||
+ printf("a7: %#x %hd\n", a7, a7);
|
||
+ printf("s0: %#x %hd\n", s0, s0);
|
||
+ printf("s1: %#x %hu\n", s1, s1);
|
||
+
|
||
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
|
||
+}
|
||
+
|
||
+int64_t fun32(int32_t a0, uint32_t a1, int32_t a2, int32_t a3, int32_t a4,
|
||
+ int32_t a5, int32_t a6, int32_t a7, int32_t s0, uint32_t s1) {
|
||
+ printf("fun32:\n");
|
||
+ printf("a0: %#x %d\n", a0, a0);
|
||
+ printf("a1: %#x %u\n", a1, a1);
|
||
+ printf("a2: %#x %d\n", a2, a2);
|
||
+ printf("a3: %#x %d\n", a3, a3);
|
||
+ printf("a4: %#x %d\n", a4, a4);
|
||
+ printf("a5: %#x %d\n", a5, a5);
|
||
+ printf("a6: %#x %d\n", a6, a6);
|
||
+ printf("a7: %#x %d\n", a7, a7);
|
||
+ printf("s0: %#x %d\n", s0, s0);
|
||
+ printf("s1: %#x %u\n", s1, s1);
|
||
+
|
||
+ // Ensure the addition happens in long int (not just int) precission.
|
||
+ // Otherwise, the result is truncated during the operation.
|
||
+ int64_t force_int64_precission = 0;
|
||
+ return force_int64_precission + a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 +
|
||
+ s1;
|
||
+}
|
||
+
|
||
+float funFloat(float a0, float a1, float a2, float a3, float a4, float a5,
|
||
+ float a6, float a7, float s0, float s1) {
|
||
+ printf("funFloat:\n");
|
||
+ printf("a0: %f\n", a0);
|
||
+ printf("a1: %f\n", a1);
|
||
+ printf("a2: %f\n", a2);
|
||
+ printf("a3: %f\n", a3);
|
||
+ printf("a4: %f\n", a4);
|
||
+ printf("a5: %f\n", a5);
|
||
+ printf("a6: %f\n", a6);
|
||
+ printf("a7: %f\n", a7);
|
||
+ printf("s0: %f\n", s0);
|
||
+ printf("s1: %f\n", s1);
|
||
+
|
||
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
|
||
+}
|
||
+
|
||
+double funDouble(double a0, double a1, double a2, double a3, double a4, double a5,
|
||
+ double a6, double a7, double s0, double s1) {
|
||
+ printf("funDouble:\n");
|
||
+ printf("a0: %f\n", a0);
|
||
+ printf("a1: %f\n", a1);
|
||
+ printf("a2: %f\n", a2);
|
||
+ printf("a3: %f\n", a3);
|
||
+ printf("a4: %f\n", a4);
|
||
+ printf("a5: %f\n", a5);
|
||
+ printf("a6: %f\n", a6);
|
||
+ printf("a7: %f\n", a7);
|
||
+ printf("s0: %f\n", s0);
|
||
+ printf("s1: %f\n", s1);
|
||
+
|
||
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
|
||
+}
|