Add ppc64le native code generator. Back port of my upstream patch for ghc 7.12. OBS-URL: https://build.opensuse.org/request/show/309163 OBS-URL: https://build.opensuse.org/package/show/devel:languages:haskell/ghc?expand=0&rev=177
2499 lines
106 KiB
Diff
2499 lines
106 KiB
Diff
Index: ghc-7.8.4/aclocal.m4
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/aclocal.m4
|
|
+++ ghc-7.8.4/aclocal.m4
|
|
@@ -188,7 +188,10 @@ AC_DEFUN([FPTOOLS_SET_HASKELL_PLATFORM_V
|
|
test -z "[$]2" || eval "[$]2=ArchPPC"
|
|
;;
|
|
powerpc64)
|
|
- test -z "[$]2" || eval "[$]2=ArchPPC_64"
|
|
+ test -z "[$]2" || eval "[$]2=\"ArchPPC_64 {ppc_64ABI = ELF_V1}\""
|
|
+ ;;
|
|
+ powerpc64le)
|
|
+ test -z "[$]2" || eval "[$]2=\"ArchPPC_64 {ppc_64ABI = ELF_V2}\""
|
|
;;
|
|
sparc)
|
|
test -z "[$]2" || eval "[$]2=ArchSPARC"
|
|
@@ -206,7 +209,7 @@ AC_DEFUN([FPTOOLS_SET_HASKELL_PLATFORM_V
|
|
mipsel)
|
|
test -z "[$]2" || eval "[$]2=ArchMipsel"
|
|
;;
|
|
- hppa|hppa1_1|ia64|m68k|powerpc64le|rs6000|s390|s390x|sparc64|vax)
|
|
+ hppa|hppa1_1|ia64|m68k|rs6000|s390|s390x|sparc64|vax)
|
|
test -z "[$]2" || eval "[$]2=ArchUnknown"
|
|
;;
|
|
*)
|
|
Index: ghc-7.8.4/compiler/cmm/CLabel.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/cmm/CLabel.hs
|
|
+++ ghc-7.8.4/compiler/cmm/CLabel.hs
|
|
@@ -1158,16 +1158,24 @@ pprDynamicLinkerAsmLabel platform dllInf
|
|
else if osElfTarget (platformOS platform)
|
|
then if platformArch platform == ArchPPC
|
|
then case dllInfo of
|
|
- CodeStub -> -- See Note [.LCTOC1 in PPC PIC code]
|
|
- ppr lbl <> text "+32768@plt"
|
|
- SymbolPtr -> text ".LC_" <> ppr lbl
|
|
- _ -> panic "pprDynamicLinkerAsmLabel"
|
|
+ CodeStub -> -- See Note [.LCTOC1 in PPC PIC code]
|
|
+ ppr lbl <> text "+32768@plt"
|
|
+ SymbolPtr -> text ".LC_" <> ppr lbl
|
|
+ _ -> panic "pprDynamicLinkerAsmLabel"
|
|
else if platformArch platform == ArchX86_64
|
|
then case dllInfo of
|
|
CodeStub -> ppr lbl <> text "@plt"
|
|
GotSymbolPtr -> ppr lbl <> text "@gotpcrel"
|
|
GotSymbolOffset -> ppr lbl
|
|
SymbolPtr -> text ".LC_" <> ppr lbl
|
|
+ else if platformArch platform == ArchPPC_64 ELF_V1
|
|
+ || platformArch platform == ArchPPC_64 ELF_V2
|
|
+ then case dllInfo of
|
|
+ GotSymbolPtr -> text ".LC_" <> ppr lbl
|
|
+ <> text "@toc"
|
|
+ GotSymbolOffset -> ppr lbl
|
|
+ SymbolPtr -> text ".LC_" <> ppr lbl
|
|
+ _ -> panic "pprDynamicLinkerAsmLabel"
|
|
else case dllInfo of
|
|
CodeStub -> ppr lbl <> text "@plt"
|
|
SymbolPtr -> text ".LC_" <> ppr lbl
|
|
Index: ghc-7.8.4/compiler/codeGen/CodeGen/Platform.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/codeGen/CodeGen/Platform.hs
|
|
+++ ghc-7.8.4/compiler/codeGen/CodeGen/Platform.hs
|
|
@@ -29,7 +29,7 @@ callerSaves platform
|
|
ArchSPARC -> SPARC.callerSaves
|
|
ArchARM {} -> ARM.callerSaves
|
|
arch
|
|
- | arch `elem` [ArchPPC, ArchPPC_64] ->
|
|
+ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
|
|
case platformOS platform of
|
|
OSDarwin -> PPC_Darwin.callerSaves
|
|
_ -> PPC.callerSaves
|
|
@@ -51,7 +51,7 @@ activeStgRegs platform
|
|
ArchSPARC -> SPARC.activeStgRegs
|
|
ArchARM {} -> ARM.activeStgRegs
|
|
arch
|
|
- | arch `elem` [ArchPPC, ArchPPC_64] ->
|
|
+ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
|
|
case platformOS platform of
|
|
OSDarwin -> PPC_Darwin.activeStgRegs
|
|
_ -> PPC.activeStgRegs
|
|
@@ -68,7 +68,7 @@ haveRegBase platform
|
|
ArchSPARC -> SPARC.haveRegBase
|
|
ArchARM {} -> ARM.haveRegBase
|
|
arch
|
|
- | arch `elem` [ArchPPC, ArchPPC_64] ->
|
|
+ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
|
|
case platformOS platform of
|
|
OSDarwin -> PPC_Darwin.haveRegBase
|
|
_ -> PPC.haveRegBase
|
|
@@ -85,7 +85,7 @@ globalRegMaybe platform
|
|
ArchSPARC -> SPARC.globalRegMaybe
|
|
ArchARM {} -> ARM.globalRegMaybe
|
|
arch
|
|
- | arch `elem` [ArchPPC, ArchPPC_64] ->
|
|
+ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
|
|
case platformOS platform of
|
|
OSDarwin -> PPC_Darwin.globalRegMaybe
|
|
_ -> PPC.globalRegMaybe
|
|
@@ -102,7 +102,7 @@ freeReg platform
|
|
ArchSPARC -> SPARC.freeReg
|
|
ArchARM {} -> ARM.freeReg
|
|
arch
|
|
- | arch `elem` [ArchPPC, ArchPPC_64] ->
|
|
+ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
|
|
case platformOS platform of
|
|
OSDarwin -> PPC_Darwin.freeReg
|
|
_ -> PPC.freeReg
|
|
Index: ghc-7.8.4/compiler/nativeGen/PIC.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/nativeGen/PIC.hs
|
|
+++ ghc-7.8.4/compiler/nativeGen/PIC.hs
|
|
@@ -158,7 +158,14 @@ cmmMakePicReference dflags lbl
|
|
-- everything gets relocated at runtime
|
|
| OSMinGW32 <- platformOS $ targetPlatform dflags
|
|
= CmmLit $ CmmLabel lbl
|
|
-
|
|
+ -- both ABI versions default to medium code model
|
|
+ | ArchPPC_64 _ <- platformArch $ targetPlatform dflags
|
|
+ = CmmMachOp (MO_Add W32) -- code model medium
|
|
+ [ CmmReg (CmmGlobal PicBaseReg)
|
|
+ , CmmLit $ picRelative
|
|
+ (platformArch $ targetPlatform dflags)
|
|
+ (platformOS $ targetPlatform dflags)
|
|
+ lbl ]
|
|
|
|
| (gopt Opt_PIC dflags || not (gopt Opt_Static dflags)) && absoluteLabel lbl
|
|
= CmmMachOp (MO_Add (wordWidth dflags))
|
|
@@ -293,13 +300,17 @@ howToAccessLabel dflags arch OSDarwin th
|
|
-- from position independent code. It is also required from the main program
|
|
-- when dynamic libraries containing Haskell code are used.
|
|
|
|
-howToAccessLabel _ ArchPPC_64 os _ kind _
|
|
+howToAccessLabel _ (ArchPPC_64 _) os _ kind _
|
|
| osElfTarget os
|
|
- = if kind == DataReference
|
|
- -- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
|
|
- then AccessViaSymbolPtr
|
|
- -- actually, .label instead of label
|
|
- else AccessDirectly
|
|
+ = case kind of
|
|
+ -- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
|
|
+ DataReference -> AccessViaSymbolPtr
|
|
+ -- RTLD does not generate stubs for function descriptors
|
|
+ -- in tail calls. Create a symbol pointer and generate
|
|
+ -- the code to load the function descriptor at the call site.
|
|
+ JumpReference -> AccessViaSymbolPtr
|
|
+ -- regular calls are handled by the runtime linker
|
|
+ _ -> AccessDirectly
|
|
|
|
howToAccessLabel dflags _ os _ _ _
|
|
-- no PIC -> the dynamic linker does everything for us;
|
|
@@ -430,9 +441,14 @@ needImportedSymbols dflags arch os
|
|
, arch == ArchPPC
|
|
= gopt Opt_PIC dflags || not (gopt Opt_Static dflags)
|
|
|
|
+ -- PowerPC 64 Linux: always
|
|
+ | osElfTarget os
|
|
+ , arch == ArchPPC_64 ELF_V1 || arch == ArchPPC_64 ELF_V2
|
|
+ = True
|
|
+
|
|
-- i386 (and others?): -dynamic but not -fPIC
|
|
| osElfTarget os
|
|
- , arch /= ArchPPC_64
|
|
+ , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2
|
|
= not (gopt Opt_Static dflags) && not (gopt Opt_PIC dflags)
|
|
|
|
| otherwise
|
|
@@ -467,16 +483,30 @@ pprGotDeclaration dflags ArchX86 OSDarwi
|
|
pprGotDeclaration _ _ OSDarwin
|
|
= empty
|
|
|
|
+-- PPC 64 ELF v1needs a Table Of Contents (TOC) on Linux
|
|
+pprGotDeclaration _ (ArchPPC_64 ELF_V1) OSLinux
|
|
+ = ptext (sLit ".section \".toc\",\"aw\"")
|
|
+-- In ELF v2 we also need to tell the assembler that we want ABI
|
|
+-- version 2. This would normally be done at the top of the file
|
|
+-- right after a file directive, but I could not figure out how
|
|
+-- to do that.
|
|
+pprGotDeclaration _ (ArchPPC_64 ELF_V2) OSLinux
|
|
+ = vcat [ ptext (sLit ".abiversion 2"),
|
|
+ ptext (sLit ".section \".toc\",\"aw\"")
|
|
+ ]
|
|
+pprGotDeclaration _ (ArchPPC_64 _) _
|
|
+ = panic "pprGotDeclaration: ArchPPC_64 only Linux supported"
|
|
+
|
|
-- Emit GOT declaration
|
|
-- Output whatever needs to be output once per .s file.
|
|
pprGotDeclaration dflags arch os
|
|
| osElfTarget os
|
|
- , arch /= ArchPPC_64
|
|
+ , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2
|
|
, not (gopt Opt_PIC dflags)
|
|
= empty
|
|
|
|
| osElfTarget os
|
|
- , arch /= ArchPPC_64
|
|
+ , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2
|
|
= vcat [
|
|
-- See Note [.LCTOC1 in PPC PIC code]
|
|
ptext (sLit ".section \".got2\",\"aw\""),
|
|
@@ -635,9 +665,16 @@ pprImportedSymbol _ (Platform { platform
|
|
-- the NCG will keep track of all DynamicLinkerLabels it uses
|
|
-- and output each of them using pprImportedSymbol.
|
|
|
|
-pprImportedSymbol _ platform@(Platform { platformArch = ArchPPC_64 }) _
|
|
+pprImportedSymbol _ platform@(Platform { platformArch = ArchPPC_64 _ })
|
|
+ importedLbl
|
|
| osElfTarget (platformOS platform)
|
|
- = empty
|
|
+ = case dynamicLinkerLabelInfo importedLbl of
|
|
+ Just (SymbolPtr, lbl)
|
|
+ -> vcat [
|
|
+ ptext (sLit ".section \".toc\", \"aw\""),
|
|
+ ptext (sLit ".LC_") <> pprCLabel platform lbl <> char ':',
|
|
+ ptext (sLit "\t.quad") <+> pprCLabel platform lbl ]
|
|
+ _ -> empty
|
|
|
|
pprImportedSymbol dflags platform importedLbl
|
|
| osElfTarget (platformOS platform)
|
|
@@ -735,6 +772,28 @@ initializePicBase_ppc ArchPPC OSDarwin p
|
|
where BasicBlock bID insns = entry
|
|
b' = BasicBlock bID (PPC.FETCHPC picReg : insns)
|
|
|
|
+-------------------------------------------------------------------------
|
|
+-- Load TOC into register 2
|
|
+-- PowerPC 64-bit ELF ABI 2.0 requires the address of the callee
|
|
+-- in register 12.
|
|
+-- We pass the label to FETCHTOC and create a .localentry too.
|
|
+-- TODO: Explain this better and refer to ABI spec!
|
|
+{-
|
|
+We would like to do approximately this, but spill slot allocation
|
|
+might be added before the first BasicBlock. That violates the ABI.
|
|
+
|
|
+For now we will emit the prologue code in the pretty printer,
|
|
+which is also what we do for ELF v1.
|
|
+initializePicBase_ppc (ArchPPC_64 ELF_V2) OSLinux picReg
|
|
+ (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
|
|
+ = do
|
|
+ bID <-getUniqueM
|
|
+ return (CmmProc info lab live (ListGraph (b':entry:blocks))
|
|
+ : statics)
|
|
+ where BasicBlock entryID _ = entry
|
|
+ b' = BasicBlock bID [PPC.FETCHTOC picReg lab,
|
|
+ PPC.BCC PPC.ALWAYS entryID]
|
|
+-}
|
|
|
|
initializePicBase_ppc _ _ _ _
|
|
= panic "initializePicBase_ppc: not needed"
|
|
Index: ghc-7.8.4/compiler/nativeGen/PPC/CodeGen.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/nativeGen/PPC/CodeGen.hs
|
|
+++ ghc-7.8.4/compiler/nativeGen/PPC/CodeGen.hs
|
|
@@ -77,14 +77,24 @@ cmmTopCodeGen
|
|
cmmTopCodeGen (CmmProc info lab live graph) = do
|
|
let blocks = toBlockListEntryFirst graph
|
|
(nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
|
|
- picBaseMb <- getPicBaseMaybeNat
|
|
dflags <- getDynFlags
|
|
let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
|
|
tops = proc : concat statics
|
|
os = platformOS $ targetPlatform dflags
|
|
- case picBaseMb of
|
|
- Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
|
|
- Nothing -> return tops
|
|
+ arch = platformArch $ targetPlatform dflags
|
|
+ case arch of
|
|
+ ArchPPC -> do
|
|
+ picBaseMb <- getPicBaseMaybeNat
|
|
+ case picBaseMb of
|
|
+ Just picBase -> initializePicBase_ppc arch os picBase tops
|
|
+ Nothing -> return tops
|
|
+ ArchPPC_64 ELF_V1 -> return tops
|
|
+ -- generating function descriptor is handled in
|
|
+ -- pretty printer
|
|
+ ArchPPC_64 ELF_V2 -> return tops
|
|
+ -- generating function prologue is handled in
|
|
+ -- pretty printer
|
|
+ _ -> panic "PPC.cmmTopCodeGen: unknown arch"
|
|
|
|
cmmTopCodeGen (CmmData sec dat) = do
|
|
return [CmmData sec dat] -- no translation, we just use CmmStatic
|
|
@@ -194,26 +204,6 @@ getRegisterReg platform (CmmGlobal mid)
|
|
-- ones which map to a real machine register on this
|
|
-- platform. Hence ...
|
|
|
|
-
|
|
-{-
|
|
-Now, given a tree (the argument to an CmmLoad) that references memory,
|
|
-produce a suitable addressing mode.
|
|
-
|
|
-A Rule of the Game (tm) for Amodes: use of the addr bit must
|
|
-immediately follow use of the code part, since the code part puts
|
|
-values in registers which the addr then refers to. So you can't put
|
|
-anything in between, lest it overwrite some of those registers. If
|
|
-you need to do some other computation between the code part and use of
|
|
-the addr bit, first store the effective address from the amode in a
|
|
-temporary, then do the other computation, and then use the temporary:
|
|
-
|
|
- code
|
|
- LEA amode, tmp
|
|
- ... other computation ...
|
|
- ... (tmp) ...
|
|
--}
|
|
-
|
|
-
|
|
-- | Convert a BlockId to some CmmStatic data
|
|
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
|
|
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
|
|
@@ -261,7 +251,7 @@ data ChildCode64 -- a.k.a "Regist
|
|
-- Reg may be modified
|
|
|
|
|
|
--- | The dual to getAnyReg: compute an expression into a register, but
|
|
+-- | Compute an expression into a register, but
|
|
-- we don't mind which one it is.
|
|
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
|
|
getSomeReg expr = do
|
|
@@ -275,7 +265,7 @@ getSomeReg expr = do
|
|
|
|
getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
|
|
getI64Amodes addrTree = do
|
|
- Amode hi_addr addr_code <- getAmode addrTree
|
|
+ Amode hi_addr addr_code <- getAmode D addrTree
|
|
case addrOffset hi_addr 4 of
|
|
Just lo_addr -> return (hi_addr, lo_addr, addr_code)
|
|
Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
|
|
@@ -386,10 +376,12 @@ getRegister e = do dflags <- getDynFlags
|
|
|
|
getRegister' :: DynFlags -> CmmExpr -> NatM Register
|
|
|
|
-getRegister' _ (CmmReg (CmmGlobal PicBaseReg))
|
|
- = do
|
|
- reg <- getPicBaseNat archWordSize
|
|
- return (Fixed archWordSize reg nilOL)
|
|
+getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
|
|
+ | target32Bit (targetPlatform dflags) = do
|
|
+ reg <- getPicBaseNat $ archWordSize (target32Bit (targetPlatform dflags))
|
|
+ return (Fixed (archWordSize (target32Bit (targetPlatform dflags)))
|
|
+ reg nilOL)
|
|
+ | otherwise = return (Fixed II64 toc nilOL)
|
|
|
|
getRegister' dflags (CmmReg reg)
|
|
= return (Fixed (cmmTypeSize (cmmRegType dflags reg))
|
|
@@ -424,30 +416,54 @@ getRegister' dflags (CmmMachOp (MO_SS_Co
|
|
return $ Fixed II32 rlo code
|
|
|
|
getRegister' dflags (CmmLoad mem pk)
|
|
- | not (isWord64 pk)
|
|
- = do
|
|
+ | not (isWord64 pk) = do
|
|
let platform = targetPlatform dflags
|
|
- Amode addr addr_code <- getAmode mem
|
|
+ Amode addr addr_code <- getAmode D mem
|
|
let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
|
|
addr_code `snocOL` LD size dst addr
|
|
return (Any size code)
|
|
+ | not (target32Bit (targetPlatform dflags)) = do
|
|
+ Amode addr addr_code <- getAmode DS mem
|
|
+ let code dst = addr_code `snocOL` LD II64 dst addr
|
|
+ return (Any II64 code)
|
|
+
|
|
where size = cmmTypeSize pk
|
|
|
|
-- catch simple cases of zero- or sign-extended load
|
|
getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
|
|
- Amode addr addr_code <- getAmode mem
|
|
+ Amode addr addr_code <- getAmode D mem
|
|
return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
|
|
|
|
+getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
|
|
+ Amode addr addr_code <- getAmode D mem
|
|
+ return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
|
|
+
|
|
-- Note: there is no Load Byte Arithmetic instruction, so no signed case here
|
|
|
|
getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
|
|
- Amode addr addr_code <- getAmode mem
|
|
+ Amode addr addr_code <- getAmode D mem
|
|
return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
|
|
|
|
getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
|
|
- Amode addr addr_code <- getAmode mem
|
|
+ Amode addr addr_code <- getAmode D mem
|
|
return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
|
|
|
|
+getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
|
|
+ Amode addr addr_code <- getAmode D mem
|
|
+ return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
|
|
+
|
|
+getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
|
|
+ Amode addr addr_code <- getAmode D mem
|
|
+ return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
|
|
+
|
|
+getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
|
|
+ Amode addr addr_code <- getAmode D mem
|
|
+ return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
|
|
+
|
|
+getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
|
|
+ Amode addr addr_code <- getAmode D mem
|
|
+ return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
|
|
+
|
|
getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
|
|
= case mop of
|
|
MO_Not rep -> triv_ucode_int rep NOT
|
|
@@ -465,7 +481,16 @@ getRegister' dflags (CmmMachOp mop [x])
|
|
| from == to -> conversionNop (intSize to) x
|
|
|
|
-- narrowing is a nop: we treat the high bits as undefined
|
|
- MO_SS_Conv W32 to -> conversionNop (intSize to) x
|
|
+ MO_SS_Conv W64 to
|
|
+ | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit int register"
|
|
+ | otherwise -> conversionNop (intSize to) x
|
|
+ MO_SS_Conv W32 to
|
|
+ | arch32 -> conversionNop (intSize to) x
|
|
+ | otherwise -> case to of
|
|
+ W64 -> triv_ucode_int to (EXTS II32)
|
|
+ W16 -> conversionNop II16 x
|
|
+ W8 -> conversionNop II8 x
|
|
+ _ -> panic "PPC.CodeGen.getRegister: no match"
|
|
MO_SS_Conv W16 W8 -> conversionNop II8 x
|
|
MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
|
|
MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
|
|
@@ -473,7 +498,17 @@ getRegister' dflags (CmmMachOp mop [x])
|
|
MO_UU_Conv from to
|
|
| from == to -> conversionNop (intSize to) x
|
|
-- narrowing is a nop: we treat the high bits as undefined
|
|
- MO_UU_Conv W32 to -> conversionNop (intSize to) x
|
|
+ MO_UU_Conv W64 to
|
|
+ | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit target"
|
|
+ | otherwise -> conversionNop (intSize to) x
|
|
+ MO_UU_Conv W32 to
|
|
+ | arch32 -> conversionNop (intSize to) x
|
|
+ | otherwise ->
|
|
+ case to of
|
|
+ W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64))
|
|
+ W16 -> conversionNop II16 x
|
|
+ W8 -> conversionNop II8 x
|
|
+ _ -> panic "PPC.CodeGen.getRegister: no match"
|
|
MO_UU_Conv W16 W8 -> conversionNop II8 x
|
|
MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
|
|
MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
|
|
@@ -486,8 +521,9 @@ getRegister' dflags (CmmMachOp mop [x])
|
|
conversionNop new_size expr
|
|
= do e_code <- getRegister' dflags expr
|
|
return (swizzleRegisterRep e_code new_size)
|
|
+ arch32 = target32Bit $ targetPlatform dflags
|
|
|
|
-getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
|
|
+getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
|
|
= case mop of
|
|
MO_F_Eq _ -> condFltReg EQQ x y
|
|
MO_F_Ne _ -> condFltReg NE x y
|
|
@@ -496,18 +532,28 @@ getRegister' _ (CmmMachOp mop [x, y]) --
|
|
MO_F_Lt _ -> condFltReg LTT x y
|
|
MO_F_Le _ -> condFltReg LE x y
|
|
|
|
- MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
|
|
-
|
|
- MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
|
|
-
|
|
- MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
|
|
+ MO_Eq rep -> condIntReg EQQ (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+ MO_Ne rep -> condIntReg NE (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+
|
|
+ MO_S_Gt rep -> condIntReg GTT (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Ge rep -> condIntReg GE (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Lt rep -> condIntReg LTT (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Le rep -> condIntReg LE (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+
|
|
+ MO_U_Gt rep -> condIntReg GU (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+ MO_U_Ge rep -> condIntReg GEU (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+ MO_U_Lt rep -> condIntReg LU (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+ MO_U_Le rep -> condIntReg LEU (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
|
|
MO_F_Add w -> triv_float w FADD
|
|
MO_F_Sub w -> triv_float w FSUB
|
|
@@ -538,32 +584,53 @@ getRegister' _ (CmmMachOp mop [x, y]) --
|
|
-> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
|
|
_ -> trivialCodeNoImm' (intSize rep) SUBF y x
|
|
|
|
- MO_Mul rep -> trivialCode rep True MULLW x y
|
|
+ MO_Mul rep
|
|
+ | arch32 -> trivialCode rep True MULLW x y
|
|
+ | otherwise -> trivialCode rep True MULLD x y
|
|
|
|
MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
|
|
+ MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y
|
|
|
|
- MO_S_MulMayOflo _ -> panic "S_MulMayOflo (rep /= II32): not implemented"
|
|
+ MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented"
|
|
MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
|
|
|
|
- MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
|
|
-
|
|
- MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
|
|
+ MO_S_Quot rep
|
|
+ | arch32 -> trivialCodeNoImm' (intSize rep) DIVW
|
|
+ (extendSExpr dflags rep x) (extendSExpr dflags rep y)
|
|
+ | otherwise -> trivialCodeNoImm' (intSize rep) DIVD
|
|
+ (extendSExpr dflags rep x) (extendSExpr dflags rep y)
|
|
+ MO_U_Quot rep
|
|
+ | arch32 -> trivialCodeNoImm' (intSize rep) DIVWU
|
|
+ (extendUExpr dflags rep x) (extendUExpr dflags rep y)
|
|
+ | otherwise -> trivialCodeNoImm' (intSize rep) DIVDU
|
|
+ (extendUExpr dflags rep x) (extendUExpr dflags rep y)
|
|
+
|
|
+ MO_S_Rem rep
|
|
+ | arch32 -> remainderCode rep DIVW (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ | otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_U_Rem rep
|
|
+ | arch32 -> remainderCode rep DIVWU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ | otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
|
|
MO_And rep -> trivialCode rep False AND x y
|
|
MO_Or rep -> trivialCode rep False OR x y
|
|
MO_Xor rep -> trivialCode rep False XOR x y
|
|
|
|
- MO_Shl rep -> trivialCode rep False SLW x y
|
|
- MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
|
|
- MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
|
|
+ MO_Shl rep -> shiftCode rep SL x y
|
|
+ MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y
|
|
+ MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y
|
|
_ -> panic "PPC.CodeGen.getRegister: no match"
|
|
|
|
where
|
|
triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
|
|
triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
|
|
|
|
+ arch32 = target32Bit $ targetPlatform dflags
|
|
+
|
|
getRegister' _ (CmmLit (CmmInt i rep))
|
|
| Just imm <- makeImmediate rep True i
|
|
= let
|
|
@@ -575,7 +642,7 @@ getRegister' _ (CmmLit (CmmFloat f frep)
|
|
lbl <- getNewLabelNat
|
|
dflags <- getDynFlags
|
|
dynRef <- cmmMakeDynamicReference dflags DataReference lbl
|
|
- Amode addr addr_code <- getAmode dynRef
|
|
+ Amode addr addr_code <- getAmode D dynRef
|
|
let size = floatSize frep
|
|
code dst =
|
|
LDATA ReadOnlyData (Statics lbl
|
|
@@ -584,6 +651,7 @@ getRegister' _ (CmmLit (CmmFloat f frep)
|
|
return (Any size code)
|
|
|
|
getRegister' dflags (CmmLit lit)
|
|
+ | target32Bit (targetPlatform dflags)
|
|
= let rep = cmmLitType dflags lit
|
|
imm = litToImm lit
|
|
code dst = toOL [
|
|
@@ -591,18 +659,46 @@ getRegister' dflags (CmmLit lit)
|
|
ADD dst dst (RIImm (LO imm))
|
|
]
|
|
in return (Any (cmmTypeSize rep) code)
|
|
+ | otherwise
|
|
+ = do lbl <- getNewLabelNat
|
|
+ dflags <- getDynFlags
|
|
+ dynRef <- cmmMakeDynamicReference dflags DataReference lbl
|
|
+ Amode addr addr_code <- getAmode D dynRef
|
|
+ let rep = cmmLitType dflags lit
|
|
+ size = cmmTypeSize rep
|
|
+ code dst =
|
|
+ LDATA ReadOnlyData (Statics lbl
|
|
+ [CmmStaticLit lit])
|
|
+ `consOL` (addr_code `snocOL` LD size dst addr)
|
|
+ return (Any size code)
|
|
|
|
getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
|
|
|
|
-- extend?Rep: wrap integer expression of type rep
|
|
- -- in a conversion to II32
|
|
-extendSExpr :: Width -> CmmExpr -> CmmExpr
|
|
-extendSExpr W32 x = x
|
|
-extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
|
|
-
|
|
-extendUExpr :: Width -> CmmExpr -> CmmExpr
|
|
-extendUExpr W32 x = x
|
|
-extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
|
|
+ -- in a conversion to II32 or II64 resp.
|
|
+extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
|
|
+extendSExpr dflags W32 x
|
|
+ | target32Bit (targetPlatform dflags) = x
|
|
+
|
|
+extendSExpr dflags W64 x
|
|
+ | not (target32Bit (targetPlatform dflags)) = x
|
|
+
|
|
+extendSExpr dflags rep x =
|
|
+ let size = if target32Bit $ targetPlatform dflags
|
|
+ then W32
|
|
+ else W64
|
|
+ in CmmMachOp (MO_SS_Conv rep size) [x]
|
|
+
|
|
+extendUExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
|
|
+extendUExpr dflags W32 x
|
|
+ | target32Bit (targetPlatform dflags) = x
|
|
+extendUExpr dflags W64 x
|
|
+ | not (target32Bit (targetPlatform dflags)) = x
|
|
+extendUExpr dflags rep x =
|
|
+ let size = if target32Bit $ targetPlatform dflags
|
|
+ then W32
|
|
+ else W64
|
|
+ in CmmMachOp (MO_UU_Conv rep size) [x]
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- The 'Amode' type: Memory addressing modes passed up the tree.
|
|
@@ -628,26 +724,68 @@ temporary, then do the other computation
|
|
... (tmp) ...
|
|
-}
|
|
|
|
-getAmode :: CmmExpr -> NatM Amode
|
|
-getAmode tree@(CmmRegOff _ _) = do dflags <- getDynFlags
|
|
- getAmode (mangleIndexTree dflags tree)
|
|
+data InstrForm = D | DS
|
|
|
|
-getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
|
|
+getAmode :: InstrForm -> CmmExpr -> NatM Amode
|
|
+getAmode inf tree@(CmmRegOff _ _)
|
|
+ = do dflags <- getDynFlags
|
|
+ getAmode inf (mangleIndexTree dflags tree)
|
|
+
|
|
+getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
|
|
| Just off <- makeImmediate W32 True (-i)
|
|
= do
|
|
(reg, code) <- getSomeReg x
|
|
return (Amode (AddrRegImm reg off) code)
|
|
|
|
|
|
-getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
|
|
+getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
|
|
| Just off <- makeImmediate W32 True i
|
|
= do
|
|
(reg, code) <- getSomeReg x
|
|
return (Amode (AddrRegImm reg off) code)
|
|
|
|
+getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
|
|
+ | Just off <- makeImmediate W64 True (-i)
|
|
+ = do
|
|
+ (reg, code) <- getSomeReg x
|
|
+ return (Amode (AddrRegImm reg off) code)
|
|
+
|
|
+
|
|
+getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
|
|
+ | Just off <- makeImmediate W64 True i
|
|
+ = do
|
|
+ (reg, code) <- getSomeReg x
|
|
+ return (Amode (AddrRegImm reg off) code)
|
|
+
|
|
+getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
|
|
+ | Just off <- makeImmediate W64 True (-i)
|
|
+ = do
|
|
+ (reg, code) <- getSomeReg x
|
|
+ (reg', off', code') <-
|
|
+ if i `mod` 4 == 0
|
|
+ then do return (reg, off, code)
|
|
+ else do
|
|
+ tmp <- getNewRegNat II64
|
|
+ return (tmp, ImmInt 0,
|
|
+ code `snocOL` ADD tmp reg (RIImm off))
|
|
+ return (Amode (AddrRegImm reg' off') code')
|
|
+
|
|
+getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
|
|
+ | Just off <- makeImmediate W64 True i
|
|
+ = do
|
|
+ (reg, code) <- getSomeReg x
|
|
+ (reg', off', code') <-
|
|
+ if i `mod` 4 == 0
|
|
+ then do return (reg, off, code)
|
|
+ else do
|
|
+ tmp <- getNewRegNat II64
|
|
+ return (tmp, ImmInt 0,
|
|
+ code `snocOL` ADD tmp reg (RIImm off))
|
|
+ return (Amode (AddrRegImm reg' off') code')
|
|
+
|
|
-- optimize addition with 32-bit immediate
|
|
-- (needed for PIC)
|
|
-getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
|
|
+getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
|
|
= do
|
|
tmp <- getNewRegNat II32
|
|
(src, srcCode) <- getSomeReg x
|
|
@@ -655,20 +793,40 @@ getAmode (CmmMachOp (MO_Add W32) [x, Cmm
|
|
code = srcCode `snocOL` ADDIS tmp src (HA imm)
|
|
return (Amode (AddrRegImm tmp (LO imm)) code)
|
|
|
|
-getAmode (CmmLit lit)
|
|
+getAmode _ (CmmLit lit)
|
|
= do
|
|
- tmp <- getNewRegNat II32
|
|
- let imm = litToImm lit
|
|
- code = unitOL (LIS tmp (HA imm))
|
|
- return (Amode (AddrRegImm tmp (LO imm)) code)
|
|
+ dflags <- getDynFlags
|
|
+ case platformArch $ targetPlatform dflags of
|
|
+ ArchPPC -> do
|
|
+ tmp <- getNewRegNat II32
|
|
+ let imm = litToImm lit
|
|
+ code = unitOL (LIS tmp (HA imm))
|
|
+ return (Amode (AddrRegImm tmp (LO imm)) code)
|
|
+ _ -> do -- TODO: Load from TOC,
|
|
+ -- see getRegister' _ (CmmLit lit)
|
|
+ tmp <- getNewRegNat II64
|
|
+ let imm = litToImm lit
|
|
+ code = toOL [
|
|
+ LIS tmp (HIGHESTA imm),
|
|
+ OR tmp tmp (RIImm (HIGHERA imm)),
|
|
+ SL II64 tmp tmp (RIImm (ImmInt 32)),
|
|
+ ORIS tmp tmp (HA imm)
|
|
+ ]
|
|
+ return (Amode (AddrRegImm tmp (LO imm)) code)
|
|
+
|
|
+getAmode _ (CmmMachOp (MO_Add W32) [x, y])
|
|
+ = do
|
|
+ (regX, codeX) <- getSomeReg x
|
|
+ (regY, codeY) <- getSomeReg y
|
|
+ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
|
|
|
|
-getAmode (CmmMachOp (MO_Add W32) [x, y])
|
|
+getAmode _ (CmmMachOp (MO_Add W64) [x, y])
|
|
= do
|
|
(regX, codeX) <- getSomeReg x
|
|
(regY, codeY) <- getSomeReg y
|
|
return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
|
|
|
|
-getAmode other
|
|
+getAmode _ other
|
|
= do
|
|
(reg, code) <- getSomeReg other
|
|
let
|
|
@@ -676,7 +834,6 @@ getAmode other
|
|
return (Amode (AddrRegImm reg off) code)
|
|
|
|
|
|
-
|
|
-- The 'CondCode' type: Condition codes passed up the tree.
|
|
data CondCode
|
|
= CondCode Bool Cond InstrBlock
|
|
@@ -686,10 +843,12 @@ data CondCode
|
|
getCondCode :: CmmExpr -> NatM CondCode
|
|
|
|
-- almost the same as everywhere else - but we need to
|
|
--- extend small integers to 32 bit first
|
|
+-- extend small integers to 32 bit or 64 bit first
|
|
|
|
getCondCode (CmmMachOp mop [x, y])
|
|
- = case mop of
|
|
+ = do
|
|
+ dflags <- getDynFlags
|
|
+ case mop of
|
|
MO_F_Eq W32 -> condFltCode EQQ x y
|
|
MO_F_Ne W32 -> condFltCode NE x y
|
|
MO_F_Gt W32 -> condFltCode GTT x y
|
|
@@ -704,18 +863,28 @@ getCondCode (CmmMachOp mop [x, y])
|
|
MO_F_Lt W64 -> condFltCode LTT x y
|
|
MO_F_Le W64 -> condFltCode LE x y
|
|
|
|
- MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
|
|
-
|
|
- MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
|
|
-
|
|
- MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
|
|
+ MO_Eq rep -> condIntCode EQQ (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+ MO_Ne rep -> condIntCode NE (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+
|
|
+ MO_S_Gt rep -> condIntCode GTT (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Ge rep -> condIntCode GE (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Lt rep -> condIntCode LTT (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Le rep -> condIntCode LE (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+
|
|
+ MO_U_Gt rep -> condIntCode GU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_U_Ge rep -> condIntCode GEU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_U_Lt rep -> condIntCode LU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_U_Le rep -> condIntCode LEU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
|
|
_ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
|
|
|
|
@@ -729,21 +898,24 @@ getCondCode _ = panic "getCondCode(2)(po
|
|
condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
|
|
|
|
-- ###FIXME: I16 and I8!
|
|
+-- TODO: Is this still an issue? All arguments are extend?Expr'd.
|
|
condIntCode cond x (CmmLit (CmmInt y rep))
|
|
| Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
|
|
= do
|
|
(src1, code) <- getSomeReg x
|
|
- let
|
|
+ dflags <- getDynFlags
|
|
+ let size = archWordSize $ target32Bit $ targetPlatform dflags
|
|
code' = code `snocOL`
|
|
- (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
|
|
+ (if condUnsigned cond then CMPL else CMP) size src1 (RIImm src2)
|
|
return (CondCode False cond code')
|
|
|
|
condIntCode cond x y = do
|
|
(src1, code1) <- getSomeReg x
|
|
(src2, code2) <- getSomeReg y
|
|
- let
|
|
+ dflags <- getDynFlags
|
|
+ let size = archWordSize $ target32Bit $ targetPlatform dflags
|
|
code' = code1 `appOL` code2 `snocOL`
|
|
- (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
|
|
+ (if condUnsigned cond then CMPL else CMP) size src1 (RIReg src2)
|
|
return (CondCode False cond code')
|
|
|
|
condFltCode cond x y = do
|
|
@@ -781,7 +953,9 @@ assignReg_FltCode :: Size -> CmmReg ->
|
|
|
|
assignMem_IntCode pk addr src = do
|
|
(srcReg, code) <- getSomeReg src
|
|
- Amode dstAddr addr_code <- getAmode addr
|
|
+ Amode dstAddr addr_code <- case pk of
|
|
+ II64 -> getAmode DS addr
|
|
+ _ -> getAmode D addr
|
|
return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
|
|
|
|
-- dst is a reg, but src could be anything
|
|
@@ -809,9 +983,42 @@ genJump (CmmLit (CmmLabel lbl))
|
|
|
|
genJump tree
|
|
= do
|
|
+ dflags <- getDynFlags
|
|
+ let platform = targetPlatform dflags
|
|
+ case platformOS platform of
|
|
+ OSLinux -> case platformArch platform of
|
|
+ ArchPPC -> genJump' tree GCPLinux
|
|
+ ArchPPC_64 ELF_V1 -> genJump' tree (GCPLinux64ELF 1)
|
|
+ ArchPPC_64 ELF_V2 -> genJump' tree (GCPLinux64ELF 2)
|
|
+ _ -> panic "PPC.CodeGen.genJump: Unknown Linux"
|
|
+ OSDarwin -> genJump' tree GCPDarwin
|
|
+ _ -> panic "PPC.CodeGen.genJump: not defined for this os"
|
|
+
|
|
+
|
|
+genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
|
|
+
|
|
+genJump' tree (GCPLinux64ELF 1)
|
|
+ = do
|
|
(target,code) <- getSomeReg tree
|
|
- return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
|
|
+ return (code
|
|
+ `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
|
|
+ `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
|
|
+ `snocOL` MTCTR r11
|
|
+ `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
|
|
+ `snocOL` BCTR [] Nothing)
|
|
+
|
|
+genJump' tree (GCPLinux64ELF 2)
|
|
+ = do
|
|
+ (target,code) <- getSomeReg tree
|
|
+ return (code
|
|
+ `snocOL` MR r12 target
|
|
+ `snocOL` MTCTR r12
|
|
+ `snocOL` BCTR [] Nothing)
|
|
|
|
+genJump' tree _
|
|
+ = do
|
|
+ (target,code) <- getSomeReg tree
|
|
+ return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- Unconditional branches
|
|
@@ -867,11 +1074,18 @@ genCCall target dest_regs argsAndHints
|
|
= do dflags <- getDynFlags
|
|
let platform = targetPlatform dflags
|
|
case platformOS platform of
|
|
- OSLinux -> genCCall' dflags GCPLinux target dest_regs argsAndHints
|
|
- OSDarwin -> genCCall' dflags GCPDarwin target dest_regs argsAndHints
|
|
- _ -> panic "PPC.CodeGen.genCCall: not defined for this os"
|
|
+ OSLinux -> case platformArch platform of
|
|
+ ArchPPC -> genCCall' dflags GCPLinux
|
|
+ target dest_regs argsAndHints
|
|
+ ArchPPC_64 ELF_V1 -> genCCall' dflags (GCPLinux64ELF 1)
|
|
+ target dest_regs argsAndHints
|
|
+ ArchPPC_64 ELF_V2 -> genCCall' dflags (GCPLinux64ELF 2)
|
|
+ target dest_regs argsAndHints
|
|
+ _ -> panic "PPC.CodeGen.genCCall: Unknown Linux"
|
|
+ OSDarwin -> genCCall' dflags GCPDarwin target dest_regs argsAndHints
|
|
+ _ -> panic "PPC.CodeGen.genCCall: not defined for this os"
|
|
|
|
-data GenCCallPlatform = GCPLinux | GCPDarwin
|
|
+data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF Int
|
|
|
|
genCCall'
|
|
:: DynFlags
|
|
@@ -910,7 +1124,11 @@ genCCall'
|
|
* The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
|
|
PowerPC Linux does not agree, so neither do we.
|
|
|
|
- According to both conventions, The parameter area should be part of the
|
|
+ PowerPC 64 Linux uses the System V Release 4 Calling Convention for
|
|
+ 64-bit PowerPC. It is specified in
|
|
+ "64-bit PowerPC ELF Application Binary Interface Supplement 1.9".
|
|
+
|
|
+ According to all conventions, the parameter area should be part of the
|
|
caller's stack frame, allocated in the caller's prologue code (large enough
|
|
to hold the parameter lists for all called routines). The NCG already
|
|
uses the stack for register spilling, leaving 64 bytes free at the top.
|
|
@@ -949,39 +1167,66 @@ genCCall' dflags gcp target dest_regs ar
|
|
PrimTarget mop -> outOfLineMachOp mop
|
|
|
|
let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
|
|
- codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
|
|
+ `appOL` toc_before
|
|
+ codeAfter = toc_after labelOrExpr `appOL` move_sp_up finalStack
|
|
+ `appOL` moveResult reduceToFF32
|
|
|
|
case labelOrExpr of
|
|
- Left lbl -> do
|
|
+ Left lbl -> do -- the linker does all the work for us
|
|
return ( codeBefore
|
|
`snocOL` BL lbl usedRegs
|
|
`appOL` codeAfter)
|
|
- Right dyn -> do
|
|
+ Right dyn -> do -- implement call through function pointer
|
|
(dynReg, dynCode) <- getSomeReg dyn
|
|
- return ( dynCode
|
|
- `snocOL` MTCTR dynReg
|
|
- `appOL` codeBefore
|
|
- `snocOL` BCTRL usedRegs
|
|
- `appOL` codeAfter)
|
|
+ case gcp of
|
|
+ GCPLinux64ELF 1 -> return ( dynCode
|
|
+ `appOL` codeBefore
|
|
+ `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
|
|
+ `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
|
|
+ `snocOL` MTCTR r11
|
|
+ `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
|
|
+ `snocOL` BCTRL usedRegs
|
|
+ `appOL` codeAfter)
|
|
+ GCPLinux64ELF 2 -> return ( dynCode
|
|
+ `appOL` codeBefore
|
|
+ `snocOL` MR r12 dynReg
|
|
+ `snocOL` MTCTR r12
|
|
+ `snocOL` BCTRL usedRegs
|
|
+ `appOL` codeAfter)
|
|
+ _ -> return ( dynCode
|
|
+ `snocOL` MTCTR dynReg
|
|
+ `appOL` codeBefore
|
|
+ `snocOL` BCTRL usedRegs
|
|
+ `appOL` codeAfter)
|
|
where
|
|
platform = targetPlatform dflags
|
|
|
|
uses_pic_base_implicitly = do
|
|
-- See Note [implicit register in PPC PIC code]
|
|
-- on why we claim to use PIC register here
|
|
- when (gopt Opt_PIC dflags) $ do
|
|
- _ <- getPicBaseNat archWordSize
|
|
+ when (gopt Opt_PIC dflags && target32Bit platform) $ do
|
|
+ _ <- getPicBaseNat $ archWordSize True
|
|
return ()
|
|
|
|
initialStackOffset = case gcp of
|
|
- GCPDarwin -> 24
|
|
- GCPLinux -> 8
|
|
+ GCPDarwin -> 24
|
|
+ GCPLinux -> 8
|
|
+ GCPLinux64ELF 1 -> 48
|
|
+ GCPLinux64ELF 2 -> 32
|
|
+ _ -> panic "genCall': unknown calling convention"
|
|
-- size of linkage area + size of arguments, in bytes
|
|
stackDelta finalStack = case gcp of
|
|
GCPDarwin ->
|
|
roundTo 16 $ (24 +) $ max 32 $ sum $
|
|
map (widthInBytes . typeWidth) argReps
|
|
GCPLinux -> roundTo 16 finalStack
|
|
+ GCPLinux64ELF 1 ->
|
|
+ roundTo 16 $ (48 +) $ max 64 $ sum $
|
|
+ map (widthInBytes . typeWidth) argReps
|
|
+ GCPLinux64ELF 2 ->
|
|
+ roundTo 16 $ (32 +) $ max 64 $ sum $
|
|
+ map (widthInBytes . typeWidth) argReps
|
|
+ _ -> panic "genCall': unknown calling conv."
|
|
|
|
-- need to remove alignment information
|
|
args | PrimTarget mop <- target,
|
|
@@ -998,14 +1243,34 @@ genCCall' dflags gcp target dest_regs ar
|
|
roundTo a x | x `mod` a == 0 = x
|
|
| otherwise = x + a - (x `mod` a)
|
|
|
|
+ spSize = if target32Bit platform then II32 else II64
|
|
+
|
|
move_sp_down finalStack
|
|
| delta > 64 =
|
|
- toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
|
|
+ toOL [STU spSize sp (AddrRegImm sp (ImmInt (-delta))),
|
|
DELTA (-delta)]
|
|
| otherwise = nilOL
|
|
where delta = stackDelta finalStack
|
|
+ toc_before = case gcp of
|
|
+ GCPLinux64ELF 1 -> unitOL $ ST spSize toc (AddrRegImm sp (ImmInt 40))
|
|
+ GCPLinux64ELF 2 -> unitOL $ ST spSize toc (AddrRegImm sp (ImmInt 24))
|
|
+ _ -> nilOL
|
|
+ toc_after labelOrExpr = case gcp of
|
|
+ GCPLinux64ELF 1 -> case labelOrExpr of
|
|
+ Left _ -> toOL [ NOP ]
|
|
+ Right _ -> toOL [ LD spSize toc
|
|
+ (AddrRegImm sp
|
|
+ (ImmInt 40))
|
|
+ ]
|
|
+ GCPLinux64ELF 2 -> case labelOrExpr of
|
|
+ Left _ -> toOL [ NOP ]
|
|
+ Right _ -> toOL [ LD spSize toc
|
|
+ (AddrRegImm sp
|
|
+ (ImmInt 24))
|
|
+ ]
|
|
+ _ -> nilOL
|
|
move_sp_up finalStack
|
|
- | delta > 64 =
|
|
+ | delta > 64 = -- TODO: fix-up stack back-chain
|
|
toOL [ADD sp sp (RIImm (ImmInt delta)),
|
|
DELTA 0]
|
|
| otherwise = nilOL
|
|
@@ -1014,7 +1279,8 @@ genCCall' dflags gcp target dest_regs ar
|
|
|
|
passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
|
|
passArguments ((arg,arg_ty):args) gprs fprs stackOffset
|
|
- accumCode accumUsed | isWord64 arg_ty =
|
|
+ accumCode accumUsed | isWord64 arg_ty
|
|
+ && target32Bit (targetPlatform dflags) =
|
|
do
|
|
ChildCode64 code vr_lo <- iselExpr64 arg
|
|
let vr_hi = getHiVRegFromLo vr_lo
|
|
@@ -1052,6 +1318,7 @@ genCCall' dflags gcp target dest_regs ar
|
|
_ -> -- only one or no regs left
|
|
passArguments args [] fprs (stackOffset'+8)
|
|
stackCode accumUsed
|
|
+ GCPLinux64ELF _ -> panic "passArguments: 32 bit code"
|
|
|
|
passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
|
|
| reg : _ <- regs = do
|
|
@@ -1063,8 +1330,10 @@ genCCall' dflags gcp target dest_regs ar
|
|
-- The Darwin ABI requires that we reserve
|
|
-- stack slots for register parameters
|
|
GCPDarwin -> stackOffset + stackBytes
|
|
- -- ... the SysV ABI doesn't.
|
|
+ -- ... the SysV ABI 32-bit doesn't.
|
|
GCPLinux -> stackOffset
|
|
+ -- ... but SysV ABI 64-bit does.
|
|
+ GCPLinux64ELF _ -> stackOffset + stackBytes
|
|
passArguments args
|
|
(drop nGprs gprs)
|
|
(drop nFprs fprs)
|
|
@@ -1092,6 +1361,11 @@ genCCall' dflags gcp target dest_regs ar
|
|
roundTo 8 stackOffset
|
|
| otherwise ->
|
|
stackOffset
|
|
+ GCPLinux64ELF _ ->
|
|
+ -- everything on the stack is 8-byte
|
|
+ -- aligned on a 64 bit system
|
|
+ -- (except vector status, not used now)
|
|
+ stackOffset
|
|
stackSlot = AddrRegImm sp (ImmInt stackOffset')
|
|
(nGprs, nFprs, stackBytes, regs)
|
|
= case gcp of
|
|
@@ -1117,6 +1391,18 @@ genCCall' dflags gcp target dest_regs ar
|
|
FF64 -> (0, 1, 8, fprs)
|
|
II64 -> panic "genCCall' passArguments II64"
|
|
FF80 -> panic "genCCall' passArguments FF80"
|
|
+ GCPLinux64ELF _ ->
|
|
+ case cmmTypeSize rep of
|
|
+ II8 -> (1, 0, 8, gprs)
|
|
+ II16 -> (1, 0, 8, gprs)
|
|
+ II32 -> (1, 0, 8, gprs)
|
|
+ II64 -> (1, 0, 8, gprs)
|
|
+ -- The ELFv1 ABI requires that we skip a
|
|
+ -- corresponding number of GPRs when we use
|
|
+ -- the FPRs.
|
|
+ FF32 -> (1, 1, 8, fprs)
|
|
+ FF64 -> (1, 1, 8, fprs)
|
|
+ FF80 -> panic "genCCall' passArguments FF80"
|
|
|
|
moveResult reduceToFF32 =
|
|
case dest_regs of
|
|
@@ -1124,8 +1410,9 @@ genCCall' dflags gcp target dest_regs ar
|
|
[dest]
|
|
| reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
|
|
| isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
|
|
- | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
|
|
- MR r_dest r4]
|
|
+ | isWord64 rep && target32Bit (targetPlatform dflags)
|
|
+ -> toOL [MR (getHiVRegFromLo r_dest) r3,
|
|
+ MR r_dest r4]
|
|
| otherwise -> unitOL (MR r_dest r3)
|
|
where rep = cmmRegType dflags (CmmLocal dest)
|
|
r_dest = getRegisterReg platform (CmmLocal dest)
|
|
@@ -1201,17 +1488,19 @@ genCCall' dflags gcp target dest_regs ar
|
|
|
|
genSwitch :: DynFlags -> CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
|
|
genSwitch dflags expr ids
|
|
- | gopt Opt_PIC dflags
|
|
+ | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags)
|
|
= do
|
|
(reg,e_code) <- getSomeReg expr
|
|
- tmp <- getNewRegNat II32
|
|
+ let sz = archWordSize $ target32Bit $ targetPlatform dflags
|
|
+ sha = if target32Bit $ targetPlatform dflags then 2 else 3
|
|
+ tmp <- getNewRegNat sz
|
|
lbl <- getNewLabelNat
|
|
dflags <- getDynFlags
|
|
dynRef <- cmmMakeDynamicReference dflags DataReference lbl
|
|
(tableReg,t_code) <- getSomeReg $ dynRef
|
|
let code = e_code `appOL` t_code `appOL` toOL [
|
|
- SLW tmp reg (RIImm (ImmInt 2)),
|
|
- LD II32 tmp (AddrRegReg tableReg tmp),
|
|
+ SL sz tmp reg (RIImm (ImmInt sha)),
|
|
+ LD sz tmp (AddrRegReg tableReg tmp),
|
|
ADD tmp tmp (RIReg tableReg),
|
|
MTCTR tmp,
|
|
BCTR ids (Just lbl)
|
|
@@ -1220,12 +1509,14 @@ genSwitch dflags expr ids
|
|
| otherwise
|
|
= do
|
|
(reg,e_code) <- getSomeReg expr
|
|
- tmp <- getNewRegNat II32
|
|
+ let sz = archWordSize $ target32Bit $ targetPlatform dflags
|
|
+ sha = if target32Bit $ targetPlatform dflags then 2 else 3
|
|
+ tmp <- getNewRegNat sz
|
|
lbl <- getNewLabelNat
|
|
let code = e_code `appOL` toOL [
|
|
- SLW tmp reg (RIImm (ImmInt 2)),
|
|
+ SL sz tmp reg (RIImm (ImmInt 2)),
|
|
ADDIS tmp tmp (HA (ImmCLbl lbl)),
|
|
- LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
|
|
+ LD sz tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
|
|
MTCTR tmp,
|
|
BCTR ids (Just lbl)
|
|
]
|
|
@@ -1235,7 +1526,9 @@ generateJumpTableForInstr :: DynFlags ->
|
|
-> Maybe (NatCmmDecl CmmStatics Instr)
|
|
generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
|
|
let jumpTable
|
|
- | gopt Opt_PIC dflags = map jumpTableEntryRel ids
|
|
+ | (gopt Opt_PIC dflags)
|
|
+ || (not $ target32Bit $ targetPlatform dflags)
|
|
+ = map jumpTableEntryRel ids
|
|
| otherwise = map (jumpTableEntry dflags) ids
|
|
where jumpTableEntryRel Nothing
|
|
= CmmStaticLit (CmmInt 0 (wordWidth dflags))
|
|
@@ -1250,25 +1543,14 @@ generateJumpTableForInstr _ _ = Nothing
|
|
|
|
-- Turn those condition codes into integers now (when they appear on
|
|
-- the right hand side of an assignment).
|
|
---
|
|
--- (If applicable) Do not fill the delay slots here; you will confuse the
|
|
--- register allocator.
|
|
|
|
condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
|
|
|
|
condReg :: NatM CondCode -> NatM Register
|
|
condReg getCond = do
|
|
CondCode _ cond cond_code <- getCond
|
|
+ dflags <- getDynFlags
|
|
let
|
|
-{- code dst = cond_code `appOL` toOL [
|
|
- BCC cond lbl1,
|
|
- LI dst (ImmInt 0),
|
|
- BCC ALWAYS lbl2,
|
|
- NEWBLOCK lbl1,
|
|
- LI dst (ImmInt 1),
|
|
- BCC ALWAYS lbl2,
|
|
- NEWBLOCK lbl2
|
|
- ]-}
|
|
code dst = cond_code
|
|
`appOL` negate_code
|
|
`appOL` toOL [
|
|
@@ -1294,7 +1576,8 @@ condReg getCond = do
|
|
GU -> (1, False)
|
|
_ -> panic "PPC.CodeGen.codeReg: no match"
|
|
|
|
- return (Any II32 code)
|
|
+ size = archWordSize $ target32Bit $ targetPlatform dflags
|
|
+ return (Any size code)
|
|
|
|
condIntReg cond x y = condReg (condIntCode cond x y)
|
|
condFltReg cond x y = condReg (condFltCode cond x y)
|
|
@@ -1363,6 +1646,27 @@ trivialCode rep _ instr x y = do
|
|
let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
|
|
return (Any (intSize rep) code)
|
|
|
|
+shiftCode
|
|
+ :: Width
|
|
+ -> (Size-> Reg -> Reg -> RI -> Instr)
|
|
+ -> CmmExpr
|
|
+ -> CmmExpr
|
|
+ -> NatM Register
|
|
+shiftCode width instr x (CmmLit (CmmInt y _))
|
|
+ | Just imm <- makeImmediate width False y
|
|
+ = do
|
|
+ (src1, code1) <- getSomeReg x
|
|
+ let size = intSize width
|
|
+ let code dst = code1 `snocOL` instr size dst src1 (RIImm imm)
|
|
+ return (Any size code)
|
|
+
|
|
+shiftCode width instr x y = do
|
|
+ (src1, code1) <- getSomeReg x
|
|
+ (src2, code2) <- getSomeReg y
|
|
+ let size = intSize width
|
|
+ let code dst = code1 `appOL` code2 `snocOL` instr size dst src1 (RIReg src2)
|
|
+ return (Any size code)
|
|
+
|
|
trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
|
|
-> CmmExpr -> CmmExpr -> NatM Register
|
|
trivialCodeNoImm' size instr x y = do
|
|
@@ -1393,25 +1697,33 @@ trivialUCode rep instr x = do
|
|
remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
|
|
-> CmmExpr -> CmmExpr -> NatM Register
|
|
remainderCode rep div x y = do
|
|
+ dflags <- getDynFlags
|
|
+ let mull_instr = if target32Bit $ targetPlatform dflags then MULLW
|
|
+ else MULLD
|
|
(src1, code1) <- getSomeReg x
|
|
(src2, code2) <- getSomeReg y
|
|
let code dst = code1 `appOL` code2 `appOL` toOL [
|
|
div dst src1 src2,
|
|
- MULLW dst dst (RIReg src2),
|
|
+ mull_instr dst dst (RIReg src2),
|
|
SUBF dst dst src1
|
|
]
|
|
return (Any (intSize rep) code)
|
|
|
|
-
|
|
coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
|
|
coerceInt2FP fromRep toRep x = do
|
|
+ dflags <- getDynFlags
|
|
+ let arch = platformArch $ targetPlatform dflags
|
|
+ coerceInt2FP' arch fromRep toRep x
|
|
+
|
|
+coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
|
|
+coerceInt2FP' ArchPPC fromRep toRep x = do
|
|
(src, code) <- getSomeReg x
|
|
lbl <- getNewLabelNat
|
|
itmp <- getNewRegNat II32
|
|
ftmp <- getNewRegNat FF64
|
|
dflags <- getDynFlags
|
|
dynRef <- cmmMakeDynamicReference dflags DataReference lbl
|
|
- Amode addr addr_code <- getAmode dynRef
|
|
+ Amode addr addr_code <- getAmode D dynRef
|
|
let
|
|
code' dst = code `appOL` maybe_exts `appOL` toOL [
|
|
LDATA ReadOnlyData $ Statics lbl
|
|
@@ -1441,8 +1753,46 @@ coerceInt2FP fromRep toRep x = do
|
|
|
|
return (Any (floatSize toRep) code')
|
|
|
|
+-- On an ELF v1 Linux we use the compiler doubleword in the stack frame
|
|
+-- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
|
|
+-- set right before a call and restored right after return from the call.
|
|
+-- So it is fine.
|
|
+coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
|
|
+ (src, code) <- getSomeReg x
|
|
+ dflags <- getDynFlags
|
|
+ let
|
|
+ code' dst = code `appOL` maybe_exts `appOL` toOL [
|
|
+ ST II64 src (spRel dflags 3),
|
|
+ LD FF64 dst (spRel dflags 3),
|
|
+ FCFID dst dst
|
|
+ ] `appOL` maybe_frsp dst
|
|
+
|
|
+ maybe_exts = case fromRep of
|
|
+ W8 -> unitOL $ EXTS II8 src src
|
|
+ W16 -> unitOL $ EXTS II16 src src
|
|
+ W32 -> unitOL $ EXTS II32 src src
|
|
+ W64 -> nilOL
|
|
+ _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
|
|
+
|
|
+ maybe_frsp dst
|
|
+ = case toRep of
|
|
+ W32 -> unitOL $ FRSP dst dst
|
|
+ W64 -> nilOL
|
|
+ _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
|
|
+
|
|
+ return (Any (floatSize toRep) code')
|
|
+
|
|
+coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
|
|
+
|
|
+
|
|
coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
|
|
-coerceFP2Int _ toRep x = do
|
|
+coerceFP2Int fromRep toRep x = do
|
|
+ dflags <- getDynFlags
|
|
+ let arch = platformArch $ targetPlatform dflags
|
|
+ coerceFP2Int' arch fromRep toRep x
|
|
+
|
|
+coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
|
|
+coerceFP2Int' ArchPPC _ toRep x = do
|
|
dflags <- getDynFlags
|
|
-- the reps don't really matter: F*->FF64 and II32->I* are no-ops
|
|
(src, code) <- getSomeReg x
|
|
@@ -1457,6 +1807,22 @@ coerceFP2Int _ toRep x = do
|
|
LD II32 dst (spRel dflags 3)]
|
|
return (Any (intSize toRep) code')
|
|
|
|
+coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
|
|
+ dflags <- getDynFlags
|
|
+ -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
|
|
+ (src, code) <- getSomeReg x
|
|
+ tmp <- getNewRegNat FF64
|
|
+ let
|
|
+ code' dst = code `appOL` toOL [
|
|
+ -- convert to int in FP reg
|
|
+ FCTIDZ tmp src,
|
|
+ -- store value (64bit) from FP to compiler word on stack
|
|
+ ST FF64 tmp (spRel dflags 3),
|
|
+ LD II64 dst (spRel dflags 3)]
|
|
+ return (Any (intSize toRep) code')
|
|
+
|
|
+coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
|
|
+
|
|
-- Note [.LCTOC1 in PPC PIC code]
|
|
-- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
|
|
-- to make the most of the PPC's 16-bit displacements.
|
|
Index: ghc-7.8.4/compiler/nativeGen/PPC/Instr.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/nativeGen/PPC/Instr.hs
|
|
+++ ghc-7.8.4/compiler/nativeGen/PPC/Instr.hs
|
|
@@ -47,8 +47,10 @@ import Data.Maybe (fromMaybe)
|
|
--------------------------------------------------------------------------------
|
|
-- Size of a PPC memory address, in bytes.
|
|
--
|
|
-archWordSize :: Size
|
|
-archWordSize = II32
|
|
+archWordSize :: Bool -> Size
|
|
+archWordSize is32Bit
|
|
+ | is32Bit = II32
|
|
+ | otherwise = II64
|
|
|
|
|
|
-- | Instruction instance for powerpc
|
|
@@ -72,16 +74,18 @@ instance Instruction Instr where
|
|
ppc_mkStackAllocInstr :: Platform -> Int -> Instr
|
|
ppc_mkStackAllocInstr platform amount
|
|
= case platformArch platform of
|
|
- ArchPPC -> -- SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
|
|
- ADD sp sp (RIImm (ImmInt (-amount)))
|
|
- arch -> panic $ "ppc_mkStackAllocInstr " ++ show arch
|
|
+ ArchPPC -> -- SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
|
|
+ ADD sp sp (RIImm (ImmInt (-amount)))
|
|
+ ArchPPC_64 _ -> STU II64 sp (AddrRegImm sp (ImmInt (-amount)))
|
|
+ arch -> panic $ "ppc_mkStackAllocInstr " ++ show arch
|
|
|
|
ppc_mkStackDeallocInstr :: Platform -> Int -> Instr
|
|
ppc_mkStackDeallocInstr platform amount
|
|
= case platformArch platform of
|
|
- ArchPPC -> -- ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
|
|
- ADD sp sp (RIImm (ImmInt amount))
|
|
- arch -> panic $ "ppc_mkStackDeallocInstr " ++ show arch
|
|
+ ArchPPC -> -- ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
|
|
+ ADD sp sp (RIImm (ImmInt amount))
|
|
+ ArchPPC_64 _ -> ADD sp sp (RIImm (ImmInt amount))
|
|
+ arch -> panic $ "ppc_mkStackDeallocInstr " ++ show arch
|
|
|
|
--
|
|
-- See note [extra spill slots] in X86/Instr.hs
|
|
@@ -208,9 +212,12 @@ data Instr
|
|
| SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
|
|
| SUBFC Reg Reg Reg -- (carrying) dst, src1, src2 ; dst = src2 - src1
|
|
| SUBFE Reg Reg Reg -- (extend) dst, src1, src2 ; dst = src2 - src1
|
|
+ | MULLD Reg Reg RI
|
|
| MULLW Reg Reg RI
|
|
| DIVW Reg Reg Reg
|
|
+ | DIVD Reg Reg Reg
|
|
| DIVWU Reg Reg Reg
|
|
+ | DIVDU Reg Reg Reg
|
|
|
|
| MULLW_MayOflo Reg Reg Reg
|
|
-- dst = 1 if src1 * src2 overflows
|
|
@@ -218,9 +225,16 @@ data Instr
|
|
-- mullwo. dst, src1, src2
|
|
-- mfxer dst
|
|
-- rlwinm dst, dst, 2, 31,31
|
|
+ | MULLD_MayOflo Reg Reg Reg
|
|
+ -- dst = 1 if src1 * src2 overflows
|
|
+ -- pseudo-instruction; pretty-printed as:
|
|
+ -- mulldo. dst, src1, src2
|
|
+ -- mfxer dst
|
|
+ -- rlwinm dst, dst, 2, 31,31
|
|
|
|
| AND Reg Reg RI -- dst, src1, src2
|
|
| OR Reg Reg RI -- dst, src1, src2
|
|
+ | ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2
|
|
| XOR Reg Reg RI -- dst, src1, src2
|
|
| XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
|
|
|
|
@@ -229,9 +243,9 @@ data Instr
|
|
| NEG Reg Reg
|
|
| NOT Reg Reg
|
|
|
|
- | SLW Reg Reg RI -- shift left word
|
|
- | SRW Reg Reg RI -- shift right word
|
|
- | SRAW Reg Reg RI -- shift right arithmetic word
|
|
+ | SL Size Reg Reg RI -- shift left
|
|
+ | SR Size Reg Reg RI -- shift right
|
|
+ | SRA Size Reg Reg RI -- shift right arithmetic
|
|
|
|
| RLWINM Reg Reg Int Int Int -- Rotate Left Word Immediate then AND with Mask
|
|
|
|
@@ -244,6 +258,8 @@ data Instr
|
|
| FCMP Reg Reg
|
|
|
|
| FCTIWZ Reg Reg -- convert to integer word
|
|
+ | FCTIDZ Reg Reg -- convert to integer double word
|
|
+ | FCFID Reg Reg -- convert from integer double word
|
|
| FRSP Reg Reg -- reduce to single precision
|
|
-- (but destination is a FP register)
|
|
|
|
@@ -253,9 +269,13 @@ data Instr
|
|
| MFLR Reg -- move from link register
|
|
| FETCHPC Reg -- pseudo-instruction:
|
|
-- bcl to next insn, mflr reg
|
|
-
|
|
+ | FETCHTOC Reg CLabel -- pseudo-instruction
|
|
+ -- add TOC offset to address in r12
|
|
+ -- print .localentry for label
|
|
| LWSYNC -- memory barrier
|
|
-
|
|
+ | NOP -- no operation, PowerPC 64 bit
|
|
+ -- needs this as place holder to
|
|
+ -- reload TOC pointer
|
|
|
|
-- | Get the registers that are being used by this instruction.
|
|
-- regUsage doesn't need to do any trickery for jumps and such.
|
|
@@ -290,22 +310,28 @@ ppc_regUsageOfInstr platform instr
|
|
SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
SUBFC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
+ MULLD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
DIVW reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
+ DIVD reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
DIVWU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
+ DIVDU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
|
|
MULLW_MayOflo reg1 reg2 reg3
|
|
-> usage ([reg2,reg3], [reg1])
|
|
+ MULLD_MayOflo reg1 reg2 reg3
|
|
+ -> usage ([reg2,reg3], [reg1])
|
|
AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
+ ORIS reg1 reg2 _ -> usage ([reg2], [reg1])
|
|
XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
XORIS reg1 reg2 _ -> usage ([reg2], [reg1])
|
|
EXTS _ reg1 reg2 -> usage ([reg2], [reg1])
|
|
NEG reg1 reg2 -> usage ([reg2], [reg1])
|
|
NOT reg1 reg2 -> usage ([reg2], [reg1])
|
|
- SLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
- SRW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
- SRAW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
+ SL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
+ SR _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
+ SRA _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
RLWINM reg1 reg2 _ _ _ -> usage ([reg2], [reg1])
|
|
|
|
FADD _ r1 r2 r3 -> usage ([r2,r3], [r1])
|
|
@@ -315,10 +341,13 @@ ppc_regUsageOfInstr platform instr
|
|
FNEG r1 r2 -> usage ([r2], [r1])
|
|
FCMP r1 r2 -> usage ([r1,r2], [])
|
|
FCTIWZ r1 r2 -> usage ([r2], [r1])
|
|
+ FCTIDZ r1 r2 -> usage ([r2], [r1])
|
|
+ FCFID r1 r2 -> usage ([r2], [r1])
|
|
FRSP r1 r2 -> usage ([r2], [r1])
|
|
MFCR reg -> usage ([], [reg])
|
|
MFLR reg -> usage ([], [reg])
|
|
FETCHPC reg -> usage ([], [reg])
|
|
+ FETCHTOC reg _ -> usage ([], [reg])
|
|
_ -> noUsage
|
|
where
|
|
usage (src, dst) = RU (filter (interesting platform) src)
|
|
@@ -367,21 +396,27 @@ ppc_patchRegsOfInstr instr env
|
|
SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3)
|
|
SUBFC reg1 reg2 reg3 -> SUBFC (env reg1) (env reg2) (env reg3)
|
|
SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3)
|
|
+ MULLD reg1 reg2 ri -> MULLD (env reg1) (env reg2) (fixRI ri)
|
|
MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri)
|
|
DIVW reg1 reg2 reg3 -> DIVW (env reg1) (env reg2) (env reg3)
|
|
+ DIVD reg1 reg2 reg3 -> DIVD (env reg1) (env reg2) (env reg3)
|
|
DIVWU reg1 reg2 reg3 -> DIVWU (env reg1) (env reg2) (env reg3)
|
|
+ DIVDU reg1 reg2 reg3 -> DIVDU (env reg1) (env reg2) (env reg3)
|
|
MULLW_MayOflo reg1 reg2 reg3
|
|
-> MULLW_MayOflo (env reg1) (env reg2) (env reg3)
|
|
+ MULLD_MayOflo reg1 reg2 reg3
|
|
+ -> MULLD_MayOflo (env reg1) (env reg2) (env reg3)
|
|
AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri)
|
|
OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri)
|
|
+ ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm
|
|
XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri)
|
|
XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
|
|
EXTS sz reg1 reg2 -> EXTS sz (env reg1) (env reg2)
|
|
NEG reg1 reg2 -> NEG (env reg1) (env reg2)
|
|
NOT reg1 reg2 -> NOT (env reg1) (env reg2)
|
|
- SLW reg1 reg2 ri -> SLW (env reg1) (env reg2) (fixRI ri)
|
|
- SRW reg1 reg2 ri -> SRW (env reg1) (env reg2) (fixRI ri)
|
|
- SRAW reg1 reg2 ri -> SRAW (env reg1) (env reg2) (fixRI ri)
|
|
+ SL sz reg1 reg2 ri -> SL sz (env reg1) (env reg2) (fixRI ri)
|
|
+ SR sz reg1 reg2 ri -> SR sz (env reg1) (env reg2) (fixRI ri)
|
|
+ SRA sz reg1 reg2 ri -> SRA sz (env reg1) (env reg2) (fixRI ri)
|
|
RLWINM reg1 reg2 sh mb me
|
|
-> RLWINM (env reg1) (env reg2) sh mb me
|
|
FADD sz r1 r2 r3 -> FADD sz (env r1) (env r2) (env r3)
|
|
@@ -391,10 +426,13 @@ ppc_patchRegsOfInstr instr env
|
|
FNEG r1 r2 -> FNEG (env r1) (env r2)
|
|
FCMP r1 r2 -> FCMP (env r1) (env r2)
|
|
FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2)
|
|
+ FCTIDZ r1 r2 -> FCTIDZ (env r1) (env r2)
|
|
+ FCFID r1 r2 -> FCFID (env r1) (env r2)
|
|
FRSP r1 r2 -> FRSP (env r1) (env r2)
|
|
MFCR reg -> MFCR (env reg)
|
|
MFLR reg -> MFLR (env reg)
|
|
FETCHPC reg -> FETCHPC (env reg)
|
|
+ FETCHTOC reg lab -> FETCHTOC (env reg) lab
|
|
_ -> instr
|
|
where
|
|
fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
|
|
@@ -457,11 +495,14 @@ ppc_mkSpillInstr
|
|
ppc_mkSpillInstr dflags reg delta slot
|
|
= let platform = targetPlatform dflags
|
|
off = spillSlotToOffset slot
|
|
+ arch = platformArch platform
|
|
in
|
|
let sz = case targetClassOfReg platform reg of
|
|
- RcInteger -> II32
|
|
+ RcInteger -> case arch of
|
|
+ ArchPPC -> II32
|
|
+ _ -> II64
|
|
RcDouble -> FF64
|
|
- _ -> panic "PPC.Instr.mkSpillInstr: no match"
|
|
+ _ -> panic "PPC.Instr.mkSpillInstr: no match"
|
|
in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
|
|
|
|
|
|
@@ -475,9 +516,12 @@ ppc_mkLoadInstr
|
|
ppc_mkLoadInstr dflags reg delta slot
|
|
= let platform = targetPlatform dflags
|
|
off = spillSlotToOffset slot
|
|
+ arch = platformArch platform
|
|
in
|
|
let sz = case targetClassOfReg platform reg of
|
|
- RcInteger -> II32
|
|
+ RcInteger -> case arch of
|
|
+ ArchPPC -> II32
|
|
+ _ -> II64
|
|
RcDouble -> FF64
|
|
_ -> panic "PPC.Instr.mkLoadInstr: no match"
|
|
in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
|
|
@@ -498,8 +542,8 @@ maxSpillSlots dflags
|
|
-- = 0 -- useful for testing allocMoreStack
|
|
|
|
-- | The number of bytes that the stack pointer should be aligned
|
|
--- to. This is 16 both on PPC32 and PPC64 at least for Darwin, but I'm
|
|
--- not sure this is correct for other OSes.
|
|
+-- to. This is 16 both on PPC32 and PPC64 at least for Darwin, and
|
|
+-- Linux (see ELF processor specific supplements).
|
|
stackAlign :: Int
|
|
stackAlign = 16
|
|
|
|
Index: ghc-7.8.4/compiler/nativeGen/PPC/Ppr.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/nativeGen/PPC/Ppr.hs
|
|
+++ ghc-7.8.4/compiler/nativeGen/PPC/Ppr.hs
|
|
@@ -39,11 +39,11 @@ import Unique ( pprUnique
|
|
import Platform
|
|
import FastString
|
|
import Outputable
|
|
+import DynFlags
|
|
|
|
import Data.Word
|
|
import Data.Bits
|
|
|
|
-
|
|
-- -----------------------------------------------------------------------------
|
|
-- Printing this stuff out
|
|
|
|
@@ -54,12 +54,17 @@ pprNatCmmDecl (CmmData section dats) =
|
|
pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
|
|
case topInfoTable proc of
|
|
Nothing ->
|
|
+ sdocWithPlatform $ \platform ->
|
|
case blocks of
|
|
[] -> -- special case for split markers:
|
|
pprLabel lbl
|
|
blocks -> -- special case for code without info table:
|
|
pprSectionHeader Text $$
|
|
- pprLabel lbl $$ -- blocks guaranteed not null, so label needed
|
|
+ (case platformArch platform of
|
|
+ ArchPPC_64 ELF_V1 -> pprFunctionDescriptor lbl
|
|
+ ArchPPC_64 ELF_V2 -> pprFunctionPrologue lbl
|
|
+ _ -> pprLabel lbl) $$ -- blocks guaranteed not null,
|
|
+ -- so label needed
|
|
vcat (map (pprBasicBlock top_info) blocks)
|
|
|
|
Just (Statics info_lbl _) ->
|
|
@@ -86,6 +91,35 @@ pprNatCmmDecl proc@(CmmProc top_info lbl
|
|
else empty)
|
|
|
|
|
|
+pprFunctionDescriptor :: CLabel -> SDoc
|
|
+pprFunctionDescriptor lab = pprGloblDecl lab
|
|
+ $$ text ".section \".opd\",\"aw\""
|
|
+ $$ text ".align 3"
|
|
+ $$ ppr lab <> char ':'
|
|
+ $$ text ".quad ."
|
|
+ <> ppr lab
|
|
+ <> text ",.TOC.@tocbase,0"
|
|
+ $$ text ".previous"
|
|
+ $$ text ".type "
|
|
+ <> ppr lab
|
|
+ <> text ", @function"
|
|
+ $$ char '.'
|
|
+ <> ppr lab
|
|
+ <> char ':'
|
|
+
|
|
+pprFunctionPrologue :: CLabel ->SDoc
|
|
+pprFunctionPrologue lab = pprGloblDecl lab
|
|
+ $$ text ".type "
|
|
+ <> ppr lab
|
|
+ <> text ", @function"
|
|
+ $$ ppr lab <> char ':'
|
|
+ $$ text "0:\taddis\t" <> pprReg toc
|
|
+ <> text ",12,.TOC.-0b@ha"
|
|
+ $$ text "\taddi\t" <> pprReg toc
|
|
+ <> char ',' <> pprReg toc <> text ",.TOC.-0b@l"
|
|
+ $$ text "\t.localentry\t" <> ppr lab
|
|
+ <> text ",.-" <> ppr lab
|
|
+
|
|
pprBasicBlock :: BlockEnv CmmStatics -> NatBasicBlock Instr -> SDoc
|
|
pprBasicBlock info_env (BasicBlock blockid instrs)
|
|
= maybe_infotable $$
|
|
@@ -213,6 +247,7 @@ pprSize x
|
|
II8 -> sLit "b"
|
|
II16 -> sLit "h"
|
|
II32 -> sLit "w"
|
|
+ II64 -> sLit "d"
|
|
FF32 -> sLit "fs"
|
|
FF64 -> sLit "fd"
|
|
_ -> panic "PPC.Ppr.pprSize: no match")
|
|
@@ -262,6 +297,18 @@ pprImm (HA i)
|
|
then hcat [ text "ha16(", pprImm i, rparen ]
|
|
else pprImm i <> text "@ha"
|
|
|
|
+pprImm (HIGHERA i)
|
|
+ = sdocWithPlatform $ \platform ->
|
|
+ if platformOS platform == OSDarwin
|
|
+ then panic "PPC.pprImm: highera not implemented on Darwin"
|
|
+ else pprImm i <> text "@highera"
|
|
+
|
|
+pprImm (HIGHESTA i)
|
|
+ = sdocWithPlatform $ \platform ->
|
|
+ if platformOS platform == OSDarwin
|
|
+ then panic "PPC.pprImm: highesta not implemented on Darwin"
|
|
+ else pprImm i <> text "@highesta"
|
|
+
|
|
|
|
pprAddr :: AddrMode -> SDoc
|
|
pprAddr (AddrRegReg r1 r2)
|
|
@@ -276,17 +323,23 @@ pprSectionHeader :: Section -> SDoc
|
|
pprSectionHeader seg
|
|
= sdocWithPlatform $ \platform ->
|
|
let osDarwin = platformOS platform == OSDarwin
|
|
+ ppc64 = not $ target32Bit platform
|
|
in case seg of
|
|
Text -> ptext (sLit ".text\n.align 2")
|
|
- Data -> ptext (sLit ".data\n.align 2")
|
|
+ Data
|
|
+ | ppc64 -> ptext (sLit ".data\n.align 3")
|
|
+ | otherwise -> ptext (sLit ".data\n.align 2")
|
|
ReadOnlyData
|
|
| osDarwin -> ptext (sLit ".const\n.align 2")
|
|
+ | ppc64 -> ptext (sLit ".section .rodata\n\t.align 3")
|
|
| otherwise -> ptext (sLit ".section .rodata\n\t.align 2")
|
|
RelocatableReadOnlyData
|
|
| osDarwin -> ptext (sLit ".const_data\n.align 2")
|
|
+ | ppc64 -> ptext (sLit ".data\n\t.align 3")
|
|
| otherwise -> ptext (sLit ".data\n\t.align 2")
|
|
UninitialisedData
|
|
| osDarwin -> ptext (sLit ".const_data\n.align 2")
|
|
+ | ppc64 -> ptext (sLit ".section .bss\n\t.align 3")
|
|
| otherwise -> ptext (sLit ".section .bss\n\t.align 2")
|
|
ReadOnlyData16
|
|
| osDarwin -> ptext (sLit ".const\n.align 4")
|
|
@@ -298,32 +351,38 @@ pprSectionHeader seg
|
|
pprDataItem :: CmmLit -> SDoc
|
|
pprDataItem lit
|
|
= sdocWithDynFlags $ \dflags ->
|
|
- vcat (ppr_item (cmmTypeSize $ cmmLitType dflags lit) lit)
|
|
+ vcat (ppr_item (cmmTypeSize $ cmmLitType dflags lit) lit dflags)
|
|
where
|
|
imm = litToImm lit
|
|
+ archPPC_64 dflags = not $ target32Bit $ targetPlatform dflags
|
|
|
|
- ppr_item II8 _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
|
|
+ ppr_item II8 _ _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
|
|
|
|
- ppr_item II32 _ = [ptext (sLit "\t.long\t") <> pprImm imm]
|
|
+ ppr_item II32 _ _ = [ptext (sLit "\t.long\t") <> pprImm imm]
|
|
|
|
- ppr_item FF32 (CmmFloat r _)
|
|
+ ppr_item II64 _ dflags
|
|
+ | archPPC_64 dflags = [ptext (sLit "\t.quad\t") <> pprImm imm]
|
|
+
|
|
+
|
|
+ ppr_item FF32 (CmmFloat r _) _
|
|
= let bs = floatToBytes (fromRational r)
|
|
in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
|
|
|
|
- ppr_item FF64 (CmmFloat r _)
|
|
+ ppr_item FF64 (CmmFloat r _) _
|
|
= let bs = doubleToBytes (fromRational r)
|
|
in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
|
|
|
|
- ppr_item II16 _ = [ptext (sLit "\t.short\t") <> pprImm imm]
|
|
+ ppr_item II16 _ _ = [ptext (sLit "\t.short\t") <> pprImm imm]
|
|
|
|
- ppr_item II64 (CmmInt x _) =
|
|
+ ppr_item II64 (CmmInt x _) dflags
|
|
+ | not(archPPC_64 dflags) =
|
|
[ptext (sLit "\t.long\t")
|
|
<> int (fromIntegral
|
|
(fromIntegral (x `shiftR` 32) :: Word32)),
|
|
ptext (sLit "\t.long\t")
|
|
<> int (fromIntegral (fromIntegral x :: Word32))]
|
|
|
|
- ppr_item _ _
|
|
+ ppr_item _ _ _
|
|
= panic "PPC.Ppr.pprDataItem: no match"
|
|
|
|
|
|
@@ -370,6 +429,7 @@ pprInstr (LD sz reg addr) = hcat [
|
|
II8 -> sLit "bz"
|
|
II16 -> sLit "hz"
|
|
II32 -> sLit "wz"
|
|
+ II64 -> sLit "d"
|
|
FF32 -> sLit "fs"
|
|
FF64 -> sLit "fd"
|
|
_ -> panic "PPC.Ppr.pprInstr: no match"
|
|
@@ -388,6 +448,7 @@ pprInstr (LA sz reg addr) = hcat [
|
|
II8 -> sLit "ba"
|
|
II16 -> sLit "ha"
|
|
II32 -> sLit "wa"
|
|
+ II64 -> sLit "d"
|
|
FF32 -> sLit "fs"
|
|
FF64 -> sLit "fd"
|
|
_ -> panic "PPC.Ppr.pprInstr: no match"
|
|
@@ -556,10 +617,14 @@ pprInstr (ADDE reg1 reg2 reg3) = pprLogi
|
|
pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3)
|
|
pprInstr (SUBFC reg1 reg2 reg3) = pprLogic (sLit "subfc") reg1 reg2 (RIReg reg3)
|
|
pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3)
|
|
+pprInstr (MULLD reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mulld") reg1 reg2 ri
|
|
pprInstr (MULLW reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mullw") reg1 reg2 ri
|
|
+pprInstr (MULLD reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri
|
|
pprInstr (MULLW reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri
|
|
pprInstr (DIVW reg1 reg2 reg3) = pprLogic (sLit "divw") reg1 reg2 (RIReg reg3)
|
|
+pprInstr (DIVD reg1 reg2 reg3) = pprLogic (sLit "divd") reg1 reg2 (RIReg reg3)
|
|
pprInstr (DIVWU reg1 reg2 reg3) = pprLogic (sLit "divwu") reg1 reg2 (RIReg reg3)
|
|
+pprInstr (DIVDU reg1 reg2 reg3) = pprLogic (sLit "divdu") reg1 reg2 (RIReg reg3)
|
|
|
|
pprInstr (MULLW_MayOflo reg1 reg2 reg3) = vcat [
|
|
hcat [ ptext (sLit "\tmullwo\t"), pprReg reg1, ptext (sLit ", "),
|
|
@@ -570,8 +635,17 @@ pprInstr (MULLW_MayOflo reg1 reg2 reg3)
|
|
pprReg reg1, ptext (sLit ", "),
|
|
ptext (sLit "2, 31, 31") ]
|
|
]
|
|
+pprInstr (MULLD_MayOflo reg1 reg2 reg3) = vcat [
|
|
+ hcat [ ptext (sLit "\tmulldo\t"), pprReg reg1, ptext (sLit ", "),
|
|
+ pprReg reg2, ptext (sLit ", "),
|
|
+ pprReg reg3 ],
|
|
+ hcat [ ptext (sLit "\tmfxer\t"), pprReg reg1 ],
|
|
+ hcat [ ptext (sLit "\trlwinm\t"), pprReg reg1, ptext (sLit ", "),
|
|
+ pprReg reg1, ptext (sLit ", "),
|
|
+ ptext (sLit "2, 31, 31") ]
|
|
+ ]
|
|
|
|
- -- for some reason, "andi" doesn't exist.
|
|
+ -- for some reason, "andi" doesn't exist.
|
|
-- we'll use "andi." instead.
|
|
pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [
|
|
char '\t',
|
|
@@ -588,6 +662,17 @@ pprInstr (AND reg1 reg2 ri) = pprLogic (
|
|
pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri
|
|
pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri
|
|
|
|
+pprInstr (ORIS reg1 reg2 imm) = hcat [
|
|
+ char '\t',
|
|
+ ptext (sLit "oris"),
|
|
+ char '\t',
|
|
+ pprReg reg1,
|
|
+ ptext (sLit ", "),
|
|
+ pprReg reg2,
|
|
+ ptext (sLit ", "),
|
|
+ pprImm imm
|
|
+ ]
|
|
+
|
|
pprInstr (XORIS reg1 reg2 imm) = hcat [
|
|
char '\t',
|
|
ptext (sLit "xoris"),
|
|
@@ -612,17 +697,33 @@ pprInstr (EXTS sz reg1 reg2) = hcat [
|
|
pprInstr (NEG reg1 reg2) = pprUnary (sLit "neg") reg1 reg2
|
|
pprInstr (NOT reg1 reg2) = pprUnary (sLit "not") reg1 reg2
|
|
|
|
-pprInstr (SLW reg1 reg2 ri) = pprLogic (sLit "slw") reg1 reg2 (limitShiftRI ri)
|
|
+pprInstr (SL sz reg1 reg2 ri) =
|
|
+ let op = case sz of
|
|
+ II32 -> "slw"
|
|
+ II64 -> "sld"
|
|
+ _ -> panic "PPC.Ppr.pprInstr: shift illegal size"
|
|
+ in pprLogic (sLit op) reg1 reg2 (limitShiftRI sz ri)
|
|
|
|
-pprInstr (SRW reg1 reg2 (RIImm (ImmInt i))) | i > 31 || i < 0 =
|
|
+pprInstr (SR II32 reg1 reg2 (RIImm (ImmInt i))) | i > 31 || i < 0 =
|
|
-- Handle the case where we are asked to shift a 32 bit register by
|
|
-- less than zero or more than 31 bits. We convert this into a clear
|
|
-- of the destination register.
|
|
-- Fixes ticket http://ghc.haskell.org/trac/ghc/ticket/5900
|
|
pprInstr (XOR reg1 reg2 (RIReg reg2))
|
|
-pprInstr (SRW reg1 reg2 ri) = pprLogic (sLit "srw") reg1 reg2 (limitShiftRI ri)
|
|
+pprInstr (SR sz reg1 reg2 ri) =
|
|
+ let op = case sz of
|
|
+ II32 -> "srw"
|
|
+ II64 -> "srd"
|
|
+ _ -> panic "PPC.Ppr.pprInstr: shift illegal size"
|
|
+ in pprLogic (sLit op) reg1 reg2 (limitShiftRI sz ri)
|
|
+
|
|
+pprInstr (SRA sz reg1 reg2 ri) =
|
|
+ let op = case sz of
|
|
+ II32 -> "sraw"
|
|
+ II64 -> "srad"
|
|
+ _ -> panic "PPC.Ppr.pprInstr: shift illegal size"
|
|
+ in pprLogic (sLit op) reg1 reg2 (limitShiftRI sz ri)
|
|
|
|
-pprInstr (SRAW reg1 reg2 ri) = pprLogic (sLit "sraw") reg1 reg2 (limitShiftRI ri)
|
|
pprInstr (RLWINM reg1 reg2 sh mb me) = hcat [
|
|
ptext (sLit "\trlwinm\t"),
|
|
pprReg reg1,
|
|
@@ -654,6 +755,8 @@ pprInstr (FCMP reg1 reg2) = hcat [
|
|
]
|
|
|
|
pprInstr (FCTIWZ reg1 reg2) = pprUnary (sLit "fctiwz") reg1 reg2
|
|
+pprInstr (FCTIDZ reg1 reg2) = pprUnary (sLit "fctidz") reg1 reg2
|
|
+pprInstr (FCFID reg1 reg2) = pprUnary (sLit "fcfid") reg1 reg2
|
|
pprInstr (FRSP reg1 reg2) = pprUnary (sLit "frsp") reg1 reg2
|
|
|
|
pprInstr (CRNOR dst src1 src2) = hcat [
|
|
@@ -684,8 +787,22 @@ pprInstr (FETCHPC reg) = vcat [
|
|
hcat [ ptext (sLit "1:\tmflr\t"), pprReg reg ]
|
|
]
|
|
|
|
+pprInstr (FETCHTOC reg lab) = vcat [
|
|
+ hcat [ ptext (sLit "0:\taddis\t"), pprReg reg,
|
|
+ ptext (sLit ",12,.TOC.-0b@ha") ],
|
|
+ hcat [ ptext (sLit "\taddi\t"), pprReg reg,
|
|
+ char ',', pprReg reg,
|
|
+ ptext (sLit ",.TOC.-0b@l") ],
|
|
+ hcat [ ptext (sLit "\t.localentry\t"),
|
|
+ ppr lab,
|
|
+ ptext (sLit ",.-"),
|
|
+ ppr lab]
|
|
+ ]
|
|
+
|
|
pprInstr LWSYNC = ptext (sLit "\tlwsync")
|
|
|
|
+pprInstr NOP = ptext (sLit "\tnop")
|
|
+
|
|
-- pprInstr _ = panic "pprInstr (ppc)"
|
|
|
|
|
|
@@ -739,9 +856,12 @@ pprFSize FF64 = empty
|
|
pprFSize FF32 = char 's'
|
|
pprFSize _ = panic "PPC.Ppr.pprFSize: no match"
|
|
|
|
- -- limit immediate argument for shift instruction to range 0..31
|
|
-limitShiftRI :: RI -> RI
|
|
-limitShiftRI (RIImm (ImmInt i)) | i > 31 || i < 0 =
|
|
+ -- limit immediate argument for shift instruction to range 0..63
|
|
+ -- for 64 bit size and 0..32 otherwise
|
|
+limitShiftRI :: Size -> RI -> RI
|
|
+limitShiftRI II64 (RIImm (ImmInt i)) | i > 63 || i < 0 =
|
|
panic $ "PPC.Ppr: Shift by " ++ show i ++ " bits is not allowed."
|
|
-limitShiftRI x = x
|
|
+limitShiftRI II32 (RIImm (ImmInt i)) | i > 31 || i < 0 =
|
|
+ panic $ "PPC.Ppr: 32 bit: Shift by " ++ show i ++ " bits is not allowed."
|
|
+limitShiftRI _ x = x
|
|
|
|
Index: ghc-7.8.4/compiler/nativeGen/PPC/Regs.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/nativeGen/PPC/Regs.hs
|
|
+++ ghc-7.8.4/compiler/nativeGen/PPC/Regs.hs
|
|
@@ -35,7 +35,7 @@ module PPC.Regs (
|
|
fits16Bits,
|
|
makeImmediate,
|
|
fReg,
|
|
- sp, r3, r4, r27, r28, r30,
|
|
+ sp, toc, r3, r4, r11, r12, r27, r28, r30,
|
|
f1, f20, f21,
|
|
|
|
allocatableRegs
|
|
@@ -62,8 +62,8 @@ import FastBool
|
|
import FastTypes
|
|
import Platform
|
|
|
|
-import Data.Word ( Word8, Word16, Word32 )
|
|
-import Data.Int ( Int8, Int16, Int32 )
|
|
+import Data.Word ( Word8, Word16, Word32, Word64 )
|
|
+import Data.Int ( Int8, Int16, Int32, Int64 )
|
|
|
|
|
|
-- squeese functions for the graph allocator -----------------------------------
|
|
@@ -145,6 +145,8 @@ data Imm
|
|
| LO Imm
|
|
| HI Imm
|
|
| HA Imm {- high halfword adjusted -}
|
|
+ | HIGHERA Imm
|
|
+ | HIGHESTA Imm
|
|
|
|
|
|
strImmLit :: String -> Imm
|
|
@@ -267,9 +269,11 @@ fits16Bits x = x >= -32768 && x < 32768
|
|
makeImmediate :: Integral a => Width -> Bool -> a -> Maybe Imm
|
|
makeImmediate rep signed x = fmap ImmInt (toI16 rep signed)
|
|
where
|
|
+ narrow W64 False = fromIntegral (fromIntegral x :: Word64)
|
|
narrow W32 False = fromIntegral (fromIntegral x :: Word32)
|
|
narrow W16 False = fromIntegral (fromIntegral x :: Word16)
|
|
narrow W8 False = fromIntegral (fromIntegral x :: Word8)
|
|
+ narrow W64 True = fromIntegral (fromIntegral x :: Int64)
|
|
narrow W32 True = fromIntegral (fromIntegral x :: Int32)
|
|
narrow W16 True = fromIntegral (fromIntegral x :: Int16)
|
|
narrow W8 True = fromIntegral (fromIntegral x :: Int8)
|
|
@@ -283,6 +287,12 @@ makeImmediate rep signed x = fmap ImmInt
|
|
toI16 W32 False
|
|
| narrowed >= 0 && narrowed < 65536 = Just narrowed
|
|
| otherwise = Nothing
|
|
+ toI16 W64 True
|
|
+ | narrowed >= -32768 && narrowed < 32768 = Just narrowed
|
|
+ | otherwise = Nothing
|
|
+ toI16 W64 False
|
|
+ | narrowed >= 0 && narrowed < 65536 = Just narrowed
|
|
+ | otherwise = Nothing
|
|
toI16 _ _ = Just narrowed
|
|
|
|
|
|
@@ -294,10 +304,13 @@ point registers.
|
|
fReg :: Int -> RegNo
|
|
fReg x = (32 + x)
|
|
|
|
-sp, r3, r4, r27, r28, r30, f1, f20, f21 :: Reg
|
|
+sp, toc, r3, r4, r11, r12, r27, r28, r30, f1, f20, f21 :: Reg
|
|
sp = regSingle 1
|
|
+toc = regSingle 2
|
|
r3 = regSingle 3
|
|
r4 = regSingle 4
|
|
+r11 = regSingle 11
|
|
+r12 = regSingle 12
|
|
r27 = regSingle 27
|
|
r28 = regSingle 28
|
|
r30 = regSingle 30
|
|
Index: ghc-7.8.4/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
|
|
+++ ghc-7.8.4/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
|
|
@@ -111,7 +111,7 @@ trivColorable platform virtualRegSqueeze
|
|
ArchX86_64 -> 5
|
|
ArchPPC -> 16
|
|
ArchSPARC -> 14
|
|
- ArchPPC_64 -> panic "trivColorable ArchPPC_64"
|
|
+ ArchPPC_64 _ -> panic "trivColorable ArchPPC_64"
|
|
ArchARM _ _ _ -> panic "trivColorable ArchARM"
|
|
ArchAlpha -> panic "trivColorable ArchAlpha"
|
|
ArchMipseb -> panic "trivColorable ArchMipseb"
|
|
@@ -135,7 +135,7 @@ trivColorable platform virtualRegSqueeze
|
|
ArchX86_64 -> 0
|
|
ArchPPC -> 0
|
|
ArchSPARC -> 22
|
|
- ArchPPC_64 -> panic "trivColorable ArchPPC_64"
|
|
+ ArchPPC_64 _ -> panic "trivColorable ArchPPC_64"
|
|
ArchARM _ _ _ -> panic "trivColorable ArchARM"
|
|
ArchAlpha -> panic "trivColorable ArchAlpha"
|
|
ArchMipseb -> panic "trivColorable ArchMipseb"
|
|
@@ -159,7 +159,7 @@ trivColorable platform virtualRegSqueeze
|
|
ArchX86_64 -> 0
|
|
ArchPPC -> 26
|
|
ArchSPARC -> 11
|
|
- ArchPPC_64 -> panic "trivColorable ArchPPC_64"
|
|
+ ArchPPC_64 _ -> panic "trivColorable ArchPPC_64"
|
|
ArchARM _ _ _ -> panic "trivColorable ArchARM"
|
|
ArchAlpha -> panic "trivColorable ArchAlpha"
|
|
ArchMipseb -> panic "trivColorable ArchMipseb"
|
|
@@ -183,7 +183,7 @@ trivColorable platform virtualRegSqueeze
|
|
ArchX86_64 -> 10
|
|
ArchPPC -> 0
|
|
ArchSPARC -> 0
|
|
- ArchPPC_64 -> panic "trivColorable ArchPPC_64"
|
|
+ ArchPPC_64 _ -> panic "trivColorable ArchPPC_64"
|
|
ArchARM _ _ _ -> panic "trivColorable ArchARM"
|
|
ArchAlpha -> panic "trivColorable ArchAlpha"
|
|
ArchMipseb -> panic "trivColorable ArchMipseb"
|
|
Index: ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
|
|
+++ ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
|
|
@@ -74,7 +74,7 @@ maxSpillSlots dflags
|
|
ArchPPC -> PPC.Instr.maxSpillSlots dflags
|
|
ArchSPARC -> SPARC.Instr.maxSpillSlots dflags
|
|
ArchARM _ _ _ -> panic "maxSpillSlots ArchARM"
|
|
- ArchPPC_64 -> panic "maxSpillSlots ArchPPC_64"
|
|
+ ArchPPC_64 _ -> PPC.Instr.maxSpillSlots dflags
|
|
ArchAlpha -> panic "maxSpillSlots ArchAlpha"
|
|
ArchMipseb -> panic "maxSpillSlots ArchMipseb"
|
|
ArchMipsel -> panic "maxSpillSlots ArchMipsel"
|
|
Index: ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/Main.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/nativeGen/RegAlloc/Linear/Main.hs
|
|
+++ ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/Main.hs
|
|
@@ -207,7 +207,7 @@ linearRegAlloc dflags entry_ids block_li
|
|
ArchSPARC -> linearRegAlloc' dflags (frInitFreeRegs platform :: SPARC.FreeRegs) entry_ids block_live sccs
|
|
ArchPPC -> linearRegAlloc' dflags (frInitFreeRegs platform :: PPC.FreeRegs) entry_ids block_live sccs
|
|
ArchARM _ _ _ -> panic "linearRegAlloc ArchARM"
|
|
- ArchPPC_64 -> panic "linearRegAlloc ArchPPC_64"
|
|
+ ArchPPC_64 _ -> linearRegAlloc' dflags (frInitFreeRegs platform :: PPC.FreeRegs) entry_ids block_live sccs
|
|
ArchAlpha -> panic "linearRegAlloc ArchAlpha"
|
|
ArchMipseb -> panic "linearRegAlloc ArchMipseb"
|
|
ArchMipsel -> panic "linearRegAlloc ArchMipsel"
|
|
Index: ghc-7.8.4/compiler/nativeGen/TargetReg.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/nativeGen/TargetReg.hs
|
|
+++ ghc-7.8.4/compiler/nativeGen/TargetReg.hs
|
|
@@ -52,7 +52,7 @@ targetVirtualRegSqueeze platform
|
|
ArchX86_64 -> X86.virtualRegSqueeze
|
|
ArchPPC -> PPC.virtualRegSqueeze
|
|
ArchSPARC -> SPARC.virtualRegSqueeze
|
|
- ArchPPC_64 -> panic "targetVirtualRegSqueeze ArchPPC_64"
|
|
+ ArchPPC_64 _ -> PPC.virtualRegSqueeze
|
|
ArchARM _ _ _ -> panic "targetVirtualRegSqueeze ArchARM"
|
|
ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha"
|
|
ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb"
|
|
@@ -68,7 +68,7 @@ targetRealRegSqueeze platform
|
|
ArchX86_64 -> X86.realRegSqueeze
|
|
ArchPPC -> PPC.realRegSqueeze
|
|
ArchSPARC -> SPARC.realRegSqueeze
|
|
- ArchPPC_64 -> panic "targetRealRegSqueeze ArchPPC_64"
|
|
+ ArchPPC_64 _ -> PPC.realRegSqueeze
|
|
ArchARM _ _ _ -> panic "targetRealRegSqueeze ArchARM"
|
|
ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha"
|
|
ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb"
|
|
@@ -83,7 +83,7 @@ targetClassOfRealReg platform
|
|
ArchX86_64 -> X86.classOfRealReg platform
|
|
ArchPPC -> PPC.classOfRealReg
|
|
ArchSPARC -> SPARC.classOfRealReg
|
|
- ArchPPC_64 -> panic "targetClassOfRealReg ArchPPC_64"
|
|
+ ArchPPC_64 _ -> PPC.classOfRealReg
|
|
ArchARM _ _ _ -> panic "targetClassOfRealReg ArchARM"
|
|
ArchAlpha -> panic "targetClassOfRealReg ArchAlpha"
|
|
ArchMipseb -> panic "targetClassOfRealReg ArchMipseb"
|
|
@@ -98,7 +98,7 @@ targetMkVirtualReg platform
|
|
ArchX86_64 -> X86.mkVirtualReg
|
|
ArchPPC -> PPC.mkVirtualReg
|
|
ArchSPARC -> SPARC.mkVirtualReg
|
|
- ArchPPC_64 -> panic "targetMkVirtualReg ArchPPC_64"
|
|
+ ArchPPC_64 _ -> PPC.mkVirtualReg
|
|
ArchARM _ _ _ -> panic "targetMkVirtualReg ArchARM"
|
|
ArchAlpha -> panic "targetMkVirtualReg ArchAlpha"
|
|
ArchMipseb -> panic "targetMkVirtualReg ArchMipseb"
|
|
@@ -113,7 +113,7 @@ targetRegDotColor platform
|
|
ArchX86_64 -> X86.regDotColor platform
|
|
ArchPPC -> PPC.regDotColor
|
|
ArchSPARC -> SPARC.regDotColor
|
|
- ArchPPC_64 -> panic "targetRegDotColor ArchPPC_64"
|
|
+ ArchPPC_64 _ -> PPC.regDotColor
|
|
ArchARM _ _ _ -> panic "targetRegDotColor ArchARM"
|
|
ArchAlpha -> panic "targetRegDotColor ArchAlpha"
|
|
ArchMipseb -> panic "targetRegDotColor ArchMipseb"
|
|
Index: ghc-7.8.4/compiler/utils/Platform.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/utils/Platform.hs
|
|
+++ ghc-7.8.4/compiler/utils/Platform.hs
|
|
@@ -8,6 +8,7 @@ module Platform (
|
|
ArmISA(..),
|
|
ArmISAExt(..),
|
|
ArmABI(..),
|
|
+ PPC_64ABI(..),
|
|
|
|
target32Bit,
|
|
isARM,
|
|
@@ -46,6 +47,8 @@ data Arch
|
|
| ArchX86_64
|
|
| ArchPPC
|
|
| ArchPPC_64
|
|
+ { ppc_64ABI :: PPC_64ABI
|
|
+ }
|
|
| ArchSPARC
|
|
| ArchARM
|
|
{ armISA :: ArmISA
|
|
@@ -104,10 +107,18 @@ data ArmABI
|
|
| HARD
|
|
deriving (Read, Show, Eq)
|
|
|
|
+-- | PowerPC 64-bit ABI
|
|
+--
|
|
+data PPC_64ABI
|
|
+ = ELF_V1
|
|
+ | ELF_V2
|
|
+ deriving (Read, Show, Eq)
|
|
+
|
|
+-- | This predicate tells us whether the platform is 32-bit.
|
|
target32Bit :: Platform -> Bool
|
|
target32Bit p = platformWordSize p == 4
|
|
|
|
--- | This predicates tells us whether the OS supports ELF-like shared libraries.
|
|
+-- | This predicate tells us whether the OS supports ELF-like shared libraries.
|
|
osElfTarget :: OS -> Bool
|
|
osElfTarget OSLinux = True
|
|
osElfTarget OSFreeBSD = True
|
|
Index: ghc-7.8.4/configure.ac
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/configure.ac
|
|
+++ ghc-7.8.4/configure.ac
|
|
@@ -288,7 +288,7 @@ AC_SUBST(SOLARIS_BROKEN_SHLD)
|
|
dnl ** Do an unregisterised build?
|
|
dnl --------------------------------------------------------------
|
|
case "$HostArch" in
|
|
- i386|x86_64|powerpc|arm)
|
|
+ i386|x86_64|powerpc|powerpc64|powerpc64le|arm)
|
|
UnregisterisedDefault=NO
|
|
;;
|
|
*)
|
|
Index: ghc-7.8.4/includes/CodeGen.Platform.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/includes/CodeGen.Platform.hs
|
|
+++ ghc-7.8.4/includes/CodeGen.Platform.hs
|
|
@@ -804,6 +804,9 @@ freeReg 1 = fastBool False -- The Stack
|
|
# if !MACHREGS_darwin
|
|
-- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that
|
|
freeReg 2 = fastBool False
|
|
+-- TODO: make this conditonal for ppc64 ELF
|
|
+freeReg 13 = fastBool False -- reserved for system thread ID
|
|
+-- TODO: do not reserve r30 in ppc64 ELF
|
|
-- at least linux in -fPIC relies on r30 in PLT stubs
|
|
freeReg 30 = fastBool False
|
|
# endif
|
|
Index: ghc-7.8.4/includes/stg/HaskellMachRegs.h
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/includes/stg/HaskellMachRegs.h
|
|
+++ ghc-7.8.4/includes/stg/HaskellMachRegs.h
|
|
@@ -35,7 +35,8 @@
|
|
|
|
#define MACHREGS_i386 i386_TARGET_ARCH
|
|
#define MACHREGS_x86_64 x86_64_TARGET_ARCH
|
|
-#define MACHREGS_powerpc (powerpc_TARGET_ARCH || powerpc64_TARGET_ARCH || rs6000_TARGET_ARCH)
|
|
+#define MACHREGS_powerpc (powerpc_TARGET_ARCH || powerpc64_TARGET_ARCH \
|
|
+ || powerpc64le_TARGET_ARCH || rs6000_TARGET_ARCH)
|
|
#define MACHREGS_sparc sparc_TARGET_ARCH
|
|
#define MACHREGS_arm arm_TARGET_ARCH
|
|
#define MACHREGS_darwin darwin_TARGET_OS
|
|
Index: ghc-7.8.4/includes/stg/RtsMachRegs.h
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/includes/stg/RtsMachRegs.h
|
|
+++ ghc-7.8.4/includes/stg/RtsMachRegs.h
|
|
@@ -41,7 +41,8 @@
|
|
|
|
#define MACHREGS_i386 i386_HOST_ARCH
|
|
#define MACHREGS_x86_64 x86_64_HOST_ARCH
|
|
-#define MACHREGS_powerpc (powerpc_HOST_ARCH || powerpc64_HOST_ARCH || rs6000_HOST_ARCH)
|
|
+#define MACHREGS_powerpc (powerpc_HOST_ARCH || powerpc64_HOST_ARCH \
|
|
+ || powerpc64le_HOST_ARCH || rs6000_HOST_ARCH)
|
|
#define MACHREGS_sparc sparc_HOST_ARCH
|
|
#define MACHREGS_arm arm_HOST_ARCH
|
|
#define MACHREGS_darwin darwin_HOST_OS
|
|
Index: ghc-7.8.4/includes/stg/SMP.h
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/includes/stg/SMP.h
|
|
+++ ghc-7.8.4/includes/stg/SMP.h
|
|
@@ -124,6 +124,14 @@ xchg(StgPtr p, StgWord w)
|
|
:"=&r" (result)
|
|
:"r" (w), "r" (p)
|
|
);
|
|
+#elif powerpc64_HOST_ARCH || powerpc64le_HOST_ARCH
|
|
+ __asm__ __volatile__ (
|
|
+ "1: ldarx %0, 0, %2\n"
|
|
+ " stdcx. %1, 0, %2\n"
|
|
+ " bne- 1b"
|
|
+ :"=&r" (result)
|
|
+ :"r" (w), "r" (p)
|
|
+ );
|
|
#elif sparc_HOST_ARCH
|
|
result = w;
|
|
__asm__ __volatile__ (
|
|
@@ -190,6 +198,20 @@ cas(StgVolatilePtr p, StgWord o, StgWord
|
|
:"cc", "memory"
|
|
);
|
|
return result;
|
|
+#elif powerpc64_HOST_ARCH || powerpc64le_HOST_ARCH
|
|
+ StgWord result;
|
|
+ __asm__ __volatile__ (
|
|
+ "1: ldarx %0, 0, %3\n"
|
|
+ " cmpd %0, %1\n"
|
|
+ " bne 2f\n"
|
|
+ " stdcx. %2, 0, %3\n"
|
|
+ " bne- 1b\n"
|
|
+ "2:"
|
|
+ :"=&r" (result)
|
|
+ :"r" (o), "r" (n), "r" (p)
|
|
+ :"cc", "memory"
|
|
+ );
|
|
+ return result;
|
|
#elif sparc_HOST_ARCH
|
|
__asm__ __volatile__ (
|
|
"cas [%1], %2, %0"
|
|
@@ -304,7 +326,7 @@ EXTERN_INLINE void
|
|
write_barrier(void) {
|
|
#if i386_HOST_ARCH || x86_64_HOST_ARCH
|
|
__asm__ __volatile__ ("" : : : "memory");
|
|
-#elif powerpc_HOST_ARCH
|
|
+#elif powerpc_HOST_ARCH || powerpc64_HOST_ARCH || powerpc64le_HOST_ARCH
|
|
__asm__ __volatile__ ("lwsync" : : : "memory");
|
|
#elif sparc_HOST_ARCH
|
|
/* Sparc in TSO mode does not require store/store barriers. */
|
|
@@ -326,7 +348,7 @@ store_load_barrier(void) {
|
|
__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory");
|
|
#elif x86_64_HOST_ARCH
|
|
__asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory");
|
|
-#elif powerpc_HOST_ARCH
|
|
+#elif powerpc_HOST_ARCH || powerpc64_HOST_ARCH || powerpc64le_HOST_ARCH
|
|
__asm__ __volatile__ ("sync" : : : "memory");
|
|
#elif sparc_HOST_ARCH
|
|
__asm__ __volatile__ ("membar #StoreLoad" : : : "memory");
|
|
@@ -345,7 +367,7 @@ load_load_barrier(void) {
|
|
__asm__ __volatile__ ("" : : : "memory");
|
|
#elif x86_64_HOST_ARCH
|
|
__asm__ __volatile__ ("" : : : "memory");
|
|
-#elif powerpc_HOST_ARCH
|
|
+#elif powerpc_HOST_ARCH || powerpc64_HOST_ARCH || powerpc64le_HOST_ARCH
|
|
__asm__ __volatile__ ("lwsync" : : : "memory");
|
|
#elif sparc_HOST_ARCH
|
|
/* Sparc in TSO mode does not require load/load barriers. */
|
|
Index: ghc-7.8.4/mk/config.mk.in
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/mk/config.mk.in
|
|
+++ ghc-7.8.4/mk/config.mk.in
|
|
@@ -161,9 +161,9 @@ GhcUnregisterised=@Unregisterised@
|
|
# (as well as a C backend)
|
|
#
|
|
# Target platforms supported:
|
|
-# i386, powerpc
|
|
+# i386, powerpc, powerpc64, sparc
|
|
# IOS and AIX are not supported
|
|
-ArchSupportsNCG=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc sparc)))
|
|
+ArchSupportsNCG=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc)))
|
|
OsSupportsNCG=$(strip $(patsubst $(TargetOS_CPP), YES, $(patsubst ios,,$(patsubst aix,,$(TargetOS_CPP)))))
|
|
|
|
GhcWithNativeCodeGen := $(strip\
|
|
@@ -174,7 +174,7 @@ HaveLibDL = @HaveLibDL@
|
|
|
|
# ArchSupportsSMP should be set iff there is support for that arch in
|
|
# includes/stg/SMP.h
|
|
-ArchSupportsSMP=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 sparc powerpc arm)))
|
|
+ArchSupportsSMP=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 sparc powerpc powerpc64 powerpc64le arm)))
|
|
|
|
GhcWithSMP := $(strip $(if $(filter YESNO, $(ArchSupportsSMP)$(GhcUnregisterised)),YES,NO))
|
|
|
|
@@ -182,7 +182,7 @@ GhcWithSMP := $(strip $(if $(filter YESN
|
|
# has support for this OS/ARCH combination.
|
|
|
|
OsSupportsGHCi=$(strip $(patsubst $(TargetOS_CPP), YES, $(findstring $(TargetOS_CPP), mingw32 cygwin32 linux solaris2 freebsd dragonfly netbsd openbsd darwin kfreebsdgnu)))
|
|
-ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc sparc sparc64 arm)))
|
|
+ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm)))
|
|
|
|
ifeq "$(OsSupportsGHCi)$(ArchSupportsGHCi)" "YESYES"
|
|
GhcWithInterpreter=YES
|
|
@@ -195,7 +195,7 @@ endif
|
|
# (see TABLES_NEXT_TO_CODE in the RTS). Whether we actually compile for
|
|
# TABLES_NEXT_TO_CODE depends on whether we're building unregisterised
|
|
# code or not, which may be decided by options to the compiler later.
|
|
-ifneq "$(findstring $(TargetArch_CPP)X, ia64X powerpc64X)" ""
|
|
+ifneq "$(findstring $(TargetArch_CPP)X, ia64X powerpc64X powerpc64leX)" ""
|
|
GhcEnableTablesNextToCode=NO
|
|
else
|
|
GhcEnableTablesNextToCode=YES
|
|
Index: ghc-7.8.4/rts/StgCRun.c
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/rts/StgCRun.c
|
|
+++ ghc-7.8.4/rts/StgCRun.c
|
|
@@ -662,11 +662,19 @@ StgRunIsImplementedInAssembler(void)
|
|
}
|
|
|
|
#else // linux_HOST_OS
|
|
-#error Only linux support for power64 right now.
|
|
+#error Only Linux support for power64 right now.
|
|
#endif
|
|
|
|
#endif
|
|
|
|
+#ifdef powerpc64le_HOST_ARCH
|
|
+/* -----------------------------------------------------------------------------
|
|
+ PowerPC 64 little endian architecture
|
|
+
|
|
+ Really everything is in assembler, so we don't have to deal with GCC...
|
|
+ -------------------------------------------------------------------------- */
|
|
+#endif
|
|
+
|
|
/* -----------------------------------------------------------------------------
|
|
ARM architecture
|
|
-------------------------------------------------------------------------- */
|
|
Index: ghc-7.8.4/rts/StgCRunAsm.S
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ ghc-7.8.4/rts/StgCRunAsm.S
|
|
@@ -0,0 +1,114 @@
|
|
+#include "ghcconfig.h"
|
|
+#include "rts/Constants.h"
|
|
+#ifdef powerpc64le_HOST_ARCH
|
|
+#ifdef linux_HOST_OS
|
|
+#define STACK_FRAME_SIZE RESERVED_C_STACK_BYTES+304
|
|
+ .file "StgCRun.c"
|
|
+ .abiversion 2
|
|
+ .section ".toc","aw"
|
|
+ .section ".text"
|
|
+ .align 2
|
|
+.globl StgRun
|
|
+.hidden StgRun
|
|
+.type StgRun,@function
|
|
+StgRun:
|
|
+.localentry StgRun,.-StgRun
|
|
+ mflr 0
|
|
+ mr 5, 1
|
|
+ std 0, 16(1)
|
|
+ stdu 1, -(STACK_FRAME_SIZE)(1)
|
|
+ std 2, -296(5)
|
|
+ std 14, -288(5)
|
|
+ std 15, -280(5)
|
|
+ std 16, -272(5)
|
|
+ std 17, -264(5)
|
|
+ std 18, -256(5)
|
|
+ std 19, -248(5)
|
|
+ std 20, -240(5)
|
|
+ std 21, -232(5)
|
|
+ std 22, -224(5)
|
|
+ std 23, -216(5)
|
|
+ std 24, -208(5)
|
|
+ std 25, -200(5)
|
|
+ std 26, -192(5)
|
|
+ std 27, -184(5)
|
|
+ std 28, -176(5)
|
|
+ std 29, -168(5)
|
|
+ std 30, -160(5)
|
|
+ std 31, -152(5)
|
|
+ stfd 14, -144(5)
|
|
+ stfd 15, -136(5)
|
|
+ stfd 16, -128(5)
|
|
+ stfd 17, -120(5)
|
|
+ stfd 18, -112(5)
|
|
+ stfd 19, -104(5)
|
|
+ stfd 20, -96(5)
|
|
+ stfd 21, -88(5)
|
|
+ stfd 22, -80(5)
|
|
+ stfd 23, -72(5)
|
|
+ stfd 24, -64(5)
|
|
+ stfd 25, -56(5)
|
|
+ stfd 26, -48(5)
|
|
+ stfd 27, -40(5)
|
|
+ stfd 28, -32(5)
|
|
+ stfd 29, -24(5)
|
|
+ stfd 30, -16(5)
|
|
+ stfd 31, -8(5)
|
|
+ mr 27, 4
|
|
+ mtctr 3
|
|
+ mr 12, 3
|
|
+ bctr
|
|
+.globl StgReturn
|
|
+.type StgReturn,@function
|
|
+StgReturn:
|
|
+.localentry StgReturn,.-StgReturn
|
|
+ mr 3,14
|
|
+ la 5, STACK_FRAME_SIZE(1)
|
|
+ ld 2, -296(5)
|
|
+ ld 14, -288(5)
|
|
+ ld 15, -280(5)
|
|
+ ld 16, -272(5)
|
|
+ ld 17, -264(5)
|
|
+ ld 18, -256(5)
|
|
+ ld 19, -248(5)
|
|
+ ld 20, -240(5)
|
|
+ ld 21, -232(5)
|
|
+ ld 22, -224(5)
|
|
+ ld 23, -216(5)
|
|
+ ld 24, -208(5)
|
|
+ ld 25, -200(5)
|
|
+ ld 26, -192(5)
|
|
+ ld 27, -184(5)
|
|
+ ld 28, -176(5)
|
|
+ ld 29, -168(5)
|
|
+ ld 30, -160(5)
|
|
+ ld 31, -152(5)
|
|
+ lfd 14, -144(5)
|
|
+ lfd 15, -136(5)
|
|
+ lfd 16, -128(5)
|
|
+ lfd 17, -120(5)
|
|
+ lfd 18, -112(5)
|
|
+ lfd 19, -104(5)
|
|
+ lfd 20, -96(5)
|
|
+ lfd 21, -88(5)
|
|
+ lfd 22, -80(5)
|
|
+ lfd 23, -72(5)
|
|
+ lfd 24, -64(5)
|
|
+ lfd 25, -56(5)
|
|
+ lfd 26, -48(5)
|
|
+ lfd 27, -40(5)
|
|
+ lfd 28, -32(5)
|
|
+ lfd 29, -24(5)
|
|
+ lfd 30, -16(5)
|
|
+ lfd 31, -8(5)
|
|
+ mr 1, 5
|
|
+ ld 0, 16(1)
|
|
+ mtlr 0
|
|
+ blr
|
|
+
|
|
+ .section .note.GNU-stack,"",@progbits
|
|
+#else // linux_HOST_OS
|
|
+#error Only Linux support for power64 little endian right now.
|
|
+#endif
|
|
+
|
|
+#endif
|
|
Index: ghc-7.8.4/rts/ghc.mk
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/rts/ghc.mk
|
|
+++ ghc-7.8.4/rts/ghc.mk
|
|
@@ -45,6 +45,9 @@ ifneq "$(PORTING_HOST)" "YES"
|
|
ifneq "$(findstring $(TargetArch_CPP), i386 powerpc powerpc64)" ""
|
|
rts_S_SRCS += rts/AdjustorAsm.S
|
|
endif
|
|
+ifneq "$(findstring $(TargetArch_CPP), powerpc64le)" ""
|
|
+rts_S_SRCS += rts/StgCRunAsm.S
|
|
+endif
|
|
endif
|
|
|
|
ifeq "$(GhcUnregisterised)" "YES"
|
|
Index: ghc-7.8.4/compiler/nativeGen/AsmCodeGen.lhs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/nativeGen/AsmCodeGen.lhs
|
|
+++ ghc-7.8.4/compiler/nativeGen/AsmCodeGen.lhs
|
|
@@ -166,7 +166,7 @@ nativeCodeGen dflags this_mod h us cmms
|
|
ArchPPC -> nCG' (ppcNcgImpl dflags)
|
|
ArchSPARC -> nCG' (sparcNcgImpl dflags)
|
|
ArchARM {} -> panic "nativeCodeGen: No NCG for ARM"
|
|
- ArchPPC_64 -> panic "nativeCodeGen: No NCG for PPC 64"
|
|
+ ArchPPC_64 _ -> nCG' (ppcNcgImpl dflags)
|
|
ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
|
|
ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
|
|
ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
|
|
Index: ghc-7.8.4/compiler/llvmGen/LlvmCodeGen/Ppr.hs
|
|
===================================================================
|
|
--- ghc-7.8.4.orig/compiler/llvmGen/LlvmCodeGen/Ppr.hs
|
|
+++ ghc-7.8.4/compiler/llvmGen/LlvmCodeGen/Ppr.hs
|
|
@@ -64,7 +64,7 @@ moduleLayout = sdocWithPlatform $ \platf
|
|
Platform { platformArch = ArchX86, platformOS = OSiOS } ->
|
|
text "target datalayout = \"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32\""
|
|
$+$ text "target triple = \"i386-apple-darwin11\""
|
|
- Platform { platformArch = ArchPPC_64 , platformOS = OSLinux } ->
|
|
+ Platform { platformArch = ArchPPC_64 ELF_V1, platformOS = OSLinux } ->
|
|
text "target datalayout = \"E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64\""
|
|
$+$ text "target triple = \"powerpc64-unknown-linux-gnu\""
|
|
_ ->
|