Compile to assembly language directly and not through C on ppc64 (cuts compile time roughly in half). Patch 19 is a backport of the patch that is currently under review upstream for inclusion into v. 7.12 OBS-URL: https://build.opensuse.org/request/show/283228 OBS-URL: https://build.opensuse.org/package/show/devel:languages:haskell/ghc?expand=0&rev=158
1918 lines
84 KiB
Diff
1918 lines
84 KiB
Diff
From 465e2fc248ea0689a9665eb6b40360051c7985d1 Mon Sep 17 00:00:00 2001
|
|
From: Peter Trommler <ptrommler@acm.org>
|
|
Date: Mon, 12 Jan 2015 12:20:31 +0100
|
|
Subject: [PATCH] implement native code generator for ppc64
|
|
|
|
The implementation follows the ELF specification and hence
|
|
is neither optimized for intra-module calls nor does it require
|
|
nasty linker tricks.
|
|
---
|
|
compiler/cmm/CLabel.hs | 6 +
|
|
compiler/nativeGen/AsmCodeGen.lhs | 3 +-
|
|
compiler/nativeGen/PIC.hs | 64 ++-
|
|
compiler/nativeGen/PPC/CodeGen.hs | 614 ++++++++++++++++++++-----
|
|
compiler/nativeGen/PPC/Instr.hs | 87 +++-
|
|
compiler/nativeGen/PPC/Ppr.hs | 149 ++++--
|
|
compiler/nativeGen/PPC/Regs.hs | 21 +-
|
|
compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs | 3 +-
|
|
compiler/nativeGen/RegAlloc/Linear/Main.hs | 3 +-
|
|
compiler/nativeGen/TargetReg.hs | 11 +-
|
|
configure.ac | 2 +-
|
|
includes/CodeGen.Platform.hs | 4 +
|
|
mk/config.mk.in | 4 +-
|
|
13 files changed, 784 insertions(+), 187 deletions(-)
|
|
|
|
Index: ghc-7.8.3/compiler/cmm/CLabel.hs
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/compiler/cmm/CLabel.hs
|
|
+++ ghc-7.8.3/compiler/cmm/CLabel.hs
|
|
@@ -1168,6 +1168,12 @@ pprDynamicLinkerAsmLabel platform dllInf
|
|
GotSymbolPtr -> ppr lbl <> text "@gotpcrel"
|
|
GotSymbolOffset -> ppr lbl
|
|
SymbolPtr -> text ".LC_" <> ppr lbl
|
|
+ else if platformArch platform == ArchPPC_64
|
|
+ then case dllInfo of
|
|
+ CodeStub -> text ".Lstub." <> ppr lbl
|
|
+ GotSymbolPtr -> text ".LC_" <> ppr lbl <> text "@toc"
|
|
+ GotSymbolOffset -> ppr lbl
|
|
+ SymbolPtr -> text ".LC_" <> ppr lbl
|
|
else case dllInfo of
|
|
CodeStub -> ppr lbl <> text "@plt"
|
|
SymbolPtr -> text ".LC_" <> ppr lbl
|
|
Index: ghc-7.8.3/compiler/nativeGen/AsmCodeGen.lhs
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/compiler/nativeGen/AsmCodeGen.lhs
|
|
+++ ghc-7.8.3/compiler/nativeGen/AsmCodeGen.lhs
|
|
@@ -166,7 +166,7 @@ nativeCodeGen dflags this_mod h us cmms
|
|
ArchPPC -> nCG' (ppcNcgImpl dflags)
|
|
ArchSPARC -> nCG' (sparcNcgImpl dflags)
|
|
ArchARM {} -> panic "nativeCodeGen: No NCG for ARM"
|
|
- ArchPPC_64 -> panic "nativeCodeGen: No NCG for PPC 64"
|
|
+ ArchPPC_64 -> nCG' (ppcNcgImpl dflags)
|
|
ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
|
|
ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
|
|
ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
|
|
Index: ghc-7.8.3/compiler/nativeGen/PIC.hs
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/compiler/nativeGen/PIC.hs
|
|
+++ ghc-7.8.3/compiler/nativeGen/PIC.hs
|
|
@@ -159,6 +159,13 @@ cmmMakePicReference dflags lbl
|
|
| OSMinGW32 <- platformOS $ targetPlatform dflags
|
|
= CmmLit $ CmmLabel lbl
|
|
|
|
+ | ArchPPC_64 <- platformArch $ targetPlatform dflags
|
|
+ = CmmMachOp (MO_Add W32) -- code model medium
|
|
+ [ CmmReg (CmmGlobal PicBaseReg)
|
|
+ , CmmLit $ picRelative
|
|
+ (platformArch $ targetPlatform dflags)
|
|
+ (platformOS $ targetPlatform dflags)
|
|
+ lbl ]
|
|
|
|
| (gopt Opt_PIC dflags || not (gopt Opt_Static dflags)) && absoluteLabel lbl
|
|
= CmmMachOp (MO_Add (wordWidth dflags))
|
|
@@ -295,11 +302,15 @@ howToAccessLabel dflags arch OSDarwin th
|
|
|
|
howToAccessLabel _ ArchPPC_64 os _ kind _
|
|
| osElfTarget os
|
|
- = if kind == DataReference
|
|
- -- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
|
|
- then AccessViaSymbolPtr
|
|
- -- actually, .label instead of label
|
|
- else AccessDirectly
|
|
+ = case kind of
|
|
+ -- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
|
|
+ DataReference -> AccessViaSymbolPtr
|
|
+ -- RTLD does not generate stubs for function descriptors
|
|
+ -- in tail calls. Create a symbol pointer and generate
|
|
+ -- the code to load the function descriptor at the call site.
|
|
+ JumpReference -> AccessViaSymbolPtr
|
|
+ -- regular calls are handled by the runtime linker
|
|
+ _ -> AccessDirectly
|
|
|
|
howToAccessLabel dflags _ os _ _ _
|
|
-- no PIC -> the dynamic linker does everything for us;
|
|
@@ -430,6 +441,11 @@ needImportedSymbols dflags arch os
|
|
, arch == ArchPPC
|
|
= gopt Opt_PIC dflags || not (gopt Opt_Static dflags)
|
|
|
|
+ -- PowerPC 64 Linux: always
|
|
+ | osElfTarget os
|
|
+ , arch == ArchPPC_64
|
|
+ = True
|
|
+
|
|
-- i386 (and others?): -dynamic but not -fPIC
|
|
| osElfTarget os
|
|
, arch /= ArchPPC_64
|
|
@@ -467,6 +483,10 @@ pprGotDeclaration dflags ArchX86 OSDarwi
|
|
pprGotDeclaration _ _ OSDarwin
|
|
= empty
|
|
|
|
+-- PPC 64 needs a Table Of Contents (TOC)
|
|
+pprGotDeclaration _ ArchPPC_64 OSLinux
|
|
+ = ptext (sLit ".section \".toc\",\"aw\"")
|
|
+
|
|
-- Emit GOT declaration
|
|
-- Output whatever needs to be output once per .s file.
|
|
pprGotDeclaration dflags arch os
|
|
@@ -635,9 +655,36 @@ pprImportedSymbol _ (Platform { platform
|
|
-- the NCG will keep track of all DynamicLinkerLabels it uses
|
|
-- and output each of them using pprImportedSymbol.
|
|
|
|
-pprImportedSymbol _ platform@(Platform { platformArch = ArchPPC_64 }) _
|
|
+pprImportedSymbol _ platform@(Platform { platformArch = ArchPPC_64 })
|
|
+ importedLbl
|
|
| osElfTarget (platformOS platform)
|
|
- = empty
|
|
+ = case dynamicLinkerLabelInfo importedLbl of
|
|
+ Just (SymbolPtr, lbl)
|
|
+ -> vcat [
|
|
+ ptext (sLit ".section \".toc\", \"aw\""),
|
|
+ ptext (sLit ".LC_") <> pprCLabel platform lbl <> char ':',
|
|
+ ptext (sLit "\t.quad") <+> pprCLabel platform lbl ]
|
|
+ -- generate code stubs for tail calls
|
|
+ Just (CodeStub, lbl)
|
|
+ -> vcat [
|
|
+ ptext (sLit ".section \".toc\", \"aw\""),
|
|
+ ptext (sLit ".LC_") <> pprCLabel platform lbl <> char ':',
|
|
+ ptext (sLit "\t.quad") <+> pprCLabel platform lbl,
|
|
+ ptext (sLit ".text"),
|
|
+ ptext (sLit ".Lstub.") <> ppr lbl <> char ':',
|
|
+ hcat [ ptext (sLit "\taddis\t12,2,.LC_"),
|
|
+ ppr lbl, ptext(sLit"@toc@ha") ],
|
|
+ hcat [ ptext (sLit "\taddi\t12,12,.LC_"),
|
|
+ ppr lbl, ptext(sLit"@toc@l") ],
|
|
+ ptext (sLit "\tld\t12,0(12)"),
|
|
+ ptext (sLit "\tld\t11,0(12)"),
|
|
+ ptext (sLit "\tld\t2,8(12)"),
|
|
+ ptext (sLit "\tmtctr\t11"),
|
|
+ ptext (sLit "\tld\t11,16(12)"),
|
|
+ ptext (sLit "\tbctr")
|
|
+ ]
|
|
+
|
|
+ _ -> empty
|
|
|
|
pprImportedSymbol dflags platform importedLbl
|
|
| osElfTarget (platformOS platform)
|
|
@@ -735,7 +782,6 @@ initializePicBase_ppc ArchPPC OSDarwin p
|
|
where BasicBlock bID insns = entry
|
|
b' = BasicBlock bID (PPC.FETCHPC picReg : insns)
|
|
|
|
-
|
|
initializePicBase_ppc _ _ _ _
|
|
= panic "initializePicBase_ppc: not needed"
|
|
|
|
Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/compiler/nativeGen/PPC/CodeGen.hs
|
|
+++ ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs
|
|
@@ -77,14 +77,20 @@ cmmTopCodeGen
|
|
cmmTopCodeGen (CmmProc info lab live graph) = do
|
|
let blocks = toBlockListEntryFirst graph
|
|
(nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
|
|
- picBaseMb <- getPicBaseMaybeNat
|
|
dflags <- getDynFlags
|
|
let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
|
|
tops = proc : concat statics
|
|
os = platformOS $ targetPlatform dflags
|
|
- case picBaseMb of
|
|
- Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
|
|
- Nothing -> return tops
|
|
+ arch = platformArch $ targetPlatform dflags
|
|
+ case arch of
|
|
+ ArchPPC -> do
|
|
+ picBaseMb <- getPicBaseMaybeNat
|
|
+ case picBaseMb of
|
|
+ Just picBase -> initializePicBase_ppc arch os picBase tops
|
|
+ Nothing -> return tops
|
|
+ ArchPPC_64 -> return tops -- generating function descriptor handled in
|
|
+ -- pretty printer
|
|
+ _ -> panic "PPC.cmmTopCodeGen: unknown arch"
|
|
|
|
cmmTopCodeGen (CmmData sec dat) = do
|
|
return [CmmData sec dat] -- no translation, we just use CmmStatic
|
|
@@ -194,26 +200,6 @@ getRegisterReg platform (CmmGlobal mid)
|
|
-- ones which map to a real machine register on this
|
|
-- platform. Hence ...
|
|
|
|
-
|
|
-{-
|
|
-Now, given a tree (the argument to an CmmLoad) that references memory,
|
|
-produce a suitable addressing mode.
|
|
-
|
|
-A Rule of the Game (tm) for Amodes: use of the addr bit must
|
|
-immediately follow use of the code part, since the code part puts
|
|
-values in registers which the addr then refers to. So you can't put
|
|
-anything in between, lest it overwrite some of those registers. If
|
|
-you need to do some other computation between the code part and use of
|
|
-the addr bit, first store the effective address from the amode in a
|
|
-temporary, then do the other computation, and then use the temporary:
|
|
-
|
|
- code
|
|
- LEA amode, tmp
|
|
- ... other computation ...
|
|
- ... (tmp) ...
|
|
--}
|
|
-
|
|
-
|
|
-- | Convert a BlockId to some CmmStatic data
|
|
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
|
|
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
|
|
@@ -261,7 +247,7 @@ data ChildCode64 -- a.k.a "Regist
|
|
-- Reg may be modified
|
|
|
|
|
|
--- | The dual to getAnyReg: compute an expression into a register, but
|
|
+-- | Compute an expression into a register, but
|
|
-- we don't mind which one it is.
|
|
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
|
|
getSomeReg expr = do
|
|
@@ -386,10 +372,14 @@ getRegister e = do dflags <- getDynFlags
|
|
|
|
getRegister' :: DynFlags -> CmmExpr -> NatM Register
|
|
|
|
-getRegister' _ (CmmReg (CmmGlobal PicBaseReg))
|
|
+getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
|
|
+ | ArchPPC_64 == (platformArch $ targetPlatform dflags)
|
|
+ = return (Fixed II64 toc nilOL)
|
|
+
|
|
+getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
|
|
= do
|
|
- reg <- getPicBaseNat archWordSize
|
|
- return (Fixed archWordSize reg nilOL)
|
|
+ reg <- getPicBaseNat $ archWordSize (target32Bit (targetPlatform dflags))
|
|
+ return (Fixed (archWordSize (target32Bit (targetPlatform dflags))) reg nilOL)
|
|
|
|
getRegister' dflags (CmmReg reg)
|
|
= return (Fixed (cmmTypeSize (cmmRegType dflags reg))
|
|
@@ -433,11 +423,23 @@ getRegister' dflags (CmmLoad mem pk)
|
|
return (Any size code)
|
|
where size = cmmTypeSize pk
|
|
|
|
+getRegister' dflags (CmmLoad mem pk)
|
|
+ | isWord64 pk && not (target32Bit (targetPlatform dflags))
|
|
+ = do
|
|
+ Amode addr addr_code <- getAmodeDS mem
|
|
+ let code dst = addr_code `snocOL` LD size dst addr
|
|
+ return (Any size code)
|
|
+ where size = cmmTypeSize pk
|
|
+
|
|
-- catch simple cases of zero- or sign-extended load
|
|
getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
|
|
Amode addr addr_code <- getAmode mem
|
|
return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
|
|
|
|
+getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
|
|
+ Amode addr addr_code <- getAmode mem
|
|
+ return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
|
|
+
|
|
-- Note: there is no Load Byte Arithmetic instruction, so no signed case here
|
|
|
|
getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
|
|
@@ -448,6 +450,22 @@ getRegister' _ (CmmMachOp (MO_SS_Conv W1
|
|
Amode addr addr_code <- getAmode mem
|
|
return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
|
|
|
|
+getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
|
|
+ Amode addr addr_code <- getAmode mem
|
|
+ return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
|
|
+
|
|
+getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
|
|
+ Amode addr addr_code <- getAmode mem
|
|
+ return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
|
|
+
|
|
+getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
|
|
+ Amode addr addr_code <- getAmode mem
|
|
+ return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
|
|
+
|
|
+getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
|
|
+ Amode addr addr_code <- getAmode mem
|
|
+ return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
|
|
+
|
|
getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
|
|
= case mop of
|
|
MO_Not rep -> triv_ucode_int rep NOT
|
|
@@ -465,7 +483,16 @@ getRegister' dflags (CmmMachOp mop [x])
|
|
| from == to -> conversionNop (intSize to) x
|
|
|
|
-- narrowing is a nop: we treat the high bits as undefined
|
|
- MO_SS_Conv W32 to -> conversionNop (intSize to) x
|
|
+ MO_SS_Conv W64 to
|
|
+ | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit int register"
|
|
+ | otherwise -> conversionNop (intSize to) x
|
|
+ MO_SS_Conv W32 to
|
|
+ | arch32 -> conversionNop (intSize to) x
|
|
+ | otherwise -> case to of
|
|
+ W64 -> triv_ucode_int to (EXTS II32)
|
|
+ W16 -> conversionNop II16 x
|
|
+ W8 -> conversionNop II8 x
|
|
+ _ -> panic "PPC.CodeGen.getRegister: no match"
|
|
MO_SS_Conv W16 W8 -> conversionNop II8 x
|
|
MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
|
|
MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
|
|
@@ -473,7 +500,16 @@ getRegister' dflags (CmmMachOp mop [x])
|
|
MO_UU_Conv from to
|
|
| from == to -> conversionNop (intSize to) x
|
|
-- narrowing is a nop: we treat the high bits as undefined
|
|
- MO_UU_Conv W32 to -> conversionNop (intSize to) x
|
|
+ MO_UU_Conv W64 to
|
|
+ | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit target"
|
|
+ | otherwise -> conversionNop (intSize to) x
|
|
+ MO_UU_Conv W32 to
|
|
+ | arch32 -> conversionNop (intSize to) x
|
|
+ | otherwise -> case to of
|
|
+ W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64))
|
|
+ W16 -> conversionNop II16 x
|
|
+ W8 -> conversionNop II8 x
|
|
+ _ -> panic "PPC.CodeGen.getRegister: no match"
|
|
MO_UU_Conv W16 W8 -> conversionNop II8 x
|
|
MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
|
|
MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
|
|
@@ -486,8 +522,9 @@ getRegister' dflags (CmmMachOp mop [x])
|
|
conversionNop new_size expr
|
|
= do e_code <- getRegister' dflags expr
|
|
return (swizzleRegisterRep e_code new_size)
|
|
+ arch32 = target32Bit $ targetPlatform dflags
|
|
|
|
-getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
|
|
+getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
|
|
= case mop of
|
|
MO_F_Eq _ -> condFltReg EQQ x y
|
|
MO_F_Ne _ -> condFltReg NE x y
|
|
@@ -496,18 +533,28 @@ getRegister' _ (CmmMachOp mop [x, y]) --
|
|
MO_F_Lt _ -> condFltReg LTT x y
|
|
MO_F_Le _ -> condFltReg LE x y
|
|
|
|
- MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
|
|
-
|
|
- MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
|
|
-
|
|
- MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
|
|
+ MO_Eq rep -> condIntReg EQQ (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+ MO_Ne rep -> condIntReg NE (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+
|
|
+ MO_S_Gt rep -> condIntReg GTT (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Ge rep -> condIntReg GE (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Lt rep -> condIntReg LTT (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Le rep -> condIntReg LE (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+
|
|
+ MO_U_Gt rep -> condIntReg GU (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+ MO_U_Ge rep -> condIntReg GEU (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+ MO_U_Lt rep -> condIntReg LU (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+ MO_U_Le rep -> condIntReg LEU (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
|
|
MO_F_Add w -> triv_float w FADD
|
|
MO_F_Sub w -> triv_float w FSUB
|
|
@@ -538,32 +585,53 @@ getRegister' _ (CmmMachOp mop [x, y]) --
|
|
-> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
|
|
_ -> trivialCodeNoImm' (intSize rep) SUBF y x
|
|
|
|
- MO_Mul rep -> trivialCode rep True MULLW x y
|
|
+ MO_Mul rep
|
|
+ | arch32 -> trivialCode rep True MULLW x y
|
|
+ | otherwise -> trivialCode rep True MULLD x y
|
|
|
|
MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
|
|
+ MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y
|
|
|
|
MO_S_MulMayOflo _ -> panic "S_MulMayOflo (rep /= II32): not implemented"
|
|
MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
|
|
|
|
- MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
|
|
-
|
|
- MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
|
|
+ MO_S_Quot rep
|
|
+ | arch32 -> trivialCodeNoImm' (intSize rep) DIVW
|
|
+ (extendSExpr dflags rep x) (extendSExpr dflags rep y)
|
|
+ | otherwise -> trivialCodeNoImm' (intSize rep) DIVD
|
|
+ (extendSExpr dflags rep x) (extendSExpr dflags rep y)
|
|
+ MO_U_Quot rep
|
|
+ | arch32 -> trivialCodeNoImm' (intSize rep) DIVWU
|
|
+ (extendUExpr dflags rep x) (extendUExpr dflags rep y)
|
|
+ | otherwise -> trivialCodeNoImm' (intSize rep) DIVDU
|
|
+ (extendUExpr dflags rep x) (extendUExpr dflags rep y)
|
|
+
|
|
+ MO_S_Rem rep
|
|
+ | arch32 -> remainderCode rep DIVW (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ | otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_U_Rem rep
|
|
+ | arch32 -> remainderCode rep DIVWU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ | otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
|
|
MO_And rep -> trivialCode rep False AND x y
|
|
MO_Or rep -> trivialCode rep False OR x y
|
|
MO_Xor rep -> trivialCode rep False XOR x y
|
|
|
|
- MO_Shl rep -> trivialCode rep False SLW x y
|
|
- MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
|
|
- MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
|
|
+ MO_Shl rep -> shiftCode rep SL x y
|
|
+ MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y
|
|
+ MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y
|
|
_ -> panic "PPC.CodeGen.getRegister: no match"
|
|
|
|
where
|
|
triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
|
|
triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
|
|
|
|
+ arch32 = target32Bit $ targetPlatform dflags
|
|
+
|
|
getRegister' _ (CmmLit (CmmInt i rep))
|
|
| Just imm <- makeImmediate rep True i
|
|
= let
|
|
@@ -584,6 +652,7 @@ getRegister' _ (CmmLit (CmmFloat f frep)
|
|
return (Any size code)
|
|
|
|
getRegister' dflags (CmmLit lit)
|
|
+ | ArchPPC == (platformArch $ targetPlatform dflags)
|
|
= let rep = cmmLitType dflags lit
|
|
imm = litToImm lit
|
|
code dst = toOL [
|
|
@@ -591,18 +660,46 @@ getRegister' dflags (CmmLit lit)
|
|
ADD dst dst (RIImm (LO imm))
|
|
]
|
|
in return (Any (cmmTypeSize rep) code)
|
|
+ | otherwise
|
|
+ = do lbl <- getNewLabelNat
|
|
+ dflags <- getDynFlags
|
|
+ dynRef <- cmmMakeDynamicReference dflags DataReference lbl
|
|
+ Amode addr addr_code <- getAmode dynRef
|
|
+ let rep = cmmLitType dflags lit
|
|
+ size = cmmTypeSize rep
|
|
+ code dst =
|
|
+ LDATA ReadOnlyData (Statics lbl
|
|
+ [CmmStaticLit lit])
|
|
+ `consOL` (addr_code `snocOL` LD size dst addr)
|
|
+ return (Any size code)
|
|
|
|
getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
|
|
|
|
-- extend?Rep: wrap integer expression of type rep
|
|
- -- in a conversion to II32
|
|
-extendSExpr :: Width -> CmmExpr -> CmmExpr
|
|
-extendSExpr W32 x = x
|
|
-extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
|
|
-
|
|
-extendUExpr :: Width -> CmmExpr -> CmmExpr
|
|
-extendUExpr W32 x = x
|
|
-extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
|
|
+ -- in a conversion to II32 or II64 resp.
|
|
+extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
|
|
+extendSExpr dflags W32 x
|
|
+ | (platformArch $ targetPlatform dflags) == ArchPPC = x
|
|
+
|
|
+extendSExpr dflags W64 x
|
|
+ | (platformArch $ targetPlatform dflags) == ArchPPC_64 = x
|
|
+
|
|
+extendSExpr dflags rep x =
|
|
+ let size = case platformArch $ targetPlatform dflags of
|
|
+ ArchPPC -> W32
|
|
+ _ -> W64
|
|
+ in CmmMachOp (MO_SS_Conv rep size) [x]
|
|
+
|
|
+extendUExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
|
|
+extendUExpr dflags W32 x
|
|
+ | (platformArch $ targetPlatform dflags) == ArchPPC = x
|
|
+extendUExpr dflags W64 x
|
|
+ | (platformArch $ targetPlatform dflags) == ArchPPC_64 = x
|
|
+extendUExpr dflags rep x =
|
|
+ let size = case platformArch $ targetPlatform dflags of
|
|
+ ArchPPC -> W32
|
|
+ _ -> W64
|
|
+ in CmmMachOp (MO_UU_Conv rep size) [x]
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- The 'Amode' type: Memory addressing modes passed up the tree.
|
|
@@ -633,7 +730,7 @@ getAmode tree@(CmmRegOff _ _) = do dflag
|
|
getAmode (mangleIndexTree dflags tree)
|
|
|
|
getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
|
|
- | Just off <- makeImmediate W32 True (-i)
|
|
+ | Just off <- makeImmediate W32 True (-i)
|
|
= do
|
|
(reg, code) <- getSomeReg x
|
|
return (Amode (AddrRegImm reg off) code)
|
|
@@ -645,6 +742,19 @@ getAmode (CmmMachOp (MO_Add W32) [x, Cmm
|
|
(reg, code) <- getSomeReg x
|
|
return (Amode (AddrRegImm reg off) code)
|
|
|
|
+getAmode (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
|
|
+ | Just off <- makeImmediate W64 True (-i)
|
|
+ = do
|
|
+ (reg, code) <- getSomeReg x
|
|
+ return (Amode (AddrRegImm reg off) code)
|
|
+
|
|
+
|
|
+getAmode (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
|
|
+ | Just off <- makeImmediate W64 True i
|
|
+ = do
|
|
+ (reg, code) <- getSomeReg x
|
|
+ return (Amode (AddrRegImm reg off) code)
|
|
+
|
|
-- optimize addition with 32-bit immediate
|
|
-- (needed for PIC)
|
|
getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
|
|
@@ -657,17 +767,37 @@ getAmode (CmmMachOp (MO_Add W32) [x, Cmm
|
|
|
|
getAmode (CmmLit lit)
|
|
= do
|
|
- tmp <- getNewRegNat II32
|
|
- let imm = litToImm lit
|
|
- code = unitOL (LIS tmp (HA imm))
|
|
- return (Amode (AddrRegImm tmp (LO imm)) code)
|
|
-
|
|
+ dflags <- getDynFlags
|
|
+ case platformArch $ targetPlatform dflags of
|
|
+ ArchPPC -> do
|
|
+ tmp <- getNewRegNat II32
|
|
+ let imm = litToImm lit
|
|
+ code = unitOL (LIS tmp (HA imm))
|
|
+ return (Amode (AddrRegImm tmp (LO imm)) code)
|
|
+ _ -> do -- TODO: Load from TOC,
|
|
+ -- see getRegister' _ (CmmLit lit)
|
|
+ tmp <- getNewRegNat II64
|
|
+ let imm = litToImm lit
|
|
+ code = toOL [
|
|
+ LIS tmp (HIGHESTA imm),
|
|
+ OR tmp tmp (RIImm (HIGHERA imm)),
|
|
+ SL II64 tmp tmp (RIImm (ImmInt 32)),
|
|
+ ORIS tmp tmp (HA imm)
|
|
+ ]
|
|
+ return (Amode (AddrRegImm tmp (LO imm)) code)
|
|
+
|
|
getAmode (CmmMachOp (MO_Add W32) [x, y])
|
|
= do
|
|
(regX, codeX) <- getSomeReg x
|
|
(regY, codeY) <- getSomeReg y
|
|
return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
|
|
|
|
+getAmode (CmmMachOp (MO_Add W64) [x, y])
|
|
+ = do
|
|
+ (regX, codeX) <- getSomeReg x
|
|
+ (regY, codeY) <- getSomeReg y
|
|
+ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
|
|
+
|
|
getAmode other
|
|
= do
|
|
(reg, code) <- getSomeReg other
|
|
@@ -675,7 +805,70 @@ getAmode other
|
|
off = ImmInt 0
|
|
return (Amode (AddrRegImm reg off) code)
|
|
|
|
+-- 64 bit load and store operations require offsets be a multiple of 4
|
|
+
|
|
+getAmodeDS :: CmmExpr -> NatM Amode
|
|
+getAmodeDS tree@(CmmRegOff _ _) = do dflags <- getDynFlags
|
|
+ getAmodeDS (mangleIndexTree
|
|
+ dflags tree)
|
|
+
|
|
+getAmodeDS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
|
|
+ | Just off <- makeImmediate W64 True (-i) , i `mod` 4 == 0
|
|
+ = do
|
|
+ (reg, code) <- getSomeReg x
|
|
+ return (Amode (AddrRegImm reg off) code)
|
|
+
|
|
|
|
+getAmodeDS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
|
|
+ | Just off <- makeImmediate W64 True i , i `mod` 4 == 0
|
|
+ = do
|
|
+ (reg, code) <- getSomeReg x
|
|
+ return (Amode (AddrRegImm reg off) code)
|
|
+
|
|
+ -- optimize addition with 32-bit immediate
|
|
+ -- (needed for PIC)
|
|
+getAmodeDS (CmmMachOp (MO_Add W32) [x, CmmLit lit])
|
|
+ = do
|
|
+ tmp <- getNewRegNat II64
|
|
+ (src, srcCode) <- getSomeReg x
|
|
+ let imm = litToImm lit
|
|
+ code = srcCode `snocOL` ADDIS tmp src (HA imm)
|
|
+ return (Amode (AddrRegImm tmp (LO imm)) code)
|
|
+
|
|
+getAmodeDS (CmmLit lit)
|
|
+ = do
|
|
+ dflags <- getDynFlags
|
|
+ case platformArch $ targetPlatform dflags of
|
|
+ ArchPPC -> do
|
|
+ tmp <- getNewRegNat II32
|
|
+ let imm = litToImm lit
|
|
+ code = unitOL (LIS tmp (HA imm))
|
|
+ return (Amode (AddrRegImm tmp (LO imm)) code)
|
|
+ _ -> do -- TODO: Load from TOC,
|
|
+ -- see getRegister' _ (CmmLit lit)
|
|
+ tmp <- getNewRegNat II64
|
|
+ let imm = litToImm lit
|
|
+ code = toOL [
|
|
+ LIS tmp (HIGHESTA imm),
|
|
+ OR tmp tmp (RIImm (HIGHERA imm)),
|
|
+ SL II64 tmp tmp (RIImm (ImmInt 32)),
|
|
+ ORIS tmp tmp (HA imm)
|
|
+ ]
|
|
+ return (Amode (AddrRegImm tmp (LO imm)) code)
|
|
+
|
|
+getAmodeDS (CmmMachOp (MO_Add _) [x, y])
|
|
+ = do
|
|
+ (regX, codeX) <- getSomeReg x
|
|
+ (regY, codeY) <- getSomeReg y
|
|
+ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
|
|
+
|
|
+getAmodeDS other
|
|
+ = do
|
|
+ (reg, code) <- getSomeReg other
|
|
+ let
|
|
+ off = ImmInt 0
|
|
+ return (Amode (AddrRegImm reg off) code)
|
|
+
|
|
|
|
-- The 'CondCode' type: Condition codes passed up the tree.
|
|
data CondCode
|
|
@@ -686,10 +879,12 @@ data CondCode
|
|
getCondCode :: CmmExpr -> NatM CondCode
|
|
|
|
-- almost the same as everywhere else - but we need to
|
|
--- extend small integers to 32 bit first
|
|
+-- extend small integers to 32 bit or 64 bit first
|
|
|
|
getCondCode (CmmMachOp mop [x, y])
|
|
- = case mop of
|
|
+ = do
|
|
+ dflags <- getDynFlags
|
|
+ case mop of
|
|
MO_F_Eq W32 -> condFltCode EQQ x y
|
|
MO_F_Ne W32 -> condFltCode NE x y
|
|
MO_F_Gt W32 -> condFltCode GTT x y
|
|
@@ -704,18 +899,28 @@ getCondCode (CmmMachOp mop [x, y])
|
|
MO_F_Lt W64 -> condFltCode LTT x y
|
|
MO_F_Le W64 -> condFltCode LE x y
|
|
|
|
- MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
|
|
-
|
|
- MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
|
|
- MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
|
|
-
|
|
- MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
|
|
- MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
|
|
+ MO_Eq rep -> condIntCode EQQ (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+ MO_Ne rep -> condIntCode NE (extendUExpr dflags rep x)
|
|
+ (extendUExpr dflags rep y)
|
|
+
|
|
+ MO_S_Gt rep -> condIntCode GTT (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Ge rep -> condIntCode GE (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Lt rep -> condIntCode LTT (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_S_Le rep -> condIntCode LE (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+
|
|
+ MO_U_Gt rep -> condIntCode GU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_U_Ge rep -> condIntCode GEU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_U_Lt rep -> condIntCode LU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
+ MO_U_Le rep -> condIntCode LEU (extendSExpr dflags rep x)
|
|
+ (extendSExpr dflags rep y)
|
|
|
|
_ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
|
|
|
|
@@ -729,21 +934,31 @@ getCondCode _ = panic "getCondCode(2)(po
|
|
condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
|
|
|
|
-- ###FIXME: I16 and I8!
|
|
+-- TODO: Is this still an issue? All arguments are extend?Expr'd.
|
|
condIntCode cond x (CmmLit (CmmInt y rep))
|
|
| Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
|
|
= do
|
|
(src1, code) <- getSomeReg x
|
|
+ dflags <- getDynFlags
|
|
+ let size = case platformArch $ targetPlatform dflags of
|
|
+ ArchPPC -> II32
|
|
+ _ -> II64
|
|
let
|
|
code' = code `snocOL`
|
|
- (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
|
|
+ (if condUnsigned cond then CMPL else CMP) size src1 (RIImm src2)
|
|
return (CondCode False cond code')
|
|
|
|
condIntCode cond x y = do
|
|
(src1, code1) <- getSomeReg x
|
|
(src2, code2) <- getSomeReg y
|
|
+ dflags <- getDynFlags
|
|
+ let size = case platformArch $ targetPlatform dflags of
|
|
+ ArchPPC -> II32
|
|
+ _ -> II64
|
|
let
|
|
code' = code1 `appOL` code2 `snocOL`
|
|
- (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
|
|
+ (if condUnsigned cond then CMPL else CMP) size src1
|
|
+ (RIReg src2)
|
|
return (CondCode False cond code')
|
|
|
|
condFltCode cond x y = do
|
|
@@ -781,7 +996,9 @@ assignReg_FltCode :: Size -> CmmReg ->
|
|
|
|
assignMem_IntCode pk addr src = do
|
|
(srcReg, code) <- getSomeReg src
|
|
- Amode dstAddr addr_code <- getAmode addr
|
|
+ Amode dstAddr addr_code <- case pk of
|
|
+ II64 -> getAmodeDS addr
|
|
+ _ -> getAmode addr
|
|
return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
|
|
|
|
-- dst is a reg, but src could be anything
|
|
@@ -809,9 +1026,34 @@ genJump (CmmLit (CmmLabel lbl))
|
|
|
|
genJump tree
|
|
= do
|
|
+ dflags <- getDynFlags
|
|
+ let platform = targetPlatform dflags
|
|
+ case platformOS platform of
|
|
+ OSLinux -> case platformArch platform of
|
|
+ ArchPPC -> genJump' tree GCPLinux
|
|
+ ArchPPC_64 -> genJump' tree GCPLinux64ELF1
|
|
+ _ -> panic "PPC.CodeGen.genJump: Unknown Linux"
|
|
+ OSDarwin -> genJump' tree GCPDarwin
|
|
+ _ -> panic "PPC.CodeGen.genJump: not defined for this os"
|
|
+
|
|
+
|
|
+genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
|
|
+
|
|
+genJump' tree GCPLinux64ELF1
|
|
+ = do
|
|
(target,code) <- getSomeReg tree
|
|
- return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
|
|
+ return (code -- TODO load function descriptor into r12 directly
|
|
+ `snocOL` MR r12 target
|
|
+ `snocOL` LD II64 r11 (AddrRegImm r12 (ImmInt 0))
|
|
+ `snocOL` LD II64 toc (AddrRegImm r12 (ImmInt 8))
|
|
+ `snocOL` MTCTR r11
|
|
+ `snocOL` LD II64 r11 (AddrRegImm r12 (ImmInt 16))
|
|
+ `snocOL` BCTR [] Nothing)
|
|
|
|
+genJump' tree _
|
|
+ = do
|
|
+ (target,code) <- getSomeReg tree
|
|
+ return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- Unconditional branches
|
|
@@ -867,11 +1109,16 @@ genCCall target dest_regs argsAndHints
|
|
= do dflags <- getDynFlags
|
|
let platform = targetPlatform dflags
|
|
case platformOS platform of
|
|
- OSLinux -> genCCall' dflags GCPLinux target dest_regs argsAndHints
|
|
+ OSLinux -> case platformArch platform of
|
|
+ ArchPPC -> genCCall' dflags GCPLinux
|
|
+ target dest_regs argsAndHints
|
|
+ ArchPPC_64 -> genCCall' dflags GCPLinux64ELF1
|
|
+ target dest_regs argsAndHints
|
|
+ _ -> panic "PPC.CodeGen.genCCall: Unknown Linux"
|
|
OSDarwin -> genCCall' dflags GCPDarwin target dest_regs argsAndHints
|
|
_ -> panic "PPC.CodeGen.genCCall: not defined for this os"
|
|
|
|
-data GenCCallPlatform = GCPLinux | GCPDarwin
|
|
+data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF1
|
|
|
|
genCCall'
|
|
:: DynFlags
|
|
@@ -910,7 +1157,11 @@ genCCall'
|
|
* The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
|
|
PowerPC Linux does not agree, so neither do we.
|
|
|
|
- According to both conventions, The parameter area should be part of the
|
|
+ PowerPC 64 Linux uses the System V Release 4 Calling Convention for
|
|
+ 64-bit PowerPC. It is specified in
|
|
+ "64-bit PowerPC ELF Application Binary Interface Supplement 1.9".
|
|
+
|
|
+ According to both conventions, the parameter area should be part of the
|
|
caller's stack frame, allocated in the caller's prologue code (large enough
|
|
to hold the parameter lists for all called routines). The NCG already
|
|
uses the stack for register spilling, leaving 64 bytes free at the top.
|
|
@@ -949,7 +1200,9 @@ genCCall' dflags gcp target dest_regs ar
|
|
PrimTarget mop -> outOfLineMachOp mop
|
|
|
|
let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
|
|
- codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
|
|
+ `appOL` toc_before
|
|
+ codeAfter = toc_after labelOrExpr `appOL` move_sp_up finalStack
|
|
+ `appOL` moveResult reduceToFF32
|
|
|
|
case labelOrExpr of
|
|
Left lbl -> do
|
|
@@ -958,30 +1211,45 @@ genCCall' dflags gcp target dest_regs ar
|
|
`appOL` codeAfter)
|
|
Right dyn -> do
|
|
(dynReg, dynCode) <- getSomeReg dyn
|
|
- return ( dynCode
|
|
- `snocOL` MTCTR dynReg
|
|
- `appOL` codeBefore
|
|
- `snocOL` BCTRL usedRegs
|
|
- `appOL` codeAfter)
|
|
+ case gcp of
|
|
+ GCPLinux64ELF1 -> return ( dynCode
|
|
+ `appOL` codeBefore
|
|
+ `snocOL` MR r12 dynReg
|
|
+ `snocOL` LD II64 r11 (AddrRegImm r12 (ImmInt 0))
|
|
+ `snocOL` LD II64 toc (AddrRegImm r12 (ImmInt 8))
|
|
+ `snocOL` MTCTR r11
|
|
+ `snocOL` LD II64 r11 (AddrRegImm r12 (ImmInt 16))
|
|
+ `snocOL` BCTRL usedRegs
|
|
+ `appOL` codeAfter)
|
|
+
|
|
+ _ -> return ( dynCode
|
|
+ `snocOL` MTCTR dynReg
|
|
+ `appOL` codeBefore
|
|
+ `snocOL` BCTRL usedRegs
|
|
+ `appOL` codeAfter)
|
|
where
|
|
platform = targetPlatform dflags
|
|
|
|
uses_pic_base_implicitly = do
|
|
-- See Note [implicit register in PPC PIC code]
|
|
-- on why we claim to use PIC register here
|
|
- when (gopt Opt_PIC dflags) $ do
|
|
- _ <- getPicBaseNat archWordSize
|
|
+ when (gopt Opt_PIC dflags && target32Bit platform) $ do
|
|
+ _ <- getPicBaseNat $ archWordSize (target32Bit (targetPlatform dflags))
|
|
return ()
|
|
|
|
initialStackOffset = case gcp of
|
|
GCPDarwin -> 24
|
|
GCPLinux -> 8
|
|
+ GCPLinux64ELF1 -> 48
|
|
-- size of linkage area + size of arguments, in bytes
|
|
stackDelta finalStack = case gcp of
|
|
GCPDarwin ->
|
|
roundTo 16 $ (24 +) $ max 32 $ sum $
|
|
map (widthInBytes . typeWidth) argReps
|
|
GCPLinux -> roundTo 16 finalStack
|
|
+ GCPLinux64ELF1 ->
|
|
+ roundTo 16 $ (48 +) $ max 64 $ sum $
|
|
+ map (widthInBytes . typeWidth) argReps
|
|
|
|
-- need to remove alignment information
|
|
args | PrimTarget mop <- target,
|
|
@@ -998,12 +1266,25 @@ genCCall' dflags gcp target dest_regs ar
|
|
roundTo a x | x `mod` a == 0 = x
|
|
| otherwise = x + a - (x `mod` a)
|
|
|
|
+ spSize = if target32Bit platform then II32 else II64
|
|
+
|
|
move_sp_down finalStack
|
|
| delta > 64 =
|
|
- toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
|
|
+ toOL [STU spSize sp (AddrRegImm sp (ImmInt (-delta))),
|
|
DELTA (-delta)]
|
|
| otherwise = nilOL
|
|
where delta = stackDelta finalStack
|
|
+ toc_before = case gcp of
|
|
+ GCPLinux64ELF1 -> unitOL $ ST spSize toc (AddrRegImm sp (ImmInt 40))
|
|
+ _ -> nilOL
|
|
+ toc_after labelOrExpr = case gcp of
|
|
+ GCPLinux64ELF1 -> case labelOrExpr of
|
|
+ Left _ -> toOL [ NOP ]
|
|
+ Right _ -> toOL [ LD spSize toc
|
|
+ (AddrRegImm sp
|
|
+ (ImmInt 40))
|
|
+ ]
|
|
+ _ -> nilOL
|
|
move_sp_up finalStack
|
|
| delta > 64 =
|
|
toOL [ADD sp sp (RIImm (ImmInt delta)),
|
|
@@ -1014,7 +1295,8 @@ genCCall' dflags gcp target dest_regs ar
|
|
|
|
passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
|
|
passArguments ((arg,arg_ty):args) gprs fprs stackOffset
|
|
- accumCode accumUsed | isWord64 arg_ty =
|
|
+ accumCode accumUsed | isWord64 arg_ty
|
|
+ && target32Bit (targetPlatform dflags) =
|
|
do
|
|
ChildCode64 code vr_lo <- iselExpr64 arg
|
|
let vr_hi = getHiVRegFromLo vr_lo
|
|
@@ -1052,6 +1334,7 @@ genCCall' dflags gcp target dest_regs ar
|
|
_ -> -- only one or no regs left
|
|
passArguments args [] fprs (stackOffset'+8)
|
|
stackCode accumUsed
|
|
+ GCPLinux64ELF1 -> panic "passArguments: 32 bit code"
|
|
|
|
passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
|
|
| reg : _ <- regs = do
|
|
@@ -1065,6 +1348,8 @@ genCCall' dflags gcp target dest_regs ar
|
|
GCPDarwin -> stackOffset + stackBytes
|
|
-- ... the SysV ABI doesn't.
|
|
GCPLinux -> stackOffset
|
|
+ -- ... but ELFv1 does.
|
|
+ GCPLinux64ELF1 -> stackOffset + stackBytes
|
|
passArguments args
|
|
(drop nGprs gprs)
|
|
(drop nFprs fprs)
|
|
@@ -1092,6 +1377,10 @@ genCCall' dflags gcp target dest_regs ar
|
|
roundTo 8 stackOffset
|
|
| otherwise ->
|
|
stackOffset
|
|
+ GCPLinux64ELF1 ->
|
|
+ -- everything on the stack is 8-byte aligned
|
|
+ -- on a 64 bit system (vector status excepted)
|
|
+ stackOffset
|
|
stackSlot = AddrRegImm sp (ImmInt stackOffset')
|
|
(nGprs, nFprs, stackBytes, regs)
|
|
= case gcp of
|
|
@@ -1117,6 +1406,18 @@ genCCall' dflags gcp target dest_regs ar
|
|
FF64 -> (0, 1, 8, fprs)
|
|
II64 -> panic "genCCall' passArguments II64"
|
|
FF80 -> panic "genCCall' passArguments FF80"
|
|
+ GCPLinux64ELF1 ->
|
|
+ case cmmTypeSize rep of
|
|
+ II8 -> (1, 0, 8, gprs)
|
|
+ II16 -> (1, 0, 8, gprs)
|
|
+ II32 -> (1, 0, 8, gprs)
|
|
+ II64 -> (1, 0, 8, gprs)
|
|
+ -- The ELFv1 ABI requires that we skip a
|
|
+ -- corresponding number of GPRs when we use
|
|
+ -- the FPRs.
|
|
+ FF32 -> (1, 1, 8, fprs)
|
|
+ FF64 -> (1, 1, 8, fprs)
|
|
+ FF80 -> panic "genCCall' passArguments FF80"
|
|
|
|
moveResult reduceToFF32 =
|
|
case dest_regs of
|
|
@@ -1124,8 +1425,9 @@ genCCall' dflags gcp target dest_regs ar
|
|
[dest]
|
|
| reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
|
|
| isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
|
|
- | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
|
|
- MR r_dest r4]
|
|
+ | isWord64 rep && target32Bit (targetPlatform dflags)
|
|
+ -> toOL [MR (getHiVRegFromLo r_dest) r3,
|
|
+ MR r_dest r4]
|
|
| otherwise -> unitOL (MR r_dest r3)
|
|
where rep = cmmRegType dflags (CmmLocal dest)
|
|
r_dest = getRegisterReg platform (CmmLocal dest)
|
|
@@ -1201,17 +1503,18 @@ genCCall' dflags gcp target dest_regs ar
|
|
|
|
genSwitch :: DynFlags -> CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
|
|
genSwitch dflags expr ids
|
|
- | gopt Opt_PIC dflags
|
|
+ | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags)
|
|
= do
|
|
(reg,e_code) <- getSomeReg expr
|
|
- tmp <- getNewRegNat II32
|
|
+ let sz = archWordSize $ target32Bit $ targetPlatform dflags
|
|
+ sha = if target32Bit $ targetPlatform dflags then 2 else 3
|
|
+ tmp <- getNewRegNat sz
|
|
lbl <- getNewLabelNat
|
|
- dflags <- getDynFlags
|
|
dynRef <- cmmMakeDynamicReference dflags DataReference lbl
|
|
(tableReg,t_code) <- getSomeReg $ dynRef
|
|
let code = e_code `appOL` t_code `appOL` toOL [
|
|
- SLW tmp reg (RIImm (ImmInt 2)),
|
|
- LD II32 tmp (AddrRegReg tableReg tmp),
|
|
+ SL sz tmp reg (RIImm (ImmInt sha)),
|
|
+ LD sz tmp (AddrRegReg tableReg tmp),
|
|
ADD tmp tmp (RIReg tableReg),
|
|
MTCTR tmp,
|
|
BCTR ids (Just lbl)
|
|
@@ -1220,12 +1523,14 @@ genSwitch dflags expr ids
|
|
| otherwise
|
|
= do
|
|
(reg,e_code) <- getSomeReg expr
|
|
- tmp <- getNewRegNat II32
|
|
+ let sz = archWordSize $ target32Bit $ targetPlatform dflags
|
|
+ sha = if target32Bit $ targetPlatform dflags then 2 else 3
|
|
+ tmp <- getNewRegNat sz
|
|
lbl <- getNewLabelNat
|
|
let code = e_code `appOL` toOL [
|
|
- SLW tmp reg (RIImm (ImmInt 2)),
|
|
+ SL sz tmp reg (RIImm (ImmInt sha)),
|
|
ADDIS tmp tmp (HA (ImmCLbl lbl)),
|
|
- LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
|
|
+ LD sz tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
|
|
MTCTR tmp,
|
|
BCTR ids (Just lbl)
|
|
]
|
|
@@ -1235,7 +1540,9 @@ generateJumpTableForInstr :: DynFlags ->
|
|
-> Maybe (NatCmmDecl CmmStatics Instr)
|
|
generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
|
|
let jumpTable
|
|
- | gopt Opt_PIC dflags = map jumpTableEntryRel ids
|
|
+ | (gopt Opt_PIC dflags)
|
|
+ || (not $ target32Bit $ targetPlatform dflags)
|
|
+ = map jumpTableEntryRel ids
|
|
| otherwise = map (jumpTableEntry dflags) ids
|
|
where jumpTableEntryRel Nothing
|
|
= CmmStaticLit (CmmInt 0 (wordWidth dflags))
|
|
@@ -1259,6 +1566,7 @@ condIntReg, condFltReg :: Cond -> CmmExp
|
|
condReg :: NatM CondCode -> NatM Register
|
|
condReg getCond = do
|
|
CondCode _ cond cond_code <- getCond
|
|
+ dflags <- getDynFlags
|
|
let
|
|
{- code dst = cond_code `appOL` toOL [
|
|
BCC cond lbl1,
|
|
@@ -1294,7 +1602,8 @@ condReg getCond = do
|
|
GU -> (1, False)
|
|
_ -> panic "PPC.CodeGen.codeReg: no match"
|
|
|
|
- return (Any II32 code)
|
|
+ size = archWordSize $ target32Bit $ targetPlatform dflags
|
|
+ return (Any size code)
|
|
|
|
condIntReg cond x y = condReg (condIntCode cond x y)
|
|
condFltReg cond x y = condReg (condFltCode cond x y)
|
|
@@ -1363,6 +1672,27 @@ trivialCode rep _ instr x y = do
|
|
let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
|
|
return (Any (intSize rep) code)
|
|
|
|
+shiftCode
|
|
+ :: Width
|
|
+ -> (Size-> Reg -> Reg -> RI -> Instr)
|
|
+ -> CmmExpr
|
|
+ -> CmmExpr
|
|
+ -> NatM Register
|
|
+shiftCode width instr x (CmmLit (CmmInt y _))
|
|
+ | Just imm <- makeImmediate width False y
|
|
+ = do
|
|
+ (src1, code1) <- getSomeReg x
|
|
+ let size = intSize width
|
|
+ let code dst = code1 `snocOL` instr size dst src1 (RIImm imm)
|
|
+ return (Any size code)
|
|
+
|
|
+shiftCode width instr x y = do
|
|
+ (src1, code1) <- getSomeReg x
|
|
+ (src2, code2) <- getSomeReg y
|
|
+ let size = intSize width
|
|
+ let code dst = code1 `appOL` code2 `snocOL` instr size dst src1 (RIReg src2)
|
|
+ return (Any size code)
|
|
+
|
|
trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
|
|
-> CmmExpr -> CmmExpr -> NatM Register
|
|
trivialCodeNoImm' size instr x y = do
|
|
@@ -1393,18 +1723,26 @@ trivialUCode rep instr x = do
|
|
remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
|
|
-> CmmExpr -> CmmExpr -> NatM Register
|
|
remainderCode rep div x y = do
|
|
+ dflags <- getDynFlags
|
|
+ let mull_instr = if target32Bit $ targetPlatform dflags then MULLW
|
|
+ else MULLD
|
|
(src1, code1) <- getSomeReg x
|
|
(src2, code2) <- getSomeReg y
|
|
let code dst = code1 `appOL` code2 `appOL` toOL [
|
|
div dst src1 src2,
|
|
- MULLW dst dst (RIReg src2),
|
|
+ mull_instr dst dst (RIReg src2),
|
|
SUBF dst dst src1
|
|
]
|
|
return (Any (intSize rep) code)
|
|
|
|
-
|
|
coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
|
|
coerceInt2FP fromRep toRep x = do
|
|
+ dflags <- getDynFlags
|
|
+ let arch = platformArch $ targetPlatform dflags
|
|
+ coerceInt2FP' arch fromRep toRep x
|
|
+
|
|
+coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
|
|
+coerceInt2FP' ArchPPC fromRep toRep x = do
|
|
(src, code) <- getSomeReg x
|
|
lbl <- getNewLabelNat
|
|
itmp <- getNewRegNat II32
|
|
@@ -1441,8 +1779,42 @@ coerceInt2FP fromRep toRep x = do
|
|
|
|
return (Any (floatSize toRep) code')
|
|
|
|
+coerceInt2FP' ArchPPC_64 fromRep toRep x = do
|
|
+ (src, code) <- getSomeReg x
|
|
+ dflags <- getDynFlags
|
|
+ let
|
|
+ code' dst = code `appOL` maybe_exts `appOL` toOL [
|
|
+ ST II64 src (spRel dflags 2),
|
|
+ LD FF64 dst (spRel dflags 2),
|
|
+ FCFID dst dst
|
|
+ ] `appOL` maybe_frsp dst
|
|
+
|
|
+ maybe_exts = case fromRep of
|
|
+ W8 -> unitOL $ EXTS II8 src src
|
|
+ W16 -> unitOL $ EXTS II16 src src
|
|
+ W32 -> unitOL $ EXTS II32 src src
|
|
+ W64 -> nilOL
|
|
+ _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
|
|
+
|
|
+ maybe_frsp dst
|
|
+ = case toRep of
|
|
+ W32 -> unitOL $ FRSP dst dst
|
|
+ W64 -> nilOL
|
|
+ _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
|
|
+
|
|
+ return (Any (floatSize toRep) code')
|
|
+
|
|
+coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
|
|
+
|
|
+
|
|
coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
|
|
-coerceFP2Int _ toRep x = do
|
|
+coerceFP2Int fromRep toRep x = do
|
|
+ dflags <- getDynFlags
|
|
+ let arch = platformArch $ targetPlatform dflags
|
|
+ coerceFP2Int' arch fromRep toRep x
|
|
+
|
|
+coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
|
|
+coerceFP2Int' ArchPPC _ toRep x = do
|
|
dflags <- getDynFlags
|
|
-- the reps don't really matter: F*->FF64 and II32->I* are no-ops
|
|
(src, code) <- getSomeReg x
|
|
@@ -1457,6 +1829,23 @@ coerceFP2Int _ toRep x = do
|
|
LD II32 dst (spRel dflags 3)]
|
|
return (Any (intSize toRep) code')
|
|
|
|
+coerceFP2Int' ArchPPC_64 _ toRep x = do
|
|
+ dflags <- getDynFlags
|
|
+ -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
|
|
+ (src, code) <- getSomeReg x
|
|
+ tmp <- getNewRegNat FF64
|
|
+ let
|
|
+ code' dst = code `appOL` toOL [
|
|
+ -- convert to int in FP reg
|
|
+ FCTIDZ tmp src,
|
|
+ -- store value (64bit) from FP to stack
|
|
+ -- TODO: verify that we can really use 16(r1) as temp
|
|
+ ST FF64 tmp (spRel dflags 2),
|
|
+ LD II64 dst (spRel dflags 2)]
|
|
+ return (Any (intSize toRep) code')
|
|
+
|
|
+coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
|
|
+
|
|
-- Note [.LCTOC1 in PPC PIC code]
|
|
-- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
|
|
-- to make the most of the PPC's 16-bit displacements.
|
|
Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/compiler/nativeGen/PPC/Instr.hs
|
|
+++ ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs
|
|
@@ -47,8 +47,10 @@ import Data.Maybe (fromMaybe)
|
|
--------------------------------------------------------------------------------
|
|
-- Size of a PPC memory address, in bytes.
|
|
--
|
|
-archWordSize :: Size
|
|
-archWordSize = II32
|
|
+archWordSize :: Bool -> Size
|
|
+archWordSize is32Bit
|
|
+ | is32Bit = II32
|
|
+ | otherwise = II64
|
|
|
|
|
|
-- | Instruction instance for powerpc
|
|
@@ -72,16 +74,18 @@ instance Instruction Instr where
|
|
ppc_mkStackAllocInstr :: Platform -> Int -> Instr
|
|
ppc_mkStackAllocInstr platform amount
|
|
= case platformArch platform of
|
|
- ArchPPC -> -- SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
|
|
- ADD sp sp (RIImm (ImmInt (-amount)))
|
|
- arch -> panic $ "ppc_mkStackAllocInstr " ++ show arch
|
|
+ ArchPPC -> -- SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
|
|
+ ADD sp sp (RIImm (ImmInt (-amount)))
|
|
+ ArchPPC_64 -> STU II64 sp (AddrRegImm sp (ImmInt (-amount)))
|
|
+ arch -> panic $ "ppc_mkStackAllocInstr " ++ show arch
|
|
|
|
ppc_mkStackDeallocInstr :: Platform -> Int -> Instr
|
|
ppc_mkStackDeallocInstr platform amount
|
|
= case platformArch platform of
|
|
- ArchPPC -> -- ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
|
|
- ADD sp sp (RIImm (ImmInt amount))
|
|
- arch -> panic $ "ppc_mkStackDeallocInstr " ++ show arch
|
|
+ ArchPPC -> -- ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
|
|
+ ADD sp sp (RIImm (ImmInt amount))
|
|
+ ArchPPC_64 -> ADD sp sp (RIImm (ImmInt amount))
|
|
+ arch -> panic $ "ppc_mkStackDeallocInstr " ++ show arch
|
|
|
|
--
|
|
-- See note [extra spill slots] in X86/Instr.hs
|
|
@@ -208,9 +212,12 @@ data Instr
|
|
| SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
|
|
| SUBFC Reg Reg Reg -- (carrying) dst, src1, src2 ; dst = src2 - src1
|
|
| SUBFE Reg Reg Reg -- (extend) dst, src1, src2 ; dst = src2 - src1
|
|
+ | MULLD Reg Reg RI
|
|
| MULLW Reg Reg RI
|
|
| DIVW Reg Reg Reg
|
|
+ | DIVD Reg Reg Reg
|
|
| DIVWU Reg Reg Reg
|
|
+ | DIVDU Reg Reg Reg
|
|
|
|
| MULLW_MayOflo Reg Reg Reg
|
|
-- dst = 1 if src1 * src2 overflows
|
|
@@ -218,9 +225,16 @@ data Instr
|
|
-- mullwo. dst, src1, src2
|
|
-- mfxer dst
|
|
-- rlwinm dst, dst, 2, 31,31
|
|
+ | MULLD_MayOflo Reg Reg Reg
|
|
+ -- dst = 1 if src1 * src2 overflows
|
|
+ -- pseudo-instruction; pretty-printed as:
|
|
+ -- mulldo. dst, src1, src2
|
|
+ -- mfxer dst
|
|
+ -- rlwinm dst, dst, 2, 31,31
|
|
|
|
| AND Reg Reg RI -- dst, src1, src2
|
|
| OR Reg Reg RI -- dst, src1, src2
|
|
+ | ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2
|
|
| XOR Reg Reg RI -- dst, src1, src2
|
|
| XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
|
|
|
|
@@ -229,9 +243,9 @@ data Instr
|
|
| NEG Reg Reg
|
|
| NOT Reg Reg
|
|
|
|
- | SLW Reg Reg RI -- shift left word
|
|
- | SRW Reg Reg RI -- shift right word
|
|
- | SRAW Reg Reg RI -- shift right arithmetic word
|
|
+ | SL Size Reg Reg RI -- shift left
|
|
+ | SR Size Reg Reg RI -- shift right
|
|
+ | SRA Size Reg Reg RI -- shift right arithmetic
|
|
|
|
| RLWINM Reg Reg Int Int Int -- Rotate Left Word Immediate then AND with Mask
|
|
|
|
@@ -244,6 +258,8 @@ data Instr
|
|
| FCMP Reg Reg
|
|
|
|
| FCTIWZ Reg Reg -- convert to integer word
|
|
+ | FCTIDZ Reg Reg -- convert to integer double word
|
|
+ | FCFID Reg Reg -- convert from integer double word
|
|
| FRSP Reg Reg -- reduce to single precision
|
|
-- (but destination is a FP register)
|
|
|
|
@@ -253,9 +269,10 @@ data Instr
|
|
| MFLR Reg -- move from link register
|
|
| FETCHPC Reg -- pseudo-instruction:
|
|
-- bcl to next insn, mflr reg
|
|
-
|
|
| LWSYNC -- memory barrier
|
|
-
|
|
+ | NOP -- no operation, PowerPC 64 bit
|
|
+ -- needs this as place holder to
|
|
+ -- reload TOC pointer
|
|
|
|
-- | Get the registers that are being used by this instruction.
|
|
-- regUsage doesn't need to do any trickery for jumps and such.
|
|
@@ -290,22 +307,28 @@ ppc_regUsageOfInstr platform instr
|
|
SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
SUBFC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
+ MULLD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
DIVW reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
+ DIVD reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
DIVWU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
+ DIVDU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
|
|
|
|
MULLW_MayOflo reg1 reg2 reg3
|
|
-> usage ([reg2,reg3], [reg1])
|
|
+ MULLD_MayOflo reg1 reg2 reg3
|
|
+ -> usage ([reg2,reg3], [reg1])
|
|
AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
+ ORIS reg1 reg2 _ -> usage ([reg2], [reg1])
|
|
XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
XORIS reg1 reg2 _ -> usage ([reg2], [reg1])
|
|
EXTS _ reg1 reg2 -> usage ([reg2], [reg1])
|
|
NEG reg1 reg2 -> usage ([reg2], [reg1])
|
|
NOT reg1 reg2 -> usage ([reg2], [reg1])
|
|
- SLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
- SRW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
- SRAW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
+ SL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
+ SR _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
+ SRA _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
|
|
RLWINM reg1 reg2 _ _ _ -> usage ([reg2], [reg1])
|
|
|
|
FADD _ r1 r2 r3 -> usage ([r2,r3], [r1])
|
|
@@ -315,6 +338,8 @@ ppc_regUsageOfInstr platform instr
|
|
FNEG r1 r2 -> usage ([r2], [r1])
|
|
FCMP r1 r2 -> usage ([r1,r2], [])
|
|
FCTIWZ r1 r2 -> usage ([r2], [r1])
|
|
+ FCTIDZ r1 r2 -> usage ([r2], [r1])
|
|
+ FCFID r1 r2 -> usage ([r2], [r1])
|
|
FRSP r1 r2 -> usage ([r2], [r1])
|
|
MFCR reg -> usage ([], [reg])
|
|
MFLR reg -> usage ([], [reg])
|
|
@@ -367,21 +392,27 @@ ppc_patchRegsOfInstr instr env
|
|
SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3)
|
|
SUBFC reg1 reg2 reg3 -> SUBFC (env reg1) (env reg2) (env reg3)
|
|
SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3)
|
|
+ MULLD reg1 reg2 ri -> MULLD (env reg1) (env reg2) (fixRI ri)
|
|
MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri)
|
|
DIVW reg1 reg2 reg3 -> DIVW (env reg1) (env reg2) (env reg3)
|
|
+ DIVD reg1 reg2 reg3 -> DIVD (env reg1) (env reg2) (env reg3)
|
|
DIVWU reg1 reg2 reg3 -> DIVWU (env reg1) (env reg2) (env reg3)
|
|
+ DIVDU reg1 reg2 reg3 -> DIVDU (env reg1) (env reg2) (env reg3)
|
|
MULLW_MayOflo reg1 reg2 reg3
|
|
-> MULLW_MayOflo (env reg1) (env reg2) (env reg3)
|
|
+ MULLD_MayOflo reg1 reg2 reg3
|
|
+ -> MULLD_MayOflo (env reg1) (env reg2) (env reg3)
|
|
AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri)
|
|
OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri)
|
|
+ ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm
|
|
XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri)
|
|
XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
|
|
EXTS sz reg1 reg2 -> EXTS sz (env reg1) (env reg2)
|
|
NEG reg1 reg2 -> NEG (env reg1) (env reg2)
|
|
NOT reg1 reg2 -> NOT (env reg1) (env reg2)
|
|
- SLW reg1 reg2 ri -> SLW (env reg1) (env reg2) (fixRI ri)
|
|
- SRW reg1 reg2 ri -> SRW (env reg1) (env reg2) (fixRI ri)
|
|
- SRAW reg1 reg2 ri -> SRAW (env reg1) (env reg2) (fixRI ri)
|
|
+ SL sz reg1 reg2 ri -> SL sz (env reg1) (env reg2) (fixRI ri)
|
|
+ SR sz reg1 reg2 ri -> SR sz (env reg1) (env reg2) (fixRI ri)
|
|
+ SRA sz reg1 reg2 ri -> SRA sz (env reg1) (env reg2) (fixRI ri)
|
|
RLWINM reg1 reg2 sh mb me
|
|
-> RLWINM (env reg1) (env reg2) sh mb me
|
|
FADD sz r1 r2 r3 -> FADD sz (env r1) (env r2) (env r3)
|
|
@@ -391,6 +422,8 @@ ppc_patchRegsOfInstr instr env
|
|
FNEG r1 r2 -> FNEG (env r1) (env r2)
|
|
FCMP r1 r2 -> FCMP (env r1) (env r2)
|
|
FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2)
|
|
+ FCTIDZ r1 r2 -> FCTIDZ (env r1) (env r2)
|
|
+ FCFID r1 r2 -> FCFID (env r1) (env r2)
|
|
FRSP r1 r2 -> FRSP (env r1) (env r2)
|
|
MFCR reg -> MFCR (env reg)
|
|
MFLR reg -> MFLR (env reg)
|
|
@@ -457,11 +490,14 @@ ppc_mkSpillInstr
|
|
ppc_mkSpillInstr dflags reg delta slot
|
|
= let platform = targetPlatform dflags
|
|
off = spillSlotToOffset slot
|
|
+ arch = platformArch platform
|
|
in
|
|
let sz = case targetClassOfReg platform reg of
|
|
- RcInteger -> II32
|
|
+ RcInteger -> case arch of
|
|
+ ArchPPC -> II32
|
|
+ _ -> II64
|
|
RcDouble -> FF64
|
|
- _ -> panic "PPC.Instr.mkSpillInstr: no match"
|
|
+ _ -> panic "PPC.Instr.mkSpillInstr: no match"
|
|
in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
|
|
|
|
|
|
@@ -475,9 +511,12 @@ ppc_mkLoadInstr
|
|
ppc_mkLoadInstr dflags reg delta slot
|
|
= let platform = targetPlatform dflags
|
|
off = spillSlotToOffset slot
|
|
+ arch = platformArch platform
|
|
in
|
|
let sz = case targetClassOfReg platform reg of
|
|
- RcInteger -> II32
|
|
+ RcInteger -> case arch of
|
|
+ ArchPPC -> II32
|
|
+ _ -> II64
|
|
RcDouble -> FF64
|
|
_ -> panic "PPC.Instr.mkLoadInstr: no match"
|
|
in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
|
|
@@ -498,8 +537,8 @@ maxSpillSlots dflags
|
|
-- = 0 -- useful for testing allocMoreStack
|
|
|
|
-- | The number of bytes that the stack pointer should be aligned
|
|
--- to. This is 16 both on PPC32 and PPC64 at least for Darwin, but I'm
|
|
--- not sure this is correct for other OSes.
|
|
+-- to. This is 16 both on PPC32 and PPC64 at least for Darwin, and
|
|
+-- Linux (see ELF processor specific supplements).
|
|
stackAlign :: Int
|
|
stackAlign = 16
|
|
|
|
Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/compiler/nativeGen/PPC/Ppr.hs
|
|
+++ ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs
|
|
@@ -39,11 +39,13 @@ import Unique ( pprUnique
|
|
import Platform
|
|
import FastString
|
|
import Outputable
|
|
+import DynFlags
|
|
|
|
import Data.Word
|
|
import Data.Bits
|
|
|
|
-
|
|
+-- temporary import to help debug
|
|
+import PprCmm ( pprLit )
|
|
-- -----------------------------------------------------------------------------
|
|
-- Printing this stuff out
|
|
|
|
@@ -54,12 +56,15 @@ pprNatCmmDecl (CmmData section dats) =
|
|
pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
|
|
case topInfoTable proc of
|
|
Nothing ->
|
|
+ sdocWithPlatform $ \platform ->
|
|
case blocks of
|
|
[] -> -- special case for split markers:
|
|
pprLabel lbl
|
|
blocks -> -- special case for code without info table:
|
|
pprSectionHeader Text $$
|
|
- pprLabel lbl $$ -- blocks guaranteed not null, so label needed
|
|
+ (if platformArch platform == ArchPPC_64
|
|
+ then pprFunctionDescriptor lbl
|
|
+ else pprLabel lbl) $$ -- blocks guaranteed not null, so label needed
|
|
vcat (map (pprBasicBlock top_info) blocks)
|
|
|
|
Just (Statics info_lbl _) ->
|
|
@@ -86,6 +91,22 @@ pprNatCmmDecl proc@(CmmProc top_info lbl
|
|
else empty)
|
|
|
|
|
|
+pprFunctionDescriptor :: CLabel -> SDoc
|
|
+pprFunctionDescriptor lab = pprGloblDecl lab
|
|
+ $$ text ".section \".opd\",\"aw\""
|
|
+ $$ text ".align 3"
|
|
+ $$ ppr lab <> char ':'
|
|
+ $$ text ".quad ."
|
|
+ <> ppr lab
|
|
+ <> text ",.TOC.@tocbase,0"
|
|
+ $$ text ".previous"
|
|
+ $$ text ".type "
|
|
+ <> ppr lab
|
|
+ <> text ", @function"
|
|
+ $$ char '.'
|
|
+ <> ppr lab
|
|
+ <> char ':'
|
|
+
|
|
pprBasicBlock :: BlockEnv CmmStatics -> NatBasicBlock Instr -> SDoc
|
|
pprBasicBlock info_env (BasicBlock blockid instrs)
|
|
= maybe_infotable $$
|
|
@@ -213,6 +234,7 @@ pprSize x
|
|
II8 -> sLit "b"
|
|
II16 -> sLit "h"
|
|
II32 -> sLit "w"
|
|
+ II64 -> sLit "d"
|
|
FF32 -> sLit "fs"
|
|
FF64 -> sLit "fd"
|
|
_ -> panic "PPC.Ppr.pprSize: no match")
|
|
@@ -262,6 +284,18 @@ pprImm (HA i)
|
|
then hcat [ text "ha16(", pprImm i, rparen ]
|
|
else pprImm i <> text "@ha"
|
|
|
|
+pprImm (HIGHERA i)
|
|
+ = sdocWithPlatform $ \platform ->
|
|
+ if platformOS platform == OSDarwin
|
|
+ then panic "PPC.pprImm: highera not implemented on Darwin"
|
|
+ else pprImm i <> text "@highera"
|
|
+
|
|
+pprImm (HIGHESTA i)
|
|
+ = sdocWithPlatform $ \platform ->
|
|
+ if platformOS platform == OSDarwin
|
|
+ then panic "PPC.pprImm: highesta not implemented on Darwin"
|
|
+ else pprImm i <> text "@highesta"
|
|
+
|
|
|
|
pprAddr :: AddrMode -> SDoc
|
|
pprAddr (AddrRegReg r1 r2)
|
|
@@ -276,17 +310,23 @@ pprSectionHeader :: Section -> SDoc
|
|
pprSectionHeader seg
|
|
= sdocWithPlatform $ \platform ->
|
|
let osDarwin = platformOS platform == OSDarwin
|
|
+ ppc64 = platformArch platform == ArchPPC_64
|
|
in case seg of
|
|
Text -> ptext (sLit ".text\n.align 2")
|
|
- Data -> ptext (sLit ".data\n.align 2")
|
|
+ Data
|
|
+ | ppc64 -> ptext (sLit ".data\n.align 3")
|
|
+ | otherwise -> ptext (sLit ".data\n.align 2")
|
|
ReadOnlyData
|
|
| osDarwin -> ptext (sLit ".const\n.align 2")
|
|
+ | ppc64 -> ptext (sLit ".section .rodata\n\t.align 3")
|
|
| otherwise -> ptext (sLit ".section .rodata\n\t.align 2")
|
|
RelocatableReadOnlyData
|
|
| osDarwin -> ptext (sLit ".const_data\n.align 2")
|
|
+ | ppc64 -> ptext (sLit ".data\n\t.align 3")
|
|
| otherwise -> ptext (sLit ".data\n\t.align 2")
|
|
UninitialisedData
|
|
| osDarwin -> ptext (sLit ".const_data\n.align 2")
|
|
+ | ppc64 -> ptext (sLit ".section .bss\n\t.align 3")
|
|
| otherwise -> ptext (sLit ".section .bss\n\t.align 2")
|
|
ReadOnlyData16
|
|
| osDarwin -> ptext (sLit ".const\n.align 4")
|
|
@@ -298,33 +338,39 @@ pprSectionHeader seg
|
|
pprDataItem :: CmmLit -> SDoc
|
|
pprDataItem lit
|
|
= sdocWithDynFlags $ \dflags ->
|
|
- vcat (ppr_item (cmmTypeSize $ cmmLitType dflags lit) lit)
|
|
+ vcat (ppr_item (cmmTypeSize $ cmmLitType dflags lit) lit dflags)
|
|
where
|
|
imm = litToImm lit
|
|
+ archPPC_64 dflags = (platformArch $ targetPlatform dflags) == ArchPPC_64
|
|
+
|
|
+ ppr_item II8 _ _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
|
|
|
|
- ppr_item II8 _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
|
|
+ ppr_item II32 _ _ = [ptext (sLit "\t.long\t") <> pprImm imm]
|
|
|
|
- ppr_item II32 _ = [ptext (sLit "\t.long\t") <> pprImm imm]
|
|
+ ppr_item II64 _ dflags
|
|
+ | archPPC_64 dflags = [ptext (sLit "\t.quad\t") <> pprImm imm]
|
|
|
|
- ppr_item FF32 (CmmFloat r _)
|
|
+
|
|
+ ppr_item FF32 (CmmFloat r _) _
|
|
= let bs = floatToBytes (fromRational r)
|
|
in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
|
|
|
|
- ppr_item FF64 (CmmFloat r _)
|
|
+ ppr_item FF64 (CmmFloat r _) _
|
|
= let bs = doubleToBytes (fromRational r)
|
|
in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
|
|
|
|
- ppr_item II16 _ = [ptext (sLit "\t.short\t") <> pprImm imm]
|
|
+ ppr_item II16 _ _ = [ptext (sLit "\t.short\t") <> pprImm imm]
|
|
|
|
- ppr_item II64 (CmmInt x _) =
|
|
+ ppr_item II64 (CmmInt x _) dflags
|
|
+ | not(archPPC_64 dflags) =
|
|
[ptext (sLit "\t.long\t")
|
|
<> int (fromIntegral
|
|
(fromIntegral (x `shiftR` 32) :: Word32)),
|
|
ptext (sLit "\t.long\t")
|
|
<> int (fromIntegral (fromIntegral x :: Word32))]
|
|
|
|
- ppr_item _ _
|
|
- = panic "PPC.Ppr.pprDataItem: no match"
|
|
+ ppr_item _ expr _
|
|
+ = pprPanic "PPC.Ppr.pprDataItem: no match" (pprLit expr)
|
|
|
|
|
|
pprInstr :: Instr -> SDoc
|
|
@@ -370,6 +416,7 @@ pprInstr (LD sz reg addr) = hcat [
|
|
II8 -> sLit "bz"
|
|
II16 -> sLit "hz"
|
|
II32 -> sLit "wz"
|
|
+ II64 -> sLit "d"
|
|
FF32 -> sLit "fs"
|
|
FF64 -> sLit "fd"
|
|
_ -> panic "PPC.Ppr.pprInstr: no match"
|
|
@@ -388,6 +435,7 @@ pprInstr (LA sz reg addr) = hcat [
|
|
II8 -> sLit "ba"
|
|
II16 -> sLit "ha"
|
|
II32 -> sLit "wa"
|
|
+ II64 -> sLit "d"
|
|
FF32 -> sLit "fs"
|
|
FF64 -> sLit "fd"
|
|
_ -> panic "PPC.Ppr.pprInstr: no match"
|
|
@@ -505,12 +553,12 @@ pprInstr (BCCFAR cond blockid) = vcat [
|
|
where lbl = mkAsmTempLabel (getUnique blockid)
|
|
|
|
pprInstr (JMP lbl) = hcat [ -- an alias for b that takes a CLabel
|
|
- char '\t',
|
|
- ptext (sLit "b"),
|
|
- char '\t',
|
|
- ppr lbl
|
|
- ]
|
|
-
|
|
+ char '\t',
|
|
+ ptext (sLit "b"),
|
|
+ char '\t',
|
|
+ ppr lbl
|
|
+ ]
|
|
+
|
|
pprInstr (MTCTR reg) = hcat [
|
|
char '\t',
|
|
ptext (sLit "mtctr"),
|
|
@@ -556,10 +604,14 @@ pprInstr (ADDE reg1 reg2 reg3) = pprLogi
|
|
pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3)
|
|
pprInstr (SUBFC reg1 reg2 reg3) = pprLogic (sLit "subfc") reg1 reg2 (RIReg reg3)
|
|
pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3)
|
|
+pprInstr (MULLD reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mulld") reg1 reg2 ri
|
|
pprInstr (MULLW reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mullw") reg1 reg2 ri
|
|
+pprInstr (MULLD reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri
|
|
pprInstr (MULLW reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri
|
|
pprInstr (DIVW reg1 reg2 reg3) = pprLogic (sLit "divw") reg1 reg2 (RIReg reg3)
|
|
+pprInstr (DIVD reg1 reg2 reg3) = pprLogic (sLit "divd") reg1 reg2 (RIReg reg3)
|
|
pprInstr (DIVWU reg1 reg2 reg3) = pprLogic (sLit "divwu") reg1 reg2 (RIReg reg3)
|
|
+pprInstr (DIVDU reg1 reg2 reg3) = pprLogic (sLit "divdu") reg1 reg2 (RIReg reg3)
|
|
|
|
pprInstr (MULLW_MayOflo reg1 reg2 reg3) = vcat [
|
|
hcat [ ptext (sLit "\tmullwo\t"), pprReg reg1, ptext (sLit ", "),
|
|
@@ -570,8 +622,17 @@ pprInstr (MULLW_MayOflo reg1 reg2 reg3)
|
|
pprReg reg1, ptext (sLit ", "),
|
|
ptext (sLit "2, 31, 31") ]
|
|
]
|
|
+pprInstr (MULLD_MayOflo reg1 reg2 reg3) = vcat [
|
|
+ hcat [ ptext (sLit "\tmulldo\t"), pprReg reg1, ptext (sLit ", "),
|
|
+ pprReg reg2, ptext (sLit ", "),
|
|
+ pprReg reg3 ],
|
|
+ hcat [ ptext (sLit "\tmfxer\t"), pprReg reg1 ],
|
|
+ hcat [ ptext (sLit "\trlwinm\t"), pprReg reg1, ptext (sLit ", "),
|
|
+ pprReg reg1, ptext (sLit ", "),
|
|
+ ptext (sLit "2, 31, 31") ]
|
|
+ ]
|
|
|
|
- -- for some reason, "andi" doesn't exist.
|
|
+ -- for some reason, "andi" doesn't exist.
|
|
-- we'll use "andi." instead.
|
|
pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [
|
|
char '\t',
|
|
@@ -588,6 +649,17 @@ pprInstr (AND reg1 reg2 ri) = pprLogic (
|
|
pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri
|
|
pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri
|
|
|
|
+pprInstr (ORIS reg1 reg2 imm) = hcat [
|
|
+ char '\t',
|
|
+ ptext (sLit "oris"),
|
|
+ char '\t',
|
|
+ pprReg reg1,
|
|
+ ptext (sLit ", "),
|
|
+ pprReg reg2,
|
|
+ ptext (sLit ", "),
|
|
+ pprImm imm
|
|
+ ]
|
|
+
|
|
pprInstr (XORIS reg1 reg2 imm) = hcat [
|
|
char '\t',
|
|
ptext (sLit "xoris"),
|
|
@@ -612,17 +684,33 @@ pprInstr (EXTS sz reg1 reg2) = hcat [
|
|
pprInstr (NEG reg1 reg2) = pprUnary (sLit "neg") reg1 reg2
|
|
pprInstr (NOT reg1 reg2) = pprUnary (sLit "not") reg1 reg2
|
|
|
|
-pprInstr (SLW reg1 reg2 ri) = pprLogic (sLit "slw") reg1 reg2 (limitShiftRI ri)
|
|
+pprInstr (SL sz reg1 reg2 ri) =
|
|
+ let op = case sz of
|
|
+ II32 -> "slw"
|
|
+ II64 -> "sld"
|
|
+ _ -> panic "PPC.Ppr.pprInstr: shift illegal size"
|
|
+ in pprLogic (sLit op) reg1 reg2 (limitShiftRI sz ri)
|
|
|
|
-pprInstr (SRW reg1 reg2 (RIImm (ImmInt i))) | i > 31 || i < 0 =
|
|
+pprInstr (SR II32 reg1 reg2 (RIImm (ImmInt i))) | i > 31 || i < 0 =
|
|
-- Handle the case where we are asked to shift a 32 bit register by
|
|
-- less than zero or more than 31 bits. We convert this into a clear
|
|
-- of the destination register.
|
|
-- Fixes ticket http://ghc.haskell.org/trac/ghc/ticket/5900
|
|
pprInstr (XOR reg1 reg2 (RIReg reg2))
|
|
-pprInstr (SRW reg1 reg2 ri) = pprLogic (sLit "srw") reg1 reg2 (limitShiftRI ri)
|
|
+pprInstr (SR sz reg1 reg2 ri) =
|
|
+ let op = case sz of
|
|
+ II32 -> "srw"
|
|
+ II64 -> "srd"
|
|
+ _ -> panic "PPC.Ppr.pprInstr: shift illegal size"
|
|
+ in pprLogic (sLit op) reg1 reg2 (limitShiftRI sz ri)
|
|
+
|
|
+pprInstr (SRA sz reg1 reg2 ri) =
|
|
+ let op = case sz of
|
|
+ II32 -> "sraw"
|
|
+ II64 -> "srad"
|
|
+ _ -> panic "PPC.Ppr.pprInstr: shift illegal size"
|
|
+ in pprLogic (sLit op) reg1 reg2 (limitShiftRI sz ri)
|
|
|
|
-pprInstr (SRAW reg1 reg2 ri) = pprLogic (sLit "sraw") reg1 reg2 (limitShiftRI ri)
|
|
pprInstr (RLWINM reg1 reg2 sh mb me) = hcat [
|
|
ptext (sLit "\trlwinm\t"),
|
|
pprReg reg1,
|
|
@@ -654,6 +742,8 @@ pprInstr (FCMP reg1 reg2) = hcat [
|
|
]
|
|
|
|
pprInstr (FCTIWZ reg1 reg2) = pprUnary (sLit "fctiwz") reg1 reg2
|
|
+pprInstr (FCTIDZ reg1 reg2) = pprUnary (sLit "fctidz") reg1 reg2
|
|
+pprInstr (FCFID reg1 reg2) = pprUnary (sLit "fcfid") reg1 reg2
|
|
pprInstr (FRSP reg1 reg2) = pprUnary (sLit "frsp") reg1 reg2
|
|
|
|
pprInstr (CRNOR dst src1 src2) = hcat [
|
|
@@ -686,6 +776,8 @@ pprInstr (FETCHPC reg) = vcat [
|
|
|
|
pprInstr LWSYNC = ptext (sLit "\tlwsync")
|
|
|
|
+pprInstr NOP = ptext (sLit "\tnop")
|
|
+
|
|
-- pprInstr _ = panic "pprInstr (ppc)"
|
|
|
|
|
|
@@ -739,9 +831,12 @@ pprFSize FF64 = empty
|
|
pprFSize FF32 = char 's'
|
|
pprFSize _ = panic "PPC.Ppr.pprFSize: no match"
|
|
|
|
- -- limit immediate argument for shift instruction to range 0..31
|
|
-limitShiftRI :: RI -> RI
|
|
-limitShiftRI (RIImm (ImmInt i)) | i > 31 || i < 0 =
|
|
+ -- limit immediate argument for shift instruction to range 0..63
|
|
+ -- for 64 bit size and 0..32 otherwise
|
|
+limitShiftRI :: Size -> RI -> RI
|
|
+limitShiftRI II64 (RIImm (ImmInt i)) | i > 63 || i < 0 =
|
|
panic $ "PPC.Ppr: Shift by " ++ show i ++ " bits is not allowed."
|
|
-limitShiftRI x = x
|
|
+limitShiftRI II32 (RIImm (ImmInt i)) | i > 31 || i < 0 =
|
|
+ panic $ "PPC.Ppr: 32 bit: Shift by " ++ show i ++ " bits is not allowed."
|
|
+limitShiftRI _ x = x
|
|
|
|
Index: ghc-7.8.3/compiler/nativeGen/PPC/Regs.hs
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/compiler/nativeGen/PPC/Regs.hs
|
|
+++ ghc-7.8.3/compiler/nativeGen/PPC/Regs.hs
|
|
@@ -35,7 +35,7 @@ module PPC.Regs (
|
|
fits16Bits,
|
|
makeImmediate,
|
|
fReg,
|
|
- sp, r3, r4, r27, r28, r30,
|
|
+ sp, toc, r3, r4, r11, r12, r27, r28, r30,
|
|
f1, f20, f21,
|
|
|
|
allocatableRegs
|
|
@@ -62,8 +62,8 @@ import FastBool
|
|
import FastTypes
|
|
import Platform
|
|
|
|
-import Data.Word ( Word8, Word16, Word32 )
|
|
-import Data.Int ( Int8, Int16, Int32 )
|
|
+import Data.Word ( Word8, Word16, Word32, Word64 )
|
|
+import Data.Int ( Int8, Int16, Int32, Int64 )
|
|
|
|
|
|
-- squeese functions for the graph allocator -----------------------------------
|
|
@@ -145,6 +145,8 @@ data Imm
|
|
| LO Imm
|
|
| HI Imm
|
|
| HA Imm {- high halfword adjusted -}
|
|
+ | HIGHERA Imm
|
|
+ | HIGHESTA Imm
|
|
|
|
|
|
strImmLit :: String -> Imm
|
|
@@ -267,9 +269,11 @@ fits16Bits x = x >= -32768 && x < 32768
|
|
makeImmediate :: Integral a => Width -> Bool -> a -> Maybe Imm
|
|
makeImmediate rep signed x = fmap ImmInt (toI16 rep signed)
|
|
where
|
|
+ narrow W64 False = fromIntegral (fromIntegral x :: Word64)
|
|
narrow W32 False = fromIntegral (fromIntegral x :: Word32)
|
|
narrow W16 False = fromIntegral (fromIntegral x :: Word16)
|
|
narrow W8 False = fromIntegral (fromIntegral x :: Word8)
|
|
+ narrow W64 True = fromIntegral (fromIntegral x :: Int64)
|
|
narrow W32 True = fromIntegral (fromIntegral x :: Int32)
|
|
narrow W16 True = fromIntegral (fromIntegral x :: Int16)
|
|
narrow W8 True = fromIntegral (fromIntegral x :: Int8)
|
|
@@ -283,6 +287,12 @@ makeImmediate rep signed x = fmap ImmInt
|
|
toI16 W32 False
|
|
| narrowed >= 0 && narrowed < 65536 = Just narrowed
|
|
| otherwise = Nothing
|
|
+ toI16 W64 True
|
|
+ | narrowed >= -32768 && narrowed < 32768 = Just narrowed
|
|
+ | otherwise = Nothing
|
|
+ toI16 W64 False
|
|
+ | narrowed >= 0 && narrowed < 65536 = Just narrowed
|
|
+ | otherwise = Nothing
|
|
toI16 _ _ = Just narrowed
|
|
|
|
|
|
@@ -294,10 +304,13 @@ point registers.
|
|
fReg :: Int -> RegNo
|
|
fReg x = (32 + x)
|
|
|
|
-sp, r3, r4, r27, r28, r30, f1, f20, f21 :: Reg
|
|
+sp, toc, r3, r4, r11, r12, r27, r28, r30, f1, f20, f21 :: Reg
|
|
sp = regSingle 1
|
|
+toc = regSingle 2
|
|
r3 = regSingle 3
|
|
r4 = regSingle 4
|
|
+r11 = regSingle 11
|
|
+r12 = regSingle 12
|
|
r27 = regSingle 27
|
|
r28 = regSingle 28
|
|
r30 = regSingle 30
|
|
Index: ghc-7.8.3/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
|
|
+++ ghc-7.8.3/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
|
|
@@ -74,7 +74,7 @@ maxSpillSlots dflags
|
|
ArchPPC -> PPC.Instr.maxSpillSlots dflags
|
|
ArchSPARC -> SPARC.Instr.maxSpillSlots dflags
|
|
ArchARM _ _ _ -> panic "maxSpillSlots ArchARM"
|
|
- ArchPPC_64 -> panic "maxSpillSlots ArchPPC_64"
|
|
+ ArchPPC_64 -> PPC.Instr.maxSpillSlots dflags
|
|
ArchAlpha -> panic "maxSpillSlots ArchAlpha"
|
|
ArchMipseb -> panic "maxSpillSlots ArchMipseb"
|
|
ArchMipsel -> panic "maxSpillSlots ArchMipsel"
|
|
Index: ghc-7.8.3/compiler/nativeGen/RegAlloc/Linear/Main.hs
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/compiler/nativeGen/RegAlloc/Linear/Main.hs
|
|
+++ ghc-7.8.3/compiler/nativeGen/RegAlloc/Linear/Main.hs
|
|
@@ -207,7 +207,7 @@ linearRegAlloc dflags first_id block_liv
|
|
ArchSPARC -> linearRegAlloc' dflags (frInitFreeRegs platform :: SPARC.FreeRegs) first_id block_live sccs
|
|
ArchPPC -> linearRegAlloc' dflags (frInitFreeRegs platform :: PPC.FreeRegs) first_id block_live sccs
|
|
ArchARM _ _ _ -> panic "linearRegAlloc ArchARM"
|
|
- ArchPPC_64 -> panic "linearRegAlloc ArchPPC_64"
|
|
+ ArchPPC_64 -> linearRegAlloc' dflags (frInitFreeRegs platform :: PPC.FreeRegs) first_id block_live sccs
|
|
ArchAlpha -> panic "linearRegAlloc ArchAlpha"
|
|
ArchMipseb -> panic "linearRegAlloc ArchMipseb"
|
|
ArchMipsel -> panic "linearRegAlloc ArchMipsel"
|
|
Index: ghc-7.8.3/compiler/nativeGen/TargetReg.hs
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/compiler/nativeGen/TargetReg.hs
|
|
+++ ghc-7.8.3/compiler/nativeGen/TargetReg.hs
|
|
@@ -52,7 +52,7 @@ targetVirtualRegSqueeze platform
|
|
ArchX86_64 -> X86.virtualRegSqueeze
|
|
ArchPPC -> PPC.virtualRegSqueeze
|
|
ArchSPARC -> SPARC.virtualRegSqueeze
|
|
- ArchPPC_64 -> panic "targetVirtualRegSqueeze ArchPPC_64"
|
|
+ ArchPPC_64 -> PPC.virtualRegSqueeze
|
|
ArchARM _ _ _ -> panic "targetVirtualRegSqueeze ArchARM"
|
|
ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha"
|
|
ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb"
|
|
@@ -68,7 +68,7 @@ targetRealRegSqueeze platform
|
|
ArchX86_64 -> X86.realRegSqueeze
|
|
ArchPPC -> PPC.realRegSqueeze
|
|
ArchSPARC -> SPARC.realRegSqueeze
|
|
- ArchPPC_64 -> panic "targetRealRegSqueeze ArchPPC_64"
|
|
+ ArchPPC_64 -> PPC.realRegSqueeze
|
|
ArchARM _ _ _ -> panic "targetRealRegSqueeze ArchARM"
|
|
ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha"
|
|
ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb"
|
|
@@ -83,7 +83,7 @@ targetClassOfRealReg platform
|
|
ArchX86_64 -> X86.classOfRealReg platform
|
|
ArchPPC -> PPC.classOfRealReg
|
|
ArchSPARC -> SPARC.classOfRealReg
|
|
- ArchPPC_64 -> panic "targetClassOfRealReg ArchPPC_64"
|
|
+ ArchPPC_64 -> PPC.classOfRealReg
|
|
ArchARM _ _ _ -> panic "targetClassOfRealReg ArchARM"
|
|
ArchAlpha -> panic "targetClassOfRealReg ArchAlpha"
|
|
ArchMipseb -> panic "targetClassOfRealReg ArchMipseb"
|
|
@@ -98,7 +98,7 @@ targetMkVirtualReg platform
|
|
ArchX86_64 -> X86.mkVirtualReg
|
|
ArchPPC -> PPC.mkVirtualReg
|
|
ArchSPARC -> SPARC.mkVirtualReg
|
|
- ArchPPC_64 -> panic "targetMkVirtualReg ArchPPC_64"
|
|
+ ArchPPC_64 -> PPC.mkVirtualReg
|
|
ArchARM _ _ _ -> panic "targetMkVirtualReg ArchARM"
|
|
ArchAlpha -> panic "targetMkVirtualReg ArchAlpha"
|
|
ArchMipseb -> panic "targetMkVirtualReg ArchMipseb"
|
|
@@ -113,7 +113,7 @@ targetRegDotColor platform
|
|
ArchX86_64 -> X86.regDotColor platform
|
|
ArchPPC -> PPC.regDotColor
|
|
ArchSPARC -> SPARC.regDotColor
|
|
- ArchPPC_64 -> panic "targetRegDotColor ArchPPC_64"
|
|
+ ArchPPC_64 -> PPC.regDotColor
|
|
ArchARM _ _ _ -> panic "targetRegDotColor ArchARM"
|
|
ArchAlpha -> panic "targetRegDotColor ArchAlpha"
|
|
ArchMipseb -> panic "targetRegDotColor ArchMipseb"
|
|
Index: ghc-7.8.3/configure.ac
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/configure.ac
|
|
+++ ghc-7.8.3/configure.ac
|
|
@@ -238,7 +238,7 @@ AC_SUBST(SOLARIS_BROKEN_SHLD)
|
|
dnl ** Do an unregisterised build?
|
|
dnl --------------------------------------------------------------
|
|
case "$HostArch" in
|
|
- i386|x86_64|powerpc|arm)
|
|
+ i386|x86_64|powerpc|powerpc64|arm)
|
|
UnregisterisedDefault=NO
|
|
;;
|
|
*)
|
|
Index: ghc-7.8.3/includes/CodeGen.Platform.hs
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/includes/CodeGen.Platform.hs
|
|
+++ ghc-7.8.3/includes/CodeGen.Platform.hs
|
|
@@ -801,6 +801,10 @@ freeRegBase _ = fastBool True
|
|
freeReg 0 = fastBool False -- Hack: r0 can't be used in all insns,
|
|
-- but it's actually free
|
|
freeReg 1 = fastBool False -- The Stack Pointer
|
|
+
|
|
+-- TODO: make this conditonal for ppc64 ELF
|
|
+freeReg 13 = fastBool False -- reserved for system thread ID
|
|
+
|
|
# if !MACHREGS_darwin
|
|
-- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that
|
|
freeReg 2 = fastBool False
|
|
Index: ghc-7.8.3/mk/config.mk.in
|
|
===================================================================
|
|
--- ghc-7.8.3.orig/mk/config.mk.in
|
|
+++ ghc-7.8.3/mk/config.mk.in
|
|
@@ -161,9 +161,9 @@ GhcUnregisterised=@Unregisterised@
|
|
# (as well as a C backend)
|
|
#
|
|
# Target platforms supported:
|
|
-# i386, powerpc
|
|
+# i386, powerpc, powerpc64, sparc
|
|
# IOS and AIX are not supported
|
|
-ArchSupportsNCG=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc sparc)))
|
|
+ArchSupportsNCG=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 sparc)))
|
|
OsSupportsNCG=$(strip $(patsubst $(TargetOS_CPP), YES, $(patsubst ios,,$(patsubst aix,,$(TargetOS_CPP)))))
|
|
|
|
GhcWithNativeCodeGen := $(strip\
|