ghc/D560.patch

321 lines
13 KiB
Diff

Index: ghc-7.8.3/compiler/cmm/CLabel.hs
===================================================================
--- ghc-7.8.3.orig/compiler/cmm/CLabel.hs
+++ ghc-7.8.3/compiler/cmm/CLabel.hs
@@ -1158,7 +1158,8 @@ pprDynamicLinkerAsmLabel platform dllInf
else if osElfTarget (platformOS platform)
then if platformArch platform == ArchPPC
then case dllInfo of
- CodeStub -> ppr lbl <> text "@plt"
+ CodeStub -> -- See Note [.LCTOC1 in PPC PIC code]
+ ppr lbl <> text "+32768@plt"
SymbolPtr -> text ".LC_" <> ppr lbl
_ -> panic "pprDynamicLinkerAsmLabel"
else if platformArch platform == ArchX86_64
Index: ghc-7.8.3/compiler/nativeGen/PIC.hs
===================================================================
--- ghc-7.8.3.orig/compiler/nativeGen/PIC.hs
+++ ghc-7.8.3/compiler/nativeGen/PIC.hs
@@ -54,7 +54,6 @@ import qualified X86.Instr as X86
import Platform
import Instruction
-import Size
import Reg
import NCGMonad
@@ -468,11 +467,8 @@ pprGotDeclaration dflags ArchX86 OSDarwi
pprGotDeclaration _ _ OSDarwin
= empty
--- pprGotDeclaration
+-- Emit GOT declaration
-- Output whatever needs to be output once per .s file.
--- The .LCTOC1 label is defined to point 32768 bytes into the table,
--- to make the most of the PPC's 16-bit displacements.
--- Only needed for PIC.
pprGotDeclaration dflags arch os
| osElfTarget os
, arch /= ArchPPC_64
@@ -482,6 +478,7 @@ pprGotDeclaration dflags arch os
| osElfTarget os
, arch /= ArchPPC_64
= vcat [
+ -- See Note [.LCTOC1 in PPC PIC code]
ptext (sLit ".section \".got2\",\"aw\""),
ptext (sLit ".LCTOC1 = .+32768") ]
@@ -688,12 +685,7 @@ pprImportedSymbol _ _ _
-- Get a pointer to our own fake GOT, which is defined on a per-module basis.
--- This is exactly how GCC does it, and it's quite horrible:
--- We first fetch the address of a local label (mkPicBaseLabel).
--- Then we add a 16-bit offset to that to get the address of a .long that we
--- define in .text space right next to the proc. This .long literal contains
--- the (32-bit) offset from our local label to our global offset table
--- (.LCTOC1 aka gotOffLabel).
+-- This is exactly how GCC does it in linux.
initializePicBase_ppc
:: Arch -> OS -> Reg
@@ -704,18 +696,9 @@ initializePicBase_ppc ArchPPC os picReg
(CmmProc info lab live (ListGraph blocks) : statics)
| osElfTarget os
= do
- dflags <- getDynFlags
- gotOffLabel <- getNewLabelNat
- tmp <- getNewRegNat $ intSize (wordWidth dflags)
let
- gotOffset = CmmData Text $ Statics gotOffLabel [
- CmmStaticLit (CmmLabelDiffOff gotLabel
- mkPicBaseLabel
- 0)
- ]
- offsetToOffset
- = PPC.ImmConstantDiff
- (PPC.ImmCLbl gotOffLabel)
+ gotOffset = PPC.ImmConstantDiff
+ (PPC.ImmCLbl gotLabel)
(PPC.ImmCLbl mkPicBaseLabel)
blocks' = case blocks of
@@ -726,15 +709,23 @@ initializePicBase_ppc ArchPPC os picReg
| bID `mapMember` info = fetchPC b
| otherwise = b
+ -- GCC does PIC prologs thusly:
+ -- bcl 20,31,.L1
+ -- .L1:
+ -- mflr 30
+ -- addis 30,30,.LCTOC1-.L1@ha
+ -- addi 30,30,.LCTOC1-.L1@l
+ -- TODO: below we use it over temporary register,
+ -- it can and should be optimised by picking
+ -- correct PIC reg.
fetchPC (BasicBlock bID insns) =
BasicBlock bID (PPC.FETCHPC picReg
- : PPC.ADDIS tmp picReg (PPC.HI offsetToOffset)
- : PPC.LD PPC.archWordSize tmp
- (PPC.AddrRegImm tmp (PPC.LO offsetToOffset))
- : PPC.ADD picReg picReg (PPC.RIReg picReg)
+ : PPC.ADDIS picReg picReg (PPC.HA gotOffset)
+ : PPC.ADDI picReg picReg (PPC.LO gotOffset)
+ : PPC.MR PPC.r30 picReg
: insns)
- return (CmmProc info lab live (ListGraph blocks') : gotOffset : statics)
+ return (CmmProc info lab live (ListGraph blocks') : statics)
initializePicBase_ppc ArchPPC OSDarwin picReg
Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs
===================================================================
--- ghc-7.8.3.orig/compiler/nativeGen/PPC/CodeGen.hs
+++ ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs
@@ -54,7 +54,7 @@ import Outputable
import Unique
import DynFlags
-import Control.Monad ( mapAndUnzipM )
+import Control.Monad ( mapAndUnzipM, when )
import Data.Bits
import Data.Word
@@ -355,6 +355,19 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]
ADDE rhi r1hi r2hi ]
return (ChildCode64 code rlo)
+iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
+ ChildCode64 code1 r1lo <- iselExpr64 e1
+ ChildCode64 code2 r2lo <- iselExpr64 e2
+ (rlo,rhi) <- getNewRegPairNat II32
+ let
+ r1hi = getHiVRegFromLo r1lo
+ r2hi = getHiVRegFromLo r2lo
+ code = code1 `appOL`
+ code2 `appOL`
+ toOL [ SUBFC rlo r2lo r1lo,
+ SUBFE rhi r2hi r1hi ]
+ return (ChildCode64 code rlo)
+
iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
(expr_reg,expr_code) <- getSomeReg expr
(rlo, rhi) <- getNewRegPairNat II32
@@ -927,8 +940,12 @@ genCCall' dflags gcp target dest_regs ar
(toOL []) []
(labelOrExpr, reduceToFF32) <- case target of
- ForeignTarget (CmmLit (CmmLabel lbl)) _ -> return (Left lbl, False)
- ForeignTarget expr _ -> return (Right expr, False)
+ ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
+ uses_pic_base_implicitly
+ return (Left lbl, False)
+ ForeignTarget expr _ -> do
+ uses_pic_base_implicitly
+ return (Right expr, False)
PrimTarget mop -> outOfLineMachOp mop
let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
@@ -949,6 +966,13 @@ genCCall' dflags gcp target dest_regs ar
where
platform = targetPlatform dflags
+ uses_pic_base_implicitly = do
+ -- See Note [implicit register in PPC PIC code]
+ -- on why we claim to use PIC register here
+ when (gopt Opt_PIC dflags) $ do
+ _ <- getPicBaseNat archWordSize
+ return ()
+
initialStackOffset = case gcp of
GCPDarwin -> 24
GCPLinux -> 8
@@ -1432,3 +1456,21 @@ coerceFP2Int _ toRep x = do
-- read low word of value (high word is undefined)
LD II32 dst (spRel dflags 3)]
return (Any (intSize toRep) code')
+
+-- Note [.LCTOC1 in PPC PIC code]
+-- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
+-- to make the most of the PPC's 16-bit displacements.
+-- As 16-bit signed offset is used (usually via addi/lwz instructions)
+-- first element will have '-32768' offset against .LCTOC1.
+
+-- Note [implicit register in PPC PIC code]
+-- PPC generates calls by labels in assembly
+-- in form of:
+-- bl puts+32768@plt
+-- in this form it's not seen directly (by GHC NCG)
+-- that r30 (PicBaseReg) is used,
+-- but r30 is a required part of PLT code setup:
+-- puts+32768@plt:
+-- lwz r11,-30484(r30) ; offset in .LCTOC1
+-- mtctr r11
+-- bctr
Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs
===================================================================
--- ghc-7.8.3.orig/compiler/nativeGen/PPC/Instr.hs
+++ ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs
@@ -203,8 +203,11 @@ data Instr
| ADD Reg Reg RI -- dst, src1, src2
| ADDC Reg Reg Reg -- (carrying) dst, src1, src2
| ADDE Reg Reg Reg -- (extend) dst, src1, src2
+ | ADDI Reg Reg Imm -- Add Immediate dst, src1, src2
| ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
| SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
+ | SUBFC Reg Reg Reg -- (carrying) dst, src1, src2 ; dst = src2 - src1
+ | SUBFE Reg Reg Reg -- (extend) dst, src1, src2 ; dst = src2 - src1
| MULLW Reg Reg RI
| DIVW Reg Reg Reg
| DIVWU Reg Reg Reg
@@ -282,8 +285,11 @@ ppc_regUsageOfInstr platform instr
ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
ADDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
ADDE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ ADDI reg1 reg2 _ -> usage ([reg2], [reg1])
ADDIS reg1 reg2 _ -> usage ([reg2], [reg1])
SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ SUBFC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
DIVW reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
DIVWU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
@@ -356,8 +362,11 @@ ppc_patchRegsOfInstr instr env
ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri)
ADDC reg1 reg2 reg3 -> ADDC (env reg1) (env reg2) (env reg3)
ADDE reg1 reg2 reg3 -> ADDE (env reg1) (env reg2) (env reg3)
+ ADDI reg1 reg2 imm -> ADDI (env reg1) (env reg2) imm
ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3)
+ SUBFC reg1 reg2 reg3 -> SUBFC (env reg1) (env reg2) (env reg3)
+ SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3)
MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri)
DIVW reg1 reg2 reg3 -> DIVW (env reg1) (env reg2) (env reg3)
DIVWU reg1 reg2 reg3 -> DIVWU (env reg1) (env reg2) (env reg3)
Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs
===================================================================
--- ghc-7.8.3.orig/compiler/nativeGen/PPC/Ppr.hs
+++ ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs
@@ -530,6 +530,16 @@ pprInstr (BCTRL _) = hcat [
ptext (sLit "bctrl")
]
pprInstr (ADD reg1 reg2 ri) = pprLogic (sLit "add") reg1 reg2 ri
+pprInstr (ADDI reg1 reg2 imm) = hcat [
+ char '\t',
+ ptext (sLit "addi"),
+ char '\t',
+ pprReg reg1,
+ ptext (sLit ", "),
+ pprReg reg2,
+ ptext (sLit ", "),
+ pprImm imm
+ ]
pprInstr (ADDIS reg1 reg2 imm) = hcat [
char '\t',
ptext (sLit "addis"),
@@ -544,6 +554,8 @@ pprInstr (ADDIS reg1 reg2 imm) = hcat [
pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3)
pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3)
pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3)
+pprInstr (SUBFC reg1 reg2 reg3) = pprLogic (sLit "subfc") reg1 reg2 (RIReg reg3)
+pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3)
pprInstr (MULLW reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mullw") reg1 reg2 ri
pprInstr (MULLW reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri
pprInstr (DIVW reg1 reg2 reg3) = pprLogic (sLit "divw") reg1 reg2 (RIReg reg3)
Index: ghc-7.8.3/compiler/nativeGen/PPC/Regs.hs
===================================================================
--- ghc-7.8.3.orig/compiler/nativeGen/PPC/Regs.hs
+++ ghc-7.8.3/compiler/nativeGen/PPC/Regs.hs
@@ -35,7 +35,8 @@ module PPC.Regs (
fits16Bits,
makeImmediate,
fReg,
- sp, r3, r4, r27, r28, f1, f20, f21,
+ sp, r3, r4, r27, r28, r30,
+ f1, f20, f21,
allocatableRegs
@@ -293,12 +294,13 @@ point registers.
fReg :: Int -> RegNo
fReg x = (32 + x)
-sp, r3, r4, r27, r28, f1, f20, f21 :: Reg
+sp, r3, r4, r27, r28, r30, f1, f20, f21 :: Reg
sp = regSingle 1
r3 = regSingle 3
r4 = regSingle 4
r27 = regSingle 27
r28 = regSingle 28
+r30 = regSingle 30
f1 = regSingle $ fReg 1
f20 = regSingle $ fReg 20
f21 = regSingle $ fReg 21
Index: ghc-7.8.3/includes/CodeGen.Platform.hs
===================================================================
--- ghc-7.8.3.orig/includes/CodeGen.Platform.hs
+++ ghc-7.8.3/includes/CodeGen.Platform.hs
@@ -804,6 +804,8 @@ freeReg 1 = fastBool False -- The Stack
# if !MACHREGS_darwin
-- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that
freeReg 2 = fastBool False
+-- at least linux in -fPIC relies on r30 in PLT stubs
+freeReg 30 = fastBool False
# endif
# ifdef REG_Base
freeReg REG_Base = fastBool False
Index: ghc-7.8.3/mk/config.mk.in
===================================================================
--- ghc-7.8.3.orig/mk/config.mk.in
+++ ghc-7.8.3/mk/config.mk.in
@@ -95,7 +95,7 @@ TargetElf = YES
endif
# Some platforms don't support shared libraries
-NoSharedLibsPlatformList = powerpc-unknown-linux \
+NoSharedLibsPlatformList = \
x86_64-unknown-mingw32 \
i386-unknown-mingw32 \
sparc-sun-solaris2 \