Index: ghc-7.8.3/compiler/cmm/CLabel.hs =================================================================== --- ghc-7.8.3.orig/compiler/cmm/CLabel.hs +++ ghc-7.8.3/compiler/cmm/CLabel.hs @@ -1158,7 +1158,8 @@ pprDynamicLinkerAsmLabel platform dllInf else if osElfTarget (platformOS platform) then if platformArch platform == ArchPPC then case dllInfo of - CodeStub -> ppr lbl <> text "@plt" + CodeStub -> -- See Note [.LCTOC1 in PPC PIC code] + ppr lbl <> text "+32768@plt" SymbolPtr -> text ".LC_" <> ppr lbl _ -> panic "pprDynamicLinkerAsmLabel" else if platformArch platform == ArchX86_64 Index: ghc-7.8.3/compiler/nativeGen/PIC.hs =================================================================== --- ghc-7.8.3.orig/compiler/nativeGen/PIC.hs +++ ghc-7.8.3/compiler/nativeGen/PIC.hs @@ -54,7 +54,6 @@ import qualified X86.Instr as X86 import Platform import Instruction -import Size import Reg import NCGMonad @@ -468,11 +467,8 @@ pprGotDeclaration dflags ArchX86 OSDarwi pprGotDeclaration _ _ OSDarwin = empty --- pprGotDeclaration +-- Emit GOT declaration -- Output whatever needs to be output once per .s file. --- The .LCTOC1 label is defined to point 32768 bytes into the table, --- to make the most of the PPC's 16-bit displacements. --- Only needed for PIC. pprGotDeclaration dflags arch os | osElfTarget os , arch /= ArchPPC_64 @@ -482,6 +478,7 @@ pprGotDeclaration dflags arch os | osElfTarget os , arch /= ArchPPC_64 = vcat [ + -- See Note [.LCTOC1 in PPC PIC code] ptext (sLit ".section \".got2\",\"aw\""), ptext (sLit ".LCTOC1 = .+32768") ] @@ -688,12 +685,7 @@ pprImportedSymbol _ _ _ -- Get a pointer to our own fake GOT, which is defined on a per-module basis. --- This is exactly how GCC does it, and it's quite horrible: --- We first fetch the address of a local label (mkPicBaseLabel). --- Then we add a 16-bit offset to that to get the address of a .long that we --- define in .text space right next to the proc. This .long literal contains --- the (32-bit) offset from our local label to our global offset table --- (.LCTOC1 aka gotOffLabel). +-- This is exactly how GCC does it in linux. initializePicBase_ppc :: Arch -> OS -> Reg @@ -704,18 +696,9 @@ initializePicBase_ppc ArchPPC os picReg (CmmProc info lab live (ListGraph blocks) : statics) | osElfTarget os = do - dflags <- getDynFlags - gotOffLabel <- getNewLabelNat - tmp <- getNewRegNat $ intSize (wordWidth dflags) let - gotOffset = CmmData Text $ Statics gotOffLabel [ - CmmStaticLit (CmmLabelDiffOff gotLabel - mkPicBaseLabel - 0) - ] - offsetToOffset - = PPC.ImmConstantDiff - (PPC.ImmCLbl gotOffLabel) + gotOffset = PPC.ImmConstantDiff + (PPC.ImmCLbl gotLabel) (PPC.ImmCLbl mkPicBaseLabel) blocks' = case blocks of @@ -726,15 +709,23 @@ initializePicBase_ppc ArchPPC os picReg | bID `mapMember` info = fetchPC b | otherwise = b + -- GCC does PIC prologs thusly: + -- bcl 20,31,.L1 + -- .L1: + -- mflr 30 + -- addis 30,30,.LCTOC1-.L1@ha + -- addi 30,30,.LCTOC1-.L1@l + -- TODO: below we use it over temporary register, + -- it can and should be optimised by picking + -- correct PIC reg. fetchPC (BasicBlock bID insns) = BasicBlock bID (PPC.FETCHPC picReg - : PPC.ADDIS tmp picReg (PPC.HI offsetToOffset) - : PPC.LD PPC.archWordSize tmp - (PPC.AddrRegImm tmp (PPC.LO offsetToOffset)) - : PPC.ADD picReg picReg (PPC.RIReg picReg) + : PPC.ADDIS picReg picReg (PPC.HA gotOffset) + : PPC.ADDI picReg picReg (PPC.LO gotOffset) + : PPC.MR PPC.r30 picReg : insns) - return (CmmProc info lab live (ListGraph blocks') : gotOffset : statics) + return (CmmProc info lab live (ListGraph blocks') : statics) initializePicBase_ppc ArchPPC OSDarwin picReg Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs =================================================================== --- ghc-7.8.3.orig/compiler/nativeGen/PPC/CodeGen.hs +++ ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs @@ -54,7 +54,7 @@ import Outputable import Unique import DynFlags -import Control.Monad ( mapAndUnzipM ) +import Control.Monad ( mapAndUnzipM, when ) import Data.Bits import Data.Word @@ -355,6 +355,19 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2] ADDE rhi r1hi r2hi ] return (ChildCode64 code rlo) +iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do + ChildCode64 code1 r1lo <- iselExpr64 e1 + ChildCode64 code2 r2lo <- iselExpr64 e2 + (rlo,rhi) <- getNewRegPairNat II32 + let + r1hi = getHiVRegFromLo r1lo + r2hi = getHiVRegFromLo r2lo + code = code1 `appOL` + code2 `appOL` + toOL [ SUBFC rlo r2lo r1lo, + SUBFE rhi r2hi r1hi ] + return (ChildCode64 code rlo) + iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do (expr_reg,expr_code) <- getSomeReg expr (rlo, rhi) <- getNewRegPairNat II32 @@ -927,8 +940,12 @@ genCCall' dflags gcp target dest_regs ar (toOL []) [] (labelOrExpr, reduceToFF32) <- case target of - ForeignTarget (CmmLit (CmmLabel lbl)) _ -> return (Left lbl, False) - ForeignTarget expr _ -> return (Right expr, False) + ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do + uses_pic_base_implicitly + return (Left lbl, False) + ForeignTarget expr _ -> do + uses_pic_base_implicitly + return (Right expr, False) PrimTarget mop -> outOfLineMachOp mop let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode @@ -949,6 +966,13 @@ genCCall' dflags gcp target dest_regs ar where platform = targetPlatform dflags + uses_pic_base_implicitly = do + -- See Note [implicit register in PPC PIC code] + -- on why we claim to use PIC register here + when (gopt Opt_PIC dflags) $ do + _ <- getPicBaseNat archWordSize + return () + initialStackOffset = case gcp of GCPDarwin -> 24 GCPLinux -> 8 @@ -1432,3 +1456,21 @@ coerceFP2Int _ toRep x = do -- read low word of value (high word is undefined) LD II32 dst (spRel dflags 3)] return (Any (intSize toRep) code') + +-- Note [.LCTOC1 in PPC PIC code] +-- The .LCTOC1 label is defined to point 32768 bytes into the GOT table +-- to make the most of the PPC's 16-bit displacements. +-- As 16-bit signed offset is used (usually via addi/lwz instructions) +-- first element will have '-32768' offset against .LCTOC1. + +-- Note [implicit register in PPC PIC code] +-- PPC generates calls by labels in assembly +-- in form of: +-- bl puts+32768@plt +-- in this form it's not seen directly (by GHC NCG) +-- that r30 (PicBaseReg) is used, +-- but r30 is a required part of PLT code setup: +-- puts+32768@plt: +-- lwz r11,-30484(r30) ; offset in .LCTOC1 +-- mtctr r11 +-- bctr Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs =================================================================== --- ghc-7.8.3.orig/compiler/nativeGen/PPC/Instr.hs +++ ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs @@ -203,8 +203,11 @@ data Instr | ADD Reg Reg RI -- dst, src1, src2 | ADDC Reg Reg Reg -- (carrying) dst, src1, src2 | ADDE Reg Reg Reg -- (extend) dst, src1, src2 + | ADDI Reg Reg Imm -- Add Immediate dst, src1, src2 | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2 | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1 + | SUBFC Reg Reg Reg -- (carrying) dst, src1, src2 ; dst = src2 - src1 + | SUBFE Reg Reg Reg -- (extend) dst, src1, src2 ; dst = src2 - src1 | MULLW Reg Reg RI | DIVW Reg Reg Reg | DIVWU Reg Reg Reg @@ -282,8 +285,11 @@ ppc_regUsageOfInstr platform instr ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) ADDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) ADDE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + ADDI reg1 reg2 _ -> usage ([reg2], [reg1]) ADDIS reg1 reg2 _ -> usage ([reg2], [reg1]) SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + SUBFC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) DIVW reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) DIVWU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) @@ -356,8 +362,11 @@ ppc_patchRegsOfInstr instr env ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri) ADDC reg1 reg2 reg3 -> ADDC (env reg1) (env reg2) (env reg3) ADDE reg1 reg2 reg3 -> ADDE (env reg1) (env reg2) (env reg3) + ADDI reg1 reg2 imm -> ADDI (env reg1) (env reg2) imm ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3) + SUBFC reg1 reg2 reg3 -> SUBFC (env reg1) (env reg2) (env reg3) + SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3) MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri) DIVW reg1 reg2 reg3 -> DIVW (env reg1) (env reg2) (env reg3) DIVWU reg1 reg2 reg3 -> DIVWU (env reg1) (env reg2) (env reg3) Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs =================================================================== --- ghc-7.8.3.orig/compiler/nativeGen/PPC/Ppr.hs +++ ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs @@ -530,6 +530,16 @@ pprInstr (BCTRL _) = hcat [ ptext (sLit "bctrl") ] pprInstr (ADD reg1 reg2 ri) = pprLogic (sLit "add") reg1 reg2 ri +pprInstr (ADDI reg1 reg2 imm) = hcat [ + char '\t', + ptext (sLit "addi"), + char '\t', + pprReg reg1, + ptext (sLit ", "), + pprReg reg2, + ptext (sLit ", "), + pprImm imm + ] pprInstr (ADDIS reg1 reg2 imm) = hcat [ char '\t', ptext (sLit "addis"), @@ -544,6 +554,8 @@ pprInstr (ADDIS reg1 reg2 imm) = hcat [ pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3) pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3) pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3) +pprInstr (SUBFC reg1 reg2 reg3) = pprLogic (sLit "subfc") reg1 reg2 (RIReg reg3) +pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3) pprInstr (MULLW reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mullw") reg1 reg2 ri pprInstr (MULLW reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri pprInstr (DIVW reg1 reg2 reg3) = pprLogic (sLit "divw") reg1 reg2 (RIReg reg3) Index: ghc-7.8.3/compiler/nativeGen/PPC/Regs.hs =================================================================== --- ghc-7.8.3.orig/compiler/nativeGen/PPC/Regs.hs +++ ghc-7.8.3/compiler/nativeGen/PPC/Regs.hs @@ -35,7 +35,8 @@ module PPC.Regs ( fits16Bits, makeImmediate, fReg, - sp, r3, r4, r27, r28, f1, f20, f21, + sp, r3, r4, r27, r28, r30, + f1, f20, f21, allocatableRegs @@ -293,12 +294,13 @@ point registers. fReg :: Int -> RegNo fReg x = (32 + x) -sp, r3, r4, r27, r28, f1, f20, f21 :: Reg +sp, r3, r4, r27, r28, r30, f1, f20, f21 :: Reg sp = regSingle 1 r3 = regSingle 3 r4 = regSingle 4 r27 = regSingle 27 r28 = regSingle 28 +r30 = regSingle 30 f1 = regSingle $ fReg 1 f20 = regSingle $ fReg 20 f21 = regSingle $ fReg 21 Index: ghc-7.8.3/includes/CodeGen.Platform.hs =================================================================== --- ghc-7.8.3.orig/includes/CodeGen.Platform.hs +++ ghc-7.8.3/includes/CodeGen.Platform.hs @@ -804,6 +804,8 @@ freeReg 1 = fastBool False -- The Stack # if !MACHREGS_darwin -- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that freeReg 2 = fastBool False +-- at least linux in -fPIC relies on r30 in PLT stubs +freeReg 30 = fastBool False # endif # ifdef REG_Base freeReg REG_Base = fastBool False Index: ghc-7.8.3/mk/config.mk.in =================================================================== --- ghc-7.8.3.orig/mk/config.mk.in +++ ghc-7.8.3/mk/config.mk.in @@ -95,7 +95,7 @@ TargetElf = YES endif # Some platforms don't support shared libraries -NoSharedLibsPlatformList = powerpc-unknown-linux \ +NoSharedLibsPlatformList = \ x86_64-unknown-mingw32 \ i386-unknown-mingw32 \ sparc-sun-solaris2 \