diff --git a/D560.patch b/D560.patch new file mode 100644 index 0000000..5f7a86c --- /dev/null +++ b/D560.patch @@ -0,0 +1,320 @@ +Index: ghc-7.8.3/compiler/cmm/CLabel.hs +=================================================================== +--- ghc-7.8.3.orig/compiler/cmm/CLabel.hs ++++ ghc-7.8.3/compiler/cmm/CLabel.hs +@@ -1158,7 +1158,8 @@ pprDynamicLinkerAsmLabel platform dllInf + else if osElfTarget (platformOS platform) + then if platformArch platform == ArchPPC + then case dllInfo of +- CodeStub -> ppr lbl <> text "@plt" ++ CodeStub -> -- See Note [.LCTOC1 in PPC PIC code] ++ ppr lbl <> text "+32768@plt" + SymbolPtr -> text ".LC_" <> ppr lbl + _ -> panic "pprDynamicLinkerAsmLabel" + else if platformArch platform == ArchX86_64 +Index: ghc-7.8.3/compiler/nativeGen/PIC.hs +=================================================================== +--- ghc-7.8.3.orig/compiler/nativeGen/PIC.hs ++++ ghc-7.8.3/compiler/nativeGen/PIC.hs +@@ -54,7 +54,6 @@ import qualified X86.Instr as X86 + + import Platform + import Instruction +-import Size + import Reg + import NCGMonad + +@@ -468,11 +467,8 @@ pprGotDeclaration dflags ArchX86 OSDarwi + pprGotDeclaration _ _ OSDarwin + = empty + +--- pprGotDeclaration ++-- Emit GOT declaration + -- Output whatever needs to be output once per .s file. +--- The .LCTOC1 label is defined to point 32768 bytes into the table, +--- to make the most of the PPC's 16-bit displacements. +--- Only needed for PIC. + pprGotDeclaration dflags arch os + | osElfTarget os + , arch /= ArchPPC_64 +@@ -482,6 +478,7 @@ pprGotDeclaration dflags arch os + | osElfTarget os + , arch /= ArchPPC_64 + = vcat [ ++ -- See Note [.LCTOC1 in PPC PIC code] + ptext (sLit ".section \".got2\",\"aw\""), + ptext (sLit ".LCTOC1 = .+32768") ] + +@@ -688,12 +685,7 @@ pprImportedSymbol _ _ _ + + + -- Get a pointer to our own fake GOT, which is defined on a per-module basis. +--- This is exactly how GCC does it, and it's quite horrible: +--- We first fetch the address of a local label (mkPicBaseLabel). +--- Then we add a 16-bit offset to that to get the address of a .long that we +--- define in .text space right next to the proc. This .long literal contains +--- the (32-bit) offset from our local label to our global offset table +--- (.LCTOC1 aka gotOffLabel). ++-- This is exactly how GCC does it in linux. + + initializePicBase_ppc + :: Arch -> OS -> Reg +@@ -704,18 +696,9 @@ initializePicBase_ppc ArchPPC os picReg + (CmmProc info lab live (ListGraph blocks) : statics) + | osElfTarget os + = do +- dflags <- getDynFlags +- gotOffLabel <- getNewLabelNat +- tmp <- getNewRegNat $ intSize (wordWidth dflags) + let +- gotOffset = CmmData Text $ Statics gotOffLabel [ +- CmmStaticLit (CmmLabelDiffOff gotLabel +- mkPicBaseLabel +- 0) +- ] +- offsetToOffset +- = PPC.ImmConstantDiff +- (PPC.ImmCLbl gotOffLabel) ++ gotOffset = PPC.ImmConstantDiff ++ (PPC.ImmCLbl gotLabel) + (PPC.ImmCLbl mkPicBaseLabel) + + blocks' = case blocks of +@@ -726,15 +709,23 @@ initializePicBase_ppc ArchPPC os picReg + | bID `mapMember` info = fetchPC b + | otherwise = b + ++ -- GCC does PIC prologs thusly: ++ -- bcl 20,31,.L1 ++ -- .L1: ++ -- mflr 30 ++ -- addis 30,30,.LCTOC1-.L1@ha ++ -- addi 30,30,.LCTOC1-.L1@l ++ -- TODO: below we use it over temporary register, ++ -- it can and should be optimised by picking ++ -- correct PIC reg. + fetchPC (BasicBlock bID insns) = + BasicBlock bID (PPC.FETCHPC picReg +- : PPC.ADDIS tmp picReg (PPC.HI offsetToOffset) +- : PPC.LD PPC.archWordSize tmp +- (PPC.AddrRegImm tmp (PPC.LO offsetToOffset)) +- : PPC.ADD picReg picReg (PPC.RIReg picReg) ++ : PPC.ADDIS picReg picReg (PPC.HA gotOffset) ++ : PPC.ADDI picReg picReg (PPC.LO gotOffset) ++ : PPC.MR PPC.r30 picReg + : insns) + +- return (CmmProc info lab live (ListGraph blocks') : gotOffset : statics) ++ return (CmmProc info lab live (ListGraph blocks') : statics) + + + initializePicBase_ppc ArchPPC OSDarwin picReg +Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs +=================================================================== +--- ghc-7.8.3.orig/compiler/nativeGen/PPC/CodeGen.hs ++++ ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs +@@ -54,7 +54,7 @@ import Outputable + import Unique + import DynFlags + +-import Control.Monad ( mapAndUnzipM ) ++import Control.Monad ( mapAndUnzipM, when ) + import Data.Bits + import Data.Word + +@@ -355,6 +355,19 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2] + ADDE rhi r1hi r2hi ] + return (ChildCode64 code rlo) + ++iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do ++ ChildCode64 code1 r1lo <- iselExpr64 e1 ++ ChildCode64 code2 r2lo <- iselExpr64 e2 ++ (rlo,rhi) <- getNewRegPairNat II32 ++ let ++ r1hi = getHiVRegFromLo r1lo ++ r2hi = getHiVRegFromLo r2lo ++ code = code1 `appOL` ++ code2 `appOL` ++ toOL [ SUBFC rlo r2lo r1lo, ++ SUBFE rhi r2hi r1hi ] ++ return (ChildCode64 code rlo) ++ + iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do + (expr_reg,expr_code) <- getSomeReg expr + (rlo, rhi) <- getNewRegPairNat II32 +@@ -927,8 +940,12 @@ genCCall' dflags gcp target dest_regs ar + (toOL []) [] + + (labelOrExpr, reduceToFF32) <- case target of +- ForeignTarget (CmmLit (CmmLabel lbl)) _ -> return (Left lbl, False) +- ForeignTarget expr _ -> return (Right expr, False) ++ ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do ++ uses_pic_base_implicitly ++ return (Left lbl, False) ++ ForeignTarget expr _ -> do ++ uses_pic_base_implicitly ++ return (Right expr, False) + PrimTarget mop -> outOfLineMachOp mop + + let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode +@@ -949,6 +966,13 @@ genCCall' dflags gcp target dest_regs ar + where + platform = targetPlatform dflags + ++ uses_pic_base_implicitly = do ++ -- See Note [implicit register in PPC PIC code] ++ -- on why we claim to use PIC register here ++ when (gopt Opt_PIC dflags) $ do ++ _ <- getPicBaseNat archWordSize ++ return () ++ + initialStackOffset = case gcp of + GCPDarwin -> 24 + GCPLinux -> 8 +@@ -1432,3 +1456,21 @@ coerceFP2Int _ toRep x = do + -- read low word of value (high word is undefined) + LD II32 dst (spRel dflags 3)] + return (Any (intSize toRep) code') ++ ++-- Note [.LCTOC1 in PPC PIC code] ++-- The .LCTOC1 label is defined to point 32768 bytes into the GOT table ++-- to make the most of the PPC's 16-bit displacements. ++-- As 16-bit signed offset is used (usually via addi/lwz instructions) ++-- first element will have '-32768' offset against .LCTOC1. ++ ++-- Note [implicit register in PPC PIC code] ++-- PPC generates calls by labels in assembly ++-- in form of: ++-- bl puts+32768@plt ++-- in this form it's not seen directly (by GHC NCG) ++-- that r30 (PicBaseReg) is used, ++-- but r30 is a required part of PLT code setup: ++-- puts+32768@plt: ++-- lwz r11,-30484(r30) ; offset in .LCTOC1 ++-- mtctr r11 ++-- bctr +Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs +=================================================================== +--- ghc-7.8.3.orig/compiler/nativeGen/PPC/Instr.hs ++++ ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs +@@ -203,8 +203,11 @@ data Instr + | ADD Reg Reg RI -- dst, src1, src2 + | ADDC Reg Reg Reg -- (carrying) dst, src1, src2 + | ADDE Reg Reg Reg -- (extend) dst, src1, src2 ++ | ADDI Reg Reg Imm -- Add Immediate dst, src1, src2 + | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2 + | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1 ++ | SUBFC Reg Reg Reg -- (carrying) dst, src1, src2 ; dst = src2 - src1 ++ | SUBFE Reg Reg Reg -- (extend) dst, src1, src2 ; dst = src2 - src1 + | MULLW Reg Reg RI + | DIVW Reg Reg Reg + | DIVWU Reg Reg Reg +@@ -282,8 +285,11 @@ ppc_regUsageOfInstr platform instr + ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + ADDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + ADDE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) ++ ADDI reg1 reg2 _ -> usage ([reg2], [reg1]) + ADDIS reg1 reg2 _ -> usage ([reg2], [reg1]) + SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) ++ SUBFC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) ++ SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + DIVW reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + DIVWU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) +@@ -356,8 +362,11 @@ ppc_patchRegsOfInstr instr env + ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri) + ADDC reg1 reg2 reg3 -> ADDC (env reg1) (env reg2) (env reg3) + ADDE reg1 reg2 reg3 -> ADDE (env reg1) (env reg2) (env reg3) ++ ADDI reg1 reg2 imm -> ADDI (env reg1) (env reg2) imm + ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm + SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3) ++ SUBFC reg1 reg2 reg3 -> SUBFC (env reg1) (env reg2) (env reg3) ++ SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3) + MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri) + DIVW reg1 reg2 reg3 -> DIVW (env reg1) (env reg2) (env reg3) + DIVWU reg1 reg2 reg3 -> DIVWU (env reg1) (env reg2) (env reg3) +Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs +=================================================================== +--- ghc-7.8.3.orig/compiler/nativeGen/PPC/Ppr.hs ++++ ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs +@@ -530,6 +530,16 @@ pprInstr (BCTRL _) = hcat [ + ptext (sLit "bctrl") + ] + pprInstr (ADD reg1 reg2 ri) = pprLogic (sLit "add") reg1 reg2 ri ++pprInstr (ADDI reg1 reg2 imm) = hcat [ ++ char '\t', ++ ptext (sLit "addi"), ++ char '\t', ++ pprReg reg1, ++ ptext (sLit ", "), ++ pprReg reg2, ++ ptext (sLit ", "), ++ pprImm imm ++ ] + pprInstr (ADDIS reg1 reg2 imm) = hcat [ + char '\t', + ptext (sLit "addis"), +@@ -544,6 +554,8 @@ pprInstr (ADDIS reg1 reg2 imm) = hcat [ + pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3) + pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3) + pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3) ++pprInstr (SUBFC reg1 reg2 reg3) = pprLogic (sLit "subfc") reg1 reg2 (RIReg reg3) ++pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3) + pprInstr (MULLW reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mullw") reg1 reg2 ri + pprInstr (MULLW reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri + pprInstr (DIVW reg1 reg2 reg3) = pprLogic (sLit "divw") reg1 reg2 (RIReg reg3) +Index: ghc-7.8.3/compiler/nativeGen/PPC/Regs.hs +=================================================================== +--- ghc-7.8.3.orig/compiler/nativeGen/PPC/Regs.hs ++++ ghc-7.8.3/compiler/nativeGen/PPC/Regs.hs +@@ -35,7 +35,8 @@ module PPC.Regs ( + fits16Bits, + makeImmediate, + fReg, +- sp, r3, r4, r27, r28, f1, f20, f21, ++ sp, r3, r4, r27, r28, r30, ++ f1, f20, f21, + + allocatableRegs + +@@ -293,12 +294,13 @@ point registers. + fReg :: Int -> RegNo + fReg x = (32 + x) + +-sp, r3, r4, r27, r28, f1, f20, f21 :: Reg ++sp, r3, r4, r27, r28, r30, f1, f20, f21 :: Reg + sp = regSingle 1 + r3 = regSingle 3 + r4 = regSingle 4 + r27 = regSingle 27 + r28 = regSingle 28 ++r30 = regSingle 30 + f1 = regSingle $ fReg 1 + f20 = regSingle $ fReg 20 + f21 = regSingle $ fReg 21 +Index: ghc-7.8.3/includes/CodeGen.Platform.hs +=================================================================== +--- ghc-7.8.3.orig/includes/CodeGen.Platform.hs ++++ ghc-7.8.3/includes/CodeGen.Platform.hs +@@ -804,6 +804,8 @@ freeReg 1 = fastBool False -- The Stack + # if !MACHREGS_darwin + -- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that + freeReg 2 = fastBool False ++-- at least linux in -fPIC relies on r30 in PLT stubs ++freeReg 30 = fastBool False + # endif + # ifdef REG_Base + freeReg REG_Base = fastBool False +Index: ghc-7.8.3/mk/config.mk.in +=================================================================== +--- ghc-7.8.3.orig/mk/config.mk.in ++++ ghc-7.8.3/mk/config.mk.in +@@ -95,7 +95,7 @@ TargetElf = YES + endif + + # Some platforms don't support shared libraries +-NoSharedLibsPlatformList = powerpc-unknown-linux \ ++NoSharedLibsPlatformList = \ + x86_64-unknown-mingw32 \ + i386-unknown-mingw32 \ + sparc-sun-solaris2 \ diff --git a/ghc.changes b/ghc.changes index 585a1b7..e9bbb8b 100644 --- a/ghc.changes +++ b/ghc.changes @@ -1,3 +1,11 @@ +------------------------------------------------------------------- +Sat Dec 13 09:36:11 UTC 2014 - peter.trommler@ohm-hochschule.de + +- add patch D560.patch +* fixes dynamic linking on ppc +* see https://phabricator.haskell.org/D560 +* this is a back port of the upstream patch + ------------------------------------------------------------------- Wed Nov 5 17:10:14 UTC 2014 - peter.trommler@ohm-hochschule.de diff --git a/ghc.spec b/ghc.spec index d52ee2b..77e1b03 100644 --- a/ghc.spec +++ b/ghc.spec @@ -67,8 +67,10 @@ Patch14: ghc-7.8.2-cgen-constify.patch Patch15: D173.patch # PATCH-FIX-UPSTREAM D177.patch peter.trommler@ohm-hochschule.de -- Pass PIC flags to assembler. See https://phabricator.haskell.org/D177. Patch16: D177.patch -# PATCH-FIX-UPSTREAM ghc.git-e18525f.patch peter.trommler@ohm-hochscule.de -- Declare extern cmm primitives as functions not data. Backport of upstream fix for 7.10. See https://git.haskell.org/ghc.git/commitdiff_plain/e18525fae273f4c1ad8d6cbe1dea4fc074cac721. +# PATCH-FIX-UPSTREAM ghc.git-e18525f.patch peter.trommler@ohm-hochschule.de -- Declare extern cmm primitives as functions not data. Backport of upstream fix for 7.10. See https://git.haskell.org/ghc.git/commitdiff_plain/e18525fae273f4c1ad8d6cbe1dea4fc074cac721. Patch17: ghc.git-e18525f.patch +# PATCH-FIX-UPSTREAM D560.patch peter.trommler@ohm-hochschule.de -- Fix loading of PIC register. See https://phabricator.haskell.org/D560. +Patch18: D560.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build @@ -167,6 +169,7 @@ except the ghc library, which is installed by the toplevel ghc metapackage. %patch15 -p1 %patch16 -p1 %patch17 -p1 +%patch18 -p1 %build # Check if bootstrap is required, i.e. version is different from ghc's version