diff --git a/0001-implement-native-code-generator-for-ppc64.patch b/0001-implement-native-code-generator-for-ppc64.patch index dadf933..a873855 100644 --- a/0001-implement-native-code-generator-for-ppc64.patch +++ b/0001-implement-native-code-generator-for-ppc64.patch @@ -1,66 +1,121 @@ -From 465e2fc248ea0689a9665eb6b40360051c7985d1 Mon Sep 17 00:00:00 2001 -From: Peter Trommler -Date: Mon, 12 Jan 2015 12:20:31 +0100 -Subject: [PATCH] implement native code generator for ppc64 - -The implementation follows the ELF specification and hence -is neither optimized for intra-module calls nor does it require -nasty linker tricks. ---- - compiler/cmm/CLabel.hs | 6 + - compiler/nativeGen/AsmCodeGen.lhs | 3 +- - compiler/nativeGen/PIC.hs | 64 ++- - compiler/nativeGen/PPC/CodeGen.hs | 614 ++++++++++++++++++++----- - compiler/nativeGen/PPC/Instr.hs | 87 +++- - compiler/nativeGen/PPC/Ppr.hs | 149 ++++-- - compiler/nativeGen/PPC/Regs.hs | 21 +- - compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs | 3 +- - compiler/nativeGen/RegAlloc/Linear/Main.hs | 3 +- - compiler/nativeGen/TargetReg.hs | 11 +- - configure.ac | 2 +- - includes/CodeGen.Platform.hs | 4 + - mk/config.mk.in | 4 +- - 13 files changed, 784 insertions(+), 187 deletions(-) - -Index: ghc-7.8.3/compiler/cmm/CLabel.hs +Index: ghc-7.8.4/aclocal.m4 =================================================================== ---- ghc-7.8.3.orig/compiler/cmm/CLabel.hs -+++ ghc-7.8.3/compiler/cmm/CLabel.hs -@@ -1168,6 +1168,12 @@ pprDynamicLinkerAsmLabel platform dllInf +--- ghc-7.8.4.orig/aclocal.m4 ++++ ghc-7.8.4/aclocal.m4 +@@ -188,7 +188,10 @@ AC_DEFUN([FPTOOLS_SET_HASKELL_PLATFORM_V + test -z "[$]2" || eval "[$]2=ArchPPC" + ;; + powerpc64) +- test -z "[$]2" || eval "[$]2=ArchPPC_64" ++ test -z "[$]2" || eval "[$]2=\"ArchPPC_64 {ppc_64ABI = ELF_V1}\"" ++ ;; ++ powerpc64le) ++ test -z "[$]2" || eval "[$]2=\"ArchPPC_64 {ppc_64ABI = ELF_V2}\"" + ;; + sparc) + test -z "[$]2" || eval "[$]2=ArchSPARC" +@@ -206,7 +209,7 @@ AC_DEFUN([FPTOOLS_SET_HASKELL_PLATFORM_V + mipsel) + test -z "[$]2" || eval "[$]2=ArchMipsel" + ;; +- hppa|hppa1_1|ia64|m68k|powerpc64le|rs6000|s390|s390x|sparc64|vax) ++ hppa|hppa1_1|ia64|m68k|rs6000|s390|s390x|sparc64|vax) + test -z "[$]2" || eval "[$]2=ArchUnknown" + ;; + *) +Index: ghc-7.8.4/compiler/cmm/CLabel.hs +=================================================================== +--- ghc-7.8.4.orig/compiler/cmm/CLabel.hs ++++ ghc-7.8.4/compiler/cmm/CLabel.hs +@@ -1158,16 +1158,24 @@ pprDynamicLinkerAsmLabel platform dllInf + else if osElfTarget (platformOS platform) + then if platformArch platform == ArchPPC + then case dllInfo of +- CodeStub -> -- See Note [.LCTOC1 in PPC PIC code] +- ppr lbl <> text "+32768@plt" +- SymbolPtr -> text ".LC_" <> ppr lbl +- _ -> panic "pprDynamicLinkerAsmLabel" ++ CodeStub -> -- See Note [.LCTOC1 in PPC PIC code] ++ ppr lbl <> text "+32768@plt" ++ SymbolPtr -> text ".LC_" <> ppr lbl ++ _ -> panic "pprDynamicLinkerAsmLabel" + else if platformArch platform == ArchX86_64 + then case dllInfo of + CodeStub -> ppr lbl <> text "@plt" GotSymbolPtr -> ppr lbl <> text "@gotpcrel" GotSymbolOffset -> ppr lbl SymbolPtr -> text ".LC_" <> ppr lbl -+ else if platformArch platform == ArchPPC_64 ++ else if platformArch platform == ArchPPC_64 ELF_V1 ++ || platformArch platform == ArchPPC_64 ELF_V2 + then case dllInfo of -+ CodeStub -> text ".Lstub." <> ppr lbl -+ GotSymbolPtr -> text ".LC_" <> ppr lbl <> text "@toc" ++ GotSymbolPtr -> text ".LC_" <> ppr lbl ++ <> text "@toc" + GotSymbolOffset -> ppr lbl + SymbolPtr -> text ".LC_" <> ppr lbl ++ _ -> panic "pprDynamicLinkerAsmLabel" else case dllInfo of CodeStub -> ppr lbl <> text "@plt" SymbolPtr -> text ".LC_" <> ppr lbl -Index: ghc-7.8.3/compiler/nativeGen/AsmCodeGen.lhs +Index: ghc-7.8.4/compiler/codeGen/CodeGen/Platform.hs =================================================================== ---- ghc-7.8.3.orig/compiler/nativeGen/AsmCodeGen.lhs -+++ ghc-7.8.3/compiler/nativeGen/AsmCodeGen.lhs -@@ -166,7 +166,7 @@ nativeCodeGen dflags this_mod h us cmms - ArchPPC -> nCG' (ppcNcgImpl dflags) - ArchSPARC -> nCG' (sparcNcgImpl dflags) - ArchARM {} -> panic "nativeCodeGen: No NCG for ARM" -- ArchPPC_64 -> panic "nativeCodeGen: No NCG for PPC 64" -+ ArchPPC_64 -> nCG' (ppcNcgImpl dflags) - ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha" - ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb" - ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel" -Index: ghc-7.8.3/compiler/nativeGen/PIC.hs +--- ghc-7.8.4.orig/compiler/codeGen/CodeGen/Platform.hs ++++ ghc-7.8.4/compiler/codeGen/CodeGen/Platform.hs +@@ -29,7 +29,7 @@ callerSaves platform + ArchSPARC -> SPARC.callerSaves + ArchARM {} -> ARM.callerSaves + arch +- | arch `elem` [ArchPPC, ArchPPC_64] -> ++ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] -> + case platformOS platform of + OSDarwin -> PPC_Darwin.callerSaves + _ -> PPC.callerSaves +@@ -51,7 +51,7 @@ activeStgRegs platform + ArchSPARC -> SPARC.activeStgRegs + ArchARM {} -> ARM.activeStgRegs + arch +- | arch `elem` [ArchPPC, ArchPPC_64] -> ++ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] -> + case platformOS platform of + OSDarwin -> PPC_Darwin.activeStgRegs + _ -> PPC.activeStgRegs +@@ -68,7 +68,7 @@ haveRegBase platform + ArchSPARC -> SPARC.haveRegBase + ArchARM {} -> ARM.haveRegBase + arch +- | arch `elem` [ArchPPC, ArchPPC_64] -> ++ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] -> + case platformOS platform of + OSDarwin -> PPC_Darwin.haveRegBase + _ -> PPC.haveRegBase +@@ -85,7 +85,7 @@ globalRegMaybe platform + ArchSPARC -> SPARC.globalRegMaybe + ArchARM {} -> ARM.globalRegMaybe + arch +- | arch `elem` [ArchPPC, ArchPPC_64] -> ++ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] -> + case platformOS platform of + OSDarwin -> PPC_Darwin.globalRegMaybe + _ -> PPC.globalRegMaybe +@@ -102,7 +102,7 @@ freeReg platform + ArchSPARC -> SPARC.freeReg + ArchARM {} -> ARM.freeReg + arch +- | arch `elem` [ArchPPC, ArchPPC_64] -> ++ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] -> + case platformOS platform of + OSDarwin -> PPC_Darwin.freeReg + _ -> PPC.freeReg +Index: ghc-7.8.4/compiler/nativeGen/PIC.hs =================================================================== ---- ghc-7.8.3.orig/compiler/nativeGen/PIC.hs -+++ ghc-7.8.3/compiler/nativeGen/PIC.hs -@@ -159,6 +159,13 @@ cmmMakePicReference dflags lbl +--- ghc-7.8.4.orig/compiler/nativeGen/PIC.hs ++++ ghc-7.8.4/compiler/nativeGen/PIC.hs +@@ -158,7 +158,14 @@ cmmMakePicReference dflags lbl + -- everything gets relocated at runtime | OSMinGW32 <- platformOS $ targetPlatform dflags = CmmLit $ CmmLabel lbl - -+ | ArchPPC_64 <- platformArch $ targetPlatform dflags +- ++ -- both ABI versions default to medium code model ++ | ArchPPC_64 _ <- platformArch $ targetPlatform dflags + = CmmMachOp (MO_Add W32) -- code model medium + [ CmmReg (CmmGlobal PicBaseReg) + , CmmLit $ picRelative @@ -70,16 +125,19 @@ Index: ghc-7.8.3/compiler/nativeGen/PIC.hs | (gopt Opt_PIC dflags || not (gopt Opt_Static dflags)) && absoluteLabel lbl = CmmMachOp (MO_Add (wordWidth dflags)) -@@ -295,11 +302,15 @@ howToAccessLabel dflags arch OSDarwin th +@@ -293,13 +300,17 @@ howToAccessLabel dflags arch OSDarwin th + -- from position independent code. It is also required from the main program + -- when dynamic libraries containing Haskell code are used. - howToAccessLabel _ ArchPPC_64 os _ kind _ +-howToAccessLabel _ ArchPPC_64 os _ kind _ ++howToAccessLabel _ (ArchPPC_64 _) os _ kind _ | osElfTarget os - = if kind == DataReference - -- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC - then AccessViaSymbolPtr - -- actually, .label instead of label - else AccessDirectly -+ = case kind of ++ = case kind of + -- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC + DataReference -> AccessViaSymbolPtr + -- RTLD does not generate stubs for function descriptors @@ -91,35 +149,61 @@ Index: ghc-7.8.3/compiler/nativeGen/PIC.hs howToAccessLabel dflags _ os _ _ _ -- no PIC -> the dynamic linker does everything for us; -@@ -430,6 +441,11 @@ needImportedSymbols dflags arch os +@@ -430,9 +441,14 @@ needImportedSymbols dflags arch os , arch == ArchPPC = gopt Opt_PIC dflags || not (gopt Opt_Static dflags) + -- PowerPC 64 Linux: always + | osElfTarget os -+ , arch == ArchPPC_64 ++ , arch == ArchPPC_64 ELF_V1 || arch == ArchPPC_64 ELF_V2 + = True + -- i386 (and others?): -dynamic but not -fPIC | osElfTarget os - , arch /= ArchPPC_64 -@@ -467,6 +483,10 @@ pprGotDeclaration dflags ArchX86 OSDarwi +- , arch /= ArchPPC_64 ++ , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2 + = not (gopt Opt_Static dflags) && not (gopt Opt_PIC dflags) + + | otherwise +@@ -467,16 +483,30 @@ pprGotDeclaration dflags ArchX86 OSDarwi pprGotDeclaration _ _ OSDarwin = empty -+-- PPC 64 needs a Table Of Contents (TOC) -+pprGotDeclaration _ ArchPPC_64 OSLinux ++-- PPC 64 ELF v1needs a Table Of Contents (TOC) on Linux ++pprGotDeclaration _ (ArchPPC_64 ELF_V1) OSLinux + = ptext (sLit ".section \".toc\",\"aw\"") ++-- In ELF v2 we also need to tell the assembler that we want ABI ++-- version 2. This would normally be done at the top of the file ++-- right after a file directive, but I could not figure out how ++-- to do that. ++pprGotDeclaration _ (ArchPPC_64 ELF_V2) OSLinux ++ = vcat [ ptext (sLit ".abiversion 2"), ++ ptext (sLit ".section \".toc\",\"aw\"") ++ ] ++pprGotDeclaration _ (ArchPPC_64 _) _ ++ = panic "pprGotDeclaration: ArchPPC_64 only Linux supported" + -- Emit GOT declaration -- Output whatever needs to be output once per .s file. pprGotDeclaration dflags arch os -@@ -635,9 +655,36 @@ pprImportedSymbol _ (Platform { platform + | osElfTarget os +- , arch /= ArchPPC_64 ++ , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2 + , not (gopt Opt_PIC dflags) + = empty + + | osElfTarget os +- , arch /= ArchPPC_64 ++ , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2 + = vcat [ + -- See Note [.LCTOC1 in PPC PIC code] + ptext (sLit ".section \".got2\",\"aw\""), +@@ -635,9 +665,16 @@ pprImportedSymbol _ (Platform { platform -- the NCG will keep track of all DynamicLinkerLabels it uses -- and output each of them using pprImportedSymbol. -pprImportedSymbol _ platform@(Platform { platformArch = ArchPPC_64 }) _ -+pprImportedSymbol _ platform@(Platform { platformArch = ArchPPC_64 }) ++pprImportedSymbol _ platform@(Platform { platformArch = ArchPPC_64 _ }) + importedLbl | osElfTarget (platformOS platform) - = empty @@ -129,43 +213,44 @@ Index: ghc-7.8.3/compiler/nativeGen/PIC.hs + ptext (sLit ".section \".toc\", \"aw\""), + ptext (sLit ".LC_") <> pprCLabel platform lbl <> char ':', + ptext (sLit "\t.quad") <+> pprCLabel platform lbl ] -+ -- generate code stubs for tail calls -+ Just (CodeStub, lbl) -+ -> vcat [ -+ ptext (sLit ".section \".toc\", \"aw\""), -+ ptext (sLit ".LC_") <> pprCLabel platform lbl <> char ':', -+ ptext (sLit "\t.quad") <+> pprCLabel platform lbl, -+ ptext (sLit ".text"), -+ ptext (sLit ".Lstub.") <> ppr lbl <> char ':', -+ hcat [ ptext (sLit "\taddis\t12,2,.LC_"), -+ ppr lbl, ptext(sLit"@toc@ha") ], -+ hcat [ ptext (sLit "\taddi\t12,12,.LC_"), -+ ppr lbl, ptext(sLit"@toc@l") ], -+ ptext (sLit "\tld\t12,0(12)"), -+ ptext (sLit "\tld\t11,0(12)"), -+ ptext (sLit "\tld\t2,8(12)"), -+ ptext (sLit "\tmtctr\t11"), -+ ptext (sLit "\tld\t11,16(12)"), -+ ptext (sLit "\tbctr") -+ ] -+ + _ -> empty pprImportedSymbol dflags platform importedLbl | osElfTarget (platformOS platform) -@@ -735,7 +782,6 @@ initializePicBase_ppc ArchPPC OSDarwin p +@@ -735,6 +772,28 @@ initializePicBase_ppc ArchPPC OSDarwin p where BasicBlock bID insns = entry b' = BasicBlock bID (PPC.FETCHPC picReg : insns) -- ++------------------------------------------------------------------------- ++-- Load TOC into register 2 ++-- PowerPC 64-bit ELF ABI 2.0 requires the address of the callee ++-- in register 12. ++-- We pass the label to FETCHTOC and create a .localentry too. ++-- TODO: Explain this better and refer to ABI spec! ++{- ++We would like to do approximately this, but spill slot allocation ++might be added before the first BasicBlock. That violates the ABI. ++ ++For now we will emit the prologue code in the pretty printer, ++which is also what we do for ELF v1. ++initializePicBase_ppc (ArchPPC_64 ELF_V2) OSLinux picReg ++ (CmmProc info lab live (ListGraph (entry:blocks)) : statics) ++ = do ++ bID <-getUniqueM ++ return (CmmProc info lab live (ListGraph (b':entry:blocks)) ++ : statics) ++ where BasicBlock entryID _ = entry ++ b' = BasicBlock bID [PPC.FETCHTOC picReg lab, ++ PPC.BCC PPC.ALWAYS entryID] ++-} + initializePicBase_ppc _ _ _ _ = panic "initializePicBase_ppc: not needed" - -Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs +Index: ghc-7.8.4/compiler/nativeGen/PPC/CodeGen.hs =================================================================== ---- ghc-7.8.3.orig/compiler/nativeGen/PPC/CodeGen.hs -+++ ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs -@@ -77,14 +77,20 @@ cmmTopCodeGen +--- ghc-7.8.4.orig/compiler/nativeGen/PPC/CodeGen.hs ++++ ghc-7.8.4/compiler/nativeGen/PPC/CodeGen.hs +@@ -77,14 +77,24 @@ cmmTopCodeGen cmmTopCodeGen (CmmProc info lab live graph) = do let blocks = toBlockListEntryFirst graph (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks @@ -184,13 +269,17 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs + case picBaseMb of + Just picBase -> initializePicBase_ppc arch os picBase tops + Nothing -> return tops -+ ArchPPC_64 -> return tops -- generating function descriptor handled in -+ -- pretty printer -+ _ -> panic "PPC.cmmTopCodeGen: unknown arch" ++ ArchPPC_64 ELF_V1 -> return tops ++ -- generating function descriptor is handled in ++ -- pretty printer ++ ArchPPC_64 ELF_V2 -> return tops ++ -- generating function prologue is handled in ++ -- pretty printer ++ _ -> panic "PPC.cmmTopCodeGen: unknown arch" cmmTopCodeGen (CmmData sec dat) = do return [CmmData sec dat] -- no translation, we just use CmmStatic -@@ -194,26 +200,6 @@ getRegisterReg platform (CmmGlobal mid) +@@ -194,26 +204,6 @@ getRegisterReg platform (CmmGlobal mid) -- ones which map to a real machine register on this -- platform. Hence ... @@ -217,7 +306,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs -- | Convert a BlockId to some CmmStatic data jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) -@@ -261,7 +247,7 @@ data ChildCode64 -- a.k.a "Regist +@@ -261,7 +251,7 @@ data ChildCode64 -- a.k.a "Regist -- Reg may be modified @@ -226,90 +315,112 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs -- we don't mind which one it is. getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) getSomeReg expr = do -@@ -386,10 +372,14 @@ getRegister e = do dflags <- getDynFlags +@@ -275,7 +265,7 @@ getSomeReg expr = do + + getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock) + getI64Amodes addrTree = do +- Amode hi_addr addr_code <- getAmode addrTree ++ Amode hi_addr addr_code <- getAmode D addrTree + case addrOffset hi_addr 4 of + Just lo_addr -> return (hi_addr, lo_addr, addr_code) + Nothing -> do (hi_ptr, code) <- getSomeReg addrTree +@@ -386,10 +376,12 @@ getRegister e = do dflags <- getDynFlags getRegister' :: DynFlags -> CmmExpr -> NatM Register -getRegister' _ (CmmReg (CmmGlobal PicBaseReg)) -+getRegister' dflags (CmmReg (CmmGlobal PicBaseReg)) -+ | ArchPPC_64 == (platformArch $ targetPlatform dflags) -+ = return (Fixed II64 toc nilOL) -+ -+getRegister' dflags (CmmReg (CmmGlobal PicBaseReg)) - = do +- = do - reg <- getPicBaseNat archWordSize - return (Fixed archWordSize reg nilOL) ++getRegister' dflags (CmmReg (CmmGlobal PicBaseReg)) ++ | target32Bit (targetPlatform dflags) = do + reg <- getPicBaseNat $ archWordSize (target32Bit (targetPlatform dflags)) -+ return (Fixed (archWordSize (target32Bit (targetPlatform dflags))) reg nilOL) ++ return (Fixed (archWordSize (target32Bit (targetPlatform dflags))) ++ reg nilOL) ++ | otherwise = return (Fixed II64 toc nilOL) getRegister' dflags (CmmReg reg) = return (Fixed (cmmTypeSize (cmmRegType dflags reg)) -@@ -433,11 +423,23 @@ getRegister' dflags (CmmLoad mem pk) +@@ -424,30 +416,54 @@ getRegister' dflags (CmmMachOp (MO_SS_Co + return $ Fixed II32 rlo code + + getRegister' dflags (CmmLoad mem pk) +- | not (isWord64 pk) +- = do ++ | not (isWord64 pk) = do + let platform = targetPlatform dflags +- Amode addr addr_code <- getAmode mem ++ Amode addr addr_code <- getAmode D mem + let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk) + addr_code `snocOL` LD size dst addr return (Any size code) ++ | not (target32Bit (targetPlatform dflags)) = do ++ Amode addr addr_code <- getAmode DS mem ++ let code dst = addr_code `snocOL` LD II64 dst addr ++ return (Any II64 code) ++ where size = cmmTypeSize pk -+getRegister' dflags (CmmLoad mem pk) -+ | isWord64 pk && not (target32Bit (targetPlatform dflags)) -+ = do -+ Amode addr addr_code <- getAmodeDS mem -+ let code dst = addr_code `snocOL` LD size dst addr -+ return (Any size code) -+ where size = cmmTypeSize pk -+ -- catch simple cases of zero- or sign-extended load getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode mem +- Amode addr addr_code <- getAmode mem ++ Amode addr addr_code <- getAmode D mem return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) +getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do -+ Amode addr addr_code <- getAmode mem ++ Amode addr addr_code <- getAmode D mem + return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr)) + -- Note: there is no Load Byte Arithmetic instruction, so no signed case here getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do -@@ -448,6 +450,22 @@ getRegister' _ (CmmMachOp (MO_SS_Conv W1 - Amode addr addr_code <- getAmode mem +- Amode addr addr_code <- getAmode mem ++ Amode addr addr_code <- getAmode D mem + return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) + + getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do +- Amode addr addr_code <- getAmode mem ++ Amode addr addr_code <- getAmode D mem return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) +getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do -+ Amode addr addr_code <- getAmode mem ++ Amode addr addr_code <- getAmode D mem + return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr)) + +getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do -+ Amode addr addr_code <- getAmode mem ++ Amode addr addr_code <- getAmode D mem + return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr)) + +getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do -+ Amode addr addr_code <- getAmode mem ++ Amode addr addr_code <- getAmode D mem + return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr)) + +getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do -+ Amode addr addr_code <- getAmode mem ++ Amode addr addr_code <- getAmode D mem + return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr)) + getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps = case mop of MO_Not rep -> triv_ucode_int rep NOT -@@ -465,7 +483,16 @@ getRegister' dflags (CmmMachOp mop [x]) +@@ -465,7 +481,16 @@ getRegister' dflags (CmmMachOp mop [x]) | from == to -> conversionNop (intSize to) x -- narrowing is a nop: we treat the high bits as undefined - MO_SS_Conv W32 to -> conversionNop (intSize to) x -+ MO_SS_Conv W64 to ++ MO_SS_Conv W64 to + | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit int register" + | otherwise -> conversionNop (intSize to) x -+ MO_SS_Conv W32 to ++ MO_SS_Conv W32 to + | arch32 -> conversionNop (intSize to) x + | otherwise -> case to of + W64 -> triv_ucode_int to (EXTS II32) + W16 -> conversionNop II16 x + W8 -> conversionNop II8 x -+ _ -> panic "PPC.CodeGen.getRegister: no match" ++ _ -> panic "PPC.CodeGen.getRegister: no match" MO_SS_Conv W16 W8 -> conversionNop II8 x MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8) MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16) -@@ -473,7 +500,16 @@ getRegister' dflags (CmmMachOp mop [x]) +@@ -473,7 +498,17 @@ getRegister' dflags (CmmMachOp mop [x]) MO_UU_Conv from to | from == to -> conversionNop (intSize to) x -- narrowing is a nop: we treat the high bits as undefined @@ -318,16 +429,17 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs + | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit target" + | otherwise -> conversionNop (intSize to) x + MO_UU_Conv W32 to -+ | arch32 -> conversionNop (intSize to) x -+ | otherwise -> case to of -+ W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64)) -+ W16 -> conversionNop II16 x -+ W8 -> conversionNop II8 x -+ _ -> panic "PPC.CodeGen.getRegister: no match" ++ | arch32 -> conversionNop (intSize to) x ++ | otherwise -> ++ case to of ++ W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64)) ++ W16 -> conversionNop II16 x ++ W8 -> conversionNop II8 x ++ _ -> panic "PPC.CodeGen.getRegister: no match" MO_UU_Conv W16 W8 -> conversionNop II8 x MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32)) MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32)) -@@ -486,8 +522,9 @@ getRegister' dflags (CmmMachOp mop [x]) +@@ -486,8 +521,9 @@ getRegister' dflags (CmmMachOp mop [x]) conversionNop new_size expr = do e_code <- getRegister' dflags expr return (swizzleRegisterRep e_code new_size) @@ -338,7 +450,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs = case mop of MO_F_Eq _ -> condFltReg EQQ x y MO_F_Ne _ -> condFltReg NE x y -@@ -496,18 +533,28 @@ getRegister' _ (CmmMachOp mop [x, y]) -- +@@ -496,18 +532,28 @@ getRegister' _ (CmmMachOp mop [x, y]) -- MO_F_Lt _ -> condFltReg LTT x y MO_F_Le _ -> condFltReg LE x y @@ -379,19 +491,20 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs MO_F_Add w -> triv_float w FADD MO_F_Sub w -> triv_float w FSUB -@@ -538,32 +585,53 @@ getRegister' _ (CmmMachOp mop [x, y]) -- +@@ -538,32 +584,53 @@ getRegister' _ (CmmMachOp mop [x, y]) -- -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) _ -> trivialCodeNoImm' (intSize rep) SUBF y x - MO_Mul rep -> trivialCode rep True MULLW x y -+ MO_Mul rep ++ MO_Mul rep + | arch32 -> trivialCode rep True MULLW x y + | otherwise -> trivialCode rep True MULLD x y MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y + MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y - MO_S_MulMayOflo _ -> panic "S_MulMayOflo (rep /= II32): not implemented" +- MO_S_MulMayOflo _ -> panic "S_MulMayOflo (rep /= II32): not implemented" ++ MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented" MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented" - MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y) @@ -404,7 +517,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs + (extendSExpr dflags rep x) (extendSExpr dflags rep y) + | otherwise -> trivialCodeNoImm' (intSize rep) DIVD + (extendSExpr dflags rep x) (extendSExpr dflags rep y) -+ MO_U_Quot rep ++ MO_U_Quot rep + | arch32 -> trivialCodeNoImm' (intSize rep) DIVWU + (extendUExpr dflags rep x) (extendUExpr dflags rep y) + | otherwise -> trivialCodeNoImm' (intSize rep) DIVDU @@ -442,15 +555,24 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs getRegister' _ (CmmLit (CmmInt i rep)) | Just imm <- makeImmediate rep True i = let -@@ -584,6 +652,7 @@ getRegister' _ (CmmLit (CmmFloat f frep) +@@ -575,7 +642,7 @@ getRegister' _ (CmmLit (CmmFloat f frep) + lbl <- getNewLabelNat + dflags <- getDynFlags + dynRef <- cmmMakeDynamicReference dflags DataReference lbl +- Amode addr addr_code <- getAmode dynRef ++ Amode addr addr_code <- getAmode D dynRef + let size = floatSize frep + code dst = + LDATA ReadOnlyData (Statics lbl +@@ -584,6 +651,7 @@ getRegister' _ (CmmLit (CmmFloat f frep) return (Any size code) getRegister' dflags (CmmLit lit) -+ | ArchPPC == (platformArch $ targetPlatform dflags) ++ | target32Bit (targetPlatform dflags) = let rep = cmmLitType dflags lit imm = litToImm lit code dst = toOL [ -@@ -591,18 +660,46 @@ getRegister' dflags (CmmLit lit) +@@ -591,18 +659,46 @@ getRegister' dflags (CmmLit lit) ADD dst dst (RIImm (LO imm)) ] in return (Any (cmmTypeSize rep) code) @@ -458,7 +580,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs + = do lbl <- getNewLabelNat + dflags <- getDynFlags + dynRef <- cmmMakeDynamicReference dflags DataReference lbl -+ Amode addr addr_code <- getAmode dynRef ++ Amode addr addr_code <- getAmode D dynRef + let rep = cmmLitType dflags lit + size = cmmTypeSize rep + code dst = @@ -481,68 +603,116 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs + -- in a conversion to II32 or II64 resp. +extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr +extendSExpr dflags W32 x -+ | (platformArch $ targetPlatform dflags) == ArchPPC = x ++ | target32Bit (targetPlatform dflags) = x + +extendSExpr dflags W64 x -+ | (platformArch $ targetPlatform dflags) == ArchPPC_64 = x ++ | not (target32Bit (targetPlatform dflags)) = x + -+extendSExpr dflags rep x = -+ let size = case platformArch $ targetPlatform dflags of -+ ArchPPC -> W32 -+ _ -> W64 ++extendSExpr dflags rep x = ++ let size = if target32Bit $ targetPlatform dflags ++ then W32 ++ else W64 + in CmmMachOp (MO_SS_Conv rep size) [x] + +extendUExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr +extendUExpr dflags W32 x -+ | (platformArch $ targetPlatform dflags) == ArchPPC = x ++ | target32Bit (targetPlatform dflags) = x +extendUExpr dflags W64 x -+ | (platformArch $ targetPlatform dflags) == ArchPPC_64 = x ++ | not (target32Bit (targetPlatform dflags)) = x +extendUExpr dflags rep x = -+ let size = case platformArch $ targetPlatform dflags of -+ ArchPPC -> W32 -+ _ -> W64 ++ let size = if target32Bit $ targetPlatform dflags ++ then W32 ++ else W64 + in CmmMachOp (MO_UU_Conv rep size) [x] -- ----------------------------------------------------------------------------- -- The 'Amode' type: Memory addressing modes passed up the tree. -@@ -633,7 +730,7 @@ getAmode tree@(CmmRegOff _ _) = do dflag - getAmode (mangleIndexTree dflags tree) +@@ -628,26 +724,68 @@ temporary, then do the other computation + ... (tmp) ... + -} - getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) -- | Just off <- makeImmediate W32 True (-i) -+ | Just off <- makeImmediate W32 True (-i) +-getAmode :: CmmExpr -> NatM Amode +-getAmode tree@(CmmRegOff _ _) = do dflags <- getDynFlags +- getAmode (mangleIndexTree dflags tree) ++data InstrForm = D | DS + +-getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) ++getAmode :: InstrForm -> CmmExpr -> NatM Amode ++getAmode inf tree@(CmmRegOff _ _) ++ = do dflags <- getDynFlags ++ getAmode inf (mangleIndexTree dflags tree) ++ ++getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate W32 True (-i) = do (reg, code) <- getSomeReg x return (Amode (AddrRegImm reg off) code) -@@ -645,6 +742,19 @@ getAmode (CmmMachOp (MO_Add W32) [x, Cmm + + +-getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)]) ++getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate W32 True i + = do (reg, code) <- getSomeReg x return (Amode (AddrRegImm reg off) code) -+getAmode (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)]) -+ | Just off <- makeImmediate W64 True (-i) ++getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)]) ++ | Just off <- makeImmediate W64 True (-i) + = do + (reg, code) <- getSomeReg x + return (Amode (AddrRegImm reg off) code) + + -+getAmode (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)]) ++getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate W64 True i + = do + (reg, code) <- getSomeReg x + return (Amode (AddrRegImm reg off) code) ++ ++getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)]) ++ | Just off <- makeImmediate W64 True (-i) ++ = do ++ (reg, code) <- getSomeReg x ++ (reg', off', code') <- ++ if i `mod` 4 == 0 ++ then do return (reg, off, code) ++ else do ++ tmp <- getNewRegNat II64 ++ return (tmp, ImmInt 0, ++ code `snocOL` ADD tmp reg (RIImm off)) ++ return (Amode (AddrRegImm reg' off') code') ++ ++getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)]) ++ | Just off <- makeImmediate W64 True i ++ = do ++ (reg, code) <- getSomeReg x ++ (reg', off', code') <- ++ if i `mod` 4 == 0 ++ then do return (reg, off, code) ++ else do ++ tmp <- getNewRegNat II64 ++ return (tmp, ImmInt 0, ++ code `snocOL` ADD tmp reg (RIImm off)) ++ return (Amode (AddrRegImm reg' off') code') + -- optimize addition with 32-bit immediate -- (needed for PIC) - getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit]) -@@ -657,17 +767,37 @@ getAmode (CmmMachOp (MO_Add W32) [x, Cmm +-getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit]) ++getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit]) + = do + tmp <- getNewRegNat II32 + (src, srcCode) <- getSomeReg x +@@ -655,20 +793,40 @@ getAmode (CmmMachOp (MO_Add W32) [x, Cmm + code = srcCode `snocOL` ADDIS tmp src (HA imm) + return (Amode (AddrRegImm tmp (LO imm)) code) - getAmode (CmmLit lit) +-getAmode (CmmLit lit) ++getAmode _ (CmmLit lit) = do - tmp <- getNewRegNat II32 - let imm = litToImm lit - code = unitOL (LIS tmp (HA imm)) - return (Amode (AddrRegImm tmp (LO imm)) code) -- + dflags <- getDynFlags + case platformArch $ targetPlatform dflags of + ArchPPC -> do @@ -561,94 +731,34 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs + ORIS tmp tmp (HA imm) + ] + return (Amode (AddrRegImm tmp (LO imm)) code) -+ - getAmode (CmmMachOp (MO_Add W32) [x, y]) ++ ++getAmode _ (CmmMachOp (MO_Add W32) [x, y]) ++ = do ++ (regX, codeX) <- getSomeReg x ++ (regY, codeY) <- getSomeReg y ++ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) + +-getAmode (CmmMachOp (MO_Add W32) [x, y]) ++getAmode _ (CmmMachOp (MO_Add W64) [x, y]) = do (regX, codeX) <- getSomeReg x (regY, codeY) <- getSomeReg y return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) -+getAmode (CmmMachOp (MO_Add W64) [x, y]) -+ = do -+ (regX, codeX) <- getSomeReg x -+ (regY, codeY) <- getSomeReg y -+ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) -+ - getAmode other +-getAmode other ++getAmode _ other = do (reg, code) <- getSomeReg other -@@ -675,7 +805,70 @@ getAmode other - off = ImmInt 0 + let +@@ -676,7 +834,6 @@ getAmode other return (Amode (AddrRegImm reg off) code) -+-- 64 bit load and store operations require offsets be a multiple of 4 -+ -+getAmodeDS :: CmmExpr -> NatM Amode -+getAmodeDS tree@(CmmRegOff _ _) = do dflags <- getDynFlags -+ getAmodeDS (mangleIndexTree -+ dflags tree) -+ -+getAmodeDS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)]) -+ | Just off <- makeImmediate W64 True (-i) , i `mod` 4 == 0 -+ = do -+ (reg, code) <- getSomeReg x -+ return (Amode (AddrRegImm reg off) code) -+ - -+getAmodeDS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)]) -+ | Just off <- makeImmediate W64 True i , i `mod` 4 == 0 -+ = do -+ (reg, code) <- getSomeReg x -+ return (Amode (AddrRegImm reg off) code) -+ -+ -- optimize addition with 32-bit immediate -+ -- (needed for PIC) -+getAmodeDS (CmmMachOp (MO_Add W32) [x, CmmLit lit]) -+ = do -+ tmp <- getNewRegNat II64 -+ (src, srcCode) <- getSomeReg x -+ let imm = litToImm lit -+ code = srcCode `snocOL` ADDIS tmp src (HA imm) -+ return (Amode (AddrRegImm tmp (LO imm)) code) -+ -+getAmodeDS (CmmLit lit) -+ = do -+ dflags <- getDynFlags -+ case platformArch $ targetPlatform dflags of -+ ArchPPC -> do -+ tmp <- getNewRegNat II32 -+ let imm = litToImm lit -+ code = unitOL (LIS tmp (HA imm)) -+ return (Amode (AddrRegImm tmp (LO imm)) code) -+ _ -> do -- TODO: Load from TOC, -+ -- see getRegister' _ (CmmLit lit) -+ tmp <- getNewRegNat II64 -+ let imm = litToImm lit -+ code = toOL [ -+ LIS tmp (HIGHESTA imm), -+ OR tmp tmp (RIImm (HIGHERA imm)), -+ SL II64 tmp tmp (RIImm (ImmInt 32)), -+ ORIS tmp tmp (HA imm) -+ ] -+ return (Amode (AddrRegImm tmp (LO imm)) code) -+ -+getAmodeDS (CmmMachOp (MO_Add _) [x, y]) -+ = do -+ (regX, codeX) <- getSomeReg x -+ (regY, codeY) <- getSomeReg y -+ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) -+ -+getAmodeDS other -+ = do -+ (reg, code) <- getSomeReg other -+ let -+ off = ImmInt 0 -+ return (Amode (AddrRegImm reg off) code) -+ +- -- The 'CondCode' type: Condition codes passed up the tree. data CondCode -@@ -686,10 +879,12 @@ data CondCode + = CondCode Bool Cond InstrBlock +@@ -686,10 +843,12 @@ data CondCode getCondCode :: CmmExpr -> NatM CondCode -- almost the same as everywhere else - but we need to @@ -663,7 +773,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs MO_F_Eq W32 -> condFltCode EQQ x y MO_F_Ne W32 -> condFltCode NE x y MO_F_Gt W32 -> condFltCode GTT x y -@@ -704,18 +899,28 @@ getCondCode (CmmMachOp mop [x, y]) +@@ -704,18 +863,28 @@ getCondCode (CmmMachOp mop [x, y]) MO_F_Lt W64 -> condFltCode LTT x y MO_F_Le W64 -> condFltCode LE x y @@ -704,7 +814,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop) -@@ -729,21 +934,31 @@ getCondCode _ = panic "getCondCode(2)(po +@@ -729,21 +898,24 @@ getCondCode _ = panic "getCondCode(2)(po condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode -- ###FIXME: I16 and I8! @@ -713,69 +823,72 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y = do (src1, code) <- getSomeReg x +- let + dflags <- getDynFlags -+ let size = case platformArch $ targetPlatform dflags of -+ ArchPPC -> II32 -+ _ -> II64 - let ++ let size = archWordSize $ target32Bit $ targetPlatform dflags code' = code `snocOL` - (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2) -+ (if condUnsigned cond then CMPL else CMP) size src1 (RIImm src2) ++ (if condUnsigned cond then CMPL else CMP) size src1 (RIImm src2) return (CondCode False cond code') condIntCode cond x y = do (src1, code1) <- getSomeReg x (src2, code2) <- getSomeReg y +- let + dflags <- getDynFlags -+ let size = case platformArch $ targetPlatform dflags of -+ ArchPPC -> II32 -+ _ -> II64 - let ++ let size = archWordSize $ target32Bit $ targetPlatform dflags code' = code1 `appOL` code2 `snocOL` - (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2) -+ (if condUnsigned cond then CMPL else CMP) size src1 -+ (RIReg src2) ++ (if condUnsigned cond then CMPL else CMP) size src1 (RIReg src2) return (CondCode False cond code') condFltCode cond x y = do -@@ -781,7 +996,9 @@ assignReg_FltCode :: Size -> CmmReg -> +@@ -781,7 +953,9 @@ assignReg_FltCode :: Size -> CmmReg -> assignMem_IntCode pk addr src = do (srcReg, code) <- getSomeReg src - Amode dstAddr addr_code <- getAmode addr + Amode dstAddr addr_code <- case pk of -+ II64 -> getAmodeDS addr -+ _ -> getAmode addr ++ II64 -> getAmode DS addr ++ _ -> getAmode D addr return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr -- dst is a reg, but src could be anything -@@ -809,9 +1026,34 @@ genJump (CmmLit (CmmLabel lbl)) +@@ -809,9 +983,42 @@ genJump (CmmLit (CmmLabel lbl)) genJump tree = do + dflags <- getDynFlags + let platform = targetPlatform dflags + case platformOS platform of -+ OSLinux -> case platformArch platform of -+ ArchPPC -> genJump' tree GCPLinux -+ ArchPPC_64 -> genJump' tree GCPLinux64ELF1 -+ _ -> panic "PPC.CodeGen.genJump: Unknown Linux" -+ OSDarwin -> genJump' tree GCPDarwin ++ OSLinux -> case platformArch platform of ++ ArchPPC -> genJump' tree GCPLinux ++ ArchPPC_64 ELF_V1 -> genJump' tree (GCPLinux64ELF 1) ++ ArchPPC_64 ELF_V2 -> genJump' tree (GCPLinux64ELF 2) ++ _ -> panic "PPC.CodeGen.genJump: Unknown Linux" ++ OSDarwin -> genJump' tree GCPDarwin + _ -> panic "PPC.CodeGen.genJump: not defined for this os" -+ ++ + +genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock + -+genJump' tree GCPLinux64ELF1 ++genJump' tree (GCPLinux64ELF 1) + = do (target,code) <- getSomeReg tree - return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing) -+ return (code -- TODO load function descriptor into r12 directly -+ `snocOL` MR r12 target -+ `snocOL` LD II64 r11 (AddrRegImm r12 (ImmInt 0)) -+ `snocOL` LD II64 toc (AddrRegImm r12 (ImmInt 8)) ++ return (code ++ `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0)) ++ `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8)) + `snocOL` MTCTR r11 -+ `snocOL` LD II64 r11 (AddrRegImm r12 (ImmInt 16)) ++ `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16)) ++ `snocOL` BCTR [] Nothing) ++ ++genJump' tree (GCPLinux64ELF 2) ++ = do ++ (target,code) <- getSomeReg tree ++ return (code ++ `snocOL` MR r12 target ++ `snocOL` MTCTR r12 + `snocOL` BCTR [] Nothing) +genJump' tree _ @@ -785,26 +898,30 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs -- ----------------------------------------------------------------------------- -- Unconditional branches -@@ -867,11 +1109,16 @@ genCCall target dest_regs argsAndHints +@@ -867,11 +1074,18 @@ genCCall target dest_regs argsAndHints = do dflags <- getDynFlags let platform = targetPlatform dflags case platformOS platform of - OSLinux -> genCCall' dflags GCPLinux target dest_regs argsAndHints -+ OSLinux -> case platformArch platform of -+ ArchPPC -> genCCall' dflags GCPLinux -+ target dest_regs argsAndHints -+ ArchPPC_64 -> genCCall' dflags GCPLinux64ELF1 -+ target dest_regs argsAndHints -+ _ -> panic "PPC.CodeGen.genCCall: Unknown Linux" - OSDarwin -> genCCall' dflags GCPDarwin target dest_regs argsAndHints - _ -> panic "PPC.CodeGen.genCCall: not defined for this os" +- OSDarwin -> genCCall' dflags GCPDarwin target dest_regs argsAndHints +- _ -> panic "PPC.CodeGen.genCCall: not defined for this os" ++ OSLinux -> case platformArch platform of ++ ArchPPC -> genCCall' dflags GCPLinux ++ target dest_regs argsAndHints ++ ArchPPC_64 ELF_V1 -> genCCall' dflags (GCPLinux64ELF 1) ++ target dest_regs argsAndHints ++ ArchPPC_64 ELF_V2 -> genCCall' dflags (GCPLinux64ELF 2) ++ target dest_regs argsAndHints ++ _ -> panic "PPC.CodeGen.genCCall: Unknown Linux" ++ OSDarwin -> genCCall' dflags GCPDarwin target dest_regs argsAndHints ++ _ -> panic "PPC.CodeGen.genCCall: not defined for this os" -data GenCCallPlatform = GCPLinux | GCPDarwin -+data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF1 ++data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF Int genCCall' :: DynFlags -@@ -910,7 +1157,11 @@ genCCall' +@@ -910,7 +1124,11 @@ genCCall' * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on PowerPC Linux does not agree, so neither do we. @@ -812,12 +929,12 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs + PowerPC 64 Linux uses the System V Release 4 Calling Convention for + 64-bit PowerPC. It is specified in + "64-bit PowerPC ELF Application Binary Interface Supplement 1.9". -+ -+ According to both conventions, the parameter area should be part of the ++ ++ According to all conventions, the parameter area should be part of the caller's stack frame, allocated in the caller's prologue code (large enough to hold the parameter lists for all called routines). The NCG already uses the stack for register spilling, leaving 64 bytes free at the top. -@@ -949,7 +1200,9 @@ genCCall' dflags gcp target dest_regs ar +@@ -949,39 +1167,66 @@ genCCall' dflags gcp target dest_regs ar PrimTarget mop -> outOfLineMachOp mop let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode @@ -827,10 +944,13 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs + `appOL` moveResult reduceToFF32 case labelOrExpr of - Left lbl -> do -@@ -958,30 +1211,45 @@ genCCall' dflags gcp target dest_regs ar +- Left lbl -> do ++ Left lbl -> do -- the linker does all the work for us + return ( codeBefore + `snocOL` BL lbl usedRegs `appOL` codeAfter) - Right dyn -> do +- Right dyn -> do ++ Right dyn -> do -- implement call through function pointer (dynReg, dynCode) <- getSomeReg dyn - return ( dynCode - `snocOL` MTCTR dynReg @@ -838,21 +958,25 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs - `snocOL` BCTRL usedRegs - `appOL` codeAfter) + case gcp of -+ GCPLinux64ELF1 -> return ( dynCode -+ `appOL` codeBefore -+ `snocOL` MR r12 dynReg -+ `snocOL` LD II64 r11 (AddrRegImm r12 (ImmInt 0)) -+ `snocOL` LD II64 toc (AddrRegImm r12 (ImmInt 8)) -+ `snocOL` MTCTR r11 -+ `snocOL` LD II64 r11 (AddrRegImm r12 (ImmInt 16)) -+ `snocOL` BCTRL usedRegs -+ `appOL` codeAfter) -+ ++ GCPLinux64ELF 1 -> return ( dynCode ++ `appOL` codeBefore ++ `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0)) ++ `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8)) ++ `snocOL` MTCTR r11 ++ `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16)) ++ `snocOL` BCTRL usedRegs ++ `appOL` codeAfter) ++ GCPLinux64ELF 2 -> return ( dynCode ++ `appOL` codeBefore ++ `snocOL` MR r12 dynReg ++ `snocOL` MTCTR r12 ++ `snocOL` BCTRL usedRegs ++ `appOL` codeAfter) + _ -> return ( dynCode -+ `snocOL` MTCTR dynReg -+ `appOL` codeBefore -+ `snocOL` BCTRL usedRegs -+ `appOL` codeAfter) ++ `snocOL` MTCTR dynReg ++ `appOL` codeBefore ++ `snocOL` BCTRL usedRegs ++ `appOL` codeAfter) where platform = targetPlatform dflags @@ -862,31 +986,39 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs - when (gopt Opt_PIC dflags) $ do - _ <- getPicBaseNat archWordSize + when (gopt Opt_PIC dflags && target32Bit platform) $ do -+ _ <- getPicBaseNat $ archWordSize (target32Bit (targetPlatform dflags)) ++ _ <- getPicBaseNat $ archWordSize True return () initialStackOffset = case gcp of - GCPDarwin -> 24 - GCPLinux -> 8 -+ GCPLinux64ELF1 -> 48 +- GCPDarwin -> 24 +- GCPLinux -> 8 ++ GCPDarwin -> 24 ++ GCPLinux -> 8 ++ GCPLinux64ELF 1 -> 48 ++ GCPLinux64ELF 2 -> 32 ++ _ -> panic "genCall': unknown calling convention" -- size of linkage area + size of arguments, in bytes stackDelta finalStack = case gcp of GCPDarwin -> roundTo 16 $ (24 +) $ max 32 $ sum $ map (widthInBytes . typeWidth) argReps GCPLinux -> roundTo 16 finalStack -+ GCPLinux64ELF1 -> ++ GCPLinux64ELF 1 -> + roundTo 16 $ (48 +) $ max 64 $ sum $ + map (widthInBytes . typeWidth) argReps ++ GCPLinux64ELF 2 -> ++ roundTo 16 $ (32 +) $ max 64 $ sum $ ++ map (widthInBytes . typeWidth) argReps ++ _ -> panic "genCall': unknown calling conv." -- need to remove alignment information args | PrimTarget mop <- target, -@@ -998,12 +1266,25 @@ genCCall' dflags gcp target dest_regs ar +@@ -998,14 +1243,34 @@ genCCall' dflags gcp target dest_regs ar roundTo a x | x `mod` a == 0 = x | otherwise = x + a - (x `mod` a) + spSize = if target32Bit platform then II32 else II64 -+ ++ move_sp_down finalStack | delta > 64 = - toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))), @@ -895,20 +1027,30 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs | otherwise = nilOL where delta = stackDelta finalStack + toc_before = case gcp of -+ GCPLinux64ELF1 -> unitOL $ ST spSize toc (AddrRegImm sp (ImmInt 40)) -+ _ -> nilOL ++ GCPLinux64ELF 1 -> unitOL $ ST spSize toc (AddrRegImm sp (ImmInt 40)) ++ GCPLinux64ELF 2 -> unitOL $ ST spSize toc (AddrRegImm sp (ImmInt 24)) ++ _ -> nilOL + toc_after labelOrExpr = case gcp of -+ GCPLinux64ELF1 -> case labelOrExpr of -+ Left _ -> toOL [ NOP ] -+ Right _ -> toOL [ LD spSize toc -+ (AddrRegImm sp -+ (ImmInt 40)) -+ ] -+ _ -> nilOL ++ GCPLinux64ELF 1 -> case labelOrExpr of ++ Left _ -> toOL [ NOP ] ++ Right _ -> toOL [ LD spSize toc ++ (AddrRegImm sp ++ (ImmInt 40)) ++ ] ++ GCPLinux64ELF 2 -> case labelOrExpr of ++ Left _ -> toOL [ NOP ] ++ Right _ -> toOL [ LD spSize toc ++ (AddrRegImm sp ++ (ImmInt 24)) ++ ] ++ _ -> nilOL move_sp_up finalStack - | delta > 64 = +- | delta > 64 = ++ | delta > 64 = -- TODO: fix-up stack back-chain toOL [ADD sp sp (RIImm (ImmInt delta)), -@@ -1014,7 +1295,8 @@ genCCall' dflags gcp target dest_regs ar + DELTA 0] + | otherwise = nilOL +@@ -1014,7 +1279,8 @@ genCCall' dflags gcp target dest_regs ar passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed) passArguments ((arg,arg_ty):args) gprs fprs stackOffset @@ -918,39 +1060,43 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs do ChildCode64 code vr_lo <- iselExpr64 arg let vr_hi = getHiVRegFromLo vr_lo -@@ -1052,6 +1334,7 @@ genCCall' dflags gcp target dest_regs ar +@@ -1052,6 +1318,7 @@ genCCall' dflags gcp target dest_regs ar _ -> -- only one or no regs left passArguments args [] fprs (stackOffset'+8) stackCode accumUsed -+ GCPLinux64ELF1 -> panic "passArguments: 32 bit code" ++ GCPLinux64ELF _ -> panic "passArguments: 32 bit code" passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed | reg : _ <- regs = do -@@ -1065,6 +1348,8 @@ genCCall' dflags gcp target dest_regs ar +@@ -1063,8 +1330,10 @@ genCCall' dflags gcp target dest_regs ar + -- The Darwin ABI requires that we reserve + -- stack slots for register parameters GCPDarwin -> stackOffset + stackBytes - -- ... the SysV ABI doesn't. +- -- ... the SysV ABI doesn't. ++ -- ... the SysV ABI 32-bit doesn't. GCPLinux -> stackOffset -+ -- ... but ELFv1 does. -+ GCPLinux64ELF1 -> stackOffset + stackBytes ++ -- ... but SysV ABI 64-bit does. ++ GCPLinux64ELF _ -> stackOffset + stackBytes passArguments args (drop nGprs gprs) (drop nFprs fprs) -@@ -1092,6 +1377,10 @@ genCCall' dflags gcp target dest_regs ar +@@ -1092,6 +1361,11 @@ genCCall' dflags gcp target dest_regs ar roundTo 8 stackOffset | otherwise -> stackOffset -+ GCPLinux64ELF1 -> -+ -- everything on the stack is 8-byte aligned -+ -- on a 64 bit system (vector status excepted) ++ GCPLinux64ELF _ -> ++ -- everything on the stack is 8-byte ++ -- aligned on a 64 bit system ++ -- (except vector status, not used now) + stackOffset stackSlot = AddrRegImm sp (ImmInt stackOffset') (nGprs, nFprs, stackBytes, regs) = case gcp of -@@ -1117,6 +1406,18 @@ genCCall' dflags gcp target dest_regs ar +@@ -1117,6 +1391,18 @@ genCCall' dflags gcp target dest_regs ar FF64 -> (0, 1, 8, fprs) II64 -> panic "genCCall' passArguments II64" FF80 -> panic "genCCall' passArguments FF80" -+ GCPLinux64ELF1 -> ++ GCPLinux64ELF _ -> + case cmmTypeSize rep of + II8 -> (1, 0, 8, gprs) + II16 -> (1, 0, 8, gprs) @@ -965,7 +1111,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs moveResult reduceToFF32 = case dest_regs of -@@ -1124,8 +1425,9 @@ genCCall' dflags gcp target dest_regs ar +@@ -1124,8 +1410,9 @@ genCCall' dflags gcp target dest_regs ar [dest] | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1) | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1) @@ -977,7 +1123,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs | otherwise -> unitOL (MR r_dest r3) where rep = cmmRegType dflags (CmmLocal dest) r_dest = getRegisterReg platform (CmmLocal dest) -@@ -1201,17 +1503,18 @@ genCCall' dflags gcp target dest_regs ar +@@ -1201,17 +1488,19 @@ genCCall' dflags gcp target dest_regs ar genSwitch :: DynFlags -> CmmExpr -> [Maybe BlockId] -> NatM InstrBlock genSwitch dflags expr ids @@ -990,7 +1136,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs + sha = if target32Bit $ targetPlatform dflags then 2 else 3 + tmp <- getNewRegNat sz lbl <- getNewLabelNat -- dflags <- getDynFlags + dflags <- getDynFlags dynRef <- cmmMakeDynamicReference dflags DataReference lbl (tableReg,t_code) <- getSomeReg $ dynRef let code = e_code `appOL` t_code `appOL` toOL [ @@ -1001,7 +1147,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs ADD tmp tmp (RIReg tableReg), MTCTR tmp, BCTR ids (Just lbl) -@@ -1220,12 +1523,14 @@ genSwitch dflags expr ids +@@ -1220,12 +1509,14 @@ genSwitch dflags expr ids | otherwise = do (reg,e_code) <- getSomeReg expr @@ -1012,14 +1158,14 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs lbl <- getNewLabelNat let code = e_code `appOL` toOL [ - SLW tmp reg (RIImm (ImmInt 2)), -+ SL sz tmp reg (RIImm (ImmInt sha)), ++ SL sz tmp reg (RIImm (ImmInt 2)), ADDIS tmp tmp (HA (ImmCLbl lbl)), - LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), + LD sz tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), MTCTR tmp, BCTR ids (Just lbl) ] -@@ -1235,7 +1540,9 @@ generateJumpTableForInstr :: DynFlags -> +@@ -1235,7 +1526,9 @@ generateJumpTableForInstr :: DynFlags -> -> Maybe (NatCmmDecl CmmStatics Instr) generateJumpTableForInstr dflags (BCTR ids (Just lbl)) = let jumpTable @@ -1030,15 +1176,34 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs | otherwise = map (jumpTableEntry dflags) ids where jumpTableEntryRel Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) -@@ -1259,6 +1566,7 @@ condIntReg, condFltReg :: Cond -> CmmExp +@@ -1250,25 +1543,14 @@ generateJumpTableForInstr _ _ = Nothing + + -- Turn those condition codes into integers now (when they appear on + -- the right hand side of an assignment). +--- +--- (If applicable) Do not fill the delay slots here; you will confuse the +--- register allocator. + + condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register + condReg :: NatM CondCode -> NatM Register condReg getCond = do CondCode _ cond cond_code <- getCond + dflags <- getDynFlags let - {- code dst = cond_code `appOL` toOL [ - BCC cond lbl1, -@@ -1294,7 +1602,8 @@ condReg getCond = do +-{- code dst = cond_code `appOL` toOL [ +- BCC cond lbl1, +- LI dst (ImmInt 0), +- BCC ALWAYS lbl2, +- NEWBLOCK lbl1, +- LI dst (ImmInt 1), +- BCC ALWAYS lbl2, +- NEWBLOCK lbl2 +- ]-} + code dst = cond_code + `appOL` negate_code + `appOL` toOL [ +@@ -1294,7 +1576,8 @@ condReg getCond = do GU -> (1, False) _ -> panic "PPC.CodeGen.codeReg: no match" @@ -1048,7 +1213,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs condIntReg cond x y = condReg (condIntCode cond x y) condFltReg cond x y = condReg (condFltCode cond x y) -@@ -1363,6 +1672,27 @@ trivialCode rep _ instr x y = do +@@ -1363,6 +1646,27 @@ trivialCode rep _ instr x y = do let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) return (Any (intSize rep) code) @@ -1076,7 +1241,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr) -> CmmExpr -> CmmExpr -> NatM Register trivialCodeNoImm' size instr x y = do -@@ -1393,18 +1723,26 @@ trivialUCode rep instr x = do +@@ -1393,25 +1697,33 @@ trivialUCode rep instr x = do remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr) -> CmmExpr -> CmmExpr -> NatM Register remainderCode rep div x y = do @@ -1105,17 +1270,29 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs (src, code) <- getSomeReg x lbl <- getNewLabelNat itmp <- getNewRegNat II32 -@@ -1441,8 +1779,42 @@ coerceInt2FP fromRep toRep x = do + ftmp <- getNewRegNat FF64 + dflags <- getDynFlags + dynRef <- cmmMakeDynamicReference dflags DataReference lbl +- Amode addr addr_code <- getAmode dynRef ++ Amode addr addr_code <- getAmode D dynRef + let + code' dst = code `appOL` maybe_exts `appOL` toOL [ + LDATA ReadOnlyData $ Statics lbl +@@ -1441,8 +1753,46 @@ coerceInt2FP fromRep toRep x = do return (Any (floatSize toRep) code') -+coerceInt2FP' ArchPPC_64 fromRep toRep x = do ++-- On an ELF v1 Linux we use the compiler doubleword in the stack frame ++-- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only ++-- set right before a call and restored right after return from the call. ++-- So it is fine. ++coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do + (src, code) <- getSomeReg x + dflags <- getDynFlags + let + code' dst = code `appOL` maybe_exts `appOL` toOL [ -+ ST II64 src (spRel dflags 2), -+ LD FF64 dst (spRel dflags 2), ++ ST II64 src (spRel dflags 3), ++ LD FF64 dst (spRel dflags 3), + FCFID dst dst + ] `appOL` maybe_frsp dst + @@ -1133,7 +1310,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs + _ -> panic "PPC.CodeGen.coerceInt2FP: no match" + + return (Any (floatSize toRep) code') -+ ++ +coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch" + + @@ -1149,11 +1326,11 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs dflags <- getDynFlags -- the reps don't really matter: F*->FF64 and II32->I* are no-ops (src, code) <- getSomeReg x -@@ -1457,6 +1829,23 @@ coerceFP2Int _ toRep x = do +@@ -1457,6 +1807,22 @@ coerceFP2Int _ toRep x = do LD II32 dst (spRel dflags 3)] return (Any (intSize toRep) code') -+coerceFP2Int' ArchPPC_64 _ toRep x = do ++coerceFP2Int' (ArchPPC_64 _) _ toRep x = do + dflags <- getDynFlags + -- the reps don't really matter: F*->FF64 and II64->I* are no-ops + (src, code) <- getSomeReg x @@ -1162,10 +1339,9 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs + code' dst = code `appOL` toOL [ + -- convert to int in FP reg + FCTIDZ tmp src, -+ -- store value (64bit) from FP to stack -+ -- TODO: verify that we can really use 16(r1) as temp -+ ST FF64 tmp (spRel dflags 2), -+ LD II64 dst (spRel dflags 2)] ++ -- store value (64bit) from FP to compiler word on stack ++ ST FF64 tmp (spRel dflags 3), ++ LD II64 dst (spRel dflags 3)] + return (Any (intSize toRep) code') + +coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch" @@ -1173,10 +1349,10 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/CodeGen.hs -- Note [.LCTOC1 in PPC PIC code] -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table -- to make the most of the PPC's 16-bit displacements. -Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs +Index: ghc-7.8.4/compiler/nativeGen/PPC/Instr.hs =================================================================== ---- ghc-7.8.3.orig/compiler/nativeGen/PPC/Instr.hs -+++ ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs +--- ghc-7.8.4.orig/compiler/nativeGen/PPC/Instr.hs ++++ ghc-7.8.4/compiler/nativeGen/PPC/Instr.hs @@ -47,8 +47,10 @@ import Data.Maybe (fromMaybe) -------------------------------------------------------------------------------- -- Size of a PPC memory address, in bytes. @@ -1199,7 +1375,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs - arch -> panic $ "ppc_mkStackAllocInstr " ++ show arch + ArchPPC -> -- SUB II32 (OpImm (ImmInt amount)) (OpReg esp) + ADD sp sp (RIImm (ImmInt (-amount))) -+ ArchPPC_64 -> STU II64 sp (AddrRegImm sp (ImmInt (-amount))) ++ ArchPPC_64 _ -> STU II64 sp (AddrRegImm sp (ImmInt (-amount))) + arch -> panic $ "ppc_mkStackAllocInstr " ++ show arch ppc_mkStackDeallocInstr :: Platform -> Int -> Instr @@ -1210,7 +1386,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs - arch -> panic $ "ppc_mkStackDeallocInstr " ++ show arch + ArchPPC -> -- ADD II32 (OpImm (ImmInt amount)) (OpReg esp) + ADD sp sp (RIImm (ImmInt amount)) -+ ArchPPC_64 -> ADD sp sp (RIImm (ImmInt amount)) ++ ArchPPC_64 _ -> ADD sp sp (RIImm (ImmInt amount)) + arch -> panic $ "ppc_mkStackDeallocInstr " ++ show arch -- @@ -1267,11 +1443,14 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs | FRSP Reg Reg -- reduce to single precision -- (but destination is a FP register) -@@ -253,9 +269,10 @@ data Instr +@@ -253,9 +269,13 @@ data Instr | MFLR Reg -- move from link register | FETCHPC Reg -- pseudo-instruction: -- bcl to next insn, mflr reg - ++ | FETCHTOC Reg CLabel -- pseudo-instruction ++ -- add TOC offset to address in r12 ++ -- print .localentry for label | LWSYNC -- memory barrier - + | NOP -- no operation, PowerPC 64 bit @@ -1280,7 +1459,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs -- | Get the registers that are being used by this instruction. -- regUsage doesn't need to do any trickery for jumps and such. -@@ -290,22 +307,28 @@ ppc_regUsageOfInstr platform instr +@@ -290,22 +310,28 @@ ppc_regUsageOfInstr platform instr SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) SUBFC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) @@ -1312,7 +1491,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs RLWINM reg1 reg2 _ _ _ -> usage ([reg2], [reg1]) FADD _ r1 r2 r3 -> usage ([r2,r3], [r1]) -@@ -315,6 +338,8 @@ ppc_regUsageOfInstr platform instr +@@ -315,10 +341,13 @@ ppc_regUsageOfInstr platform instr FNEG r1 r2 -> usage ([r2], [r1]) FCMP r1 r2 -> usage ([r1,r2], []) FCTIWZ r1 r2 -> usage ([r2], [r1]) @@ -1321,7 +1500,12 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs FRSP r1 r2 -> usage ([r2], [r1]) MFCR reg -> usage ([], [reg]) MFLR reg -> usage ([], [reg]) -@@ -367,21 +392,27 @@ ppc_patchRegsOfInstr instr env + FETCHPC reg -> usage ([], [reg]) ++ FETCHTOC reg _ -> usage ([], [reg]) + _ -> noUsage + where + usage (src, dst) = RU (filter (interesting platform) src) +@@ -367,21 +396,27 @@ ppc_patchRegsOfInstr instr env SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3) SUBFC reg1 reg2 reg3 -> SUBFC (env reg1) (env reg2) (env reg3) SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3) @@ -1352,7 +1536,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs RLWINM reg1 reg2 sh mb me -> RLWINM (env reg1) (env reg2) sh mb me FADD sz r1 r2 r3 -> FADD sz (env r1) (env r2) (env r3) -@@ -391,6 +422,8 @@ ppc_patchRegsOfInstr instr env +@@ -391,10 +426,13 @@ ppc_patchRegsOfInstr instr env FNEG r1 r2 -> FNEG (env r1) (env r2) FCMP r1 r2 -> FCMP (env r1) (env r2) FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2) @@ -1361,7 +1545,12 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs FRSP r1 r2 -> FRSP (env r1) (env r2) MFCR reg -> MFCR (env reg) MFLR reg -> MFLR (env reg) -@@ -457,11 +490,14 @@ ppc_mkSpillInstr + FETCHPC reg -> FETCHPC (env reg) ++ FETCHTOC reg lab -> FETCHTOC (env reg) lab + _ -> instr + where + fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) +@@ -457,11 +495,14 @@ ppc_mkSpillInstr ppc_mkSpillInstr dflags reg delta slot = let platform = targetPlatform dflags off = spillSlotToOffset slot @@ -1378,7 +1567,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs in ST sz reg (AddrRegImm sp (ImmInt (off-delta))) -@@ -475,9 +511,12 @@ ppc_mkLoadInstr +@@ -475,9 +516,12 @@ ppc_mkLoadInstr ppc_mkLoadInstr dflags reg delta slot = let platform = targetPlatform dflags off = spillSlotToOffset slot @@ -1392,7 +1581,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs RcDouble -> FF64 _ -> panic "PPC.Instr.mkLoadInstr: no match" in LD sz reg (AddrRegImm sp (ImmInt (off-delta))) -@@ -498,8 +537,8 @@ maxSpillSlots dflags +@@ -498,8 +542,8 @@ maxSpillSlots dflags -- = 0 -- useful for testing allocMoreStack -- | The number of bytes that the stack pointer should be aligned @@ -1403,11 +1592,11 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Instr.hs stackAlign :: Int stackAlign = 16 -Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs +Index: ghc-7.8.4/compiler/nativeGen/PPC/Ppr.hs =================================================================== ---- ghc-7.8.3.orig/compiler/nativeGen/PPC/Ppr.hs -+++ ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs -@@ -39,11 +39,13 @@ import Unique ( pprUnique +--- ghc-7.8.4.orig/compiler/nativeGen/PPC/Ppr.hs ++++ ghc-7.8.4/compiler/nativeGen/PPC/Ppr.hs +@@ -39,11 +39,11 @@ import Unique ( pprUnique import Platform import FastString import Outputable @@ -1417,12 +1606,10 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs import Data.Bits - -+-- temporary import to help debug -+import PprCmm ( pprLit ) -- ----------------------------------------------------------------------------- -- Printing this stuff out -@@ -54,12 +56,15 @@ pprNatCmmDecl (CmmData section dats) = +@@ -54,12 +54,17 @@ pprNatCmmDecl (CmmData section dats) = pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) = case topInfoTable proc of Nothing -> @@ -1433,13 +1620,15 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs blocks -> -- special case for code without info table: pprSectionHeader Text $$ - pprLabel lbl $$ -- blocks guaranteed not null, so label needed -+ (if platformArch platform == ArchPPC_64 -+ then pprFunctionDescriptor lbl -+ else pprLabel lbl) $$ -- blocks guaranteed not null, so label needed ++ (case platformArch platform of ++ ArchPPC_64 ELF_V1 -> pprFunctionDescriptor lbl ++ ArchPPC_64 ELF_V2 -> pprFunctionPrologue lbl ++ _ -> pprLabel lbl) $$ -- blocks guaranteed not null, ++ -- so label needed vcat (map (pprBasicBlock top_info) blocks) Just (Statics info_lbl _) -> -@@ -86,6 +91,22 @@ pprNatCmmDecl proc@(CmmProc top_info lbl +@@ -86,6 +91,35 @@ pprNatCmmDecl proc@(CmmProc top_info lbl else empty) @@ -1448,7 +1637,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs + $$ text ".section \".opd\",\"aw\"" + $$ text ".align 3" + $$ ppr lab <> char ':' -+ $$ text ".quad ." ++ $$ text ".quad ." + <> ppr lab + <> text ",.TOC.@tocbase,0" + $$ text ".previous" @@ -1458,11 +1647,24 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs + $$ char '.' + <> ppr lab + <> char ':' ++ ++pprFunctionPrologue :: CLabel ->SDoc ++pprFunctionPrologue lab = pprGloblDecl lab ++ $$ text ".type " ++ <> ppr lab ++ <> text ", @function" ++ $$ ppr lab <> char ':' ++ $$ text "0:\taddis\t" <> pprReg toc ++ <> text ",12,.TOC.-0b@ha" ++ $$ text "\taddi\t" <> pprReg toc ++ <> char ',' <> pprReg toc <> text ",.TOC.-0b@l" ++ $$ text "\t.localentry\t" <> ppr lab ++ <> text ",.-" <> ppr lab + pprBasicBlock :: BlockEnv CmmStatics -> NatBasicBlock Instr -> SDoc pprBasicBlock info_env (BasicBlock blockid instrs) = maybe_infotable $$ -@@ -213,6 +234,7 @@ pprSize x +@@ -213,6 +247,7 @@ pprSize x II8 -> sLit "b" II16 -> sLit "h" II32 -> sLit "w" @@ -1470,7 +1672,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs FF32 -> sLit "fs" FF64 -> sLit "fd" _ -> panic "PPC.Ppr.pprSize: no match") -@@ -262,6 +284,18 @@ pprImm (HA i) +@@ -262,6 +297,18 @@ pprImm (HA i) then hcat [ text "ha16(", pprImm i, rparen ] else pprImm i <> text "@ha" @@ -1489,17 +1691,17 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs pprAddr :: AddrMode -> SDoc pprAddr (AddrRegReg r1 r2) -@@ -276,17 +310,23 @@ pprSectionHeader :: Section -> SDoc +@@ -276,17 +323,23 @@ pprSectionHeader :: Section -> SDoc pprSectionHeader seg = sdocWithPlatform $ \platform -> let osDarwin = platformOS platform == OSDarwin -+ ppc64 = platformArch platform == ArchPPC_64 ++ ppc64 = not $ target32Bit platform in case seg of Text -> ptext (sLit ".text\n.align 2") - Data -> ptext (sLit ".data\n.align 2") + Data -+ | ppc64 -> ptext (sLit ".data\n.align 3") -+ | otherwise -> ptext (sLit ".data\n.align 2") ++ | ppc64 -> ptext (sLit ".data\n.align 3") ++ | otherwise -> ptext (sLit ".data\n.align 2") ReadOnlyData | osDarwin -> ptext (sLit ".const\n.align 2") + | ppc64 -> ptext (sLit ".section .rodata\n\t.align 3") @@ -1514,7 +1716,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs | otherwise -> ptext (sLit ".section .bss\n\t.align 2") ReadOnlyData16 | osDarwin -> ptext (sLit ".const\n.align 4") -@@ -298,33 +338,39 @@ pprSectionHeader seg +@@ -298,32 +351,38 @@ pprSectionHeader seg pprDataItem :: CmmLit -> SDoc pprDataItem lit = sdocWithDynFlags $ \dflags -> @@ -1522,18 +1724,18 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs + vcat (ppr_item (cmmTypeSize $ cmmLitType dflags lit) lit dflags) where imm = litToImm lit -+ archPPC_64 dflags = (platformArch $ targetPlatform dflags) == ArchPPC_64 -+ -+ ppr_item II8 _ _ = [ptext (sLit "\t.byte\t") <> pprImm imm] ++ archPPC_64 dflags = not $ target32Bit $ targetPlatform dflags - ppr_item II8 _ = [ptext (sLit "\t.byte\t") <> pprImm imm] -+ ppr_item II32 _ _ = [ptext (sLit "\t.long\t") <> pprImm imm] ++ ppr_item II8 _ _ = [ptext (sLit "\t.byte\t") <> pprImm imm] - ppr_item II32 _ = [ptext (sLit "\t.long\t") <> pprImm imm] -+ ppr_item II64 _ dflags -+ | archPPC_64 dflags = [ptext (sLit "\t.quad\t") <> pprImm imm] ++ ppr_item II32 _ _ = [ptext (sLit "\t.long\t") <> pprImm imm] - ppr_item FF32 (CmmFloat r _) ++ ppr_item II64 _ dflags ++ | archPPC_64 dflags = [ptext (sLit "\t.quad\t") <> pprImm imm] ++ + + ppr_item FF32 (CmmFloat r _) _ = let bs = floatToBytes (fromRational r) @@ -1557,13 +1759,11 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs <> int (fromIntegral (fromIntegral x :: Word32))] - ppr_item _ _ -- = panic "PPC.Ppr.pprDataItem: no match" -+ ppr_item _ expr _ -+ = pprPanic "PPC.Ppr.pprDataItem: no match" (pprLit expr) ++ ppr_item _ _ _ + = panic "PPC.Ppr.pprDataItem: no match" - pprInstr :: Instr -> SDoc -@@ -370,6 +416,7 @@ pprInstr (LD sz reg addr) = hcat [ +@@ -370,6 +429,7 @@ pprInstr (LD sz reg addr) = hcat [ II8 -> sLit "bz" II16 -> sLit "hz" II32 -> sLit "wz" @@ -1571,7 +1771,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs FF32 -> sLit "fs" FF64 -> sLit "fd" _ -> panic "PPC.Ppr.pprInstr: no match" -@@ -388,6 +435,7 @@ pprInstr (LA sz reg addr) = hcat [ +@@ -388,6 +448,7 @@ pprInstr (LA sz reg addr) = hcat [ II8 -> sLit "ba" II16 -> sLit "ha" II32 -> sLit "wa" @@ -1579,26 +1779,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs FF32 -> sLit "fs" FF64 -> sLit "fd" _ -> panic "PPC.Ppr.pprInstr: no match" -@@ -505,12 +553,12 @@ pprInstr (BCCFAR cond blockid) = vcat [ - where lbl = mkAsmTempLabel (getUnique blockid) - - pprInstr (JMP lbl) = hcat [ -- an alias for b that takes a CLabel -- char '\t', -- ptext (sLit "b"), -- char '\t', -- ppr lbl -- ] -- -+ char '\t', -+ ptext (sLit "b"), -+ char '\t', -+ ppr lbl -+ ] -+ - pprInstr (MTCTR reg) = hcat [ - char '\t', - ptext (sLit "mtctr"), -@@ -556,10 +604,14 @@ pprInstr (ADDE reg1 reg2 reg3) = pprLogi +@@ -556,10 +617,14 @@ pprInstr (ADDE reg1 reg2 reg3) = pprLogi pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3) pprInstr (SUBFC reg1 reg2 reg3) = pprLogic (sLit "subfc") reg1 reg2 (RIReg reg3) pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3) @@ -1613,7 +1794,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs pprInstr (MULLW_MayOflo reg1 reg2 reg3) = vcat [ hcat [ ptext (sLit "\tmullwo\t"), pprReg reg1, ptext (sLit ", "), -@@ -570,8 +622,17 @@ pprInstr (MULLW_MayOflo reg1 reg2 reg3) +@@ -570,8 +635,17 @@ pprInstr (MULLW_MayOflo reg1 reg2 reg3) pprReg reg1, ptext (sLit ", "), ptext (sLit "2, 31, 31") ] ] @@ -1632,7 +1813,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs -- we'll use "andi." instead. pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [ char '\t', -@@ -588,6 +649,17 @@ pprInstr (AND reg1 reg2 ri) = pprLogic ( +@@ -588,6 +662,17 @@ pprInstr (AND reg1 reg2 ri) = pprLogic ( pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri @@ -1650,15 +1831,15 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs pprInstr (XORIS reg1 reg2 imm) = hcat [ char '\t', ptext (sLit "xoris"), -@@ -612,17 +684,33 @@ pprInstr (EXTS sz reg1 reg2) = hcat [ +@@ -612,17 +697,33 @@ pprInstr (EXTS sz reg1 reg2) = hcat [ pprInstr (NEG reg1 reg2) = pprUnary (sLit "neg") reg1 reg2 pprInstr (NOT reg1 reg2) = pprUnary (sLit "not") reg1 reg2 -pprInstr (SLW reg1 reg2 ri) = pprLogic (sLit "slw") reg1 reg2 (limitShiftRI ri) -+pprInstr (SL sz reg1 reg2 ri) = ++pprInstr (SL sz reg1 reg2 ri) = + let op = case sz of + II32 -> "slw" -+ II64 -> "sld" ++ II64 -> "sld" + _ -> panic "PPC.Ppr.pprInstr: shift illegal size" + in pprLogic (sLit op) reg1 reg2 (limitShiftRI sz ri) @@ -1670,7 +1851,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs -- Fixes ticket http://ghc.haskell.org/trac/ghc/ticket/5900 pprInstr (XOR reg1 reg2 (RIReg reg2)) -pprInstr (SRW reg1 reg2 ri) = pprLogic (sLit "srw") reg1 reg2 (limitShiftRI ri) -+pprInstr (SR sz reg1 reg2 ri) = ++pprInstr (SR sz reg1 reg2 ri) = + let op = case sz of + II32 -> "srw" + II64 -> "srd" @@ -1688,7 +1869,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs pprInstr (RLWINM reg1 reg2 sh mb me) = hcat [ ptext (sLit "\trlwinm\t"), pprReg reg1, -@@ -654,6 +742,8 @@ pprInstr (FCMP reg1 reg2) = hcat [ +@@ -654,6 +755,8 @@ pprInstr (FCMP reg1 reg2) = hcat [ ] pprInstr (FCTIWZ reg1 reg2) = pprUnary (sLit "fctiwz") reg1 reg2 @@ -1697,8 +1878,22 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs pprInstr (FRSP reg1 reg2) = pprUnary (sLit "frsp") reg1 reg2 pprInstr (CRNOR dst src1 src2) = hcat [ -@@ -686,6 +776,8 @@ pprInstr (FETCHPC reg) = vcat [ +@@ -684,8 +787,22 @@ pprInstr (FETCHPC reg) = vcat [ + hcat [ ptext (sLit "1:\tmflr\t"), pprReg reg ] + ] ++pprInstr (FETCHTOC reg lab) = vcat [ ++ hcat [ ptext (sLit "0:\taddis\t"), pprReg reg, ++ ptext (sLit ",12,.TOC.-0b@ha") ], ++ hcat [ ptext (sLit "\taddi\t"), pprReg reg, ++ char ',', pprReg reg, ++ ptext (sLit ",.TOC.-0b@l") ], ++ hcat [ ptext (sLit "\t.localentry\t"), ++ ppr lab, ++ ptext (sLit ",.-"), ++ ppr lab] ++ ] ++ pprInstr LWSYNC = ptext (sLit "\tlwsync") +pprInstr NOP = ptext (sLit "\tnop") @@ -1706,7 +1901,7 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs -- pprInstr _ = panic "pprInstr (ppc)" -@@ -739,9 +831,12 @@ pprFSize FF64 = empty +@@ -739,9 +856,12 @@ pprFSize FF64 = empty pprFSize FF32 = char 's' pprFSize _ = panic "PPC.Ppr.pprFSize: no match" @@ -1723,10 +1918,10 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Ppr.hs + panic $ "PPC.Ppr: 32 bit: Shift by " ++ show i ++ " bits is not allowed." +limitShiftRI _ x = x -Index: ghc-7.8.3/compiler/nativeGen/PPC/Regs.hs +Index: ghc-7.8.4/compiler/nativeGen/PPC/Regs.hs =================================================================== ---- ghc-7.8.3.orig/compiler/nativeGen/PPC/Regs.hs -+++ ghc-7.8.3/compiler/nativeGen/PPC/Regs.hs +--- ghc-7.8.4.orig/compiler/nativeGen/PPC/Regs.hs ++++ ghc-7.8.4/compiler/nativeGen/PPC/Regs.hs @@ -35,7 +35,7 @@ module PPC.Regs ( fits16Bits, makeImmediate, @@ -1796,16 +1991,56 @@ Index: ghc-7.8.3/compiler/nativeGen/PPC/Regs.hs r27 = regSingle 27 r28 = regSingle 28 r30 = regSingle 30 -Index: ghc-7.8.3/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs +Index: ghc-7.8.4/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs =================================================================== ---- ghc-7.8.3.orig/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs -+++ ghc-7.8.3/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs +--- ghc-7.8.4.orig/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs ++++ ghc-7.8.4/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +@@ -111,7 +111,7 @@ trivColorable platform virtualRegSqueeze + ArchX86_64 -> 5 + ArchPPC -> 16 + ArchSPARC -> 14 +- ArchPPC_64 -> panic "trivColorable ArchPPC_64" ++ ArchPPC_64 _ -> panic "trivColorable ArchPPC_64" + ArchARM _ _ _ -> panic "trivColorable ArchARM" + ArchAlpha -> panic "trivColorable ArchAlpha" + ArchMipseb -> panic "trivColorable ArchMipseb" +@@ -135,7 +135,7 @@ trivColorable platform virtualRegSqueeze + ArchX86_64 -> 0 + ArchPPC -> 0 + ArchSPARC -> 22 +- ArchPPC_64 -> panic "trivColorable ArchPPC_64" ++ ArchPPC_64 _ -> panic "trivColorable ArchPPC_64" + ArchARM _ _ _ -> panic "trivColorable ArchARM" + ArchAlpha -> panic "trivColorable ArchAlpha" + ArchMipseb -> panic "trivColorable ArchMipseb" +@@ -159,7 +159,7 @@ trivColorable platform virtualRegSqueeze + ArchX86_64 -> 0 + ArchPPC -> 26 + ArchSPARC -> 11 +- ArchPPC_64 -> panic "trivColorable ArchPPC_64" ++ ArchPPC_64 _ -> panic "trivColorable ArchPPC_64" + ArchARM _ _ _ -> panic "trivColorable ArchARM" + ArchAlpha -> panic "trivColorable ArchAlpha" + ArchMipseb -> panic "trivColorable ArchMipseb" +@@ -183,7 +183,7 @@ trivColorable platform virtualRegSqueeze + ArchX86_64 -> 10 + ArchPPC -> 0 + ArchSPARC -> 0 +- ArchPPC_64 -> panic "trivColorable ArchPPC_64" ++ ArchPPC_64 _ -> panic "trivColorable ArchPPC_64" + ArchARM _ _ _ -> panic "trivColorable ArchARM" + ArchAlpha -> panic "trivColorable ArchAlpha" + ArchMipseb -> panic "trivColorable ArchMipseb" +Index: ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs +=================================================================== +--- ghc-7.8.4.orig/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs ++++ ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs @@ -74,7 +74,7 @@ maxSpillSlots dflags ArchPPC -> PPC.Instr.maxSpillSlots dflags ArchSPARC -> SPARC.Instr.maxSpillSlots dflags ArchARM _ _ _ -> panic "maxSpillSlots ArchARM" - ArchPPC_64 -> panic "maxSpillSlots ArchPPC_64" -+ ArchPPC_64 -> PPC.Instr.maxSpillSlots dflags ++ ArchPPC_64 _ -> PPC.Instr.maxSpillSlots dflags ArchAlpha -> panic "maxSpillSlots ArchAlpha" ArchMipseb -> panic "maxSpillSlots ArchMipseb" ArchMipsel -> panic "maxSpillSlots ArchMipsel" @@ -1813,25 +2048,25 @@ Index: ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/Main.hs =================================================================== --- ghc-7.8.4.orig/compiler/nativeGen/RegAlloc/Linear/Main.hs +++ ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/Main.hs -@@ -207,7 +207,7 @@ linearRegAlloc dflags entry_ids block_liv +@@ -207,7 +207,7 @@ linearRegAlloc dflags entry_ids block_li ArchSPARC -> linearRegAlloc' dflags (frInitFreeRegs platform :: SPARC.FreeRegs) entry_ids block_live sccs ArchPPC -> linearRegAlloc' dflags (frInitFreeRegs platform :: PPC.FreeRegs) entry_ids block_live sccs ArchARM _ _ _ -> panic "linearRegAlloc ArchARM" - ArchPPC_64 -> panic "linearRegAlloc ArchPPC_64" -+ ArchPPC_64 -> linearRegAlloc' dflags (frInitFreeRegs platform :: PPC.FreeRegs) entry_ids block_live sccs ++ ArchPPC_64 _ -> linearRegAlloc' dflags (frInitFreeRegs platform :: PPC.FreeRegs) entry_ids block_live sccs ArchAlpha -> panic "linearRegAlloc ArchAlpha" ArchMipseb -> panic "linearRegAlloc ArchMipseb" ArchMipsel -> panic "linearRegAlloc ArchMipsel" -Index: ghc-7.8.3/compiler/nativeGen/TargetReg.hs +Index: ghc-7.8.4/compiler/nativeGen/TargetReg.hs =================================================================== ---- ghc-7.8.3.orig/compiler/nativeGen/TargetReg.hs -+++ ghc-7.8.3/compiler/nativeGen/TargetReg.hs +--- ghc-7.8.4.orig/compiler/nativeGen/TargetReg.hs ++++ ghc-7.8.4/compiler/nativeGen/TargetReg.hs @@ -52,7 +52,7 @@ targetVirtualRegSqueeze platform ArchX86_64 -> X86.virtualRegSqueeze ArchPPC -> PPC.virtualRegSqueeze ArchSPARC -> SPARC.virtualRegSqueeze - ArchPPC_64 -> panic "targetVirtualRegSqueeze ArchPPC_64" -+ ArchPPC_64 -> PPC.virtualRegSqueeze ++ ArchPPC_64 _ -> PPC.virtualRegSqueeze ArchARM _ _ _ -> panic "targetVirtualRegSqueeze ArchARM" ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha" ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb" @@ -1840,7 +2075,7 @@ Index: ghc-7.8.3/compiler/nativeGen/TargetReg.hs ArchPPC -> PPC.realRegSqueeze ArchSPARC -> SPARC.realRegSqueeze - ArchPPC_64 -> panic "targetRealRegSqueeze ArchPPC_64" -+ ArchPPC_64 -> PPC.realRegSqueeze ++ ArchPPC_64 _ -> PPC.realRegSqueeze ArchARM _ _ _ -> panic "targetRealRegSqueeze ArchARM" ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha" ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb" @@ -1849,7 +2084,7 @@ Index: ghc-7.8.3/compiler/nativeGen/TargetReg.hs ArchPPC -> PPC.classOfRealReg ArchSPARC -> SPARC.classOfRealReg - ArchPPC_64 -> panic "targetClassOfRealReg ArchPPC_64" -+ ArchPPC_64 -> PPC.classOfRealReg ++ ArchPPC_64 _ -> PPC.classOfRealReg ArchARM _ _ _ -> panic "targetClassOfRealReg ArchARM" ArchAlpha -> panic "targetClassOfRealReg ArchAlpha" ArchMipseb -> panic "targetClassOfRealReg ArchMipseb" @@ -1858,7 +2093,7 @@ Index: ghc-7.8.3/compiler/nativeGen/TargetReg.hs ArchPPC -> PPC.mkVirtualReg ArchSPARC -> SPARC.mkVirtualReg - ArchPPC_64 -> panic "targetMkVirtualReg ArchPPC_64" -+ ArchPPC_64 -> PPC.mkVirtualReg ++ ArchPPC_64 _ -> PPC.mkVirtualReg ArchARM _ _ _ -> panic "targetMkVirtualReg ArchARM" ArchAlpha -> panic "targetMkVirtualReg ArchAlpha" ArchMipseb -> panic "targetMkVirtualReg ArchMipseb" @@ -1867,42 +2102,177 @@ Index: ghc-7.8.3/compiler/nativeGen/TargetReg.hs ArchPPC -> PPC.regDotColor ArchSPARC -> SPARC.regDotColor - ArchPPC_64 -> panic "targetRegDotColor ArchPPC_64" -+ ArchPPC_64 -> PPC.regDotColor ++ ArchPPC_64 _ -> PPC.regDotColor ArchARM _ _ _ -> panic "targetRegDotColor ArchARM" ArchAlpha -> panic "targetRegDotColor ArchAlpha" ArchMipseb -> panic "targetRegDotColor ArchMipseb" -Index: ghc-7.8.3/configure.ac +Index: ghc-7.8.4/compiler/utils/Platform.hs =================================================================== ---- ghc-7.8.3.orig/configure.ac -+++ ghc-7.8.3/configure.ac -@@ -238,7 +238,7 @@ AC_SUBST(SOLARIS_BROKEN_SHLD) +--- ghc-7.8.4.orig/compiler/utils/Platform.hs ++++ ghc-7.8.4/compiler/utils/Platform.hs +@@ -8,6 +8,7 @@ module Platform ( + ArmISA(..), + ArmISAExt(..), + ArmABI(..), ++ PPC_64ABI(..), + + target32Bit, + isARM, +@@ -46,6 +47,8 @@ data Arch + | ArchX86_64 + | ArchPPC + | ArchPPC_64 ++ { ppc_64ABI :: PPC_64ABI ++ } + | ArchSPARC + | ArchARM + { armISA :: ArmISA +@@ -104,10 +107,18 @@ data ArmABI + | HARD + deriving (Read, Show, Eq) + ++-- | PowerPC 64-bit ABI ++-- ++data PPC_64ABI ++ = ELF_V1 ++ | ELF_V2 ++ deriving (Read, Show, Eq) ++ ++-- | This predicate tells us whether the platform is 32-bit. + target32Bit :: Platform -> Bool + target32Bit p = platformWordSize p == 4 + +--- | This predicates tells us whether the OS supports ELF-like shared libraries. ++-- | This predicate tells us whether the OS supports ELF-like shared libraries. + osElfTarget :: OS -> Bool + osElfTarget OSLinux = True + osElfTarget OSFreeBSD = True +Index: ghc-7.8.4/configure.ac +=================================================================== +--- ghc-7.8.4.orig/configure.ac ++++ ghc-7.8.4/configure.ac +@@ -288,7 +288,7 @@ AC_SUBST(SOLARIS_BROKEN_SHLD) dnl ** Do an unregisterised build? dnl -------------------------------------------------------------- case "$HostArch" in - i386|x86_64|powerpc|arm) -+ i386|x86_64|powerpc|powerpc64|arm) ++ i386|x86_64|powerpc|powerpc64|powerpc64le|arm) UnregisterisedDefault=NO ;; *) -Index: ghc-7.8.3/includes/CodeGen.Platform.hs +Index: ghc-7.8.4/includes/CodeGen.Platform.hs =================================================================== ---- ghc-7.8.3.orig/includes/CodeGen.Platform.hs -+++ ghc-7.8.3/includes/CodeGen.Platform.hs -@@ -801,6 +801,10 @@ freeRegBase _ = fastBool True - freeReg 0 = fastBool False -- Hack: r0 can't be used in all insns, - -- but it's actually free - freeReg 1 = fastBool False -- The Stack Pointer -+ -+-- TODO: make this conditonal for ppc64 ELF -+freeReg 13 = fastBool False -- reserved for system thread ID -+ +--- ghc-7.8.4.orig/includes/CodeGen.Platform.hs ++++ ghc-7.8.4/includes/CodeGen.Platform.hs +@@ -804,6 +804,9 @@ freeReg 1 = fastBool False -- The Stack # if !MACHREGS_darwin -- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that freeReg 2 = fastBool False -Index: ghc-7.8.3/mk/config.mk.in ++-- TODO: make this conditonal for ppc64 ELF ++freeReg 13 = fastBool False -- reserved for system thread ID ++-- TODO: do not reserve r30 in ppc64 ELF + -- at least linux in -fPIC relies on r30 in PLT stubs + freeReg 30 = fastBool False + # endif +Index: ghc-7.8.4/includes/stg/HaskellMachRegs.h =================================================================== ---- ghc-7.8.3.orig/mk/config.mk.in -+++ ghc-7.8.3/mk/config.mk.in +--- ghc-7.8.4.orig/includes/stg/HaskellMachRegs.h ++++ ghc-7.8.4/includes/stg/HaskellMachRegs.h +@@ -35,7 +35,8 @@ + + #define MACHREGS_i386 i386_TARGET_ARCH + #define MACHREGS_x86_64 x86_64_TARGET_ARCH +-#define MACHREGS_powerpc (powerpc_TARGET_ARCH || powerpc64_TARGET_ARCH || rs6000_TARGET_ARCH) ++#define MACHREGS_powerpc (powerpc_TARGET_ARCH || powerpc64_TARGET_ARCH \ ++ || powerpc64le_TARGET_ARCH || rs6000_TARGET_ARCH) + #define MACHREGS_sparc sparc_TARGET_ARCH + #define MACHREGS_arm arm_TARGET_ARCH + #define MACHREGS_darwin darwin_TARGET_OS +Index: ghc-7.8.4/includes/stg/RtsMachRegs.h +=================================================================== +--- ghc-7.8.4.orig/includes/stg/RtsMachRegs.h ++++ ghc-7.8.4/includes/stg/RtsMachRegs.h +@@ -41,7 +41,8 @@ + + #define MACHREGS_i386 i386_HOST_ARCH + #define MACHREGS_x86_64 x86_64_HOST_ARCH +-#define MACHREGS_powerpc (powerpc_HOST_ARCH || powerpc64_HOST_ARCH || rs6000_HOST_ARCH) ++#define MACHREGS_powerpc (powerpc_HOST_ARCH || powerpc64_HOST_ARCH \ ++ || powerpc64le_HOST_ARCH || rs6000_HOST_ARCH) + #define MACHREGS_sparc sparc_HOST_ARCH + #define MACHREGS_arm arm_HOST_ARCH + #define MACHREGS_darwin darwin_HOST_OS +Index: ghc-7.8.4/includes/stg/SMP.h +=================================================================== +--- ghc-7.8.4.orig/includes/stg/SMP.h ++++ ghc-7.8.4/includes/stg/SMP.h +@@ -124,6 +124,14 @@ xchg(StgPtr p, StgWord w) + :"=&r" (result) + :"r" (w), "r" (p) + ); ++#elif powerpc64_HOST_ARCH || powerpc64le_HOST_ARCH ++ __asm__ __volatile__ ( ++ "1: ldarx %0, 0, %2\n" ++ " stdcx. %1, 0, %2\n" ++ " bne- 1b" ++ :"=&r" (result) ++ :"r" (w), "r" (p) ++ ); + #elif sparc_HOST_ARCH + result = w; + __asm__ __volatile__ ( +@@ -190,6 +198,20 @@ cas(StgVolatilePtr p, StgWord o, StgWord + :"cc", "memory" + ); + return result; ++#elif powerpc64_HOST_ARCH || powerpc64le_HOST_ARCH ++ StgWord result; ++ __asm__ __volatile__ ( ++ "1: ldarx %0, 0, %3\n" ++ " cmpd %0, %1\n" ++ " bne 2f\n" ++ " stdcx. %2, 0, %3\n" ++ " bne- 1b\n" ++ "2:" ++ :"=&r" (result) ++ :"r" (o), "r" (n), "r" (p) ++ :"cc", "memory" ++ ); ++ return result; + #elif sparc_HOST_ARCH + __asm__ __volatile__ ( + "cas [%1], %2, %0" +@@ -304,7 +326,7 @@ EXTERN_INLINE void + write_barrier(void) { + #if i386_HOST_ARCH || x86_64_HOST_ARCH + __asm__ __volatile__ ("" : : : "memory"); +-#elif powerpc_HOST_ARCH ++#elif powerpc_HOST_ARCH || powerpc64_HOST_ARCH || powerpc64le_HOST_ARCH + __asm__ __volatile__ ("lwsync" : : : "memory"); + #elif sparc_HOST_ARCH + /* Sparc in TSO mode does not require store/store barriers. */ +@@ -326,7 +348,7 @@ store_load_barrier(void) { + __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory"); + #elif x86_64_HOST_ARCH + __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory"); +-#elif powerpc_HOST_ARCH ++#elif powerpc_HOST_ARCH || powerpc64_HOST_ARCH || powerpc64le_HOST_ARCH + __asm__ __volatile__ ("sync" : : : "memory"); + #elif sparc_HOST_ARCH + __asm__ __volatile__ ("membar #StoreLoad" : : : "memory"); +@@ -345,7 +367,7 @@ load_load_barrier(void) { + __asm__ __volatile__ ("" : : : "memory"); + #elif x86_64_HOST_ARCH + __asm__ __volatile__ ("" : : : "memory"); +-#elif powerpc_HOST_ARCH ++#elif powerpc_HOST_ARCH || powerpc64_HOST_ARCH || powerpc64le_HOST_ARCH + __asm__ __volatile__ ("lwsync" : : : "memory"); + #elif sparc_HOST_ARCH + /* Sparc in TSO mode does not require load/load barriers. */ +Index: ghc-7.8.4/mk/config.mk.in +=================================================================== +--- ghc-7.8.4.orig/mk/config.mk.in ++++ ghc-7.8.4/mk/config.mk.in @@ -161,9 +161,9 @@ GhcUnregisterised=@Unregisterised@ # (as well as a C backend) # @@ -1911,7 +2281,218 @@ Index: ghc-7.8.3/mk/config.mk.in +# i386, powerpc, powerpc64, sparc # IOS and AIX are not supported -ArchSupportsNCG=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc sparc))) -+ArchSupportsNCG=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 sparc))) ++ArchSupportsNCG=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc))) OsSupportsNCG=$(strip $(patsubst $(TargetOS_CPP), YES, $(patsubst ios,,$(patsubst aix,,$(TargetOS_CPP))))) GhcWithNativeCodeGen := $(strip\ +@@ -174,7 +174,7 @@ HaveLibDL = @HaveLibDL@ + + # ArchSupportsSMP should be set iff there is support for that arch in + # includes/stg/SMP.h +-ArchSupportsSMP=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 sparc powerpc arm))) ++ArchSupportsSMP=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 sparc powerpc powerpc64 powerpc64le arm))) + + GhcWithSMP := $(strip $(if $(filter YESNO, $(ArchSupportsSMP)$(GhcUnregisterised)),YES,NO)) + +@@ -182,7 +182,7 @@ GhcWithSMP := $(strip $(if $(filter YESN + # has support for this OS/ARCH combination. + + OsSupportsGHCi=$(strip $(patsubst $(TargetOS_CPP), YES, $(findstring $(TargetOS_CPP), mingw32 cygwin32 linux solaris2 freebsd dragonfly netbsd openbsd darwin kfreebsdgnu))) +-ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc sparc sparc64 arm))) ++ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm))) + + ifeq "$(OsSupportsGHCi)$(ArchSupportsGHCi)" "YESYES" + GhcWithInterpreter=YES +@@ -195,7 +195,7 @@ endif + # (see TABLES_NEXT_TO_CODE in the RTS). Whether we actually compile for + # TABLES_NEXT_TO_CODE depends on whether we're building unregisterised + # code or not, which may be decided by options to the compiler later. +-ifneq "$(findstring $(TargetArch_CPP)X, ia64X powerpc64X)" "" ++ifneq "$(findstring $(TargetArch_CPP)X, ia64X powerpc64X powerpc64leX)" "" + GhcEnableTablesNextToCode=NO + else + GhcEnableTablesNextToCode=YES +Index: ghc-7.8.4/rts/StgCRun.c +=================================================================== +--- ghc-7.8.4.orig/rts/StgCRun.c ++++ ghc-7.8.4/rts/StgCRun.c +@@ -662,11 +662,19 @@ StgRunIsImplementedInAssembler(void) + } + + #else // linux_HOST_OS +-#error Only linux support for power64 right now. ++#error Only Linux support for power64 right now. + #endif + + #endif + ++#ifdef powerpc64le_HOST_ARCH ++/* ----------------------------------------------------------------------------- ++ PowerPC 64 little endian architecture ++ ++ Really everything is in assembler, so we don't have to deal with GCC... ++ -------------------------------------------------------------------------- */ ++#endif ++ + /* ----------------------------------------------------------------------------- + ARM architecture + -------------------------------------------------------------------------- */ +Index: ghc-7.8.4/rts/StgCRunAsm.S +=================================================================== +--- /dev/null ++++ ghc-7.8.4/rts/StgCRunAsm.S +@@ -0,0 +1,114 @@ ++#include "ghcconfig.h" ++#include "rts/Constants.h" ++#ifdef powerpc64le_HOST_ARCH ++#ifdef linux_HOST_OS ++#define STACK_FRAME_SIZE RESERVED_C_STACK_BYTES+304 ++ .file "StgCRun.c" ++ .abiversion 2 ++ .section ".toc","aw" ++ .section ".text" ++ .align 2 ++.globl StgRun ++.hidden StgRun ++.type StgRun,@function ++StgRun: ++.localentry StgRun,.-StgRun ++ mflr 0 ++ mr 5, 1 ++ std 0, 16(1) ++ stdu 1, -(STACK_FRAME_SIZE)(1) ++ std 2, -296(5) ++ std 14, -288(5) ++ std 15, -280(5) ++ std 16, -272(5) ++ std 17, -264(5) ++ std 18, -256(5) ++ std 19, -248(5) ++ std 20, -240(5) ++ std 21, -232(5) ++ std 22, -224(5) ++ std 23, -216(5) ++ std 24, -208(5) ++ std 25, -200(5) ++ std 26, -192(5) ++ std 27, -184(5) ++ std 28, -176(5) ++ std 29, -168(5) ++ std 30, -160(5) ++ std 31, -152(5) ++ stfd 14, -144(5) ++ stfd 15, -136(5) ++ stfd 16, -128(5) ++ stfd 17, -120(5) ++ stfd 18, -112(5) ++ stfd 19, -104(5) ++ stfd 20, -96(5) ++ stfd 21, -88(5) ++ stfd 22, -80(5) ++ stfd 23, -72(5) ++ stfd 24, -64(5) ++ stfd 25, -56(5) ++ stfd 26, -48(5) ++ stfd 27, -40(5) ++ stfd 28, -32(5) ++ stfd 29, -24(5) ++ stfd 30, -16(5) ++ stfd 31, -8(5) ++ mr 27, 4 ++ mtctr 3 ++ mr 12, 3 ++ bctr ++.globl StgReturn ++.type StgReturn,@function ++StgReturn: ++.localentry StgReturn,.-StgReturn ++ mr 3,14 ++ la 5, STACK_FRAME_SIZE(1) ++ ld 2, -296(5) ++ ld 14, -288(5) ++ ld 15, -280(5) ++ ld 16, -272(5) ++ ld 17, -264(5) ++ ld 18, -256(5) ++ ld 19, -248(5) ++ ld 20, -240(5) ++ ld 21, -232(5) ++ ld 22, -224(5) ++ ld 23, -216(5) ++ ld 24, -208(5) ++ ld 25, -200(5) ++ ld 26, -192(5) ++ ld 27, -184(5) ++ ld 28, -176(5) ++ ld 29, -168(5) ++ ld 30, -160(5) ++ ld 31, -152(5) ++ lfd 14, -144(5) ++ lfd 15, -136(5) ++ lfd 16, -128(5) ++ lfd 17, -120(5) ++ lfd 18, -112(5) ++ lfd 19, -104(5) ++ lfd 20, -96(5) ++ lfd 21, -88(5) ++ lfd 22, -80(5) ++ lfd 23, -72(5) ++ lfd 24, -64(5) ++ lfd 25, -56(5) ++ lfd 26, -48(5) ++ lfd 27, -40(5) ++ lfd 28, -32(5) ++ lfd 29, -24(5) ++ lfd 30, -16(5) ++ lfd 31, -8(5) ++ mr 1, 5 ++ ld 0, 16(1) ++ mtlr 0 ++ blr ++ ++ .section .note.GNU-stack,"",@progbits ++#else // linux_HOST_OS ++#error Only Linux support for power64 little endian right now. ++#endif ++ ++#endif +Index: ghc-7.8.4/rts/ghc.mk +=================================================================== +--- ghc-7.8.4.orig/rts/ghc.mk ++++ ghc-7.8.4/rts/ghc.mk +@@ -45,6 +45,9 @@ ifneq "$(PORTING_HOST)" "YES" + ifneq "$(findstring $(TargetArch_CPP), i386 powerpc powerpc64)" "" + rts_S_SRCS += rts/AdjustorAsm.S + endif ++ifneq "$(findstring $(TargetArch_CPP), powerpc64le)" "" ++rts_S_SRCS += rts/StgCRunAsm.S ++endif + endif + + ifeq "$(GhcUnregisterised)" "YES" +Index: ghc-7.8.4/compiler/nativeGen/AsmCodeGen.lhs +=================================================================== +--- ghc-7.8.4.orig/compiler/nativeGen/AsmCodeGen.lhs ++++ ghc-7.8.4/compiler/nativeGen/AsmCodeGen.lhs +@@ -166,7 +166,7 @@ nativeCodeGen dflags this_mod h us cmms + ArchPPC -> nCG' (ppcNcgImpl dflags) + ArchSPARC -> nCG' (sparcNcgImpl dflags) + ArchARM {} -> panic "nativeCodeGen: No NCG for ARM" +- ArchPPC_64 -> panic "nativeCodeGen: No NCG for PPC 64" ++ ArchPPC_64 _ -> nCG' (ppcNcgImpl dflags) + ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha" + ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb" + ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel" +Index: ghc-7.8.4/compiler/llvmGen/LlvmCodeGen/Ppr.hs +=================================================================== +--- ghc-7.8.4.orig/compiler/llvmGen/LlvmCodeGen/Ppr.hs ++++ ghc-7.8.4/compiler/llvmGen/LlvmCodeGen/Ppr.hs +@@ -64,7 +64,7 @@ moduleLayout = sdocWithPlatform $ \platf + Platform { platformArch = ArchX86, platformOS = OSiOS } -> + text "target datalayout = \"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32\"" + $+$ text "target triple = \"i386-apple-darwin11\"" +- Platform { platformArch = ArchPPC_64 , platformOS = OSLinux } -> ++ Platform { platformArch = ArchPPC_64 ELF_V1, platformOS = OSLinux } -> + text "target datalayout = \"E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64\"" + $+$ text "target triple = \"powerpc64-unknown-linux-gnu\"" + _ -> diff --git a/D349.patch b/D349.patch index 46c22c6..8361426 100644 --- a/D349.patch +++ b/D349.patch @@ -1,7 +1,7 @@ -Index: ghc-7.8.3/compiler/ghci/Linker.lhs +Index: ghc-7.8.4/compiler/ghci/Linker.lhs =================================================================== ---- ghc-7.8.3.orig/compiler/ghci/Linker.lhs -+++ ghc-7.8.3/compiler/ghci/Linker.lhs +--- ghc-7.8.4.orig/compiler/ghci/Linker.lhs ++++ ghc-7.8.4/compiler/ghci/Linker.lhs @@ -123,7 +123,10 @@ data PersistentLinkerState -- The currently-loaded packages; always object code -- Held, as usual, in dependency order; though I am not sure if @@ -172,11 +172,11 @@ Index: ghc-7.8.3/compiler/ghci/Linker.lhs Just err -> panic ("Loading temp shared object failed: " ++ err) rmDupLinkables :: [Linkable] -- Already loaded -Index: ghc-7.8.3/compiler/main/SysTools.lhs +Index: ghc-7.8.4/compiler/main/SysTools.lhs =================================================================== ---- ghc-7.8.3.orig/compiler/main/SysTools.lhs -+++ ghc-7.8.3/compiler/main/SysTools.lhs -@@ -1365,6 +1365,7 @@ linkDynLib dflags0 o_files dep_packages +--- ghc-7.8.4.orig/compiler/main/SysTools.lhs ++++ ghc-7.8.4/compiler/main/SysTools.lhs +@@ -1416,6 +1416,7 @@ linkDynLib dflags0 o_files dep_packages in package_hs_libs ++ extra_libs ++ other_flags -- probably _stub.o files @@ -184,7 +184,7 @@ Index: ghc-7.8.3/compiler/main/SysTools.lhs let extra_ld_inputs = ldInputs dflags case os of -@@ -1482,8 +1483,8 @@ linkDynLib dflags0 o_files dep_packages +@@ -1533,8 +1534,8 @@ linkDynLib dflags0 o_files dep_packages -- Set the library soname. We use -h rather than -soname as -- Solaris 10 doesn't support the latter: ++ [ Option ("-Wl,-h," ++ takeFileName output_fn) ] @@ -194,11 +194,11 @@ Index: ghc-7.8.3/compiler/main/SysTools.lhs ++ map Option pkg_lib_path_opts ++ map Option pkg_link_opts ) -Index: ghc-7.8.3/rts/Linker.c +Index: ghc-7.8.4/rts/Linker.c =================================================================== ---- ghc-7.8.3.orig/rts/Linker.c -+++ ghc-7.8.3/rts/Linker.c -@@ -1776,7 +1776,7 @@ internal_dlopen(const char *dll_name) +--- ghc-7.8.4.orig/rts/Linker.c ++++ ghc-7.8.4/rts/Linker.c +@@ -1777,7 +1777,7 @@ internal_dlopen(const char *dll_name) // (see POSIX also) ACQUIRE_LOCK(&dl_mutex); @@ -207,7 +207,7 @@ Index: ghc-7.8.3/rts/Linker.c errmsg = NULL; if (hdl == NULL) { -@@ -1786,11 +1786,12 @@ internal_dlopen(const char *dll_name) +@@ -1787,11 +1787,12 @@ internal_dlopen(const char *dll_name) errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL"); strcpy(errmsg_copy, errmsg); errmsg = errmsg_copy; @@ -224,7 +224,7 @@ Index: ghc-7.8.3/rts/Linker.c RELEASE_LOCK(&dl_mutex); //--------------- End critical section ------------------- -@@ -1798,14 +1799,39 @@ internal_dlopen(const char *dll_name) +@@ -1799,14 +1800,39 @@ internal_dlopen(const char *dll_name) return errmsg; } @@ -265,7 +265,7 @@ Index: ghc-7.8.3/rts/Linker.c for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) { v = dlsym(o_so->handle, symbol); if (dlerror() == NULL) { -@@ -1813,7 +1839,6 @@ internal_dlsym(void *hdl, const char *sy +@@ -1814,7 +1840,6 @@ internal_dlsym(void *hdl, const char *sy return v; } } @@ -273,7 +273,7 @@ Index: ghc-7.8.3/rts/Linker.c RELEASE_LOCK(&dl_mutex); return v; } -@@ -1981,7 +2006,7 @@ lookupSymbol( char *lbl ) +@@ -1982,7 +2007,7 @@ lookupSymbol( char *lbl ) if (!ghciLookupSymbolTable(symhash, lbl, &val)) { IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n")); # if defined(OBJFORMAT_ELF) @@ -282,7 +282,7 @@ Index: ghc-7.8.3/rts/Linker.c # elif defined(OBJFORMAT_MACHO) # if HAVE_DLFCN_H /* On OS X 10.3 and later, we use dlsym instead of the old legacy -@@ -1995,7 +2020,7 @@ lookupSymbol( char *lbl ) +@@ -1996,7 +2021,7 @@ lookupSymbol( char *lbl ) */ IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl)); ASSERT(lbl[0] == '_'); diff --git a/ghc-arm64.patch b/ghc-arm64.patch index 7550cd7..99188ee 100644 --- a/ghc-arm64.patch +++ b/ghc-arm64.patch @@ -7,11 +7,11 @@ Bug: https://ghc.haskell.org/trac/ghc/ticket/7942 Signed-off-by: Austin Seipp -Index: ghc-7.8.3/aclocal.m4 -================================================================================ ---- ghc-7.8.4/aclocal.m4 +Index: ghc-7.8.4/aclocal.m4 +=================================================================== +--- ghc-7.8.4.orig/aclocal.m4 +++ ghc-7.8.4/aclocal.m4 -@@ -197,6 +197,9 @@ +@@ -200,6 +200,9 @@ AC_DEFUN([FPTOOLS_SET_HASKELL_PLATFORM_V GET_ARM_ISA() test -z "[$]2" || eval "[$]2=\"ArchARM {armISA = \$ARM_ISA, armISAExt = \$ARM_ISA_EXT, armABI = \$ARM_ABI}\"" ;; @@ -21,7 +21,7 @@ Index: ghc-7.8.3/aclocal.m4 alpha) test -z "[$]2" || eval "[$]2=ArchAlpha" ;; -@@ -1862,6 +1865,9 @@ +@@ -1865,6 +1868,9 @@ AC_MSG_CHECKING(for path to top of build # converts cpu from gnu to ghc naming, and assigns the result to $target_var AC_DEFUN([GHC_CONVERT_CPU],[ case "$1" in @@ -31,115 +31,127 @@ Index: ghc-7.8.3/aclocal.m4 alpha*) $2="alpha" ;; ---- ghc-7.8.4/compiler/nativeGen/AsmCodeGen.lhs +Index: ghc-7.8.4/compiler/nativeGen/AsmCodeGen.lhs +=================================================================== +--- ghc-7.8.4.orig/compiler/nativeGen/AsmCodeGen.lhs +++ ghc-7.8.4/compiler/nativeGen/AsmCodeGen.lhs -@@ -166,6 +166,7 @@ +@@ -166,6 +166,7 @@ nativeCodeGen dflags this_mod h us cmms ArchPPC -> nCG' (ppcNcgImpl dflags) ArchSPARC -> nCG' (sparcNcgImpl dflags) ArchARM {} -> panic "nativeCodeGen: No NCG for ARM" + ArchARM64 -> panic "nativeCodeGen: No NCG for ARM64" - ArchPPC_64 -> nCG' (ppcNcgImpl dflags) + ArchPPC_64 _ -> nCG' (ppcNcgImpl dflags) ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha" ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb" ---- ghc-7.8.4/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +Index: ghc-7.8.4/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +=================================================================== +--- ghc-7.8.4.orig/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +++ ghc-7.8.4/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs -@@ -113,6 +113,7 @@ +@@ -113,6 +113,7 @@ trivColorable platform virtualRegSqueeze ArchSPARC -> 14 - ArchPPC_64 -> panic "trivColorable ArchPPC_64" + ArchPPC_64 _ -> panic "trivColorable ArchPPC_64" ArchARM _ _ _ -> panic "trivColorable ArchARM" + ArchARM64 -> panic "trivColorable ArchARM64" ArchAlpha -> panic "trivColorable ArchAlpha" ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" -@@ -137,6 +138,7 @@ +@@ -137,6 +138,7 @@ trivColorable platform virtualRegSqueeze ArchSPARC -> 22 - ArchPPC_64 -> panic "trivColorable ArchPPC_64" + ArchPPC_64 _ -> panic "trivColorable ArchPPC_64" ArchARM _ _ _ -> panic "trivColorable ArchARM" + ArchARM64 -> panic "trivColorable ArchARM64" ArchAlpha -> panic "trivColorable ArchAlpha" ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" -@@ -161,6 +163,7 @@ +@@ -161,6 +163,7 @@ trivColorable platform virtualRegSqueeze ArchSPARC -> 11 - ArchPPC_64 -> panic "trivColorable ArchPPC_64" + ArchPPC_64 _ -> panic "trivColorable ArchPPC_64" ArchARM _ _ _ -> panic "trivColorable ArchARM" + ArchARM64 -> panic "trivColorable ArchARM64" ArchAlpha -> panic "trivColorable ArchAlpha" ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" -@@ -185,6 +188,7 @@ +@@ -185,6 +188,7 @@ trivColorable platform virtualRegSqueeze ArchSPARC -> 0 - ArchPPC_64 -> panic "trivColorable ArchPPC_64" + ArchPPC_64 _ -> panic "trivColorable ArchPPC_64" ArchARM _ _ _ -> panic "trivColorable ArchARM" + ArchARM64 -> panic "trivColorable ArchARM64" ArchAlpha -> panic "trivColorable ArchAlpha" ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" ---- ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs +Index: ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs +=================================================================== +--- ghc-7.8.4.orig/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs +++ ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs -@@ -74,6 +74,7 @@ +@@ -74,6 +74,7 @@ maxSpillSlots dflags ArchPPC -> PPC.Instr.maxSpillSlots dflags ArchSPARC -> SPARC.Instr.maxSpillSlots dflags ArchARM _ _ _ -> panic "maxSpillSlots ArchARM" + ArchARM64 -> panic "maxSpillSlots ArchARM64" - ArchPPC_64 -> PPC.Instr.maxSpillSlots dflags + ArchPPC_64 _ -> PPC.Instr.maxSpillSlots dflags ArchAlpha -> panic "maxSpillSlots ArchAlpha" ArchMipseb -> panic "maxSpillSlots ArchMipseb" ---- ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/Main.hs +Index: ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/Main.hs +=================================================================== +--- ghc-7.8.4.orig/compiler/nativeGen/RegAlloc/Linear/Main.hs +++ ghc-7.8.4/compiler/nativeGen/RegAlloc/Linear/Main.hs -@@ -207,6 +207,7 @@ +@@ -207,6 +207,7 @@ linearRegAlloc dflags entry_ids block_li ArchSPARC -> linearRegAlloc' dflags (frInitFreeRegs platform :: SPARC.FreeRegs) entry_ids block_live sccs ArchPPC -> linearRegAlloc' dflags (frInitFreeRegs platform :: PPC.FreeRegs) entry_ids block_live sccs ArchARM _ _ _ -> panic "linearRegAlloc ArchARM" + ArchARM64 -> panic "linearRegAlloc ArchARM64" - ArchPPC_64 -> linearRegAlloc' dflags (frInitFreeRegs platform :: PPC.FreeRegs) entry_ids block_live sccs + ArchPPC_64 _ -> linearRegAlloc' dflags (frInitFreeRegs platform :: PPC.FreeRegs) entry_ids block_live sccs ArchAlpha -> panic "linearRegAlloc ArchAlpha" ArchMipseb -> panic "linearRegAlloc ArchMipseb" ---- ghc-7.8.4/compiler/nativeGen/TargetReg.hs +Index: ghc-7.8.4/compiler/nativeGen/TargetReg.hs +=================================================================== +--- ghc-7.8.4.orig/compiler/nativeGen/TargetReg.hs +++ ghc-7.8.4/compiler/nativeGen/TargetReg.hs -@@ -54,6 +54,7 @@ +@@ -54,6 +54,7 @@ targetVirtualRegSqueeze platform ArchSPARC -> SPARC.virtualRegSqueeze - ArchPPC_64 -> PPC.virtualRegSqueeze + ArchPPC_64 _ -> PPC.virtualRegSqueeze ArchARM _ _ _ -> panic "targetVirtualRegSqueeze ArchARM" + ArchARM64 -> panic "targetVirtualRegSqueeze ArchARM64" ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha" ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb" ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel" -@@ -70,6 +71,7 @@ +@@ -70,6 +71,7 @@ targetRealRegSqueeze platform ArchSPARC -> SPARC.realRegSqueeze - ArchPPC_64 -> PPC.realRegSqueeze + ArchPPC_64 _ -> PPC.realRegSqueeze ArchARM _ _ _ -> panic "targetRealRegSqueeze ArchARM" + ArchARM64 -> panic "targetRealRegSqueeze ArchARM64" ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha" ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb" ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel" -@@ -85,6 +87,7 @@ +@@ -85,6 +87,7 @@ targetClassOfRealReg platform ArchSPARC -> SPARC.classOfRealReg - ArchPPC_64 -> PPC.classOfRealReg + ArchPPC_64 _ -> PPC.classOfRealReg ArchARM _ _ _ -> panic "targetClassOfRealReg ArchARM" + ArchARM64 -> panic "targetClassOfRealReg ArchARM64" ArchAlpha -> panic "targetClassOfRealReg ArchAlpha" ArchMipseb -> panic "targetClassOfRealReg ArchMipseb" ArchMipsel -> panic "targetClassOfRealReg ArchMipsel" -@@ -100,6 +103,7 @@ +@@ -100,6 +103,7 @@ targetMkVirtualReg platform ArchSPARC -> SPARC.mkVirtualReg - ArchPPC_64 -> PPC.mkVirtualReg + ArchPPC_64 _ -> PPC.mkVirtualReg ArchARM _ _ _ -> panic "targetMkVirtualReg ArchARM" + ArchARM64 -> panic "targetMkVirtualReg ArchARM64" ArchAlpha -> panic "targetMkVirtualReg ArchAlpha" ArchMipseb -> panic "targetMkVirtualReg ArchMipseb" ArchMipsel -> panic "targetMkVirtualReg ArchMipsel" -@@ -115,6 +119,7 @@ +@@ -115,6 +119,7 @@ targetRegDotColor platform ArchSPARC -> SPARC.regDotColor - ArchPPC_64 -> PPC.regDotColor + ArchPPC_64 _ -> PPC.regDotColor ArchARM _ _ _ -> panic "targetRegDotColor ArchARM" + ArchARM64 -> panic "targetRegDotColor ArchARM64" ArchAlpha -> panic "targetRegDotColor ArchAlpha" ArchMipseb -> panic "targetRegDotColor ArchMipseb" ArchMipsel -> panic "targetRegDotColor ArchMipsel" ---- ghc-7.8.4/compiler/utils/Platform.hs +Index: ghc-7.8.4/compiler/utils/Platform.hs +=================================================================== +--- ghc-7.8.4.orig/compiler/utils/Platform.hs +++ ghc-7.8.4/compiler/utils/Platform.hs -@@ -52,6 +52,7 @@ +@@ -55,6 +55,7 @@ data Arch , armISAExt :: [ArmISAExt] , armABI :: ArmABI } @@ -147,17 +159,21 @@ Index: ghc-7.8.3/aclocal.m4 | ArchAlpha | ArchMipseb | ArchMipsel ---- ghc-7.8.4/includes/stg/HaskellMachRegs.h +Index: ghc-7.8.4/includes/stg/HaskellMachRegs.h +=================================================================== +--- ghc-7.8.4.orig/includes/stg/HaskellMachRegs.h +++ ghc-7.8.4/includes/stg/HaskellMachRegs.h -@@ -38,6 +38,7 @@ - #define MACHREGS_powerpc (powerpc_TARGET_ARCH || powerpc64_TARGET_ARCH || rs6000_TARGET_ARCH) +@@ -39,6 +39,7 @@ + || powerpc64le_TARGET_ARCH || rs6000_TARGET_ARCH) #define MACHREGS_sparc sparc_TARGET_ARCH #define MACHREGS_arm arm_TARGET_ARCH +#define MACHREGS_aarch64 aarch64_TARGET_ARCH #define MACHREGS_darwin darwin_TARGET_OS #endif ---- ghc-7.8.4/includes/stg/MachRegs.h +Index: ghc-7.8.4/includes/stg/MachRegs.h +=================================================================== +--- ghc-7.8.4.orig/includes/stg/MachRegs.h +++ ghc-7.8.4/includes/stg/MachRegs.h @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- @@ -229,9 +245,11 @@ Index: ghc-7.8.3/aclocal.m4 #else #error Cannot find platform to give register info for ---- ghc-7.8.4/rts/StgCRun.c +Index: ghc-7.8.4/rts/StgCRun.c +=================================================================== +--- ghc-7.8.4.orig/rts/StgCRun.c +++ ghc-7.8.4/rts/StgCRun.c -@@ -748,4 +748,70 @@ +@@ -756,4 +756,70 @@ StgRun(StgFunPtr f, StgRegTable *basereg } #endif diff --git a/ghc.changes b/ghc.changes index dd3be8d..4c8c876 100644 --- a/ghc.changes +++ b/ghc.changes @@ -1,3 +1,16 @@ +------------------------------------------------------------------- +Fri May 29 05:47:07 UTC 2015 - peter.trommler@ohm-hochschule.de + +- refresh ghc-arm64.patch +- unconditionally apply ghc-arm64.patch + +------------------------------------------------------------------- +Thu May 28 15:14:31 UTC 2015 - peter.trommler@ohm-hochschule.de + +- add backport of powerpc64le native code generator to + 0001-implement-native-code-generator-for-ppc64.patch +- refresh D349.patch + ------------------------------------------------------------------- Thu May 28 13:15:52 CEST 2015 - ro@suse.de diff --git a/ghc.spec b/ghc.spec index 3c65cc2..1979fe1 100644 --- a/ghc.spec +++ b/ghc.spec @@ -16,7 +16,7 @@ # -%global unregisterised_archs aarch64 ppc64le s390 s390x +%global unregisterised_archs aarch64 s390 s390x Name: ghc Version: 7.8.4 @@ -168,9 +168,7 @@ except the ghc library, which is installed by the toplevel ghc metapackage. %patch18 -p1 %patch19 -p1 %patch20 -p1 -%ifarch aarch64 %patch21 -p1 -%endif %build # Patch 19 modifies build system