From b29f20edb1ca7f1763ceb001e2bb2d5f2f11bec3 Mon Sep 17 00:00:00 2001 From: Peter Trommler Date: Fri, 2 Oct 2015 15:48:30 +0200 Subject: [PATCH] nativeGen PPC: fix > 16 bit offsets in stack handling Implement access to spill slots at offsets larger than 16 bits. Also allocation and deallocation of spill slots was restricted to 16 bit offsets. Now 32 bit offsets are supported on all PowerPC platforms. The implementation of 32 bit offsets requires more than one instruction but the native code generator wants one instruction. So we implement pseudo-instructions that are pretty printed into multiple assembly instructions. With pseudo-instructions for spill slot allocation and deallocation we can also implement handling of the back chain pointer according to the ELF ABIs. Test Plan: validate (especially on powerpc (32 bit)) Reviewers: bgamari, austin, erikd Reviewed By: erikd Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D1296 GHC Trac Issues: #7830 --- compiler/nativeGen/PPC/Instr.hs | 39 ++++++++++++++++++++++++++++----------- compiler/nativeGen/PPC/Ppr.hs | 33 +++++++++++++++++++++++++++++++++ compiler/nativeGen/PPC/Regs.hs | 14 ++++++++++++-- includes/CodeGen.Platform.hs | 18 +++++++++++++----- 4 files changed, 86 insertions(+), 18 deletions(-) Index: ghc-7.10.2/compiler/nativeGen/PPC/Instr.hs =================================================================== --- ghc-7.10.2.orig/compiler/nativeGen/PPC/Instr.hs +++ ghc-7.10.2/compiler/nativeGen/PPC/Instr.hs @@ -75,19 +75,19 @@ instance Instruction Instr where ppc_mkStackAllocInstr :: Platform -> Int -> Instr ppc_mkStackAllocInstr platform amount - = case platformArch platform of - ArchPPC -> -- SUB II32 (OpImm (ImmInt amount)) (OpReg esp) - ADD sp sp (RIImm (ImmInt (-amount))) - ArchPPC_64 _ -> STU II64 sp (AddrRegImm sp (ImmInt (-amount))) - arch -> panic $ "ppc_mkStackAllocInstr " ++ show arch + = ppc_mkStackAllocInstr' platform (-amount) ppc_mkStackDeallocInstr :: Platform -> Int -> Instr ppc_mkStackDeallocInstr platform amount + = ppc_mkStackAllocInstr' platform amount + +ppc_mkStackAllocInstr' :: Platform -> Int -> Instr +ppc_mkStackAllocInstr' platform amount = case platformArch platform of - ArchPPC -> -- ADD II32 (OpImm (ImmInt amount)) (OpReg esp) - ADD sp sp (RIImm (ImmInt amount)) - ArchPPC_64 _ -> ADD sp sp (RIImm (ImmInt amount)) - arch -> panic $ "ppc_mkStackDeallocInstr " ++ show arch + ArchPPC -> UPDATE_SP II32 (ImmInt amount) + ArchPPC_64 _ -> UPDATE_SP II64 (ImmInt amount) + _ -> panic $ "ppc_mkStackAllocInstr' " + ++ show (platformArch platform) -- -- See note [extra spill slots] in X86/Instr.hs @@ -187,8 +187,10 @@ data Instr -- Loads and stores. | LD Size Reg AddrMode -- Load size, dst, src + | LDFAR Size Reg AddrMode -- Load format, dst, src 32 bit offset | LA Size Reg AddrMode -- Load arithmetic size, dst, src | ST Size Reg AddrMode -- Store size, src, dst + | STFAR Size Reg AddrMode -- Store format, src, dst 32 bit offset | STU Size Reg AddrMode -- Store with Update size, src, dst | LIS Reg Imm -- Load Immediate Shifted dst, src | LI Reg Imm -- Load Immediate dst, src @@ -278,6 +280,8 @@ data Instr | NOP -- no operation, PowerPC 64 bit -- needs this as place holder to -- reload TOC pointer + | UPDATE_SP Size Imm -- expand/shrink spill area on C stack + -- pseudo-instruction -- | Get the registers that are being used by this instruction. -- regUsage doesn't need to do any trickery for jumps and such. @@ -289,8 +293,10 @@ ppc_regUsageOfInstr :: Platform -> Instr ppc_regUsageOfInstr platform instr = case instr of LD _ reg addr -> usage (regAddr addr, [reg]) + LDFAR _ reg addr -> usage (regAddr addr, [reg]) LA _ reg addr -> usage (regAddr addr, [reg]) ST _ reg addr -> usage (reg : regAddr addr, []) + STFAR _ reg addr -> usage (reg : regAddr addr, []) STU _ reg addr -> usage (reg : regAddr addr, []) LIS reg _ -> usage ([], [reg]) LI reg _ -> usage ([], [reg]) @@ -350,6 +356,7 @@ ppc_regUsageOfInstr platform instr MFLR reg -> usage ([], [reg]) FETCHPC reg -> usage ([], [reg]) FETCHTOC reg _ -> usage ([], [reg]) + UPDATE_SP _ _ -> usage ([], [sp]) _ -> noUsage where usage (src, dst) = RU (filter (interesting platform) src) @@ -376,8 +383,10 @@ ppc_patchRegsOfInstr :: Instr -> (Reg -> ppc_patchRegsOfInstr instr env = case instr of LD sz reg addr -> LD sz (env reg) (fixAddr addr) + LDFAR sz reg addr -> LDFAR sz (env reg) (fixAddr addr) LA sz reg addr -> LA sz (env reg) (fixAddr addr) ST sz reg addr -> ST sz (env reg) (fixAddr addr) + STFAR sz reg addr -> STFAR sz (env reg) (fixAddr addr) STU sz reg addr -> STU sz (env reg) (fixAddr addr) LIS reg imm -> LIS (env reg) imm LI reg imm -> LI (env reg) imm @@ -505,7 +514,11 @@ ppc_mkSpillInstr dflags reg delta slot _ -> II64 RcDouble -> FF64 _ -> panic "PPC.Instr.mkSpillInstr: no match" - in ST sz reg (AddrRegImm sp (ImmInt (off-delta))) + instr = case makeImmediate W32 True (off-delta) of + Just _ -> ST + Nothing -> STFAR -- pseudo instruction: 32 bit offsets + + in instr sz reg (AddrRegImm sp (ImmInt (off-delta))) ppc_mkLoadInstr @@ -526,7 +539,11 @@ ppc_mkLoadInstr dflags reg delta slot _ -> II64 RcDouble -> FF64 _ -> panic "PPC.Instr.mkLoadInstr: no match" - in LD sz reg (AddrRegImm sp (ImmInt (off-delta))) + instr = case makeImmediate W32 True (off-delta) of + Just _ -> LD + Nothing -> LDFAR -- pseudo instruction: 32 bit offsets + + in instr sz reg (AddrRegImm sp (ImmInt (off-delta))) -- | The maximum number of bytes required to spill a register. PPC32 Index: ghc-7.10.2/compiler/nativeGen/PPC/Ppr.hs =================================================================== --- ghc-7.10.2.orig/compiler/nativeGen/PPC/Ppr.hs +++ ghc-7.10.2/compiler/nativeGen/PPC/Ppr.hs @@ -437,6 +437,14 @@ pprInstr (LD sz reg addr) = hcat [ ptext (sLit ", "), pprAddr addr ] +pprInstr (LDFAR fmt reg (AddrRegImm source off)) = + sdocWithPlatform $ \platform -> vcat [ + pprInstr (ADDIS (tmpReg platform) source (HA off)), + pprInstr (LD fmt reg (AddrRegImm (tmpReg platform) (LO off))) + ] + +pprInstr (LDFAR _ _ _) = + panic "PPC.Ppr.pprInstr LDFAR: no match" pprInstr (LA sz reg addr) = hcat [ char '\t', ptext (sLit "l"), @@ -467,6 +475,14 @@ pprInstr (ST sz reg addr) = hcat [ ptext (sLit ", "), pprAddr addr ] +pprInstr (STFAR fmt reg (AddrRegImm source off)) = + sdocWithPlatform $ \platform -> vcat [ + pprInstr (ADDIS (tmpReg platform) source (HA off)), + pprInstr (ST fmt reg (AddrRegImm (tmpReg platform) (LO off))) + ] + +pprInstr (STFAR _ _ _) = + panic "PPC.Ppr.pprInstr STFAR: no match" pprInstr (STU sz reg addr) = hcat [ char '\t', ptext (sLit "st"), @@ -799,6 +815,22 @@ pprInstr LWSYNC = ptext (sLit "\tlwsync" pprInstr NOP = ptext (sLit "\tnop") +pprInstr (UPDATE_SP fmt amount@(ImmInt offset)) + | fits16Bits offset = vcat [ + pprInstr (LD fmt r0 (AddrRegImm sp (ImmInt 0))), + pprInstr (STU fmt r0 (AddrRegImm sp amount)) + ] + +pprInstr (UPDATE_SP fmt amount) + = sdocWithPlatform $ \platform -> + let tmp = tmpReg platform in + vcat [ + pprInstr (LD fmt r0 (AddrRegImm sp (ImmInt 0))), + pprInstr (ADDIS tmp sp (HA amount)), + pprInstr (ADD tmp tmp (RIImm (LO amount))), + pprInstr (STU fmt r0 (AddrRegReg sp tmp)) + ] + -- pprInstr _ = panic "pprInstr (ppc)" Index: ghc-7.10.2/compiler/nativeGen/PPC/Regs.hs =================================================================== --- ghc-7.10.2.orig/compiler/nativeGen/PPC/Regs.hs +++ ghc-7.10.2/compiler/nativeGen/PPC/Regs.hs @@ -37,7 +37,8 @@ module PPC.Regs ( fits16Bits, makeImmediate, fReg, - sp, toc, r3, r4, r11, r12, r27, r28, r30, + r0, sp, toc, r3, r4, r11, r12, r27, r28, r30, + tmpReg, f1, f20, f21, allocatableRegs @@ -306,7 +307,8 @@ point registers. fReg :: Int -> RegNo fReg x = (32 + x) -sp, toc, r3, r4, r11, r12, r27, r28, r30, f1, f20, f21 :: Reg +r0, sp, toc, r3, r4, r11, r12, r27, r28, r30, f1, f20, f21 :: Reg +r0 = regSingle 0 sp = regSingle 1 toc = regSingle 2 r3 = regSingle 3 @@ -327,3 +329,11 @@ allocatableRegs :: Platform -> [RealReg] allocatableRegs platform = let isFree i = isFastTrue (freeReg platform i) in map RealRegSingle $ filter isFree allMachRegNos + +-- temporary register for compiler use +tmpReg :: Platform -> Reg +tmpReg platform = + case platformArch platform of + ArchPPC -> regSingle 13 + ArchPPC_64 _ -> regSingle 30 + _ -> panic "PPC.Regs.tmpReg: unknowm arch" Index: ghc-7.10.2/includes/CodeGen.Platform.hs =================================================================== --- ghc-7.10.2.orig/includes/CodeGen.Platform.hs +++ ghc-7.10.2/includes/CodeGen.Platform.hs @@ -876,16 +876,26 @@ freeRegBase _ = fastBool True #elif MACHREGS_powerpc freeReg 0 = fastBool False -- Hack: r0 can't be used in all insns, - -- but it's actually free + -- in stack reallocations on Linux + -- r0 is not usable in all insns so also reserved + -- on Darwin. freeReg 1 = fastBool False -- The Stack Pointer # if !MACHREGS_darwin -- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that freeReg 2 = fastBool False -- TODO: make this conditonal for ppc64 ELF -freeReg 13 = fastBool False -- reserved for system thread ID +freeReg 13 = fastBool False -- reserved for system thread ID on 64 bit -- TODO: do not reserve r30 in ppc64 ELF -- at least linux in -fPIC relies on r30 in PLT stubs freeReg 30 = fastBool False +{- TODO: reserve r13 on 64 bit systems only and r30 on 32 bit respectively. + For now we use r30 on 64 bit and r13 on 32 bit as a temporary register + in stack handling code. See compiler/nativeGen/PPC/Ppr.hs. + + Later we might want to reserve r13 and r30 only where it is required. + Then use r12 as temporary register, which is also what the C ABI does. +-} + # endif # ifdef REG_Base freeReg REG_Base = fastBool False