Fix PowerPC assembler issue with large stack frames. OBS-URL: https://build.opensuse.org/request/show/337227 OBS-URL: https://build.opensuse.org/package/show/devel:languages:haskell/ghc?expand=0&rev=194
273 lines
11 KiB
Diff
273 lines
11 KiB
Diff
From b29f20edb1ca7f1763ceb001e2bb2d5f2f11bec3 Mon Sep 17 00:00:00 2001
|
|
From: Peter Trommler <ptrommler@acm.org>
|
|
Date: Fri, 2 Oct 2015 15:48:30 +0200
|
|
Subject: [PATCH] nativeGen PPC: fix > 16 bit offsets in stack handling
|
|
|
|
Implement access to spill slots at offsets larger than 16 bits.
|
|
Also allocation and deallocation of spill slots was restricted to
|
|
16 bit offsets. Now 32 bit offsets are supported on all PowerPC
|
|
platforms.
|
|
|
|
The implementation of 32 bit offsets requires more than one instruction
|
|
but the native code generator wants one instruction. So we implement
|
|
pseudo-instructions that are pretty printed into multiple assembly
|
|
instructions.
|
|
|
|
With pseudo-instructions for spill slot allocation and deallocation
|
|
we can also implement handling of the back chain pointer according
|
|
to the ELF ABIs.
|
|
|
|
Test Plan: validate (especially on powerpc (32 bit))
|
|
|
|
Reviewers: bgamari, austin, erikd
|
|
|
|
Reviewed By: erikd
|
|
|
|
Subscribers: thomie
|
|
|
|
Differential Revision: https://phabricator.haskell.org/D1296
|
|
|
|
GHC Trac Issues: #7830
|
|
---
|
|
compiler/nativeGen/PPC/Instr.hs | 39 ++++++++++++++++++++++++++++-----------
|
|
compiler/nativeGen/PPC/Ppr.hs | 33 +++++++++++++++++++++++++++++++++
|
|
compiler/nativeGen/PPC/Regs.hs | 14 ++++++++++++--
|
|
includes/CodeGen.Platform.hs | 18 +++++++++++++-----
|
|
4 files changed, 86 insertions(+), 18 deletions(-)
|
|
|
|
Index: ghc-7.10.2/compiler/nativeGen/PPC/Instr.hs
|
|
===================================================================
|
|
--- ghc-7.10.2.orig/compiler/nativeGen/PPC/Instr.hs
|
|
+++ ghc-7.10.2/compiler/nativeGen/PPC/Instr.hs
|
|
@@ -75,19 +75,19 @@ instance Instruction Instr where
|
|
|
|
ppc_mkStackAllocInstr :: Platform -> Int -> Instr
|
|
ppc_mkStackAllocInstr platform amount
|
|
- = case platformArch platform of
|
|
- ArchPPC -> -- SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
|
|
- ADD sp sp (RIImm (ImmInt (-amount)))
|
|
- ArchPPC_64 _ -> STU II64 sp (AddrRegImm sp (ImmInt (-amount)))
|
|
- arch -> panic $ "ppc_mkStackAllocInstr " ++ show arch
|
|
+ = ppc_mkStackAllocInstr' platform (-amount)
|
|
|
|
ppc_mkStackDeallocInstr :: Platform -> Int -> Instr
|
|
ppc_mkStackDeallocInstr platform amount
|
|
+ = ppc_mkStackAllocInstr' platform amount
|
|
+
|
|
+ppc_mkStackAllocInstr' :: Platform -> Int -> Instr
|
|
+ppc_mkStackAllocInstr' platform amount
|
|
= case platformArch platform of
|
|
- ArchPPC -> -- ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
|
|
- ADD sp sp (RIImm (ImmInt amount))
|
|
- ArchPPC_64 _ -> ADD sp sp (RIImm (ImmInt amount))
|
|
- arch -> panic $ "ppc_mkStackDeallocInstr " ++ show arch
|
|
+ ArchPPC -> UPDATE_SP II32 (ImmInt amount)
|
|
+ ArchPPC_64 _ -> UPDATE_SP II64 (ImmInt amount)
|
|
+ _ -> panic $ "ppc_mkStackAllocInstr' "
|
|
+ ++ show (platformArch platform)
|
|
|
|
--
|
|
-- See note [extra spill slots] in X86/Instr.hs
|
|
@@ -187,8 +187,10 @@ data Instr
|
|
|
|
-- Loads and stores.
|
|
| LD Size Reg AddrMode -- Load size, dst, src
|
|
+ | LDFAR Size Reg AddrMode -- Load format, dst, src 32 bit offset
|
|
| LA Size Reg AddrMode -- Load arithmetic size, dst, src
|
|
| ST Size Reg AddrMode -- Store size, src, dst
|
|
+ | STFAR Size Reg AddrMode -- Store format, src, dst 32 bit offset
|
|
| STU Size Reg AddrMode -- Store with Update size, src, dst
|
|
| LIS Reg Imm -- Load Immediate Shifted dst, src
|
|
| LI Reg Imm -- Load Immediate dst, src
|
|
@@ -278,6 +280,8 @@ data Instr
|
|
| NOP -- no operation, PowerPC 64 bit
|
|
-- needs this as place holder to
|
|
-- reload TOC pointer
|
|
+ | UPDATE_SP Size Imm -- expand/shrink spill area on C stack
|
|
+ -- pseudo-instruction
|
|
|
|
-- | Get the registers that are being used by this instruction.
|
|
-- regUsage doesn't need to do any trickery for jumps and such.
|
|
@@ -289,8 +293,10 @@ ppc_regUsageOfInstr :: Platform -> Instr
|
|
ppc_regUsageOfInstr platform instr
|
|
= case instr of
|
|
LD _ reg addr -> usage (regAddr addr, [reg])
|
|
+ LDFAR _ reg addr -> usage (regAddr addr, [reg])
|
|
LA _ reg addr -> usage (regAddr addr, [reg])
|
|
ST _ reg addr -> usage (reg : regAddr addr, [])
|
|
+ STFAR _ reg addr -> usage (reg : regAddr addr, [])
|
|
STU _ reg addr -> usage (reg : regAddr addr, [])
|
|
LIS reg _ -> usage ([], [reg])
|
|
LI reg _ -> usage ([], [reg])
|
|
@@ -350,6 +356,7 @@ ppc_regUsageOfInstr platform instr
|
|
MFLR reg -> usage ([], [reg])
|
|
FETCHPC reg -> usage ([], [reg])
|
|
FETCHTOC reg _ -> usage ([], [reg])
|
|
+ UPDATE_SP _ _ -> usage ([], [sp])
|
|
_ -> noUsage
|
|
where
|
|
usage (src, dst) = RU (filter (interesting platform) src)
|
|
@@ -376,8 +383,10 @@ ppc_patchRegsOfInstr :: Instr -> (Reg ->
|
|
ppc_patchRegsOfInstr instr env
|
|
= case instr of
|
|
LD sz reg addr -> LD sz (env reg) (fixAddr addr)
|
|
+ LDFAR sz reg addr -> LDFAR sz (env reg) (fixAddr addr)
|
|
LA sz reg addr -> LA sz (env reg) (fixAddr addr)
|
|
ST sz reg addr -> ST sz (env reg) (fixAddr addr)
|
|
+ STFAR sz reg addr -> STFAR sz (env reg) (fixAddr addr)
|
|
STU sz reg addr -> STU sz (env reg) (fixAddr addr)
|
|
LIS reg imm -> LIS (env reg) imm
|
|
LI reg imm -> LI (env reg) imm
|
|
@@ -505,7 +514,11 @@ ppc_mkSpillInstr dflags reg delta slot
|
|
_ -> II64
|
|
RcDouble -> FF64
|
|
_ -> panic "PPC.Instr.mkSpillInstr: no match"
|
|
- in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
|
|
+ instr = case makeImmediate W32 True (off-delta) of
|
|
+ Just _ -> ST
|
|
+ Nothing -> STFAR -- pseudo instruction: 32 bit offsets
|
|
+
|
|
+ in instr sz reg (AddrRegImm sp (ImmInt (off-delta)))
|
|
|
|
|
|
ppc_mkLoadInstr
|
|
@@ -526,7 +539,11 @@ ppc_mkLoadInstr dflags reg delta slot
|
|
_ -> II64
|
|
RcDouble -> FF64
|
|
_ -> panic "PPC.Instr.mkLoadInstr: no match"
|
|
- in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
|
|
+ instr = case makeImmediate W32 True (off-delta) of
|
|
+ Just _ -> LD
|
|
+ Nothing -> LDFAR -- pseudo instruction: 32 bit offsets
|
|
+
|
|
+ in instr sz reg (AddrRegImm sp (ImmInt (off-delta)))
|
|
|
|
|
|
-- | The maximum number of bytes required to spill a register. PPC32
|
|
Index: ghc-7.10.2/compiler/nativeGen/PPC/Ppr.hs
|
|
===================================================================
|
|
--- ghc-7.10.2.orig/compiler/nativeGen/PPC/Ppr.hs
|
|
+++ ghc-7.10.2/compiler/nativeGen/PPC/Ppr.hs
|
|
@@ -437,6 +437,14 @@ pprInstr (LD sz reg addr) = hcat [
|
|
ptext (sLit ", "),
|
|
pprAddr addr
|
|
]
|
|
+pprInstr (LDFAR fmt reg (AddrRegImm source off)) =
|
|
+ sdocWithPlatform $ \platform -> vcat [
|
|
+ pprInstr (ADDIS (tmpReg platform) source (HA off)),
|
|
+ pprInstr (LD fmt reg (AddrRegImm (tmpReg platform) (LO off)))
|
|
+ ]
|
|
+
|
|
+pprInstr (LDFAR _ _ _) =
|
|
+ panic "PPC.Ppr.pprInstr LDFAR: no match"
|
|
pprInstr (LA sz reg addr) = hcat [
|
|
char '\t',
|
|
ptext (sLit "l"),
|
|
@@ -467,6 +475,14 @@ pprInstr (ST sz reg addr) = hcat [
|
|
ptext (sLit ", "),
|
|
pprAddr addr
|
|
]
|
|
+pprInstr (STFAR fmt reg (AddrRegImm source off)) =
|
|
+ sdocWithPlatform $ \platform -> vcat [
|
|
+ pprInstr (ADDIS (tmpReg platform) source (HA off)),
|
|
+ pprInstr (ST fmt reg (AddrRegImm (tmpReg platform) (LO off)))
|
|
+ ]
|
|
+
|
|
+pprInstr (STFAR _ _ _) =
|
|
+ panic "PPC.Ppr.pprInstr STFAR: no match"
|
|
pprInstr (STU sz reg addr) = hcat [
|
|
char '\t',
|
|
ptext (sLit "st"),
|
|
@@ -799,6 +815,22 @@ pprInstr LWSYNC = ptext (sLit "\tlwsync"
|
|
|
|
pprInstr NOP = ptext (sLit "\tnop")
|
|
|
|
+pprInstr (UPDATE_SP fmt amount@(ImmInt offset))
|
|
+ | fits16Bits offset = vcat [
|
|
+ pprInstr (LD fmt r0 (AddrRegImm sp (ImmInt 0))),
|
|
+ pprInstr (STU fmt r0 (AddrRegImm sp amount))
|
|
+ ]
|
|
+
|
|
+pprInstr (UPDATE_SP fmt amount)
|
|
+ = sdocWithPlatform $ \platform ->
|
|
+ let tmp = tmpReg platform in
|
|
+ vcat [
|
|
+ pprInstr (LD fmt r0 (AddrRegImm sp (ImmInt 0))),
|
|
+ pprInstr (ADDIS tmp sp (HA amount)),
|
|
+ pprInstr (ADD tmp tmp (RIImm (LO amount))),
|
|
+ pprInstr (STU fmt r0 (AddrRegReg sp tmp))
|
|
+ ]
|
|
+
|
|
-- pprInstr _ = panic "pprInstr (ppc)"
|
|
|
|
|
|
Index: ghc-7.10.2/compiler/nativeGen/PPC/Regs.hs
|
|
===================================================================
|
|
--- ghc-7.10.2.orig/compiler/nativeGen/PPC/Regs.hs
|
|
+++ ghc-7.10.2/compiler/nativeGen/PPC/Regs.hs
|
|
@@ -37,7 +37,8 @@ module PPC.Regs (
|
|
fits16Bits,
|
|
makeImmediate,
|
|
fReg,
|
|
- sp, toc, r3, r4, r11, r12, r27, r28, r30,
|
|
+ r0, sp, toc, r3, r4, r11, r12, r27, r28, r30,
|
|
+ tmpReg,
|
|
f1, f20, f21,
|
|
|
|
allocatableRegs
|
|
@@ -306,7 +307,8 @@ point registers.
|
|
fReg :: Int -> RegNo
|
|
fReg x = (32 + x)
|
|
|
|
-sp, toc, r3, r4, r11, r12, r27, r28, r30, f1, f20, f21 :: Reg
|
|
+r0, sp, toc, r3, r4, r11, r12, r27, r28, r30, f1, f20, f21 :: Reg
|
|
+r0 = regSingle 0
|
|
sp = regSingle 1
|
|
toc = regSingle 2
|
|
r3 = regSingle 3
|
|
@@ -327,3 +329,11 @@ allocatableRegs :: Platform -> [RealReg]
|
|
allocatableRegs platform
|
|
= let isFree i = isFastTrue (freeReg platform i)
|
|
in map RealRegSingle $ filter isFree allMachRegNos
|
|
+
|
|
+-- temporary register for compiler use
|
|
+tmpReg :: Platform -> Reg
|
|
+tmpReg platform =
|
|
+ case platformArch platform of
|
|
+ ArchPPC -> regSingle 13
|
|
+ ArchPPC_64 _ -> regSingle 30
|
|
+ _ -> panic "PPC.Regs.tmpReg: unknowm arch"
|
|
Index: ghc-7.10.2/includes/CodeGen.Platform.hs
|
|
===================================================================
|
|
--- ghc-7.10.2.orig/includes/CodeGen.Platform.hs
|
|
+++ ghc-7.10.2/includes/CodeGen.Platform.hs
|
|
@@ -876,16 +876,26 @@ freeRegBase _ = fastBool True
|
|
#elif MACHREGS_powerpc
|
|
|
|
freeReg 0 = fastBool False -- Hack: r0 can't be used in all insns,
|
|
- -- but it's actually free
|
|
+ -- in stack reallocations on Linux
|
|
+ -- r0 is not usable in all insns so also reserved
|
|
+ -- on Darwin.
|
|
freeReg 1 = fastBool False -- The Stack Pointer
|
|
# if !MACHREGS_darwin
|
|
-- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that
|
|
freeReg 2 = fastBool False
|
|
-- TODO: make this conditonal for ppc64 ELF
|
|
-freeReg 13 = fastBool False -- reserved for system thread ID
|
|
+freeReg 13 = fastBool False -- reserved for system thread ID on 64 bit
|
|
-- TODO: do not reserve r30 in ppc64 ELF
|
|
-- at least linux in -fPIC relies on r30 in PLT stubs
|
|
freeReg 30 = fastBool False
|
|
+{- TODO: reserve r13 on 64 bit systems only and r30 on 32 bit respectively.
|
|
+ For now we use r30 on 64 bit and r13 on 32 bit as a temporary register
|
|
+ in stack handling code. See compiler/nativeGen/PPC/Ppr.hs.
|
|
+
|
|
+ Later we might want to reserve r13 and r30 only where it is required.
|
|
+ Then use r12 as temporary register, which is also what the C ABI does.
|
|
+-}
|
|
+
|
|
# endif
|
|
# ifdef REG_Base
|
|
freeReg REG_Base = fastBool False
|