valgrind/r14239.diff
Dirk Mueller 0ed966f036 - update for ppc64le support (bnc#880334):
- drop: valgrind-3.9.0-merge.patches.from.Paul.McKenney.patch,
  valgrind-3.9.0-ppc64le-abiv2.patch
  - add: VEX-r2803.diff, VEX-r2808.diff, VEX-r2816.diff
  VEX-r2904.diff, VEX-r2910.diff, VEX-r2914.diff, VEX-r2915.diff,
  VEX-r2916.diff, r13767.diff, r13770.diff, r14184.diff, r14238.diff,
  r14239.diff, r14240.diff, r14246.diff

OBS-URL: https://build.opensuse.org/package/show/devel:tools/valgrind?expand=0&rev=114
2014-08-17 21:49:48 +00:00

4284 lines
165 KiB
Diff

------------------------------------------------------------------------
r14239 | carll | 2014-08-08 01:35:54 +0200 (Fr, 08. Aug 2014) | 23 Zeilen
This commit is for Bugzilla 334834. The Bugzilla contains patch 2 of 3
to add PPC64 LE support. The other two patches can be found in Bugzillas
334384 and 334836.
POWER PC, add the functional Little Endian support, patch 2
The IBM POWER processor now supports both Big Endian and Little Endian.
The ABI for Little Endian also changes. Specifically, the function
descriptor is not used, the stack size changed, accessing the TOC
changed. Functions now have a local and a global entry point. Register
r2 contains the TOC for local calls and register r12 contains the TOC
for global calls. This patch makes the functional changes to the
Valgrind tool. The patch makes the changes needed for the
none/tests/ppc32 and none/tests/ppc64 Makefile.am. A number of the
ppc specific tests have Endian dependencies that are not fixed in
this patch. They are fixed in the next patch.
Per Julian's comments renamed coregrind/m_dispatch/dispatch-ppc64-linux.S
to coregrind/m_dispatch/dispatch-ppc64be-linux.S Created new file for LE
coregrind/m_dispatch/dispatch-ppc64le-linux.S. The same was done for
coregrind/m_syswrap/syscall-ppc-linux.S.
Signed-off-by: Carl Love <carll@us.ibm.com>
------------------------------------------------------------------------
Index: coregrind/Makefile.am
===================================================================
--- coregrind/Makefile.am.orig
+++ coregrind/Makefile.am
@@ -313,7 +313,8 @@ COREGRIND_SOURCES_COMMON = \
m_dispatch/dispatch-x86-linux.S \
m_dispatch/dispatch-amd64-linux.S \
m_dispatch/dispatch-ppc32-linux.S \
- m_dispatch/dispatch-ppc64-linux.S \
+ m_dispatch/dispatch-ppc64be-linux.S \
+ m_dispatch/dispatch-ppc64le-linux.S \
m_dispatch/dispatch-arm-linux.S \
m_dispatch/dispatch-arm64-linux.S \
m_dispatch/dispatch-s390x-linux.S \
@@ -365,7 +366,8 @@ COREGRIND_SOURCES_COMMON = \
m_syswrap/syscall-x86-linux.S \
m_syswrap/syscall-amd64-linux.S \
m_syswrap/syscall-ppc32-linux.S \
- m_syswrap/syscall-ppc64-linux.S \
+ m_syswrap/syscall-ppc64be-linux.S \
+ m_syswrap/syscall-ppc64le-linux.S \
m_syswrap/syscall-arm-linux.S \
m_syswrap/syscall-arm64-linux.S \
m_syswrap/syscall-s390x-linux.S \
Index: coregrind/launcher-linux.c
===================================================================
--- coregrind/launcher-linux.c.orig
+++ coregrind/launcher-linux.c
@@ -228,6 +228,10 @@ static const char *select_platform(const
(ehdr->e_ident[EI_OSABI] == ELFOSABI_SYSV ||
ehdr->e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
platform = "arm64-linux";
+ } else if (ehdr->e_machine == EM_PPC64 &&
+ (ehdr->e_ident[EI_OSABI] == ELFOSABI_SYSV ||
+ ehdr->e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
+ platform = "ppc64le-linux";
}
} else if (header[EI_DATA] == ELFDATA2MSB) {
# if !defined(VGPV_arm_linux_android) \
@@ -321,6 +325,7 @@ int main(int argc, char** argv, char** e
(0==strcmp(VG_PLATFORM,"amd64-linux")) ||
(0==strcmp(VG_PLATFORM,"ppc32-linux")) ||
(0==strcmp(VG_PLATFORM,"ppc64be-linux")) ||
+ (0==strcmp(VG_PLATFORM,"ppc64le-linux")) ||
(0==strcmp(VG_PLATFORM,"arm-linux")) ||
(0==strcmp(VG_PLATFORM,"arm64-linux")) ||
(0==strcmp(VG_PLATFORM,"s390x-linux")) ||
Index: coregrind/m_translate.c
===================================================================
--- coregrind/m_translate.c.orig
+++ coregrind/m_translate.c
@@ -1118,6 +1118,10 @@ static IRTemp gen_POP ( IRSB* bb )
return res;
}
+#endif
+
+#if defined(VG_PLAT_USES_PPCTOC)
+
/* Generate code to push LR and R2 onto this thread's redir stack,
then set R2 to the new value (which is the TOC pointer to be used
for the duration of the replacement function, as determined by
@@ -1140,6 +1144,9 @@ static void gen_push_and_set_LR_R2 ( IRS
# error Platform is not TOC-afflicted, fortunately
# endif
}
+#endif
+
+#if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
static void gen_pop_R2_LR_then_bLR ( IRSB* bb )
{
@@ -1166,6 +1173,9 @@ static void gen_pop_R2_LR_then_bLR ( IRS
# error Platform is not TOC-afflicted, fortunately
# endif
}
+#endif
+
+#if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
static
Bool mk_preamble__ppctoc_magic_return_stub ( void* closureV, IRSB* bb )
@@ -1187,6 +1197,30 @@ Bool mk_preamble__ppctoc_magic_return_st
}
#endif
+#if defined(VGP_ppc64le_linux)
+/* Generate code to push LR and R2 onto this thread's redir stack.
+ Need to save R2 in case we redirect to a global entry point. The
+ value of R2 is not preserved when entering the global entry point.
+ Need to make sure R2 gets restored on return. Set LR to the magic
+ return stub, so we get to intercept the return and restore R2 and
+ L2 to the values saved here.
+
+ The existing infrastruture for the TOC enabled architectures is
+ being exploited here. So, we need to enable a number of the
+ code sections used by VG_PLAT_USES_PPCTOC.
+*/
+
+static void gen_push_R2_and_set_LR ( IRSB* bb )
+{
+ Addr64 bogus_RA = (Addr64)&VG_(ppctoc_magic_redirect_return_stub);
+ Int offB_GPR2 = offsetof(VexGuestPPC64State,guest_GPR2);
+ Int offB_LR = offsetof(VexGuestPPC64State,guest_LR);
+ gen_PUSH( bb, IRExpr_Get(offB_LR, Ity_I64) );
+ gen_PUSH( bb, IRExpr_Get(offB_GPR2, Ity_I64) );
+ addStmtToIRSB( bb, IRStmt_Put( offB_LR, mkU64( bogus_RA )) );
+}
+# endif
+
/* --------------- END helpers for with-TOC platforms --------------- */
@@ -1244,6 +1278,19 @@ Bool mk_preamble__set_NRADDR_to_zero ( v
gen_push_and_set_LR_R2 ( bb, VG_(get_tocptr)( closure->readdr ) );
}
# endif
+
+#if defined(VGP_ppc64le_linux)
+ VgCallbackClosure* closure = (VgCallbackClosure*)closureV;
+ Int offB_GPR12 = offsetof(VexGuestArchState, guest_GPR12);
+ addStmtToIRSB(bb, IRStmt_Put(offB_GPR12, mkU64(closure->readdr)));
+ addStmtToIRSB(bb,
+ IRStmt_Put(
+ offsetof(VexGuestArchState,guest_NRADDR_GPR2),
+ VG_WORDSIZE==8 ? mkU64(0) : mkU32(0)
+ )
+ );
+ gen_push_R2_and_set_LR ( bb );
+#endif
return False;
}
@@ -1277,7 +1324,7 @@ Bool mk_preamble__set_NRADDR_to_nraddr (
Int offB_GPR25 = offsetof(VexGuestMIPS64State, guest_r25);
addStmtToIRSB(bb, IRStmt_Put(offB_GPR25, mkU64(closure->readdr)));
# endif
-# if defined(VGP_ppc64be_linux)
+# if defined(VG_PLAT_USES_PPCTOC) && !defined(VGP_ppc64le_linux)
addStmtToIRSB(
bb,
IRStmt_Put(
@@ -1288,6 +1335,22 @@ Bool mk_preamble__set_NRADDR_to_nraddr (
);
gen_push_and_set_LR_R2 ( bb, VG_(get_tocptr)( closure->readdr ) );
# endif
+#if defined(VGP_ppc64le_linux)
+ /* This saves the r2 before leaving the function. We need to move
+ * guest_NRADDR_GPR2 back to R2 on return.
+ */
+ Int offB_GPR12 = offsetof(VexGuestArchState, guest_GPR12);
+ addStmtToIRSB(
+ bb,
+ IRStmt_Put(
+ offsetof(VexGuestArchState,guest_NRADDR_GPR2),
+ IRExpr_Get(offsetof(VexGuestArchState,guest_GPR2),
+ VG_WORDSIZE==8 ? Ity_I64 : Ity_I32)
+ )
+ );
+ addStmtToIRSB(bb, IRStmt_Put(offB_GPR12, mkU64(closure->readdr)));
+ gen_push_R2_and_set_LR ( bb );
+#endif
return False;
}
@@ -1475,7 +1538,8 @@ Bool VG_(translate) ( ThreadId tid,
if (kind == T_Redir_Wrap)
preamble_fn = mk_preamble__set_NRADDR_to_nraddr;
-# if defined(VG_PLAT_USES_PPCTOC)
+ /* LE we setup the LR */
+# if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
if (ULong_to_Ptr(nraddr)
== (void*)&VG_(ppctoc_magic_redirect_return_stub)) {
/* If entering the special return stub, this means a wrapped or
@@ -1518,6 +1582,11 @@ Bool VG_(translate) ( ThreadId tid,
vex_abiinfo.guest_ppc_zap_RZ_at_bl = const_True;
vex_abiinfo.host_ppc_calls_use_fndescrs = True;
# endif
+# if defined(VGP_ppc64le_linux)
+ vex_abiinfo.guest_ppc_zap_RZ_at_blr = True;
+ vex_abiinfo.guest_ppc_zap_RZ_at_bl = const_True;
+ vex_abiinfo.host_ppc_calls_use_fndescrs = False;
+# endif
/* Set up closure args. */
closure.tid = tid;
Index: coregrind/launcher-darwin.c
===================================================================
--- coregrind/launcher-darwin.c.orig
+++ coregrind/launcher-darwin.c
@@ -64,6 +64,7 @@ static struct {
{ CPU_TYPE_ARM, "arm", "arm" },
{ CPU_TYPE_POWERPC, "ppc", "ppc32" },
{ CPU_TYPE_POWERPC64BE, "ppc64be", "ppc64be" },
+ { CPU_TYPE_POWERPC64LE, "ppc64le", "ppc64le" },
};
static int valid_archs_count = sizeof(valid_archs)/sizeof(valid_archs[0]);
Index: coregrind/m_sigframe/sigframe-ppc64-linux.c
===================================================================
--- coregrind/m_sigframe/sigframe-ppc64-linux.c.orig
+++ coregrind/m_sigframe/sigframe-ppc64-linux.c
@@ -252,7 +252,11 @@ void VG_(sigframe_create)( ThreadId tid,
# undef DO
frame->uc.uc_mcontext.gp_regs[VKI_PT_NIP] = tst->arch.vex.guest_CIA;
- frame->uc.uc_mcontext.gp_regs[VKI_PT_MSR] = 0xf032; /* pretty arbitrary */
+#ifdef VGP_ppc64le_linux
+ frame->uc.uc_mcontext.gp_regs[VKI_PT_MSR] = 0xf033; /* pretty arbitrary */
+#else
+ frame->uc.uc_mcontext.gp_regs[VKI_PT_MSR] = 0xf032; /* pretty arbitrary */
+#endif
frame->uc.uc_mcontext.gp_regs[VKI_PT_ORIG_R3] = tst->arch.vex.guest_GPR3;
frame->uc.uc_mcontext.gp_regs[VKI_PT_CTR] = tst->arch.vex.guest_CTR;
frame->uc.uc_mcontext.gp_regs[VKI_PT_LNK] = tst->arch.vex.guest_LR;
@@ -302,9 +306,13 @@ void VG_(sigframe_create)( ThreadId tid,
/* Handler is in fact a standard ppc64-linux function descriptor,
so extract the function entry point and also the toc ptr to use. */
+#if defined(VGP_ppc64be_linux)
SET_SIGNAL_GPR(tid, 2, (Addr) ((ULong*)handler)[1]);
tst->arch.vex.guest_CIA = (Addr) ((ULong*)handler)[0];
-
+#else
+ SET_SIGNAL_GPR(tid, 12, (Addr) handler);
+ tst->arch.vex.guest_CIA = (Addr) handler;
+#endif
priv = &frame->priv;
priv->magicPI = 0x31415927;
priv->sigNo_private = sigNo;
Index: coregrind/m_syswrap/syscall-ppc64-linux.S
===================================================================
--- coregrind/m_syswrap/syscall-ppc64-linux.S
+++ /dev/null
@@ -1,172 +0,0 @@
-
-/*--------------------------------------------------------------------*/
-/*--- Support for doing system calls. syscall-ppc64-linux.S ---*/
-/*--------------------------------------------------------------------*/
-
-/*
- This file is part of Valgrind, a dynamic binary instrumentation
- framework.
-
- Copyright (C) 2005-2013 Paul Mackerras <paulus@samba.org>
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307, USA.
-
- The GNU General Public License is contained in the file COPYING.
-*/
-
-#if defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
-
-#include "pub_core_basics_asm.h"
-#include "pub_core_vkiscnums_asm.h"
-#include "libvex_guest_offsets.h"
-
-
-/*----------------------------------------------------------------*/
-/*
- Perform a syscall for the client. This will run a syscall
- with the client's specific per-thread signal mask.
-
- The structure of this function is such that, if the syscall is
- interrupted by a signal, we can determine exactly what
- execution state we were in with respect to the execution of
- the syscall by examining the value of NIP in the signal
- handler. This means that we can always do the appropriate
- thing to precisely emulate the kernel's signal/syscall
- interactions.
-
- The syscall number is taken from the argument, even though it
- should also be in regs->m_gpr[0]. The syscall result is written
- back to regs->m_gpr[3]/m_xer/m_result on completion.
-
- Returns 0 if the syscall was successfully called (even if the
- syscall itself failed), or a nonzero error code in the lowest
- 8 bits if one of the sigprocmasks failed (there's no way to
- determine which one failed). And there's no obvious way to
- recover from that either, but nevertheless we want to know.
-
- VG_(fixup_guest_state_after_syscall_interrupted) does the
- thread state fixup in the case where we were interrupted by a
- signal.
-
- Prototype:
-
- UWord ML_(do_syscall_for_client_WRK)(
- Int syscallno, // r3
- void* guest_state, // r4
- const vki_sigset_t *sysmask, // r5
- const vki_sigset_t *postmask, // r6
- Int sigsetSzB) // r7
-*/
-/* from vki_arch.h */
-#define VKI_SIG_SETMASK 2
-
-.align 2
-.globl ML_(do_syscall_for_client_WRK)
-.section ".opd","aw"
-.align 3
-ML_(do_syscall_for_client_WRK):
-.quad .ML_(do_syscall_for_client_WRK),.TOC.@tocbase,0
-.previous
-.type .ML_(do_syscall_for_client_WRK),@function
-.globl .ML_(do_syscall_for_client_WRK)
-.ML_(do_syscall_for_client_WRK):
- /* make a stack frame */
- stdu 1,-80(1)
- std 31,72(1)
- std 30,64(1)
- std 29,56(1)
- std 28,48(1)
- mr 31,3 /* syscall number */
- mr 30,4 /* guest_state */
- mr 29,6 /* postmask */
- mr 28,7 /* sigsetSzB */
-
- /* set the signal mask for doing the system call */
- /* set up for sigprocmask(SIG_SETMASK, sysmask, postmask) */
-1: li 0,__NR_rt_sigprocmask
- li 3,VKI_SIG_SETMASK
- mr 4,5
- mr 5,6
- mr 6,7
- sc /* set the mask */
- bso 7f /* if the sigprocmask fails */
-
- /* load up syscall args from the threadstate */
- ld 3,OFFSET_ppc64_GPR3(30)
- ld 4,OFFSET_ppc64_GPR4(30)
- ld 5,OFFSET_ppc64_GPR5(30)
- ld 6,OFFSET_ppc64_GPR6(30)
- ld 7,OFFSET_ppc64_GPR7(30)
- ld 8,OFFSET_ppc64_GPR8(30)
- mr 0,31 /* syscall number */
-2: sc /* do the syscall */
-
- /* put the result back in the threadstate */
-3: std 3,OFFSET_ppc64_GPR3(30) /* gst->GPR3 = sc result */
- /* copy cr0.so back to simulated state */
- mfcr 5 /* r5 = CR */
- rlwinm 5,5,4,31,31 /* r5 = (CR >> 28) & 1 */
- stb 5,OFFSET_ppc64_CR0_0(30) /* gst->CR0.SO = cr0.so */
-
- /* block signals again */
- /* set up for sigprocmask(SIG_SETMASK, postmask, NULL) */
-4: li 0,__NR_rt_sigprocmask
- li 3,VKI_SIG_SETMASK
- mr 4,29
- li 5,0
- mr 6,28
- sc /* set the mask */
- bso 7f /* if the sigprocmask fails */
- /* now safe from signals */
- li 3,0 /* SUCCESS */
-
- /* pop off stack frame */
-5: ld 28,48(1)
- ld 29,56(1)
- ld 30,64(1)
- ld 31,72(1)
- addi 1,1,80
- blr
-
- /* failure: return 0x8000 | error code */
-7: ori 3,3,0x8000 /* FAILURE -- ensure return value is nonzero */
- b 5b
-
-.section .rodata
-/* export the ranges so that
- VG_(fixup_guest_state_after_syscall_interrupted) can do the
- right thing */
-
-.globl ML_(blksys_setup)
-.globl ML_(blksys_restart)
-.globl ML_(blksys_complete)
-.globl ML_(blksys_committed)
-.globl ML_(blksys_finished)
-ML_(blksys_setup): .quad 1b
-ML_(blksys_restart): .quad 2b
-ML_(blksys_complete): .quad 3b
-ML_(blksys_committed): .quad 4b
-ML_(blksys_finished): .quad 5b
-
-
-/* Let the linker know we don't need an executable stack */
-.section .note.GNU-stack,"",@progbits
-
-#endif // defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
-
-/*--------------------------------------------------------------------*/
-/*--- end ---*/
-/*--------------------------------------------------------------------*/
Index: coregrind/m_syswrap/syscall-ppc64be-linux.S
===================================================================
--- /dev/null
+++ coregrind/m_syswrap/syscall-ppc64be-linux.S
@@ -0,0 +1,172 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Support for doing system calls. syscall-ppc64-linux.S ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2005-2013 Paul Mackerras <paulus@samba.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#if defined(VGP_ppc64be_linux)
+
+#include "pub_core_basics_asm.h"
+#include "pub_core_vkiscnums_asm.h"
+#include "libvex_guest_offsets.h"
+
+
+/*----------------------------------------------------------------*/
+/*
+ Perform a syscall for the client. This will run a syscall
+ with the client's specific per-thread signal mask.
+
+ The structure of this function is such that, if the syscall is
+ interrupted by a signal, we can determine exactly what
+ execution state we were in with respect to the execution of
+ the syscall by examining the value of NIP in the signal
+ handler. This means that we can always do the appropriate
+ thing to precisely emulate the kernel's signal/syscall
+ interactions.
+
+ The syscall number is taken from the argument, even though it
+ should also be in regs->m_gpr[0]. The syscall result is written
+ back to regs->m_gpr[3]/m_xer/m_result on completion.
+
+ Returns 0 if the syscall was successfully called (even if the
+ syscall itself failed), or a nonzero error code in the lowest
+ 8 bits if one of the sigprocmasks failed (there's no way to
+ determine which one failed). And there's no obvious way to
+ recover from that either, but nevertheless we want to know.
+
+ VG_(fixup_guest_state_after_syscall_interrupted) does the
+ thread state fixup in the case where we were interrupted by a
+ signal.
+
+ Prototype:
+
+ UWord ML_(do_syscall_for_client_WRK)(
+ Int syscallno, // r3
+ void* guest_state, // r4
+ const vki_sigset_t *sysmask, // r5
+ const vki_sigset_t *postmask, // r6
+ Int sigsetSzB) // r7
+*/
+/* from vki_arch.h */
+#define VKI_SIG_SETMASK 2
+
+.align 2
+.globl ML_(do_syscall_for_client_WRK)
+.section ".opd","aw"
+.align 3
+ML_(do_syscall_for_client_WRK):
+.quad .ML_(do_syscall_for_client_WRK),.TOC.@tocbase,0
+.previous
+.type .ML_(do_syscall_for_client_WRK),@function
+.globl .ML_(do_syscall_for_client_WRK)
+.ML_(do_syscall_for_client_WRK):
+ /* make a stack frame */
+ stdu 1,-80(1)
+ std 31,72(1)
+ std 30,64(1)
+ std 29,56(1)
+ std 28,48(1)
+ mr 31,3 /* syscall number */
+ mr 30,4 /* guest_state */
+ mr 29,6 /* postmask */
+ mr 28,7 /* sigsetSzB */
+
+ /* set the signal mask for doing the system call */
+ /* set up for sigprocmask(SIG_SETMASK, sysmask, postmask) */
+1: li 0,__NR_rt_sigprocmask
+ li 3,VKI_SIG_SETMASK
+ mr 4,5
+ mr 5,6
+ mr 6,7
+ sc /* set the mask */
+ bso 7f /* if the sigprocmask fails */
+
+ /* load up syscall args from the threadstate */
+ ld 3,OFFSET_ppc64_GPR3(30)
+ ld 4,OFFSET_ppc64_GPR4(30)
+ ld 5,OFFSET_ppc64_GPR5(30)
+ ld 6,OFFSET_ppc64_GPR6(30)
+ ld 7,OFFSET_ppc64_GPR7(30)
+ ld 8,OFFSET_ppc64_GPR8(30)
+ mr 0,31 /* syscall number */
+2: sc /* do the syscall */
+
+ /* put the result back in the threadstate */
+3: std 3,OFFSET_ppc64_GPR3(30) /* gst->GPR3 = sc result */
+ /* copy cr0.so back to simulated state */
+ mfcr 5 /* r5 = CR */
+ rlwinm 5,5,4,31,31 /* r5 = (CR >> 28) & 1 */
+ stb 5,OFFSET_ppc64_CR0_0(30) /* gst->CR0.SO = cr0.so */
+
+ /* block signals again */
+ /* set up for sigprocmask(SIG_SETMASK, postmask, NULL) */
+4: li 0,__NR_rt_sigprocmask
+ li 3,VKI_SIG_SETMASK
+ mr 4,29
+ li 5,0
+ mr 6,28
+ sc /* set the mask */
+ bso 7f /* if the sigprocmask fails */
+ /* now safe from signals */
+ li 3,0 /* SUCCESS */
+
+ /* pop off stack frame */
+5: ld 28,48(1)
+ ld 29,56(1)
+ ld 30,64(1)
+ ld 31,72(1)
+ addi 1,1,80
+ blr
+
+ /* failure: return 0x8000 | error code */
+7: ori 3,3,0x8000 /* FAILURE -- ensure return value is nonzero */
+ b 5b
+
+.section .rodata
+/* export the ranges so that
+ VG_(fixup_guest_state_after_syscall_interrupted) can do the
+ right thing */
+
+.globl ML_(blksys_setup)
+.globl ML_(blksys_restart)
+.globl ML_(blksys_complete)
+.globl ML_(blksys_committed)
+.globl ML_(blksys_finished)
+ML_(blksys_setup): .quad 1b
+ML_(blksys_restart): .quad 2b
+ML_(blksys_complete): .quad 3b
+ML_(blksys_committed): .quad 4b
+ML_(blksys_finished): .quad 5b
+
+
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",@progbits
+
+#endif // defined(VGP_ppc64be_linux)
+
+/*--------------------------------------------------------------------*/
+/*--- end ---*/
+/*--------------------------------------------------------------------*/
Index: coregrind/m_syswrap/syswrap-main.c
===================================================================
--- coregrind/m_syswrap/syswrap-main.c.orig
+++ coregrind/m_syswrap/syswrap-main.c
@@ -2006,6 +2006,25 @@ void ML_(fixup_guest_state_to_restart_sy
vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
}
+#elif defined(VGP_ppc64le_linux)
+ arch->vex.guest_CIA -= 4; // sizeof(ppc32 instr)
+
+ /* Make sure our caller is actually sane, and we're really backing
+ back over a syscall.
+
+ sc == 44 00 00 02
+ */
+ {
+ UChar *p = (UChar *)arch->vex.guest_CIA;
+
+ if (p[3] != 0x44 || p[2] != 0x0 || p[1] != 0x0 || p[0] != 0x02)
+ VG_(message)(Vg_DebugMsg,
+ "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
+ arch->vex.guest_CIA + 0ULL, p[3], p[2], p[1], p[0]);
+
+ vg_assert(p[3] == 0x44 && p[2] == 0x0 && p[1] == 0x0 && p[0] == 0x2);
+ }
+
#elif defined(VGP_arm_linux)
if (arch->vex.guest_R15T & 1) {
// Thumb mode. SVC is a encoded as
Index: coregrind/m_syswrap/syswrap-ppc64-linux.c
===================================================================
--- coregrind/m_syswrap/syswrap-ppc64-linux.c.orig
+++ coregrind/m_syswrap/syswrap-ppc64-linux.c
@@ -78,6 +78,7 @@ void ML_(call_on_new_stack_0_1) ( Addr s
address, the second word is the TOC ptr (r2), and the third word is
the static chain value. */
asm(
+#if defined(VGP_ppc64be_linux)
" .align 2\n"
" .globl vgModuleLocal_call_on_new_stack_0_1\n"
" .section \".opd\",\"aw\"\n"
@@ -126,6 +127,55 @@ asm(
" mtcr 0\n\t" // CAB: Need this?
" bctr\n\t" // jump to dst
" trap\n" // should never get here
+#else
+// ppc64le_linux
+" .align 2\n"
+" .globl vgModuleLocal_call_on_new_stack_0_1\n"
+"vgModuleLocal_call_on_new_stack_0_1:\n"
+" .type .vgModuleLocal_call_on_new_stack_0_1,@function\n"
+"#if _CALL_ELF == 2 \n"
+"0: addis 2,12,.TOC.-0b@ha\n"
+" addi 2,2,.TOC.-0b@l\n"
+"#endif\n"
+".localentry vgModuleLocal_call_on_new_stack_0_1, .-vgModuleLocal_call_on_new_stack_0_1\n"
+" mr %r1,%r3\n\t" // stack to %sp
+" mtlr %r4\n\t" // retaddr to %lr
+" mtctr %r5\n\t" // f_ptr to count reg
+" mr %r3,%r6\n\t" // arg1 to %r3
+" li 0,0\n\t" // zero all GP regs
+" li 4,0\n\t"
+" li 5,0\n\t"
+" li 6,0\n\t"
+" li 7,0\n\t"
+" li 8,0\n\t"
+" li 9,0\n\t"
+" li 10,0\n\t"
+" li 11,0\n\t"
+" li 12,0\n\t"
+" li 13,0\n\t"
+" li 14,0\n\t"
+" li 15,0\n\t"
+" li 16,0\n\t"
+" li 17,0\n\t"
+" li 18,0\n\t"
+" li 19,0\n\t"
+" li 20,0\n\t"
+" li 21,0\n\t"
+" li 22,0\n\t"
+" li 23,0\n\t"
+" li 24,0\n\t"
+" li 25,0\n\t"
+" li 26,0\n\t"
+" li 27,0\n\t"
+" li 28,0\n\t"
+" li 29,0\n\t"
+" li 30,0\n\t"
+" li 31,0\n\t"
+" mtxer 0\n\t" // CAB: Need this?
+" mtcr 0\n\t" // CAB: Need this?
+" bctr\n\t" // jump to dst
+" trap\n" // should never get here
+#endif
);
@@ -170,6 +220,7 @@ ULong do_syscall_clone_ppc64_linux ( Wor
Int* parent_tid,
void/*vki_modify_ldt_t*/ * );
asm(
+#if defined(VGP_ppc64be_linux)
" .align 2\n"
" .globl do_syscall_clone_ppc64_linux\n"
" .section \".opd\",\"aw\"\n"
@@ -240,6 +291,78 @@ asm(
" ld 31,56(1)\n"
" addi 1,1,64\n"
" blr\n"
+#else
+" .align 2\n"
+" .globl do_syscall_clone_ppc64_linux\n"
+" .type do_syscall_clone_ppc64_linux,@function\n"
+"do_syscall_clone_ppc64_linux:\n"
+" .globl .do_syscall_clone_ppc64_linux\n"
+".do_syscall_clone_ppc64_linux:\n"
+"#if _CALL_ELF == 2 \n"
+"0: addis 2,12,.TOC.-0b@ha \n"
+" addi 2,2,.TOC.-0b@l \n"
+"#endif \n"
+" .localentry do_syscall_clone_ppc64_linux, .-do_syscall_clone_ppc64_linux \n"
+" stdu 1,-64(1)\n"
+" std 29,40(1)\n"
+" std 30,48(1)\n"
+" std 31,56(1)\n"
+" mr 30,3\n" // preserve fn
+" mr 31,6\n" // preserve arg
+
+ // setup child stack
+" rldicr 4,4, 0,59\n" // trim sp to multiple of 16 bytes
+ // (r4 &= ~0xF)
+" li 0,0\n"
+" stdu 0,-32(4)\n" // make initial stack frame
+" mr 29,4\n" // preserve sp
+
+ // setup syscall
+" li 0,"__NR_CLONE"\n" // syscall number
+" mr 3,5\n" // syscall arg1: flags
+ // r4 already setup // syscall arg2: child_stack
+" mr 5,8\n" // syscall arg3: parent_tid
+" mr 6,13\n" // syscall arg4: REAL THREAD tls
+" mr 7,7\n" // syscall arg5: child_tid
+" mr 8,8\n" // syscall arg6: ????
+" mr 9,9\n" // syscall arg7: ????
+
+" sc\n" // clone()
+
+" mfcr 4\n" // CR now in low half r4
+" sldi 4,4,32\n" // CR now in hi half r4
+
+" sldi 3,3,32\n"
+" srdi 3,3,32\n" // zero out hi half r3
+
+" or 3,3,4\n" // r3 = CR : syscall-retval
+" cmpwi 3,0\n" // child if retval == 0 (note, cmpw)
+" bne 1f\n" // jump if !child
+
+ /* CHILD - call thread function */
+ /* Note: 2.4 kernel doesn't set the child stack pointer,
+ so we do it here.
+ That does leave a small window for a signal to be delivered
+ on the wrong stack, unfortunately. */
+" mr 1,29\n"
+" mtctr 30\n" // ctr reg = fn
+" mr 3,31\n" // r3 = arg
+" bctrl\n" // call fn()
+
+ // exit with result
+" li 0,"__NR_EXIT"\n"
+" sc\n"
+
+ // Exit returned?!
+" .long 0\n"
+
+ // PARENT or ERROR - return
+"1: ld 29,40(1)\n"
+" ld 30,48(1)\n"
+" ld 31,56(1)\n"
+" addi 1,1,64\n"
+" blr\n"
+#endif
);
#undef __NR_CLONE
Index: coregrind/m_syswrap/syscall-ppc64le-linux.S
===================================================================
--- /dev/null
+++ coregrind/m_syswrap/syscall-ppc64le-linux.S
@@ -0,0 +1,189 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Support for doing system calls. syscall-ppc64-linux.S ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2005-2013 Paul Mackerras <paulus@samba.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#if defined(VGP_ppc64le_linux)
+
+#include "pub_core_basics_asm.h"
+#include "pub_core_vkiscnums_asm.h"
+#include "libvex_guest_offsets.h"
+
+
+/*----------------------------------------------------------------*/
+/*
+ Perform a syscall for the client. This will run a syscall
+ with the client's specific per-thread signal mask.
+
+ The structure of this function is such that, if the syscall is
+ interrupted by a signal, we can determine exactly what
+ execution state we were in with respect to the execution of
+ the syscall by examining the value of NIP in the signal
+ handler. This means that we can always do the appropriate
+ thing to precisely emulate the kernel's signal/syscall
+ interactions.
+
+ The syscall number is taken from the argument, even though it
+ should also be in regs->m_gpr[0]. The syscall result is written
+ back to regs->m_gpr[3]/m_xer/m_result on completion.
+
+ Returns 0 if the syscall was successfully called (even if the
+ syscall itself failed), or a nonzero error code in the lowest
+ 8 bits if one of the sigprocmasks failed (there's no way to
+ determine which one failed). And there's no obvious way to
+ recover from that either, but nevertheless we want to know.
+
+ VG_(fixup_guest_state_after_syscall_interrupted) does the
+ thread state fixup in the case where we were interrupted by a
+ signal.
+
+ Prototype:
+
+ UWord ML_(do_syscall_for_client_WRK)(
+ Int syscallno, // r3
+ void* guest_state, // r4
+ const vki_sigset_t *sysmask, // r5
+ const vki_sigset_t *postmask, // r6
+ Int sigsetSzB) // r7
+*/
+/* from vki_arch.h */
+#define VKI_SIG_SETMASK 2
+
+.align 2
+.globl ML_(do_syscall_for_client_WRK)
+#if _CALL_ELF == 2
+.type .ML_(do_syscall_for_client_WRK),@function
+ML_(do_syscall_for_client_WRK):
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry ML_(do_syscall_for_client_WRK), .-ML_(do_syscall_for_client_WRK)
+#else
+.section ".opd","aw"
+.align 3
+ML_(do_syscall_for_client_WRK):
+.quad .ML_(do_syscall_for_client_WRK),.TOC.@tocbase,0
+.previous
+#endif
+#if _CALL_ELF == 2
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry ML_(do_syscall_for_client_WRK), .-ML_(do_syscall_for_client_WRK)
+#endif
+.type .ML_(do_syscall_for_client_WRK),@function
+.globl .ML_(do_syscall_for_client_WRK)
+.ML_(do_syscall_for_client_WRK):
+ /* make a stack frame */
+ stdu 1,-80(1)
+ std 31,72(1)
+ std 30,64(1)
+ std 29,56(1)
+ std 28,48(1)
+ mr 31,3 /* syscall number */
+ mr 30,4 /* guest_state */
+ mr 29,6 /* postmask */
+ mr 28,7 /* sigsetSzB */
+
+ /* set the signal mask for doing the system call */
+ /* set up for sigprocmask(SIG_SETMASK, sysmask, postmask) */
+1: li 0,__NR_rt_sigprocmask
+ li 3,VKI_SIG_SETMASK
+ mr 4,5
+ mr 5,6
+ mr 6,7
+ sc /* set the mask */
+ bso 7f /* if the sigprocmask fails */
+
+ /* load up syscall args from the threadstate */
+ ld 3,OFFSET_ppc64_GPR3(30)
+ ld 4,OFFSET_ppc64_GPR4(30)
+ ld 5,OFFSET_ppc64_GPR5(30)
+ ld 6,OFFSET_ppc64_GPR6(30)
+ ld 7,OFFSET_ppc64_GPR7(30)
+ ld 8,OFFSET_ppc64_GPR8(30)
+ mr 0,31 /* syscall number */
+2: sc /* do the syscall */
+
+ /* put the result back in the threadstate */
+3: std 3,OFFSET_ppc64_GPR3(30) /* gst->GPR3 = sc result */
+ /* copy cr0.so back to simulated state */
+ mfcr 5 /* r5 = CR */
+ rlwinm 5,5,4,31,31 /* r5 = (CR >> 28) & 1 */
+ stb 5,OFFSET_ppc64_CR0_0(30) /* gst->CR0.SO = cr0.so */
+
+ /* block signals again */
+ /* set up for sigprocmask(SIG_SETMASK, postmask, NULL) */
+4: li 0,__NR_rt_sigprocmask
+ li 3,VKI_SIG_SETMASK
+ mr 4,29
+ li 5,0
+ mr 6,28
+ sc /* set the mask */
+ bso 7f /* if the sigprocmask fails */
+ /* now safe from signals */
+ li 3,0 /* SUCCESS */
+
+ /* pop off stack frame */
+5: ld 28,48(1)
+ ld 29,56(1)
+ ld 30,64(1)
+ ld 31,72(1)
+ addi 1,1,80
+ blr
+
+ /* failure: return 0x8000 | error code */
+7: ori 3,3,0x8000 /* FAILURE -- ensure return value is nonzero */
+ b 5b
+#if _CALL_ELF == 2
+ .size .ML_(do_syscall_for_client_WRK),.-.ML_(do_syscall_for_client_WRK)
+#else
+ .size .ML_(do_syscall_for_client_WRK),.-.ML_(do_syscall_for_client_WRK)
+#endif
+.section .rodata
+/* export the ranges so that
+ VG_(fixup_guest_state_after_syscall_interrupted) can do the
+ right thing */
+
+.globl ML_(blksys_setup)
+.globl ML_(blksys_restart)
+.globl ML_(blksys_complete)
+.globl ML_(blksys_committed)
+.globl ML_(blksys_finished)
+ML_(blksys_setup): .quad 1b
+ML_(blksys_restart): .quad 2b
+ML_(blksys_complete): .quad 3b
+ML_(blksys_committed): .quad 4b
+ML_(blksys_finished): .quad 5b
+
+
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",@progbits
+
+#endif defined(VGP_ppc64le_linux)
+
+/*--------------------------------------------------------------------*/
+/*--- end ---*/
+/*--------------------------------------------------------------------*/
Index: coregrind/pub_core_machine.h
===================================================================
--- coregrind/pub_core_machine.h.orig
+++ coregrind/pub_core_machine.h
@@ -61,6 +61,11 @@
# define VG_ELF_MACHINE EM_PPC64
# define VG_ELF_CLASS ELFCLASS64
# define VG_PLAT_USES_PPCTOC 1
+#elif defined(VGP_ppc64le_linux)
+# define VG_ELF_DATA2XXX ELFDATA2LSB
+# define VG_ELF_MACHINE EM_PPC64
+# define VG_ELF_CLASS ELFCLASS64
+# undef VG_PLAT_USES_PPCTOC
#elif defined(VGP_arm_linux)
# define VG_ELF_DATA2XXX ELFDATA2LSB
# define VG_ELF_MACHINE EM_ARM
Index: coregrind/m_debuginfo/priv_storage.h
===================================================================
--- coregrind/m_debuginfo/priv_storage.h.orig
+++ coregrind/m_debuginfo/priv_storage.h
@@ -70,7 +70,8 @@
typedef
struct {
Addr addr; /* lowest address of entity */
- Addr tocptr; /* ppc64-linux only: value that R2 should have */
+ Addr tocptr; /* ppc64be-linux only: value that R2 should have */
+ Addr local_ep; /* address for local entry point, ppc64le */
HChar* pri_name; /* primary name, never NULL */
HChar** sec_names; /* NULL, or a NULL term'd array of other names */
// XXX: this could be shrunk (on 32-bit platforms) by using 30
@@ -745,7 +746,7 @@ struct _DebugInfo {
Bool gotplt_present;
Addr gotplt_avma;
SizeT gotplt_size;
- /* .opd -- needed on ppc64-linux for finding symbols */
+ /* .opd -- needed on ppc64be-linux for finding symbols */
Bool opd_present;
Addr opd_avma;
SizeT opd_size;
Index: coregrind/m_debuginfo/readmacho.c
===================================================================
--- coregrind/m_debuginfo/readmacho.c.orig
+++ coregrind/m_debuginfo/readmacho.c
@@ -199,6 +199,8 @@ static DiSlice map_image_aboard ( DebugI
Int cputype = CPU_TYPE_POWERPC;
# elif defined(VGA_ppc64be)
Int cputype = CPU_TYPE_POWERPC64BE;
+# elif defined(VGA_ppc64le)
+ Int cputype = CPU_TYPE_POWERPC64LE;
# elif defined(VGA_x86)
Int cputype = CPU_TYPE_X86;
# elif defined(VGA_amd64)
Index: coregrind/m_debuginfo/readelf.c
===================================================================
--- coregrind/m_debuginfo/readelf.c.orig
+++ coregrind/m_debuginfo/readelf.c
@@ -240,7 +240,10 @@ Bool get_elf_symbol_info (
Bool* from_opd_out, /* ppc64be-linux only: did we deref an
.opd entry? */
Bool* is_text_out, /* is this a text symbol? */
- Bool* is_ifunc /* is this a STT_GNU_IFUNC function ?*/
+ Bool* is_ifunc, /* is this a STT_GNU_IFUNC function ?*/
+ Addr* sym_local_ep /* addr for local entry point. PPC64 LE
+ supports a local and global entry points.
+ Use this value to return the entry point. */
)
{
Bool plausible;
@@ -258,6 +261,8 @@ Bool get_elf_symbol_info (
*sym_tocptr_out = 0; /* unknown/inapplicable */
*from_opd_out = False;
*is_ifunc = False;
+ *sym_local_ep = 0; /* unknown/inapplicable */
+
/* Get the symbol size, but restrict it to fit in a signed 32 bit
int. Also, deal with the stupid case of negative size by making
the size be 1. Note that sym->st_size has type UWord,
@@ -670,14 +675,57 @@ Bool get_elf_symbol_info (
}
# if defined(VGP_ppc64be_linux)
- /* It's crucial that we never add symbol addresses in the .opd
- section. This would completely mess up function redirection and
- intercepting. This assert ensures that any symbols that make it
- into the symbol table on ppc64-linux don't point into .opd. */
if (di->opd_present && di->opd_size > 0) {
vg_assert(*sym_avma_out + *sym_size_out <= di->opd_avma
|| *sym_avma_out >= di->opd_avma + di->opd_size);
}
+#endif
+
+# if defined(VGP_ppc64le_linux)
+ /* PPC64 LE ABI uses three bits in the st_other field to indicate the number
+ * of instructions between the function's global and local entry points. An
+ * offset of 0 indicates that there is one entry point. The value must be:
+ *
+ * 0 - one entry point, local and global are the same
+ * 1 - reserved
+ * 2 - local entry point is one instruction after the global entry point
+ * 3 - local entry point is two instructions after the global entry point
+ * 4 - local entry point is four instructions after the global entry point
+ * 5 - local entry point is eight instructions after the global entry point
+ * 6 - local entry point is sixteen two instructions after the global entry point
+ * 7 - reserved
+ *
+ * The extract the three bit field from the other field.
+ * (other_field & STO_PPC64_LOCAL_MASK) >> STO_PPC_LOCAL_BIT
+ *
+ * where the #define values are given in include/elf/powerpc.h file for
+ * the PPC binutils.
+ *
+ * coversion of the three bit field to bytes is given by
+ *
+ * ((1 << bit_field) >> 2) << 2
+ */
+
+ #define STO_PPC64_LOCAL_BIT 5
+ #define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT)
+ {
+ unsigned int bit_field, dist_to_local_entry;
+ /* extract the other filed */
+ bit_field = (sym->st_other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT;
+
+ if ((bit_field > 0) && (bit_field < 7)) {
+ /* store the local entry point address */
+ dist_to_local_entry = ((1 << bit_field) >> 2) << 2;
+ *sym_local_ep = *sym_avma_out + dist_to_local_entry;
+
+ if (TRACE_SYMTAB_ENABLED) {
+ HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
+ "di.gesi.5", sym_name_ioff);
+ VG_(printf)("Local entry point: %s at %#010x\n",
+ sym_name, (unsigned int)*sym_local_ep);
+ }
+ }
+ }
# endif
/* Acquire! */
@@ -686,7 +734,7 @@ Bool get_elf_symbol_info (
/* Read an ELF symbol table (normal or dynamic). This one is for the
- "normal" case ({x86,amd64,ppc32,arm,mips32,mips64}-linux). */
+ "normal" case ({x86,amd64,ppc32,arm,mips32,mips64, ppc64le}-linux). */
static
__attribute__((unused)) /* not referred to on all targets */
void read_elf_symtab__normal(
@@ -725,6 +773,7 @@ void read_elf_symtab__normal(
Addr sym_avma_really = 0;
Int sym_size = 0;
Addr sym_tocptr = 0;
+ Addr local_ep = 0;
Bool from_opd = False, is_text = False, is_ifunc = False;
DiOffT sym_name_really = DiOffT_INVALID;
if (get_elf_symbol_info(di, &sym, sym_name, escn_strtab,
@@ -734,7 +783,8 @@ void read_elf_symtab__normal(
&sym_avma_really,
&sym_size,
&sym_tocptr,
- &from_opd, &is_text, &is_ifunc)) {
+ &from_opd, &is_text, &is_ifunc,
+ &local_ep)) {
DiSym disym;
VG_(memset)(&disym, 0, sizeof(disym));
@@ -742,6 +792,7 @@ void read_elf_symtab__normal(
"di.res__n.1", sym_name_really);
disym.addr = sym_avma_really;
disym.tocptr = sym_tocptr;
+ disym.local_ep = local_ep;
disym.pri_name = ML_(addStr) ( di, cstr, -1 );
disym.sec_names = NULL;
disym.size = sym_size;
@@ -749,7 +800,7 @@ void read_elf_symtab__normal(
disym.isIFunc = is_ifunc;
if (cstr) { ML_(dinfo_free)(cstr); cstr = NULL; }
vg_assert(disym.pri_name);
- vg_assert(disym.tocptr == 0); /* has no role except on ppc64-linux */
+ vg_assert(disym.tocptr == 0); /* has no role except on ppc64be-linux */
ML_(addSym) ( di, &disym );
if (TRACE_SYMTAB_ENABLED) {
@@ -761,6 +812,10 @@ void read_elf_symtab__normal(
(Int)disym.size,
(HChar*)disym.pri_name
);
+ if (local_ep != 0) {
+ TRACE_SYMTAB(" local entry point %#010lx\n",
+ local_ep)
+ }
}
}
@@ -856,6 +911,7 @@ void read_elf_symtab__ppc64be_linux(
Addr sym_avma_really = 0;
Int sym_size = 0;
Addr sym_tocptr = 0;
+ Addr sym_local_ep = 0;
Bool from_opd = False, is_text = False, is_ifunc = False;
DiOffT sym_name_really = DiOffT_INVALID;
DiSym disym;
@@ -867,7 +923,8 @@ void read_elf_symtab__ppc64be_linux(
&sym_avma_really,
&sym_size,
&sym_tocptr,
- &from_opd, &is_text, &is_ifunc)) {
+ &from_opd, &is_text, &is_ifunc,
+ &sym_local_ep)) {
/* Check if we've seen this (name,addr) key before. */
key.addr = sym_avma_really;
@@ -2774,6 +2831,7 @@ Bool ML_(read_elf_debug_info) ( struct _
# if !defined(VGP_amd64_linux) \
&& !defined(VGP_s390x_linux) \
&& !defined(VGP_ppc64be_linux) \
+ && !defined(VGP_ppc64le_linux) \
&& !defined(VGPV_arm_linux_android) \
&& !defined(VGPV_x86_linux_android) \
&& !defined(VGP_mips64_linux)
Index: coregrind/m_debuginfo/debuginfo.c
===================================================================
--- coregrind/m_debuginfo/debuginfo.c.orig
+++ coregrind/m_debuginfo/debuginfo.c
@@ -1,4 +1,5 @@
+
/*--------------------------------------------------------------------*/
/*--- Top level management of symbols and debugging information. ---*/
/*--- debuginfo.c ---*/
@@ -3983,6 +3984,7 @@ void VG_(DebugInfo_syms_getidx) ( const
Int idx,
/*OUT*/Addr* avma,
/*OUT*/Addr* tocptr,
+ /*OUT*/Addr* local_ep,
/*OUT*/UInt* size,
/*OUT*/HChar** pri_name,
/*OUT*/HChar*** sec_names,
@@ -3992,6 +3994,7 @@ void VG_(DebugInfo_syms_getidx) ( const
vg_assert(idx >= 0 && idx < si->symtab_used);
if (avma) *avma = si->symtab[idx].addr;
if (tocptr) *tocptr = si->symtab[idx].tocptr;
+ if (local_ep) *local_ep = si->symtab[idx].local_ep;
if (size) *size = si->symtab[idx].size;
if (pri_name) *pri_name = si->symtab[idx].pri_name;
if (sec_names) *sec_names = (HChar **)si->symtab[idx].sec_names; // FIXME
Index: coregrind/m_signals.c
===================================================================
--- coregrind/m_signals.c.orig
+++ coregrind/m_signals.c
@@ -866,6 +866,23 @@ extern void my_sigreturn(void);
" li 0, " #name "\n" \
" sc\n"
+#elif defined(VGP_ppc64le_linux)
+/* Little Endian supports ELF version 2. In the future, it may
+ * support other versions.
+ */
+# define _MY_SIGRETURN(name) \
+ ".align 2\n" \
+ ".globl my_sigreturn\n" \
+ ".type .my_sigreturn,@function\n" \
+ "my_sigreturn:\n" \
+ "#if _CALL_ELF == 2 \n" \
+ "0: addis 2,12,.TOC.-0b@ha\n" \
+ " addi 2,2,.TOC.-0b@l\n" \
+ " .localentry my_sigreturn,.-my_sigreturn\n" \
+ "#endif \n" \
+ " sc\n" \
+ " .size my_sigreturn,.-my_sigreturn\n"
+
#elif defined(VGP_arm_linux)
# define _MY_SIGRETURN(name) \
".text\n" \
Index: coregrind/m_redir.c
===================================================================
--- coregrind/m_redir.c.orig
+++ coregrind/m_redir.c
@@ -397,7 +397,7 @@ void VG_(redir_notify_new_DebugInfo)( De
TopSpec* newts;
HChar* sym_name_pri;
HChar** sym_names_sec;
- Addr sym_addr, sym_toc;
+ Addr sym_addr, sym_toc, local_ep;
HChar demangled_sopatt[N_DEMANGLED];
HChar demangled_fnpatt[N_DEMANGLED];
Bool check_ppcTOCs = False;
@@ -499,7 +499,7 @@ void VG_(redir_notify_new_DebugInfo)( De
nsyms = VG_(DebugInfo_syms_howmany)( newdi );
for (i = 0; i < nsyms; i++) {
- VG_(DebugInfo_syms_getidx)( newdi, i, &sym_addr, &sym_toc,
+ VG_(DebugInfo_syms_getidx)( newdi, i, &sym_addr, &sym_toc, &local_ep,
NULL, &sym_name_pri, &sym_names_sec,
&isText, NULL );
/* Set up to conveniently iterate over all names for this symbol. */
@@ -592,7 +592,7 @@ void VG_(redir_notify_new_DebugInfo)( De
if (check_ppcTOCs) {
for (i = 0; i < nsyms; i++) {
- VG_(DebugInfo_syms_getidx)( newdi, i, &sym_addr, &sym_toc,
+ VG_(DebugInfo_syms_getidx)( newdi, i, &sym_addr, &sym_toc, &local_ep,
NULL, &sym_name_pri, &sym_names_sec,
&isText, NULL );
HChar* twoslots[2];
@@ -755,7 +755,9 @@ void generate_and_add_actives (
of trashing the caches less. */
nsyms = VG_(DebugInfo_syms_howmany)( di );
for (i = 0; i < nsyms; i++) {
- VG_(DebugInfo_syms_getidx)( di, i, &sym_addr, NULL,
+ Addr local_ep = 0;
+
+ VG_(DebugInfo_syms_getidx)( di, i, &sym_addr, NULL, &local_ep,
NULL, &sym_name_pri, &sym_names_sec,
&isText, &isIFunc );
HChar* twoslots[2];
@@ -783,6 +785,18 @@ void generate_and_add_actives (
act.isIFunc = isIFunc;
sp->done = True;
maybe_add_active( act );
+
+ /* If the function being wrapped has a local entry point
+ * redirect it to the global entry point. The redirection
+ * must save and setup r2 then setup r12 for the new function.
+ * On return, r2 must be restored. Local entry points used
+ * used in PPC64 Little Endian.
+ */
+ if (local_ep != 0) {
+ act.from_addr = local_ep;
+ maybe_add_active( act );
+ }
+
}
} /* for (sp = specs; sp; sp = sp->next) */
@@ -1298,6 +1312,27 @@ void VG_(redir_initialise) ( void )
);
}
+# elif defined(VGP_ppc64le_linux)
+ /* If we're using memcheck, use these intercepts right from
+ * the start, otherwise ld.so makes a lot of noise.
+ */
+ if (0==VG_(strcmp)("Memcheck", VG_(details).name)) {
+
+ /* this is mandatory - can't sanely continue without it */
+ add_hardwired_spec(
+ "ld64.so.2", "strlen",
+ (Addr)&VG_(ppc64_linux_REDIR_FOR_strlen),
+ complain_about_stripped_glibc_ldso
+ );
+
+ add_hardwired_spec(
+ "ld64.so.2", "index",
+ (Addr)&VG_(ppc64_linux_REDIR_FOR_strchr),
+ NULL /* not mandatory - so why bother at all? */
+ /* glibc-2.5 (FC6, ppc64) seems fine without it */
+ );
+ }
+
# elif defined(VGP_arm_linux)
/* If we're using memcheck, use these intercepts right from
the start, otherwise ld.so makes a lot of noise. */
@@ -1551,7 +1586,7 @@ static void handle_require_text_symbols
Bool isText = False;
HChar* sym_name_pri = NULL;
HChar** sym_names_sec = NULL;
- VG_(DebugInfo_syms_getidx)( di, j, NULL, NULL,
+ VG_(DebugInfo_syms_getidx)( di, j, NULL, NULL, NULL,
NULL, &sym_name_pri, &sym_names_sec,
&isText, NULL );
HChar* twoslots[2];
Index: coregrind/m_ume/elf.c
===================================================================
--- coregrind/m_ume/elf.c.orig
+++ coregrind/m_ume/elf.c
@@ -520,6 +520,10 @@ Int VG_(load_ELF)(Int fd, const HChar* n
info->init_toc = ((ULong*)entry)[1];
info->init_ip += interp_offset;
info->init_toc += interp_offset;
+#elif defined(VGP_ppc64le_linux)
+ /* On PPC64LE, ELF ver 2. API doesn't use a func ptr */
+ info->init_ip = (Addr)entry;
+ info->init_toc = 0; /* meaningless on this platform */
#else
info->init_ip = (Addr)entry;
info->init_toc = 0; /* meaningless on this platform */
Index: coregrind/m_ume/macho.c
===================================================================
--- coregrind/m_ume/macho.c.orig
+++ coregrind/m_ume/macho.c
@@ -701,6 +701,8 @@ load_fat_file(int fd, vki_off_t offset,
good_arch = CPU_TYPE_POWERPC;
#elif defined(VGA_ppc64be)
good_arch = CPU_TYPE_POWERPC64BE;
+#elif defined(VGA_ppc64le)
+ good_arch = CPU_TYPE_POWERPC64LE;
#elif defined(VGA_x86)
good_arch = CPU_TYPE_I386;
#elif defined(VGA_amd64)
Index: coregrind/m_dispatch/dispatch-ppc64-linux.S
===================================================================
--- coregrind/m_dispatch/dispatch-ppc64-linux.S
+++ /dev/null
@@ -1,537 +0,0 @@
-
-/*--------------------------------------------------------------------*/
-/*--- The core dispatch loop, for jumping to a code address. ---*/
-/*--- dispatch-ppc64-linux.S ---*/
-/*--------------------------------------------------------------------*/
-
-/*
- This file is part of Valgrind, a dynamic binary instrumentation
- framework.
-
- Copyright (C) 2005-2013 Cerion Armour-Brown <cerion@open-works.co.uk>
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307, USA.
-
- The GNU General Public License is contained in the file COPYING.
-*/
-
-#if defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
-
-#include "pub_core_basics_asm.h"
-#include "pub_core_dispatch_asm.h"
-#include "pub_core_transtab_asm.h"
-#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */
-
-
-/* References to globals via the TOC */
-
-/*
- .globl vgPlain_tt_fast
- .lcomm vgPlain_tt_fast,4,4
- .type vgPlain_tt_fast, @object
-*/
-.section ".toc","aw"
-.tocent__vgPlain_tt_fast:
- .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
-.tocent__vgPlain_stats__n_xindirs_32:
- .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32
-.tocent__vgPlain_stats__n_xindir_misses_32:
- .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32
-.tocent__vgPlain_machine_ppc64_has_VMX:
- .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
-
-/*------------------------------------------------------------*/
-/*--- ---*/
-/*--- The dispatch loop. VG_(disp_run_translations) is ---*/
-/*--- used to run all translations, ---*/
-/*--- including no-redir ones. ---*/
-/*--- ---*/
-/*------------------------------------------------------------*/
-
-/*----------------------------------------------------*/
-/*--- Entry and preamble (set everything up) ---*/
-/*----------------------------------------------------*/
-
-/* signature:
-void VG_(disp_run_translations)( UWord* two_words,
- void* guest_state,
- Addr host_addr );
-*/
-
-.section ".text"
-.align 2
-.globl VG_(disp_run_translations)
-.section ".opd","aw"
-.align 3
-VG_(disp_run_translations):
-.quad .VG_(disp_run_translations),.TOC.@tocbase,0
-.previous
-.type .VG_(disp_run_translations),@function
-.globl .VG_(disp_run_translations)
-.VG_(disp_run_translations):
- /* r3 holds two_words */
- /* r4 holds guest_state */
- /* r5 holds host_addr */
-
- /* ----- entry point to VG_(disp_run_translations) ----- */
- /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
-
- /* Save lr, cr */
- mflr 6
- std 6,16(1)
- mfcr 6
- std 6,8(1)
-
- /* New stack frame */
- stdu 1,-624(1) /* sp should maintain 16-byte alignment */
-
- /* General reg save area : 152 bytes */
- std 31,472(1)
- std 30,464(1)
- std 29,456(1)
- std 28,448(1)
- std 27,440(1)
- std 26,432(1)
- std 25,424(1)
- std 24,416(1)
- std 23,408(1)
- std 22,400(1)
- std 21,392(1)
- std 20,384(1)
- std 19,376(1)
- std 18,368(1)
- std 17,360(1)
- std 16,352(1)
- std 15,344(1)
- std 14,336(1)
- std 13,328(1)
- std 3,104(1) /* save two_words for later */
-
- /* Save callee-saved registers... */
- /* Floating-point reg save area : 144 bytes */
- stfd 31,616(1)
- stfd 30,608(1)
- stfd 29,600(1)
- stfd 28,592(1)
- stfd 27,584(1)
- stfd 26,576(1)
- stfd 25,568(1)
- stfd 24,560(1)
- stfd 23,552(1)
- stfd 22,544(1)
- stfd 21,536(1)
- stfd 20,528(1)
- stfd 19,520(1)
- stfd 18,512(1)
- stfd 17,504(1)
- stfd 16,496(1)
- stfd 15,488(1)
- stfd 14,480(1)
-
- /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
- The Linux kernel might not actually use VRSAVE for its intended
- purpose, but it should be harmless to preserve anyway. */
- /* r3, r4, r5 are live here, so use r6 */
- ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
- ld 6,0(6)
- cmpldi 6,0
- beq .LafterVMX1
-
- /* VRSAVE save word : 32 bytes */
- mfspr 6,256 /* vrsave reg is spr number 256 */
- stw 6,324(1)
-
- /* Alignment padding : 4 bytes */
-
- /* Vector reg save area (quadword aligned) : 192 bytes */
- li 6,304
- stvx 31,6,1
- li 6,288
- stvx 30,6,1
- li 6,272
- stvx 29,6,1
- li 6,256
- stvx 28,6,1
- li 6,240
- stvx 27,6,1
- li 6,224
- stvx 26,6,1
- li 6,208
- stvx 25,6,1
- li 6,192
- stvx 24,6,1
- li 6,176
- stvx 23,6,1
- li 6,160
- stvx 22,6,1
- li 6,144
- stvx 21,6,1
- li 6,128
- stvx 20,6,1
-.LafterVMX1:
-
- /* Local variable space... */
-
- /* r3 holds two_words */
- /* r4 holds guest_state */
- /* r5 holds host_addr */
-
- /* 96(sp) used later to check FPSCR[RM] */
- /* 88(sp) used later to load fpscr with zero */
- /* 48:87(sp) free */
-
- /* Linkage Area (reserved) BE ABI
- 40(sp) : TOC
- 32(sp) : link editor doubleword
- 24(sp) : compiler doubleword
- 16(sp) : LR
- 8(sp) : CR
- 0(sp) : back-chain
- */
-
- /* set host FPU control word to the default mode expected
- by VEX-generated code. See comments in libvex.h for
- more info. */
- /* => get zero into f3 (tedious)
- fsub 3,3,3 is not a reliable way to do this, since if
- f3 holds a NaN or similar then we don't necessarily
- wind up with zero. */
- li 6,0
- stw 6,88(1)
- lfs 3,88(1)
- mtfsf 0xFF,3 /* fpscr = lo32 of f3 */
-
- /* set host AltiVec control word to the default mode expected
- by VEX-generated code. */
- ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
- ld 6,0(6)
- cmpldi 6,0
- beq .LafterVMX2
-
- vspltisw 3,0x0 /* generate zero */
- mtvscr 3
-.LafterVMX2:
-
- /* make a stack frame for the code we are calling */
- stdu 1,-48(1)
-
- /* Set up the guest state ptr */
- mr 31,4 /* r31 (generated code gsp) = r4 */
-
- /* and jump into the code cache. Chained translations in
- the code cache run, until for whatever reason, they can't
- continue. When that happens, the translation in question
- will jump (or call) to one of the continuation points
- VG_(cp_...) below. */
- mtctr 5
- bctr
- /*NOTREACHED*/
-
-/*----------------------------------------------------*/
-/*--- Postamble and exit. ---*/
-/*----------------------------------------------------*/
-
-.postamble:
- /* At this point, r6 and r7 contain two
- words to be returned to the caller. r6
- holds a TRC value, and r7 optionally may
- hold another word (for CHAIN_ME exits, the
- address of the place to patch.) */
-
- /* undo the "make a stack frame for the code we are calling" */
- addi 1,1,48
-
- /* We're leaving. Check that nobody messed with
- VSCR or FPSCR in ways we don't expect. */
- /* Using r11 - value used again further on, so don't trash! */
- ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
- ld 11,0(11)
-
- /* Set fpscr back to a known state, since vex-generated code
- may have messed with fpscr[rm]. */
- li 5,0
- addi 1,1,-16
- stw 5,0(1)
- lfs 3,0(1)
- addi 1,1,16
- mtfsf 0xFF,3 /* fpscr = f3 */
-
- cmpldi 11,0 /* Do we have altivec? */
- beq .LafterVMX8
-
- /* Check VSCR[NJ] == 1 */
- /* first generate 4x 0x00010000 */
- vspltisw 4,0x1 /* 4x 0x00000001 */
- vspltisw 5,0x0 /* zero */
- vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */
- /* retrieve VSCR and mask wanted bits */
- mfvscr 7
- vand 7,7,6 /* gives NJ flag */
- vspltw 7,7,0x3 /* flags-word to all lanes */
- vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
- bt 24,.invariant_violation /* branch if all_equal */
-
-.LafterVMX8:
- /* otherwise we're OK */
- b .remove_frame
-
-.invariant_violation:
- li 6,VG_TRC_INVARIANT_FAILED
- li 7,0
- /* fall through */
-
-.remove_frame:
- /* r11 already holds VG_(machine_ppc32_has_VMX) value */
- cmplwi 11,0
- beq .LafterVMX9
-
- /* Restore Altivec regs.
- Use r5 as scratch since r6/r7 are live. */
- /* VRSAVE */
- lwz 5,324(1)
- mfspr 5,256 /* VRSAVE reg is spr number 256 */
-
- /* Vector regs */
- li 5,304
- lvx 31,5,1
- li 5,288
- lvx 30,5,1
- li 5,272
- lvx 29,5,1
- li 5,256
- lvx 28,5,1
- li 5,240
- lvx 27,5,1
- li 5,224
- lvx 26,5,1
- li 5,208
- lvx 25,5,1
- li 5,192
- lvx 24,5,1
- li 5,176
- lvx 23,5,1
- li 5,160
- lvx 22,5,1
- li 5,144
- lvx 21,5,1
- li 5,128
- lvx 20,5,1
-.LafterVMX9:
-
- /* Restore FP regs */
- /* Floating-point regs */
- lfd 31,616(1)
- lfd 30,608(1)
- lfd 29,600(1)
- lfd 28,592(1)
- lfd 27,584(1)
- lfd 26,576(1)
- lfd 25,568(1)
- lfd 24,560(1)
- lfd 23,552(1)
- lfd 22,544(1)
- lfd 21,536(1)
- lfd 20,528(1)
- lfd 19,520(1)
- lfd 18,512(1)
- lfd 17,504(1)
- lfd 16,496(1)
- lfd 15,488(1)
- lfd 14,480(1)
-
- /* restore int regs, including importantly r3 (two_words) */
- ld 31,472(1)
- ld 30,464(1)
- ld 29,456(1)
- ld 28,448(1)
- ld 27,440(1)
- ld 26,432(1)
- ld 25,424(1)
- ld 24,416(1)
- ld 23,408(1)
- ld 22,400(1)
- ld 21,392(1)
- ld 20,384(1)
- ld 19,376(1)
- ld 18,368(1)
- ld 17,360(1)
- ld 16,352(1)
- ld 15,344(1)
- ld 14,336(1)
- ld 13,328(1)
- ld 3,104(1)
- /* Stash return values */
- std 6,0(3)
- std 7,8(3)
-
- /* restore lr & sp, and leave */
- ld 0,632(1) /* stack_size + 8 */
- mtcr 0
- ld 0,640(1) /* stack_size + 16 */
- mtlr 0
- addi 1,1,624 /* stack_size */
- blr
-
-
-/*----------------------------------------------------*/
-/*--- Continuation points ---*/
-/*----------------------------------------------------*/
-
-/* ------ Chain me to slow entry point ------ */
- .section ".text"
- .align 2
- .globl VG_(disp_cp_chain_me_to_slowEP)
- .section ".opd","aw"
- .align 3
-VG_(disp_cp_chain_me_to_slowEP):
- .quad .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
- .previous
- .type .VG_(disp_cp_chain_me_to_slowEP),@function
- .globl .VG_(disp_cp_chain_me_to_slowEP)
-.VG_(disp_cp_chain_me_to_slowEP):
- /* We got called. The return address indicates
- where the patching needs to happen. Collect
- the return address and, exit back to C land,
- handing the caller the pair (Chain_me_S, RA) */
- li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP
- mflr 7
- /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP
- 4 = mtctr r30
- 4 = btctr
- */
- subi 7,7,20+4+4
- b .postamble
-
-/* ------ Chain me to fast entry point ------ */
- .section ".text"
- .align 2
- .globl VG_(disp_cp_chain_me_to_fastEP)
- .section ".opd","aw"
- .align 3
-VG_(disp_cp_chain_me_to_fastEP):
- .quad .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
- .previous
- .type .VG_(disp_cp_chain_me_to_fastEP),@function
- .globl .VG_(disp_cp_chain_me_to_fastEP)
-.VG_(disp_cp_chain_me_to_fastEP):
- /* We got called. The return address indicates
- where the patching needs to happen. Collect
- the return address and, exit back to C land,
- handing the caller the pair (Chain_me_S, RA) */
- li 6, VG_TRC_CHAIN_ME_TO_FAST_EP
- mflr 7
- /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP
- 4 = mtctr r30
- 4 = btctr
- */
- subi 7,7,20+4+4
- b .postamble
-
-/* ------ Indirect but boring jump ------ */
- .section ".text"
- .align 2
- .globl VG_(disp_cp_xindir)
- .section ".opd","aw"
- .align 3
-VG_(disp_cp_xindir):
- .quad .VG_(disp_cp_xindir),.TOC.@tocbase,0
- .previous
- .type .VG_(disp_cp_xindir),@function
- .globl .VG_(disp_cp_xindir)
-.VG_(disp_cp_xindir):
- /* Where are we going? */
- ld 3,OFFSET_ppc64_CIA(31)
-
- /* stats only */
- ld 5, .tocent__vgPlain_stats__n_xindirs_32@toc(2)
- lwz 6,0(5)
- addi 6,6,1
- stw 6,0(5)
-
- /* r5 = &VG_(tt_fast) */
- ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
-
- /* try a fast lookup in the translation cache */
- /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
- = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */
- rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */
- sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */
- add 5,5,4 /* & VG_(tt_fast)[entry#] */
- ld 6,0(5) /* .guest */
- ld 7,8(5) /* .host */
- cmpd 3,6
- bne .fast_lookup_failed
-
- /* Found a match. Jump to .host. */
- mtctr 7
- bctr
-
-.fast_lookup_failed:
- /* stats only */
- ld 5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2)
- lwz 6,0(5)
- addi 6,6,1
- stw 6,0(5)
-
- li 6,VG_TRC_INNER_FASTMISS
- li 7,0
- b .postamble
- /*NOTREACHED*/
-
-/* ------ Assisted jump ------ */
-.section ".text"
- .align 2
- .globl VG_(disp_cp_xassisted)
- .section ".opd","aw"
- .align 3
-VG_(disp_cp_xassisted):
- .quad .VG_(disp_cp_xassisted),.TOC.@tocbase,0
- .previous
- .type .VG_(disp_cp_xassisted),@function
- .globl .VG_(disp_cp_xassisted)
-.VG_(disp_cp_xassisted):
- /* r31 contains the TRC */
- mr 6,31
- li 7,0
- b .postamble
-
-/* ------ Event check failed ------ */
- .section ".text"
- .align 2
- .globl VG_(disp_cp_evcheck_fail)
- .section ".opd","aw"
- .align 3
-VG_(disp_cp_evcheck_fail):
- .quad .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
- .previous
- .type .VG_(disp_cp_evcheck_fail),@function
- .globl .VG_(disp_cp_evcheck_fail)
-.VG_(disp_cp_evcheck_fail):
- li 6,VG_TRC_INNER_COUNTERZERO
- li 7,0
- b .postamble
-
-
-.size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
-
-/* Let the linker know we don't need an executable stack */
-.section .note.GNU-stack,"",@progbits
-
-#endif // defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
-
-/*--------------------------------------------------------------------*/
-/*--- end ---*/
-/*--------------------------------------------------------------------*/
Index: coregrind/m_dispatch/dispatch-ppc64be-linux.S
===================================================================
--- /dev/null
+++ coregrind/m_dispatch/dispatch-ppc64be-linux.S
@@ -0,0 +1,537 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address. ---*/
+/*--- dispatch-ppc64-linux.S ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2005-2013 Cerion Armour-Brown <cerion@open-works.co.uk>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#if defined(VGP_ppc64be_linux)
+
+#include "pub_core_basics_asm.h"
+#include "pub_core_dispatch_asm.h"
+#include "pub_core_transtab_asm.h"
+#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */
+
+
+/* References to globals via the TOC */
+
+/*
+ .globl vgPlain_tt_fast
+ .lcomm vgPlain_tt_fast,4,4
+ .type vgPlain_tt_fast, @object
+*/
+.section ".toc","aw"
+.tocent__vgPlain_tt_fast:
+ .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
+.tocent__vgPlain_stats__n_xindirs_32:
+ .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32
+.tocent__vgPlain_stats__n_xindir_misses_32:
+ .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32
+.tocent__vgPlain_machine_ppc64_has_VMX:
+ .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
+
+/*------------------------------------------------------------*/
+/*--- ---*/
+/*--- The dispatch loop. VG_(disp_run_translations) is ---*/
+/*--- used to run all translations, ---*/
+/*--- including no-redir ones. ---*/
+/*--- ---*/
+/*------------------------------------------------------------*/
+
+/*----------------------------------------------------*/
+/*--- Entry and preamble (set everything up) ---*/
+/*----------------------------------------------------*/
+
+/* signature:
+void VG_(disp_run_translations)( UWord* two_words,
+ void* guest_state,
+ Addr host_addr );
+*/
+
+.section ".text"
+.align 2
+.globl VG_(disp_run_translations)
+.section ".opd","aw"
+.align 3
+VG_(disp_run_translations):
+.quad .VG_(disp_run_translations),.TOC.@tocbase,0
+.previous
+.type .VG_(disp_run_translations),@function
+.globl .VG_(disp_run_translations)
+.VG_(disp_run_translations):
+ /* r3 holds two_words */
+ /* r4 holds guest_state */
+ /* r5 holds host_addr */
+
+ /* ----- entry point to VG_(disp_run_translations) ----- */
+ /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
+
+ /* Save lr, cr */
+ mflr 6
+ std 6,16(1)
+ mfcr 6
+ std 6,8(1)
+
+ /* New stack frame */
+ stdu 1,-624(1) /* sp should maintain 16-byte alignment */
+
+ /* General reg save area : 152 bytes */
+ std 31,472(1)
+ std 30,464(1)
+ std 29,456(1)
+ std 28,448(1)
+ std 27,440(1)
+ std 26,432(1)
+ std 25,424(1)
+ std 24,416(1)
+ std 23,408(1)
+ std 22,400(1)
+ std 21,392(1)
+ std 20,384(1)
+ std 19,376(1)
+ std 18,368(1)
+ std 17,360(1)
+ std 16,352(1)
+ std 15,344(1)
+ std 14,336(1)
+ std 13,328(1)
+ std 3,104(1) /* save two_words for later */
+
+ /* Save callee-saved registers... */
+ /* Floating-point reg save area : 144 bytes */
+ stfd 31,616(1)
+ stfd 30,608(1)
+ stfd 29,600(1)
+ stfd 28,592(1)
+ stfd 27,584(1)
+ stfd 26,576(1)
+ stfd 25,568(1)
+ stfd 24,560(1)
+ stfd 23,552(1)
+ stfd 22,544(1)
+ stfd 21,536(1)
+ stfd 20,528(1)
+ stfd 19,520(1)
+ stfd 18,512(1)
+ stfd 17,504(1)
+ stfd 16,496(1)
+ stfd 15,488(1)
+ stfd 14,480(1)
+
+ /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
+ The Linux kernel might not actually use VRSAVE for its intended
+ purpose, but it should be harmless to preserve anyway. */
+ /* r3, r4, r5 are live here, so use r6 */
+ ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 6,0(6)
+ cmpldi 6,0
+ beq .LafterVMX1
+
+ /* VRSAVE save word : 32 bytes */
+ mfspr 6,256 /* vrsave reg is spr number 256 */
+ stw 6,324(1)
+
+ /* Alignment padding : 4 bytes */
+
+ /* Vector reg save area (quadword aligned) : 192 bytes */
+ li 6,304
+ stvx 31,6,1
+ li 6,288
+ stvx 30,6,1
+ li 6,272
+ stvx 29,6,1
+ li 6,256
+ stvx 28,6,1
+ li 6,240
+ stvx 27,6,1
+ li 6,224
+ stvx 26,6,1
+ li 6,208
+ stvx 25,6,1
+ li 6,192
+ stvx 24,6,1
+ li 6,176
+ stvx 23,6,1
+ li 6,160
+ stvx 22,6,1
+ li 6,144
+ stvx 21,6,1
+ li 6,128
+ stvx 20,6,1
+.LafterVMX1:
+
+ /* Local variable space... */
+
+ /* r3 holds two_words */
+ /* r4 holds guest_state */
+ /* r5 holds host_addr */
+
+ /* 96(sp) used later to check FPSCR[RM] */
+ /* 88(sp) used later to load fpscr with zero */
+ /* 48:87(sp) free */
+
+ /* Linkage Area (reserved) BE ABI
+ 40(sp) : TOC
+ 32(sp) : link editor doubleword
+ 24(sp) : compiler doubleword
+ 16(sp) : LR
+ 8(sp) : CR
+ 0(sp) : back-chain
+ */
+
+ /* set host FPU control word to the default mode expected
+ by VEX-generated code. See comments in libvex.h for
+ more info. */
+ /* => get zero into f3 (tedious)
+ fsub 3,3,3 is not a reliable way to do this, since if
+ f3 holds a NaN or similar then we don't necessarily
+ wind up with zero. */
+ li 6,0
+ stw 6,88(1)
+ lfs 3,88(1)
+ mtfsf 0xFF,3 /* fpscr = lo32 of f3 */
+
+ /* set host AltiVec control word to the default mode expected
+ by VEX-generated code. */
+ ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 6,0(6)
+ cmpldi 6,0
+ beq .LafterVMX2
+
+ vspltisw 3,0x0 /* generate zero */
+ mtvscr 3
+.LafterVMX2:
+
+ /* make a stack frame for the code we are calling */
+ stdu 1,-48(1)
+
+ /* Set up the guest state ptr */
+ mr 31,4 /* r31 (generated code gsp) = r4 */
+
+ /* and jump into the code cache. Chained translations in
+ the code cache run, until for whatever reason, they can't
+ continue. When that happens, the translation in question
+ will jump (or call) to one of the continuation points
+ VG_(cp_...) below. */
+ mtctr 5
+ bctr
+ /*NOTREACHED*/
+
+/*----------------------------------------------------*/
+/*--- Postamble and exit. ---*/
+/*----------------------------------------------------*/
+
+.postamble:
+ /* At this point, r6 and r7 contain two
+ words to be returned to the caller. r6
+ holds a TRC value, and r7 optionally may
+ hold another word (for CHAIN_ME exits, the
+ address of the place to patch.) */
+
+ /* undo the "make a stack frame for the code we are calling" */
+ addi 1,1,48
+
+ /* We're leaving. Check that nobody messed with
+ VSCR or FPSCR in ways we don't expect. */
+ /* Using r11 - value used again further on, so don't trash! */
+ ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 11,0(11)
+
+ /* Set fpscr back to a known state, since vex-generated code
+ may have messed with fpscr[rm]. */
+ li 5,0
+ addi 1,1,-16
+ stw 5,0(1)
+ lfs 3,0(1)
+ addi 1,1,16
+ mtfsf 0xFF,3 /* fpscr = f3 */
+
+ cmpldi 11,0 /* Do we have altivec? */
+ beq .LafterVMX8
+
+ /* Check VSCR[NJ] == 1 */
+ /* first generate 4x 0x00010000 */
+ vspltisw 4,0x1 /* 4x 0x00000001 */
+ vspltisw 5,0x0 /* zero */
+ vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */
+ /* retrieve VSCR and mask wanted bits */
+ mfvscr 7
+ vand 7,7,6 /* gives NJ flag */
+ vspltw 7,7,0x3 /* flags-word to all lanes */
+ vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
+ bt 24,.invariant_violation /* branch if all_equal */
+
+.LafterVMX8:
+ /* otherwise we're OK */
+ b .remove_frame
+
+.invariant_violation:
+ li 6,VG_TRC_INVARIANT_FAILED
+ li 7,0
+ /* fall through */
+
+.remove_frame:
+ /* r11 already holds VG_(machine_ppc32_has_VMX) value */
+ cmplwi 11,0
+ beq .LafterVMX9
+
+ /* Restore Altivec regs.
+ Use r5 as scratch since r6/r7 are live. */
+ /* VRSAVE */
+ lwz 5,324(1)
+ mfspr 5,256 /* VRSAVE reg is spr number 256 */
+
+ /* Vector regs */
+ li 5,304
+ lvx 31,5,1
+ li 5,288
+ lvx 30,5,1
+ li 5,272
+ lvx 29,5,1
+ li 5,256
+ lvx 28,5,1
+ li 5,240
+ lvx 27,5,1
+ li 5,224
+ lvx 26,5,1
+ li 5,208
+ lvx 25,5,1
+ li 5,192
+ lvx 24,5,1
+ li 5,176
+ lvx 23,5,1
+ li 5,160
+ lvx 22,5,1
+ li 5,144
+ lvx 21,5,1
+ li 5,128
+ lvx 20,5,1
+.LafterVMX9:
+
+ /* Restore FP regs */
+ /* Floating-point regs */
+ lfd 31,616(1)
+ lfd 30,608(1)
+ lfd 29,600(1)
+ lfd 28,592(1)
+ lfd 27,584(1)
+ lfd 26,576(1)
+ lfd 25,568(1)
+ lfd 24,560(1)
+ lfd 23,552(1)
+ lfd 22,544(1)
+ lfd 21,536(1)
+ lfd 20,528(1)
+ lfd 19,520(1)
+ lfd 18,512(1)
+ lfd 17,504(1)
+ lfd 16,496(1)
+ lfd 15,488(1)
+ lfd 14,480(1)
+
+ /* restore int regs, including importantly r3 (two_words) */
+ ld 31,472(1)
+ ld 30,464(1)
+ ld 29,456(1)
+ ld 28,448(1)
+ ld 27,440(1)
+ ld 26,432(1)
+ ld 25,424(1)
+ ld 24,416(1)
+ ld 23,408(1)
+ ld 22,400(1)
+ ld 21,392(1)
+ ld 20,384(1)
+ ld 19,376(1)
+ ld 18,368(1)
+ ld 17,360(1)
+ ld 16,352(1)
+ ld 15,344(1)
+ ld 14,336(1)
+ ld 13,328(1)
+ ld 3,104(1)
+ /* Stash return values */
+ std 6,0(3)
+ std 7,8(3)
+
+ /* restore lr & sp, and leave */
+ ld 0,632(1) /* stack_size + 8 */
+ mtcr 0
+ ld 0,640(1) /* stack_size + 16 */
+ mtlr 0
+ addi 1,1,624 /* stack_size */
+ blr
+
+
+/*----------------------------------------------------*/
+/*--- Continuation points ---*/
+/*----------------------------------------------------*/
+
+/* ------ Chain me to slow entry point ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_chain_me_to_slowEP)
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_chain_me_to_slowEP):
+ .quad .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
+ .previous
+ .type .VG_(disp_cp_chain_me_to_slowEP),@function
+ .globl .VG_(disp_cp_chain_me_to_slowEP)
+.VG_(disp_cp_chain_me_to_slowEP):
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_S, RA) */
+ li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP
+ mflr 7
+ /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP
+ 4 = mtctr r30
+ 4 = btctr
+ */
+ subi 7,7,20+4+4
+ b .postamble
+
+/* ------ Chain me to fast entry point ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_chain_me_to_fastEP)
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_chain_me_to_fastEP):
+ .quad .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
+ .previous
+ .type .VG_(disp_cp_chain_me_to_fastEP),@function
+ .globl .VG_(disp_cp_chain_me_to_fastEP)
+.VG_(disp_cp_chain_me_to_fastEP):
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_S, RA) */
+ li 6, VG_TRC_CHAIN_ME_TO_FAST_EP
+ mflr 7
+ /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP
+ 4 = mtctr r30
+ 4 = btctr
+ */
+ subi 7,7,20+4+4
+ b .postamble
+
+/* ------ Indirect but boring jump ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_xindir)
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_xindir):
+ .quad .VG_(disp_cp_xindir),.TOC.@tocbase,0
+ .previous
+ .type .VG_(disp_cp_xindir),@function
+ .globl .VG_(disp_cp_xindir)
+.VG_(disp_cp_xindir):
+ /* Where are we going? */
+ ld 3,OFFSET_ppc64_CIA(31)
+
+ /* stats only */
+ ld 5, .tocent__vgPlain_stats__n_xindirs_32@toc(2)
+ lwz 6,0(5)
+ addi 6,6,1
+ stw 6,0(5)
+
+ /* r5 = &VG_(tt_fast) */
+ ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
+
+ /* try a fast lookup in the translation cache */
+ /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
+ = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */
+ rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */
+ sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */
+ add 5,5,4 /* & VG_(tt_fast)[entry#] */
+ ld 6,0(5) /* .guest */
+ ld 7,8(5) /* .host */
+ cmpd 3,6
+ bne .fast_lookup_failed
+
+ /* Found a match. Jump to .host. */
+ mtctr 7
+ bctr
+
+.fast_lookup_failed:
+ /* stats only */
+ ld 5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2)
+ lwz 6,0(5)
+ addi 6,6,1
+ stw 6,0(5)
+
+ li 6,VG_TRC_INNER_FASTMISS
+ li 7,0
+ b .postamble
+ /*NOTREACHED*/
+
+/* ------ Assisted jump ------ */
+.section ".text"
+ .align 2
+ .globl VG_(disp_cp_xassisted)
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_xassisted):
+ .quad .VG_(disp_cp_xassisted),.TOC.@tocbase,0
+ .previous
+ .type .VG_(disp_cp_xassisted),@function
+ .globl .VG_(disp_cp_xassisted)
+.VG_(disp_cp_xassisted):
+ /* r31 contains the TRC */
+ mr 6,31
+ li 7,0
+ b .postamble
+
+/* ------ Event check failed ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_evcheck_fail)
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_evcheck_fail):
+ .quad .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
+ .previous
+ .type .VG_(disp_cp_evcheck_fail),@function
+ .globl .VG_(disp_cp_evcheck_fail)
+.VG_(disp_cp_evcheck_fail):
+ li 6,VG_TRC_INNER_COUNTERZERO
+ li 7,0
+ b .postamble
+
+
+.size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
+
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",@progbits
+
+#endif // defined(VGP_ppc64be_linux)
+
+/*--------------------------------------------------------------------*/
+/*--- end ---*/
+/*--------------------------------------------------------------------*/
Index: coregrind/m_dispatch/dispatch-ppc64le-linux.S
===================================================================
--- /dev/null
+++ coregrind/m_dispatch/dispatch-ppc64le-linux.S
@@ -0,0 +1,630 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address. ---*/
+/*--- dispatch-ppc64-linux.S ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2005-2013 Cerion Armour-Brown <cerion@open-works.co.uk>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#if defined(VGP_ppc64le_linux)
+
+#include "pub_core_basics_asm.h"
+#include "pub_core_dispatch_asm.h"
+#include "pub_core_transtab_asm.h"
+#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */
+
+/* NOTE: PPC64 supports Big Endian and Little Endian. It also supports the
+ ELF version 1 and ELF version 2 APIs.
+
+ Currently LE uses ELF version 2 and BE uses ELF version 1. However,
+ BE and LE may support the other ELF version in the future. So, the
+ _CALL_ELF is used in the assembly function to enable code for a
+ specific ELF version independently of the Enianess of the machine.
+ The test "#if _CALL_ELF == 2" checks if ELF version 2 is being used.
+*/
+
+/* References to globals via the TOC */
+
+/*
+ .globl vgPlain_tt_fast
+ .lcomm vgPlain_tt_fast,4,4
+ .type vgPlain_tt_fast, @object
+*/
+.section ".toc","aw"
+.tocent__vgPlain_tt_fast:
+ .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
+.tocent__vgPlain_stats__n_xindirs_32:
+ .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32
+.tocent__vgPlain_stats__n_xindir_misses_32:
+ .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32
+.tocent__vgPlain_machine_ppc64_has_VMX:
+ .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
+
+/*------------------------------------------------------------*/
+/*--- ---*/
+/*--- The dispatch loop. VG_(disp_run_translations) is ---*/
+/*--- used to run all translations, ---*/
+/*--- including no-redir ones. ---*/
+/*--- ---*/
+/*------------------------------------------------------------*/
+
+/*----------------------------------------------------*/
+/*--- Entry and preamble (set everything up) ---*/
+/*----------------------------------------------------*/
+
+/* signature:
+void VG_(disp_run_translations)( UWord* two_words,
+ void* guest_state,
+ Addr host_addr );
+*/
+
+.section ".text"
+.align 2
+.globl VG_(disp_run_translations)
+#if _CALL_ELF == 2
+.type VG_(disp_run_translations),@function
+VG_(disp_run_translations):
+.type .VG_(disp_run_translations),@function
+#else
+.section ".opd","aw"
+.align 3
+VG_(disp_run_translations):
+.quad .VG_(disp_run_translations),.TOC.@tocbase,0
+.previous
+.type .VG_(disp_run_translations),@function
+#endif
+.globl .VG_(disp_run_translations)
+.VG_(disp_run_translations):
+#if _CALL_ELF == 2
+0: addis 2, 12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry VG_(disp_run_translations), .-VG_(disp_run_translations)
+#endif
+
+ /* r3 holds two_words */
+ /* r4 holds guest_state */
+ /* r5 holds host_addr */
+
+ /* ----- entry point to VG_(disp_run_translations) ----- */
+ /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
+
+ /* Save lr, cr */
+ mflr 6
+ std 6,16(1)
+ mfcr 6
+ std 6,8(1)
+
+ /* New stack frame */
+ stdu 1,-624(1) /* sp should maintain 16-byte alignment */
+
+ /* General reg save area : 152 bytes */
+ std 31,472(1)
+ std 30,464(1)
+ std 29,456(1)
+ std 28,448(1)
+ std 27,440(1)
+ std 26,432(1)
+ std 25,424(1)
+ std 24,416(1)
+ std 23,408(1)
+ std 22,400(1)
+ std 21,392(1)
+ std 20,384(1)
+ std 19,376(1)
+ std 18,368(1)
+ std 17,360(1)
+ std 16,352(1)
+ std 15,344(1)
+ std 14,336(1)
+ std 13,328(1)
+ std 3,104(1) /* save two_words for later */
+
+ /* Save callee-saved registers... */
+ /* Floating-point reg save area : 144 bytes */
+ stfd 31,616(1)
+ stfd 30,608(1)
+ stfd 29,600(1)
+ stfd 28,592(1)
+ stfd 27,584(1)
+ stfd 26,576(1)
+ stfd 25,568(1)
+ stfd 24,560(1)
+ stfd 23,552(1)
+ stfd 22,544(1)
+ stfd 21,536(1)
+ stfd 20,528(1)
+ stfd 19,520(1)
+ stfd 18,512(1)
+ stfd 17,504(1)
+ stfd 16,496(1)
+ stfd 15,488(1)
+ stfd 14,480(1)
+
+ /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
+ The Linux kernel might not actually use VRSAVE for its intended
+ purpose, but it should be harmless to preserve anyway. */
+ /* r3, r4, r5 are live here, so use r6 */
+ ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 6,0(6)
+ cmpldi 6,0
+ beq .LafterVMX1
+
+ /* VRSAVE save word : 32 bytes */
+ mfspr 6,256 /* vrsave reg is spr number 256 */
+ stw 6,324(1)
+
+ /* Alignment padding : 4 bytes */
+
+ /* Vector reg save area (quadword aligned) : 192 bytes */
+ li 6,304
+ stvx 31,6,1
+ li 6,288
+ stvx 30,6,1
+ li 6,272
+ stvx 29,6,1
+ li 6,256
+ stvx 28,6,1
+ li 6,240
+ stvx 27,6,1
+ li 6,224
+ stvx 26,6,1
+ li 6,208
+ stvx 25,6,1
+ li 6,192
+ stvx 24,6,1
+ li 6,176
+ stvx 23,6,1
+ li 6,160
+ stvx 22,6,1
+ li 6,144
+ stvx 21,6,1
+ li 6,128
+ stvx 20,6,1
+.LafterVMX1:
+
+ /* Local variable space... */
+
+ /* r3 holds two_words */
+ /* r4 holds guest_state */
+ /* r5 holds host_addr */
+
+ /* 96(sp) used later to check FPSCR[RM] */
+ /* 88(sp) used later to load fpscr with zero */
+ /* 48:87(sp) free */
+
+ /* Linkage Area (reserved) BE ABI
+ 40(sp) : TOC
+ 32(sp) : link editor doubleword
+ 24(sp) : compiler doubleword
+ 16(sp) : LR
+ 8(sp) : CR
+ 0(sp) : back-chain
+ */
+
+ /* set host FPU control word to the default mode expected
+ by VEX-generated code. See comments in libvex.h for
+ more info. */
+ /* => get zero into f3 (tedious)
+ fsub 3,3,3 is not a reliable way to do this, since if
+ f3 holds a NaN or similar then we don't necessarily
+ wind up with zero. */
+ li 6,0
+ stw 6,88(1)
+ lfs 3,88(1)
+ mtfsf 0xFF,3 /* fpscr = lo32 of f3 */
+
+ /* set host AltiVec control word to the default mode expected
+ by VEX-generated code. */
+ ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 6,0(6)
+ cmpldi 6,0
+ beq .LafterVMX2
+
+ vspltisw 3,0x0 /* generate zero */
+ mtvscr 3
+.LafterVMX2:
+
+ /* make a stack frame for the code we are calling */
+ stdu 1,-48(1)
+
+ /* Set up the guest state ptr */
+ mr 31,4 /* r31 (generated code gsp) = r4 */
+#if _CALL_ELF == 2
+/* for the LE ABI need to setup r2 and r12 */
+0: addis 2, 12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+#endif
+
+ /* and jump into the code cache. Chained translations in
+ the code cache run, until for whatever reason, they can't
+ continue. When that happens, the translation in question
+ will jump (or call) to one of the continuation points
+ VG_(cp_...) below. */
+ mtctr 5
+ bctr
+ /*NOTREACHED*/
+
+/*----------------------------------------------------*/
+/*--- Postamble and exit. ---*/
+/*----------------------------------------------------*/
+
+.postamble:
+ /* At this point, r6 and r7 contain two
+ words to be returned to the caller. r6
+ holds a TRC value, and r7 optionally may
+ hold another word (for CHAIN_ME exits, the
+ address of the place to patch.) */
+
+ /* undo the "make a stack frame for the code we are calling" */
+ addi 1,1,48
+
+ /* We're leaving. Check that nobody messed with
+ VSCR or FPSCR in ways we don't expect. */
+ /* Using r11 - value used again further on, so don't trash! */
+ ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 11,0(11)
+
+ /* Set fpscr back to a known state, since vex-generated code
+ may have messed with fpscr[rm]. */
+ li 5,0
+ addi 1,1,-16
+ stw 5,0(1)
+ lfs 3,0(1)
+ addi 1,1,16
+ mtfsf 0xFF,3 /* fpscr = f3 */
+
+ cmpldi 11,0 /* Do we have altivec? */
+ beq .LafterVMX8
+
+ /* Check VSCR[NJ] == 1 */
+ /* first generate 4x 0x00010000 */
+ vspltisw 4,0x1 /* 4x 0x00000001 */
+ vspltisw 5,0x0 /* zero */
+ vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */
+ /* retrieve VSCR and mask wanted bits */
+ mfvscr 7
+ vand 7,7,6 /* gives NJ flag */
+ vspltw 7,7,0x3 /* flags-word to all lanes */
+ vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
+ bt 24,.invariant_violation /* branch if all_equal */
+
+.LafterVMX8:
+ /* otherwise we're OK */
+ b .remove_frame
+
+.invariant_violation:
+ li 6,VG_TRC_INVARIANT_FAILED
+ li 7,0
+ /* fall through */
+
+.remove_frame:
+ /* r11 already holds VG_(machine_ppc32_has_VMX) value */
+ cmplwi 11,0
+ beq .LafterVMX9
+
+ /* Restore Altivec regs.
+ Use r5 as scratch since r6/r7 are live. */
+ /* VRSAVE */
+ lwz 5,324(1)
+ mfspr 5,256 /* VRSAVE reg is spr number 256 */
+
+ /* Vector regs */
+ li 5,304
+ lvx 31,5,1
+ li 5,288
+ lvx 30,5,1
+ li 5,272
+ lvx 29,5,1
+ li 5,256
+ lvx 28,5,1
+ li 5,240
+ lvx 27,5,1
+ li 5,224
+ lvx 26,5,1
+ li 5,208
+ lvx 25,5,1
+ li 5,192
+ lvx 24,5,1
+ li 5,176
+ lvx 23,5,1
+ li 5,160
+ lvx 22,5,1
+ li 5,144
+ lvx 21,5,1
+ li 5,128
+ lvx 20,5,1
+.LafterVMX9:
+
+ /* Restore FP regs */
+ /* Floating-point regs */
+ lfd 31,616(1)
+ lfd 30,608(1)
+ lfd 29,600(1)
+ lfd 28,592(1)
+ lfd 27,584(1)
+ lfd 26,576(1)
+ lfd 25,568(1)
+ lfd 24,560(1)
+ lfd 23,552(1)
+ lfd 22,544(1)
+ lfd 21,536(1)
+ lfd 20,528(1)
+ lfd 19,520(1)
+ lfd 18,512(1)
+ lfd 17,504(1)
+ lfd 16,496(1)
+ lfd 15,488(1)
+ lfd 14,480(1)
+
+ /* restore int regs, including importantly r3 (two_words) */
+ ld 31,472(1)
+ ld 30,464(1)
+ ld 29,456(1)
+ ld 28,448(1)
+ ld 27,440(1)
+ ld 26,432(1)
+ ld 25,424(1)
+ ld 24,416(1)
+ ld 23,408(1)
+ ld 22,400(1)
+ ld 21,392(1)
+ ld 20,384(1)
+ ld 19,376(1)
+ ld 18,368(1)
+ ld 17,360(1)
+ ld 16,352(1)
+ ld 15,344(1)
+ ld 14,336(1)
+ ld 13,328(1)
+ ld 3,104(1)
+ /* Stash return values */
+ std 6,0(3)
+ std 7,8(3)
+
+ /* restore lr & sp, and leave */
+ ld 0,632(1) /* stack_size + 8 */
+ mtcr 0
+ ld 0,640(1) /* stack_size + 16 */
+ mtlr 0
+ addi 1,1,624 /* stack_size */
+ blr
+#if _CALL_ELF == 2
+ .size VG_(disp_run_translations),.-VG_(disp_run_translations)
+#endif
+
+
+/*----------------------------------------------------*/
+/*--- Continuation points ---*/
+/*----------------------------------------------------*/
+
+/* ------ Chain me to slow entry point ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_chain_me_to_slowEP)
+#if _CALL_ELF == 2
+ .type VG_(disp_cp_chain_me_to_slowEP),@function
+ VG_(disp_cp_chain_me_to_slowEP):
+#else
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_chain_me_to_slowEP):
+ .quad .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
+ .previous
+#endif
+ .type .VG_(disp_cp_chain_me_to_slowEP),@function
+ .globl .VG_(disp_cp_chain_me_to_slowEP)
+.VG_(disp_cp_chain_me_to_slowEP):
+#if _CALL_ELF == 2
+0: addis 2, 12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry VG_(disp_cp_chain_me_to_slowEP), .-VG_(disp_cp_chain_me_to_slowEP)
+#endif
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_S, RA) */
+ li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP
+ mflr 7
+ /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP
+ 4 = mtctr r30
+ 4 = btctr
+ */
+ subi 7,7,20+4+4
+ b .postamble
+#if _CALL_ELF == 2
+ .size VG_(disp_cp_chain_me_to_slowEP),.-VG_(disp_cp_chain_me_to_slowEP)
+#endif
+
+/* ------ Chain me to fast entry point ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_chain_me_to_fastEP)
+#if _CALL_ELF == 2
+ .type VG_(disp_cp_chain_me_to_fastEP),@function
+VG_(disp_cp_chain_me_to_fastEP):
+#else
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_chain_me_to_fastEP):
+ .quad .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
+ .previous
+#endif
+ .type .VG_(disp_cp_chain_me_to_fastEP),@function
+ .globl .VG_(disp_cp_chain_me_to_fastEP)
+.VG_(disp_cp_chain_me_to_fastEP):
+#if _CALL_ELF == 2
+0: addis 2, 12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry VG_(disp_cp_chain_me_to_fastEP), .-VG_(disp_cp_chain_me_to_fastEP)
+#endif
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_S, RA) */
+ li 6, VG_TRC_CHAIN_ME_TO_FAST_EP
+ mflr 7
+ /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP
+ 4 = mtctr r30
+ 4 = btctr
+ */
+ subi 7,7,20+4+4
+ b .postamble
+#if _CALL_ELF == 2
+ .size VG_(disp_cp_chain_me_to_fastEP),.-VG_(disp_cp_chain_me_to_fastEP)
+#endif
+
+/* ------ Indirect but boring jump ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_xindir)
+#if _CALL_ELF == 2
+ .type VG_(disp_cp_xindir),@function
+VG_(disp_cp_xindir):
+#else
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_xindir):
+ .quad .VG_(disp_cp_xindir),.TOC.@tocbase,0
+ .previous
+#endif
+ .type .VG_(disp_cp_xindir),@function
+ .globl .VG_(disp_cp_xindir)
+.VG_(disp_cp_xindir):
+#if _CALL_ELF == 2
+0: addis 2, 12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry VG_(disp_cp_xindir), .-VG_(disp_cp_xindir)
+#endif
+ /* Where are we going? */
+ ld 3,OFFSET_ppc64_CIA(31)
+
+ /* stats only */
+ ld 5, .tocent__vgPlain_stats__n_xindirs_32@toc(2)
+ lwz 6,0(5)
+ addi 6,6,1
+ stw 6,0(5)
+
+ /* r5 = &VG_(tt_fast) */
+ ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
+
+ /* try a fast lookup in the translation cache */
+ /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
+ = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */
+ rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */
+ sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */
+ add 5,5,4 /* & VG_(tt_fast)[entry#] */
+ ld 6,0(5) /* .guest */
+ ld 7,8(5) /* .host */
+ cmpd 3,6
+ bne .fast_lookup_failed
+
+ /* Found a match. Jump to .host. */
+ mtctr 7
+ bctr
+#if _CALL_ELF == 2
+ .size VG_(disp_cp_xindir),.-VG_(disp_cp_xindir)
+#endif
+
+.fast_lookup_failed:
+ /* stats only */
+ ld 5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2)
+ lwz 6,0(5)
+ addi 6,6,1
+ stw 6,0(5)
+
+ li 6,VG_TRC_INNER_FASTMISS
+ li 7,0
+ b .postamble
+ /*NOTREACHED*/
+
+/* ------ Assisted jump ------ */
+.section ".text"
+ .align 2
+ .globl VG_(disp_cp_xassisted)
+#if _CALL_ELF == 2
+ .type VG_(disp_cp_xassisted),@function
+VG_(disp_cp_xassisted):
+#else
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_xassisted):
+ .quad .VG_(disp_cp_xassisted),.TOC.@tocbase,0
+ .previous
+#endif
+#if _CALL_ELF == 2
+0: addis 2, 12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry VG_(disp_cp_xassisted), .-VG_(disp_cp_xassisted)
+#endif
+ .type .VG_(disp_cp_xassisted),@function
+ .globl .VG_(disp_cp_xassisted)
+.VG_(disp_cp_xassisted):
+ /* r31 contains the TRC */
+ mr 6,31
+ li 7,0
+ b .postamble
+#if _CALL_ELF == 2
+ .size VG_(disp_cp_xassisted),.-VG_(disp_cp_xassisted)
+#endif
+
+/* ------ Event check failed ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_evcheck_fail)
+#if _CALL_ELF == 2
+ .type VG_(disp_cp_evcheck_fail),@function
+VG_(disp_cp_evcheck_fail):
+#else
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_evcheck_fail):
+ .quad .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
+ .previous
+#endif
+#if _CALL_ELF == 2
+0: addis 2, 12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry VG_(disp_cp_evcheck_fail), .-VG_(disp_cp_evcheck_fail)
+#endif
+ .type .VG_(disp_cp_evcheck_fail),@function
+ .globl .VG_(disp_cp_evcheck_fail)
+.VG_(disp_cp_evcheck_fail):
+ li 6,VG_TRC_INNER_COUNTERZERO
+ li 7,0
+ b .postamble
+#if _CALL_ELF == 2
+ .size VG_(disp_cp_evcheck_fail),.-VG_(disp_cp_evcheck_fail)
+#endif
+
+.size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
+
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",@progbits
+
+#endif // defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
+
+/*--------------------------------------------------------------------*/
+/*--- end ---*/
+/*--------------------------------------------------------------------*/
Index: coregrind/m_libcsetjmp.c
===================================================================
--- coregrind/m_libcsetjmp.c.orig
+++ coregrind/m_libcsetjmp.c
@@ -158,7 +158,6 @@ __asm__(
".align 2" "\n"
".p2align 4,,15" "\n"
".globl VG_MINIMAL_SETJMP" "\n"
-
".section \".opd\",\"aw\"" "\n"
".align 3" "\n"
"VG_MINIMAL_SETJMP:" "\n"
@@ -267,9 +266,114 @@ __asm__(
"" "\n"
".previous" "\n"
-".previous" "\n"
);
+#elif defined(VGP_ppc64le_linux)
+__asm__(
+".section \".toc\",\"aw\"" "\n"
+
+".section \".text\"" "\n"
+".align 2" "\n"
+".p2align 4,,15" "\n"
+".globl VG_MINIMAL_SETJMP" "\n"
+".type VG_MINIMAL_SETJMP,@function" "\n"
+"VG_MINIMAL_SETJMP:" "\n"
+" .localentry VG_MINIMAL_SETJMP, .-VG_MINIMAL_SETJMP" "\n"
+" std 0, 0(3)" "\n"
+" std 1, 8(3)" "\n"
+" std 2, 16(3)" "\n"
+" std 3, 24(3)" "\n"
+" std 4, 32(3)" "\n"
+" std 5, 40(3)" "\n"
+" std 6, 48(3)" "\n"
+" std 7, 56(3)" "\n"
+" std 8, 64(3)" "\n"
+" std 9, 72(3)" "\n"
+" std 10, 80(3)" "\n"
+" std 11, 88(3)" "\n"
+" std 12, 96(3)" "\n"
+" std 13, 104(3)" "\n"
+" std 14, 112(3)" "\n"
+" std 15, 120(3)" "\n"
+" std 16, 128(3)" "\n"
+" std 17, 136(3)" "\n"
+" std 18, 144(3)" "\n"
+" std 19, 152(3)" "\n"
+" std 20, 160(3)" "\n"
+" std 21, 168(3)" "\n"
+" std 22, 176(3)" "\n"
+" std 23, 184(3)" "\n"
+" std 24, 192(3)" "\n"
+" std 25, 200(3)" "\n"
+" std 26, 208(3)" "\n"
+" std 27, 216(3)" "\n"
+" std 28, 224(3)" "\n"
+" std 29, 232(3)" "\n"
+" std 30, 240(3)" "\n"
+" std 31, 248(3)" "\n"
+// must use a caller-save register here as scratch, hence r4
+" mflr 4" "\n"
+" std 4, 256(3)" "\n"
+" mfcr 4" "\n"
+" std 4, 264(3)" "\n"
+" li 3, 0" "\n"
+" blr" "\n"
+"" "\n"
+
+
+".globl VG_MINIMAL_LONGJMP" "\n"
+".type VG_MINIMAL_LONGJMP, @function" "\n"
+"VG_MINIMAL_LONGJMP:" "\n"
+" .localentry VG_MINIMAL_LONGJMP, .-VG_MINIMAL_LONGJMP" "\n"
+ // do r4 = 1
+ // and park it in the restore slot for r3 (the ret reg)
+" li 4, 1" "\n"
+" std 4, 24(3)" "\n"
+ // restore everything except r3
+ // then r3 last of all
+ // then blr
+" ld 0, 256(3)" "\n"
+" mtlr 0" "\n"
+" ld 0, 264(3)" "\n"
+" mtcr 0" "\n"
+" ld 0, 0(3)" "\n"
+" ld 1, 8(3)" "\n"
+" ld 2, 16(3)" "\n"
+ // r3 is done at the end
+" ld 4, 32(3)" "\n"
+" ld 5, 40(3)" "\n"
+" ld 6, 48(3)" "\n"
+" ld 7, 56(3)" "\n"
+" ld 8, 64(3)" "\n"
+" ld 9, 72(3)" "\n"
+" ld 10, 80(3)" "\n"
+" ld 11, 88(3)" "\n"
+" ld 12, 96(3)" "\n"
+" ld 13, 104(3)" "\n"
+" ld 14, 112(3)" "\n"
+" ld 15, 120(3)" "\n"
+" ld 16, 128(3)" "\n"
+" ld 17, 136(3)" "\n"
+" ld 18, 144(3)" "\n"
+" ld 19, 152(3)" "\n"
+" ld 20, 160(3)" "\n"
+" ld 21, 168(3)" "\n"
+" ld 22, 176(3)" "\n"
+" ld 23, 184(3)" "\n"
+" ld 24, 192(3)" "\n"
+" ld 25, 200(3)" "\n"
+" ld 26, 208(3)" "\n"
+" ld 27, 216(3)" "\n"
+" ld 28, 224(3)" "\n"
+" ld 29, 232(3)" "\n"
+" ld 30, 240(3)" "\n"
+" ld 31, 248(3)" "\n"
+" ld 3, 24(3)" "\n"
+" blr" "\n"
+"" "\n"
+
+".previous" "\n"
+);
#endif /* VGP_ppc64be_linux */
Index: coregrind/m_machine.c
===================================================================
--- coregrind/m_machine.c.orig
+++ coregrind/m_machine.c
@@ -1154,9 +1154,13 @@ Bool VG_(machine_get_hwcaps)( void )
VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
va = VexArchPPC64;
- // CARLL fixme: when the time comes, copy .endness setting code
- // from the VGA_mips32 case
+# if defined(VKI_LITTLE_ENDIAN)
+ vai.endness = VexEndnessLE;
+# elif defined(VKI_BIG_ENDIAN)
vai.endness = VexEndnessBE;
+# else
+ vai.endness = VexEndness_INVALID;
+# endif
vai.hwcaps = 0;
if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
Index: coregrind/m_syscall.c
===================================================================
--- coregrind/m_syscall.c.orig
+++ coregrind/m_syscall.c
@@ -422,6 +422,45 @@ asm(
" blr\n"
);
+#elif defined(VGP_ppc64le_linux)
+/* Due to the need to return 65 bits of result, this is completely
+ different from the ppc32 case. The single arg register points to a
+ 7-word block containing the syscall # and the 6 args. The syscall
+ result proper is put in [0] of the block, and %cr0.so is in the
+ bottom bit of [1]. */
+extern void do_syscall_WRK ( ULong* argblock );
+/* Little Endian supports ELF version 2. In the future, it may support
+ * other versions as well.
+ */
+asm(
+".align 2\n"
+".globl do_syscall_WRK\n"
+".type do_syscall_WRK,@function\n"
+"do_syscall_WRK:\n"
+"#if _CALL_ELF == 2" "\n"
+"0: addis 2,12,.TOC.-0b@ha\n"
+" addi 2,2,.TOC.-0b@l\n"
+" .localentry do_syscall_WRK, .-do_syscall_WRK\n"
+"#endif" "\n"
+" std 3,-16(1)\n" /* stash arg */
+" ld 8, 48(3)\n" /* sc arg 6 */
+" ld 7, 40(3)\n" /* sc arg 5 */
+" ld 6, 32(3)\n" /* sc arg 4 */
+" ld 5, 24(3)\n" /* sc arg 3 */
+" ld 4, 16(3)\n" /* sc arg 2 */
+" ld 0, 0(3)\n" /* sc number */
+" ld 3, 8(3)\n" /* sc arg 1 */
+" sc\n" /* result in r3 and cr0.so */
+" ld 5,-16(1)\n" /* reacquire argblock ptr (r5 is caller-save) */
+" std 3,0(5)\n" /* argblock[0] = r3 */
+" mfcr 3\n"
+" srwi 3,3,28\n"
+" andi. 3,3,1\n"
+" std 3,8(5)\n" /* argblock[1] = cr0.s0 & 1 */
+" blr\n"
+" .size do_syscall_WRK, .-do_syscall_WRK\n"
+);
+
#elif defined(VGP_arm_linux)
/* I think the conventions are:
args in r0 r1 r2 r3 r4 r5
Index: coregrind/m_initimg/initimg-linux.c
===================================================================
--- coregrind/m_initimg/initimg-linux.c.orig
+++ coregrind/m_initimg/initimg-linux.c
@@ -1055,6 +1055,9 @@ void VG_(ii_finalise_image)( IIFinaliseI
arch->vex.guest_GPR1 = iifii.initial_client_SP;
arch->vex.guest_GPR2 = iifii.initial_client_TOC;
arch->vex.guest_CIA = iifii.initial_client_IP;
+#if defined(VGP_ppc64le_linux)
+ arch->vex.guest_GPR12 = iifii.initial_client_IP;
+#endif
# elif defined(VGP_arm_linux)
/* Zero out the initial state, and set up the simulated FPU in a
Index: coregrind/m_trampoline.S
===================================================================
--- coregrind/m_trampoline.S.orig
+++ coregrind/m_trampoline.S
@@ -444,6 +444,12 @@ VG_(ppctoc_magic_redirect_return_stub):
/* this function is written using the "dotless" ABI convention */
.align 2
.globl VG_(ppc64_linux_REDIR_FOR_strlen)
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+ /* Little Endian uses ELF version 2 */
+ .type VG_(ppc64_linux_REDIR_FOR_strlen),@function
+VG_(ppc64_linux_REDIR_FOR_strlen):
+#else
+ /* Big Endian uses ELF version 1 */
.section ".opd","aw"
.align 3
VG_(ppc64_linux_REDIR_FOR_strlen):
@@ -454,6 +460,12 @@ VG_(ppc64_linux_REDIR_FOR_strlen):
.type VG_(ppc64_linux_REDIR_FOR_strlen), @function
.L.VG_(ppc64_linux_REDIR_FOR_strlen):
+#endif
+#if _CALL_ELF == 2
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry VG_(ppc64_linux_REDIR_FOR_strlen), .-VG_(ppc64_linux_REDIR_FOR_strlen)
+#endif
mr 9,3
lbz 0,0(3)
li 3,0
@@ -467,6 +479,12 @@ VG_(ppc64_linux_REDIR_FOR_strlen):
cmpwi 7,0,0
bne 7,.L01
blr
+
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+ .size VG_(ppc64_linux_REDIR_FOR_strlen),.-VG_(ppc64_linux_REDIR_FOR_strlen)
+#else
+ .size VG_(ppc64_linux_REDIR_FOR_strlen),.-.L.VG_(ppc64_linux_REDIR_FOR_strlen)
+#endif
.long 0
.byte 0,0,0,0,0,0,0,0
.L0end:
@@ -474,6 +492,10 @@ VG_(ppc64_linux_REDIR_FOR_strlen):
/* this function is written using the "dotless" ABI convention */
.align 2
.globl VG_(ppc64_linux_REDIR_FOR_strchr)
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+ .type VG_(ppc64_linux_REDIR_FOR_strchr),@function
+VG_(ppc64_linux_REDIR_FOR_strchr):
+#else
.section ".opd","aw"
.align 3
VG_(ppc64_linux_REDIR_FOR_strchr):
@@ -482,8 +504,14 @@ VG_(ppc64_linux_REDIR_FOR_strchr):
.size VG_(ppc64_linux_REDIR_FOR_strchr), \
.L1end-.L.VG_(ppc64_linux_REDIR_FOR_strchr)
.type VG_(ppc64_linux_REDIR_FOR_strchr),@function
-
+
.L.VG_(ppc64_linux_REDIR_FOR_strchr):
+#endif
+#if _CALL_ELF == 2
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry VG_(ppc64_linux_REDIR_FOR_strchr), .-VG_(ppc64_linux_REDIR_FOR_strchr)
+#endif
lbz 0,0(3)
rldicl 4,4,0,56
cmpw 7,4,0
@@ -491,6 +519,11 @@ VG_(ppc64_linux_REDIR_FOR_strchr):
cmpdi 7,0,0
bne 7,.L18
b .L14
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+ .size VG_(ppc64_linux_REDIR_FOR_strchr),.-VG_(ppc64_linux_REDIR_FOR_strchr)
+#else
+ .size VG_(ppc64_linux_REDIR_FOR_strchr),.-.L.VG_(ppc64_linux_REDIR_FOR_strchr)
+#endif
.L19:
beq 6,.L14
.L18:
Index: coregrind/m_coredump/coredump-elf.c
===================================================================
--- coregrind/m_coredump/coredump-elf.c.orig
+++ coregrind/m_coredump/coredump-elf.c
@@ -343,6 +343,27 @@ static void fill_prstatus(const ThreadSt
regs->dsisr = 0;
regs->result = 0;
+#elif defined(VGP_ppc64le_linux)
+# define DO(n) regs->gpr[n] = arch->vex.guest_GPR##n
+ DO(0); DO(1); DO(2); DO(3); DO(4); DO(5); DO(6); DO(7);
+ DO(8); DO(9); DO(10); DO(11); DO(12); DO(13); DO(14); DO(15);
+ DO(16); DO(17); DO(18); DO(19); DO(20); DO(21); DO(22); DO(23);
+ DO(24); DO(25); DO(26); DO(27); DO(28); DO(29); DO(30); DO(31);
+# undef DO
+
+ regs->nip = arch->vex.guest_CIA;
+ regs->msr = 0xf033; /* pretty arbitrary */
+ regs->orig_gpr3 = arch->vex.guest_GPR3;
+ regs->ctr = arch->vex.guest_CTR;
+ regs->link = arch->vex.guest_LR;
+ regs->xer = LibVEX_GuestPPC64_get_XER( &((ThreadArchState*)arch)->vex );
+ regs->ccr = LibVEX_GuestPPC64_get_CR( &((ThreadArchState*)arch)->vex );
+ /* regs->mq = 0; */
+ regs->trap = 0;
+ regs->dar = 0; /* should be fault address? */
+ regs->dsisr = 0;
+ regs->result = 0;
+
#elif defined(VGP_arm_linux)
regs->ARM_r0 = arch->vex.guest_R0;
regs->ARM_r1 = arch->vex.guest_R1;
Index: coregrind/m_main.c
===================================================================
--- coregrind/m_main.c.orig
+++ coregrind/m_main.c
@@ -2608,6 +2608,10 @@ static void final_tidyup(ThreadId tid)
VG_(set_IP)(tid, __libc_freeres_wrapper);
# if defined(VGP_ppc64be_linux)
VG_(threads)[tid].arch.vex.guest_GPR2 = r2;
+# elif defined(VGP_ppc64le_linux)
+ /* setting GPR2 but not really needed, GPR12 is needed */
+ VG_(threads)[tid].arch.vex.guest_GPR2 = __libc_freeres_wrapper;
+ VG_(threads)[tid].arch.vex.guest_GPR12 = __libc_freeres_wrapper;
# endif
/* mips-linux note: we need to set t9 */
# if defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
@@ -2847,6 +2851,51 @@ asm("\n"
"\tmtctr 14\n"
"\tbctrl\n"
"\tnop\n"
+ "\ttrap\n"
+);
+#elif defined(VGP_ppc64le_linux)
+/* Little Endian uses ELF version 2 but in the future may also
+ * support other ELF versions.
+ */
+asm("\n"
+ "\t.align 2\n"
+ "\t.global _start\n"
+ "\t.type _start,@function\n"
+ "_start:\n"
+ "#if _CALL_ELF == 2 \n"
+ "0: addis 2,12,.TOC.-0b@ha\n"
+ " addi 2,2,.TOC.-0b@l\n"
+ " .localentry _start, .-_start\n"
+ "#endif \n"
+ /* set up the new stack in r16 */
+ "\tlis 16, vgPlain_interim_stack@highest\n"
+ "\tori 16,16,vgPlain_interim_stack@higher\n"
+ "\tsldi 16,16,32\n"
+ "\toris 16,16,vgPlain_interim_stack@h\n"
+ "\tori 16,16,vgPlain_interim_stack@l\n"
+ "\txor 17,17,17\n"
+ "\tlis 17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" >> 16)\n"
+ "\tori 17,17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" & 0xFFFF)\n"
+ "\txor 18,18,18\n"
+ "\tlis 18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" >> 16)\n"
+ "\tori 18,18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" & 0xFFFF)\n"
+ "\tadd 16,17,16\n"
+ "\tadd 16,18,16\n"
+ "\trldicr 16,16,0,59\n"
+ /* now r16 = &vgPlain_interim_stack + VG_STACK_GUARD_SZB +
+ VG_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
+ boundary. And r1 is the original SP. Set the SP to r16 and
+ call _start_in_C_linux, passing it the initial SP. */
+ "\tmr 3,1\n"
+ "\tmr 1,16\n"
+ "\tlis 14, _start_in_C_linux@highest\n"
+ "\tori 14,14,_start_in_C_linux@higher\n"
+ "\tsldi 14,14,32\n"
+ "\toris 14,14,_start_in_C_linux@h\n"
+ "\tori 14,14,_start_in_C_linux@l\n"
+ "\tmtctr 14\n"
+ "\tbctrl\n"
+ "\tnop\n"
"\ttrap\n"
);
#elif defined(VGP_s390x_linux)
Index: tests/arch_test.c
===================================================================
--- tests/arch_test.c.orig
+++ tests/arch_test.c
@@ -27,6 +27,7 @@ char* all_archs[] = {
"amd64",
"ppc32",
"ppc64",
+ "ppc64le",
"arm",
"s390x",
"mips32",
@@ -54,6 +55,9 @@ static Bool go(char* arch)
if ( 0 == strcmp( arch, "ppc32" ) ) return True;
#endif
+#elif defined(VGP_ppc64le_linux)
+ if ( 0 == strcmp( arch, "ppc64" ) ) return True;
+
#elif defined(VGP_s390x_linux)
if ( 0 == strcmp( arch, "s390x" ) ) return True;
Index: memcheck/mc_machine.c
===================================================================
--- memcheck/mc_machine.c.orig
+++ memcheck/mc_machine.c
@@ -167,7 +167,11 @@ static Int get_otrack_shadow_offset_wrk
return GOF(GPRn);
by testing ox instead of o, and setting ox back 4 bytes when sz == 4.
*/
+#if defined(VGA_ppc64le)
+ Int ox = o;
+#else
Int ox = sz == 8 ? o : (o - 4);
+#endif
if (ox == GOF(GPR0)) return ox;
if (ox == GOF(GPR1)) return ox;
if (ox == GOF(GPR2)) return ox;
@@ -367,7 +371,11 @@ static Int get_otrack_shadow_offset_wrk
Int o = offset;
Int sz = szB;
tl_assert(sz > 0);
+#if defined(VGA_ppc64)
tl_assert(host_is_big_endian());
+#elif defined(VGA_ppc64le)
+ tl_assert(host_is_little_endian());
+#endif
if (o == GOF(GPR0) && sz == 4) return o;
if (o == GOF(GPR1) && sz == 4) return o;
Index: helgrind/hg_main.c
===================================================================
--- helgrind/hg_main.c.orig
+++ helgrind/hg_main.c
@@ -4425,6 +4425,7 @@ static Bool is_in_dynamic_linker_shared_
if (VG_STREQ(soname, VG_U_LD_LINUX_SO_2)) return True;
if (VG_STREQ(soname, VG_U_LD_LINUX_X86_64_SO_2)) return True;
if (VG_STREQ(soname, VG_U_LD64_SO_1)) return True;
+ if (VG_STREQ(soname, VG_U_LD64_SO_2)) return True;
if (VG_STREQ(soname, VG_U_LD_SO_1)) return True;
# elif defined(VGO_darwin)
if (VG_STREQ(soname, VG_U_DYLD)) return True;
Index: include/pub_tool_debuginfo.h
===================================================================
--- include/pub_tool_debuginfo.h.orig
+++ include/pub_tool_debuginfo.h
@@ -219,6 +219,7 @@ void VG_(DebugInfo_syms_getidx) ( const
Int idx,
/*OUT*/Addr* avma,
/*OUT*/Addr* tocptr,
+ /*OUT*/Addr* local_ep,
/*OUT*/UInt* size,
/*OUT*/HChar** pri_name,
/*OUT*/HChar*** sec_names,
Index: include/valgrind.h
===================================================================
--- include/valgrind.h.orig
+++ include/valgrind.h
@@ -116,6 +116,7 @@
#undef PLAT_amd64_linux
#undef PLAT_ppc32_linux
#undef PLAT_ppc64be_linux
+#undef PLAT_ppc64le_linux
#undef PLAT_arm_linux
#undef PLAT_arm64_linux
#undef PLAT_s390x_linux
@@ -141,6 +142,9 @@
#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF != 2
/* Big Endian uses ELF version 1 */
# define PLAT_ppc64be_linux 1
+#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF == 2
+/* Little Endian uses ELF version 2 */
+# define PLAT_ppc64le_linux 1
#elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__)
# define PLAT_arm_linux 1
#elif defined(__linux__) && defined(__aarch64__) && !defined(__arm__)
@@ -588,6 +592,82 @@ typedef
#endif /* PLAT_ppc64be_linux */
+#if defined(PLAT_ppc64le_linux)
+
+typedef
+ struct {
+ unsigned long long int nraddr; /* where's the code? */
+ unsigned long long int r2; /* what tocptr do we need? */
+ }
+ OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \
+ "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \
+ _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ \
+ __extension__ \
+ ({ unsigned long long int _zzq_args[6]; \
+ unsigned long long int _zzq_result; \
+ unsigned long long int* _zzq_ptr; \
+ _zzq_args[0] = (unsigned long long int)(_zzq_request); \
+ _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \
+ _zzq_ptr = _zzq_args; \
+ __asm__ volatile("mr 3,%1\n\t" /*default*/ \
+ "mr 4,%2\n\t" /*ptr*/ \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = client_request ( %R4 ) */ \
+ "or 1,1,1\n\t" \
+ "mr %0,3" /*result*/ \
+ : "=b" (_zzq_result) \
+ : "b" (_zzq_default), "b" (_zzq_ptr) \
+ : "cc", "memory", "r3", "r4"); \
+ _zzq_result; \
+ })
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ unsigned long long int __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR */ \
+ "or 2,2,2\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "cc", "memory", "r3" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR_GPR2 */ \
+ "or 4,4,4\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "cc", "memory", "r3" \
+ ); \
+ _zzq_orig->r2 = __addr; \
+ }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* branch-and-link-to-noredir *%R12 */ \
+ "or 3,3,3\n\t"
+
+#define VALGRIND_VEX_INJECT_IR() \
+ do { \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ "or 5,5,5\n\t" \
+ ); \
+ } while (0)
+
+#endif /* PLAT_ppc64le_linux */
/* ------------------------- arm-linux ------------------------- */
@@ -3082,6 +3162,562 @@ typedef
#endif /* PLAT_ppc64be_linux */
+/* ------------------------- ppc64le-linux ----------------------- */
+#if defined(PLAT_ppc64le_linux)
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+ "lr", "ctr", "xer", \
+ "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
+ "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
+ "r11", "r12", "r13"
+
+/* Macros to save and align the stack before making a function
+ call and restore it afterwards as gcc may not keep the stack
+ pointer aligned if it doesn't realise calls are being made
+ to other functions. */
+
+#define VALGRIND_ALIGN_STACK \
+ "mr 28,1\n\t" \
+ "rldicr 1,1,0,59\n\t"
+#define VALGRIND_RESTORE_STACK \
+ "mr 1,28\n\t"
+
+/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned
+ long) == 8. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+0]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+1]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+2]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(12)\n\t" /* arg2->r4 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+3]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(12)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(12)\n\t" /* arg3->r5 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+4]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(12)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(12)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(12)\n\t" /* arg4->r6 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+5]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(12)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(12)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(12)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(12)\n\t" /* arg5->r7 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+6]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(12)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(12)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(12)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(12)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(12)\n\t" /* arg6->r8 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+7]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(12)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(12)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(12)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(12)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(12)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(12)\n\t" /* arg7->r9 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+8]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(12)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(12)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(12)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(12)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(12)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(12)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(12)\n\t" /* arg8->r10 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+9]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-128\n\t" /* expand stack frame */ \
+ /* arg9 */ \
+ "ld 3,72(12)\n\t" \
+ "std 3,96(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(12)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(12)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(12)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(12)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(12)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(12)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(12)\n\t" /* arg8->r10 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+10]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-128\n\t" /* expand stack frame */ \
+ /* arg10 */ \
+ "ld 3,80(12)\n\t" \
+ "std 3,104(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(12)\n\t" \
+ "std 3,96(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(12)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(12)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(12)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(12)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(12)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(12)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(12)\n\t" /* arg8->r10 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+11]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-144\n\t" /* expand stack frame */ \
+ /* arg11 */ \
+ "ld 3,88(12)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* arg10 */ \
+ "ld 3,80(12)\n\t" \
+ "std 3,104(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(12)\n\t" \
+ "std 3,96(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(12)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(12)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(12)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(12)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(12)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(12)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(12)\n\t" /* arg8->r10 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+12]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ _argvec[2+12] = (unsigned long)arg12; \
+ __asm__ volatile( \
+ VALGRIND_ALIGN_STACK \
+ "mr 12,%1\n\t" \
+ "std 2,-16(12)\n\t" /* save tocptr */ \
+ "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-144\n\t" /* expand stack frame */ \
+ /* arg12 */ \
+ "ld 3,96(12)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg11 */ \
+ "ld 3,88(12)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* arg10 */ \
+ "ld 3,80(12)\n\t" \
+ "std 3,104(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(12)\n\t" \
+ "std 3,96(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(12)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(12)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(12)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(12)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(12)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(12)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(12)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(12)\n\t" /* arg8->r10 */ \
+ "ld 12, 0(12)\n\t" /* target->r12 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \
+ "mr 12,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(12)\n\t" /* restore tocptr */ \
+ VALGRIND_RESTORE_STACK \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_ppc64le_linux */
+
/* ------------------------- arm-linux ------------------------- */
#if defined(PLAT_arm_linux)
@@ -5630,6 +6266,7 @@ VALGRIND_PRINTF_BACKTRACE(const char *fo
#undef PLAT_amd64_linux
#undef PLAT_ppc32_linux
#undef PLAT_ppc64be_linux
+#undef PLAT_ppc64le_linux
#undef PLAT_arm_linux
#undef PLAT_s390x_linux
#undef PLAT_mips32_linux
Index: include/pub_tool_redir.h
===================================================================
--- include/pub_tool_redir.h.orig
+++ include/pub_tool_redir.h
@@ -284,6 +284,7 @@
#define VG_Z_LD64_SO_1 ld64ZdsoZd1 // ld64.so.1
#define VG_U_LD64_SO_1 "ld64.so.1"
+#define VG_U_LD64_SO_2 "ld64.so.2" // PPC LE loader
#define VG_Z_LD_SO_1 ldZdsoZd1 // ld.so.1
#define VG_U_LD_SO_1 "ld.so.1"
Index: include/vki/vki-ppc64-linux.h
===================================================================
--- include/vki/vki-ppc64-linux.h.orig
+++ include/vki/vki-ppc64-linux.h
@@ -31,9 +31,11 @@
#ifndef __VKI_PPC64_LINUX_H
#define __VKI_PPC64_LINUX_H
-// ppc64 is big-endian.
+#if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux)
#define VKI_BIG_ENDIAN 1
-
+#elif defined(VGP_ppc64le_linux)
+#define VKI_LITTLE_ENDIAN 1
+#endif
//----------------------------------------------------------------------
// From linux-2.6.13/include/asm-ppc64/types.h
//----------------------------------------------------------------------