texlive/0004-Add-ppc64-support-based-on-koriakin-GitHub-patchset.patch

3522 lines
107 KiB
Diff

From: "Rodrigo R. Galvao" <rosattig@br.ibm.com>
Date: Wed, 11 Oct 2017 08:41:47 +0000
Subject: New patch proposal for PPC64 support
Create a patch for PPC64 support based on
https://github.com/LuaJIT/LuaJIT/pull/140.
It replaces the old patch since this new one is more likely to be merged
with luajit upstream.
Author: Rodrigo R. Galvao <rosattig@br.ibm.com>
---
dynasm/dasm_ppc.lua | 5 +
src/Makefile | 11 +-
src/host/buildvm_asm.c | 16 +-
src/lj_arch.h | 18 +-
src/lj_ccall.c | 166 ++++++-
src/lj_ccall.h | 13 +
src/lj_ccallback.c | 68 ++-
src/lj_ctype.h | 2 +-
src/lj_def.h | 4 +
src/lj_frame.h | 9 +
src/lj_target_ppc.h | 14 +
src/vm_ppc.dasc | 1290 ++++++++++++++++++++++++++++++++----------------
12 files changed, 1162 insertions(+), 454 deletions(-)
diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
index f73974d..a4ad70b 100644
--- a/dynasm/dasm_ppc.lua
+++ b/dynasm/dasm_ppc.lua
@@ -257,9 +257,11 @@ map_op = {
addic_3 = "30000000RRI",
["addic._3"] = "34000000RRI",
addi_3 = "38000000RR0I",
+ addil_3 = "38000000RR0J",
li_2 = "38000000RI",
la_2 = "38000000RD",
addis_3 = "3c000000RR0I",
+ addisl_3 = "3c000000RR0J",
lis_2 = "3c000000RI",
lus_2 = "3c000000RU",
bc_3 = "40000000AAK",
@@ -842,6 +844,9 @@ map_op = {
srdi_3 = op_alias("rldicl_4", function(p)
p[4] = p[3]; p[3] = "64-("..p[3]..")"
end),
+ ["srdi._3"] = op_alias("rldicl._4", function(p)
+ p[4] = p[3]; p[3] = "64-("..p[3]..")"
+ end),
clrldi_3 = op_alias("rldicl_4", function(p)
p[4] = p[3]; p[3] = "0"
end),
diff --git a/src/Makefile b/src/Makefile
index 6b73a89..cc50bae 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -453,7 +453,16 @@ ifeq (ppc,$(TARGET_LJARCH))
DASM_AFLAGS+= -D GPR64
endif
ifeq (PS3,$(TARGET_SYS))
- DASM_AFLAGS+= -D PPE -D TOC
+ DASM_AFLAGS+= -D PPE
+ endif
+ ifneq (,$(findstring LJ_ARCH_PPC_OPD 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D OPD
+ endif
+ ifneq (,$(findstring LJ_ARCH_PPC_OPDENV 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D OPDENV
+ endif
+ ifneq (,$(findstring LJ_ARCH_PPC_ELFV2 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D ELFV2
endif
ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH)))
DASM_ARCH= ppc64
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index ffd1490..6bb995e 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -140,18 +140,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
#else
#define TOCPREFIX ""
#endif
- if ((ins >> 26) == 16) {
+ if ((ins >> 26) == 14) {
+ fprintf(ctx->fp, "\taddi %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym);
+ } else if ((ins >> 26) == 15) {
+ fprintf(ctx->fp, "\taddis %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym);
+ } else if ((ins >> 26) == 16) {
fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
(ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
} else if ((ins >> 26) == 18) {
-#if LJ_ARCH_PPC64
- const char *suffix = strchr(sym, '@');
- if (suffix && suffix[1] == 'h') {
- fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
- } else if (suffix && suffix[1] == 'l') {
- fprintf(ctx->fp, "\tld 12, %s\n", sym);
- } else
-#endif
fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
} else {
fprintf(stderr,
@@ -250,7 +246,7 @@ void emit_asm(BuildCtx *ctx)
int i, rel;
fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
-#if LJ_ARCH_PPC64
+#if LJ_ARCH_PPC_ELFV2
fprintf(ctx->fp, "\t.abiversion 2\n");
#endif
fprintf(ctx->fp, "\t.text\n");
diff --git a/src/lj_arch.h b/src/lj_arch.h
index d609b37..53bc651 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -269,10 +269,18 @@
#if LJ_TARGET_CONSOLE
#define LJ_ARCH_PPC32ON64 1
#define LJ_ARCH_NOFFI 1
+#if LJ_TARGET_PS3
+#define LJ_ARCH_PPC_OPD 1
+#endif
#elif LJ_ARCH_BITS == 64
-#define LJ_ARCH_PPC64 1
-#define LJ_TARGET_GC64 1
+#define LJ_ARCH_PPC32ON64 1
#define LJ_ARCH_NOJIT 1 /* NYI */
+#if _CALL_ELF == 2
+#define LJ_ARCH_PPC_ELFV2 1
+#else
+#define LJ_ARCH_PPC_OPD 1
+#define LJ_ARCH_PPC_OPDENV 1
+#endif
#endif
#if _ARCH_PWR7
@@ -423,12 +431,6 @@
#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
#error "No support for PowerPC CPUs without double-precision FPU"
#endif
-#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
-#error "No support for little-endian PPC32"
-#endif
-#if LJ_ARCH_PPC64
-#error "No support for PowerPC 64 bit mode (yet)"
-#endif
#ifdef __NO_FPRS__
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#endif
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 5c252e5..b891591 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -369,21 +369,97 @@
#elif LJ_TARGET_PPC
/* -- PPC calling conventions --------------------------------------------- */
+#if LJ_ARCH_BITS == 64
+
+#if LJ_ARCH_PPC_ELFV2
+
+#define CCALL_HANDLE_STRUCTRET \
+ if (sz > 16 && ccall_classify_fp(cts, ctr) <= 0) { \
+ cc->retref = 1; /* Return by reference. */ \
+ cc->gpr[ngpr++] = (GPRArg)dp; \
+ }
+
+#define CCALL_HANDLE_STRUCTRET2 \
+ int isfp = ccall_classify_fp(cts, ctr); \
+ int i; \
+ if (isfp == FTYPE_FLOAT) { \
+ for (i = 0; i < ctr->size / 4; i++) \
+ ((float *)dp)[i] = cc->fpr[i]; \
+ } else if (isfp == FTYPE_DOUBLE) { \
+ for (i = 0; i < ctr->size / 8; i++) \
+ ((double *)dp)[i] = cc->fpr[i]; \
+ } else { \
+ if (ctr->size < 8 && LJ_BE) { \
+ sp += 8 - ctr->size; \
+ } \
+ memcpy(dp, sp, ctr->size); \
+ }
+
+#else
+
#define CCALL_HANDLE_STRUCTRET \
cc->retref = 1; /* Return all structs by reference. */ \
cc->gpr[ngpr++] = (GPRArg)dp;
+#endif
+
#define CCALL_HANDLE_COMPLEXRET \
/* Complex values are returned in 2 or 4 GPRs. */ \
cc->retref = 0;
+#define CCALL_HANDLE_STRUCTARG
+
#define CCALL_HANDLE_COMPLEXRET2 \
- memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
+ ((float *)dp)[0] = cc->fpr[0]; \
+ ((float *)dp)[1] = cc->fpr[1]; \
+ } else { /* Copy complex double from FPRs. */ \
+ ((double *)dp)[0] = cc->fpr[0]; \
+ ((double *)dp)[1] = cc->fpr[1]; \
+ }
+
+#define CCALL_HANDLE_COMPLEXARG \
+ isfp = 1; \
+ if (d->size == sizeof(float) * 2) { \
+ d = ctype_get(cts, CTID_COMPLEX_DOUBLE); \
+ isf32 = 1; \
+ }
+
+#define CCALL_HANDLE_REGARG \
+ if (isfp && d->size == sizeof(float)) { \
+ d = ctype_get(cts, CTID_DOUBLE); \
+ isf32 = 1; \
+ } \
+ if (ngpr < maxgpr) { \
+ dp = &cc->gpr[ngpr]; \
+ ngpr += n; \
+ if (ngpr > maxgpr) { \
+ nsp += ngpr - 8; \
+ ngpr = 8; \
+ if (nsp > CCALL_MAXSTACK) { \
+ goto err_nyi; \
+ } \
+ } \
+ goto done; \
+ }
+
+#else
+
+#define CCALL_HANDLE_STRUCTRET \
+ cc->retref = 1; /* Return all structs by reference. */ \
+ cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET \
+ /* Complex values are returned in 2 or 4 GPRs. */ \
+ cc->retref = 0;
#define CCALL_HANDLE_STRUCTARG \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; /* Pass all structs by reference. */
+#define CCALL_HANDLE_COMPLEXRET2 \
+ memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
+
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex by value in 2 or 4 GPRs. */
@@ -410,6 +486,8 @@
} \
}
+#endif
+
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
@@ -801,6 +879,50 @@ noth: /* Not a homogeneous float/double aggregate. */
#endif
+/* -- PowerPC64 ELFv2 ABI struct classification ------------------- */
+
+#if LJ_ARCH_PPC_ELFV2
+
+#define FTYPE_FLOAT 1
+#define FTYPE_DOUBLE 2
+
+static unsigned int ccall_classify_fp(CTState *cts, CType *ct) {
+ if (ctype_isfp(ct->info)) {
+ if (ct->size == sizeof(float))
+ return FTYPE_FLOAT;
+ else
+ return FTYPE_DOUBLE;
+ } else if (ctype_iscomplex(ct->info)) {
+ if (ct->size == sizeof(float) * 2)
+ return FTYPE_FLOAT;
+ else
+ return FTYPE_DOUBLE;
+ } else if (ctype_isstruct(ct->info)) {
+ int res = -1;
+ int sz = ct->size;
+ while (ct->sib) {
+ ct = ctype_get(cts, ct->sib);
+ if (ctype_isfield(ct->info)) {
+ int sub = ccall_classify_fp(cts, ctype_rawchild(cts, ct));
+ if (res == -1)
+ res = sub;
+ if (sub != -1 && sub != res)
+ return 0;
+ } else if (ctype_isbitfield(ct->info) ||
+ ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+ return 0;
+ }
+ }
+ if (res > 0 && sz > res * 4 * 8)
+ return 0;
+ return res;
+ } else {
+ return 0;
+ }
+}
+
+#endif
+
/* -- MIPS64 ABI struct classification ---------------------------- */
#if LJ_TARGET_MIPS64
@@ -974,6 +1096,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CTSize sz;
MSize n, isfp = 0, isva = 0;
void *dp, *rp = NULL;
+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
+ int isf32 = 0;
+#endif
if (fid) { /* Get argument type from field. */
CType *ctf = ctype_get(cts, fid);
@@ -1030,7 +1155,37 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(void **)dp = rp;
dp = rp;
}
+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 && LJ_BE
+ if (ctype_isstruct(d->info) && sz < CTSIZE_PTR) {
+ dp = (char *)dp + (CTSIZE_PTR - sz);
+ }
+#endif
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
+ if (isfp) {
+ int i;
+ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++)
+ cc->fpr[nfpr++] = ((double *)dp)[i];
+ }
+ if (isf32) {
+ int i;
+ for (i = 0; i < d->size / 8; i++)
+ ((float *)dp)[i*2] = ((double *)dp)[i];
+ }
+#endif
+#if LJ_ARCH_PPC_ELFV2
+ if (ctype_isstruct(d->info)) {
+ isfp = ccall_classify_fp(cts, d);
+ int i;
+ if (isfp == FTYPE_FLOAT) {
+ for (i = 0; i < d->size / 4 && nfpr < CCALL_NARG_FPR; i++)
+ cc->fpr[nfpr++] = ((float *)dp)[i];
+ } else if (isfp == FTYPE_DOUBLE) {
+ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++)
+ cc->fpr[nfpr++] = ((double *)dp)[i];
+ }
+ }
+#endif
/* Extend passed integers to 32 bits at least. */
if (ctype_isinteger_or_bool(d->info) && d->size < 4) {
if (d->info & CTF_UNSIGNED)
@@ -1044,6 +1199,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
if (isfp && d->size == sizeof(float))
((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
#endif
+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info))
+ && d->size <= 4) {
+ if (d->info & CTF_UNSIGNED)
+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
+ else
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+ }
+#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
#if LJ_TARGET_MIPS64
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 59f6648..bbf309f 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -86,10 +86,23 @@ typedef union FPRArg {
#elif LJ_TARGET_PPC
#define CCALL_NARG_GPR 8
+#if LJ_ARCH_BITS == 64
+#define CCALL_NARG_FPR 13
+#if LJ_ARCH_PPC_ELFV2
+#define CCALL_NRET_GPR 2
+#define CCALL_NRET_FPR 8
+#define CCALL_SPS_EXTRA 14
+#else
+#define CCALL_NRET_GPR 1
+#define CCALL_NRET_FPR 2
+#define CCALL_SPS_EXTRA 16
+#endif
+#else
#define CCALL_NARG_FPR 8
#define CCALL_NRET_GPR 4 /* For complex double. */
#define CCALL_NRET_FPR 1
#define CCALL_SPS_EXTRA 4
+#endif
#define CCALL_SPS_FREE 0
typedef intptr_t GPRArg;
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 846827b..eb7f445 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -61,8 +61,24 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#elif LJ_TARGET_PPC
+#if LJ_ARCH_PPC_OPD
+
+#define CALLBACK_SLOT2OFS(slot) (24*(slot))
+#define CALLBACK_OFS2SLOT(ofs) ((ofs)/24)
+#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+
+#elif LJ_ARCH_PPC_ELFV2
+
+#define CALLBACK_SLOT2OFS(slot) (4*(slot))
+#define CALLBACK_OFS2SLOT(ofs) ((ofs)/4)
+#define CALLBACK_MAX_SLOT (CALLBACK_MCODE_SIZE/4 - 10)
+
+#else
+
#define CALLBACK_MCODE_HEAD 24
+#endif
+
#elif LJ_TARGET_MIPS32
#define CALLBACK_MCODE_HEAD 20
@@ -188,24 +204,59 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
lua_assert(p - page <= CALLBACK_MCODE_SIZE);
}
#elif LJ_TARGET_PPC
+#if LJ_ARCH_PPC_OPD
+register void *vm_toc __asm__("r2");
+static void callback_mcode_init(global_State *g, uint64_t *page)
+{
+ uint64_t *p = page;
+ void *target = (void *)lj_vm_ffi_callback;
+ MSize slot;
+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+ *p++ = (uint64_t)target;
+ *p++ = (uint64_t)vm_toc;
+ *p++ = (uint64_t)g | ((uint64_t)slot << 47);
+ }
+ lua_assert(p - page <= CALLBACK_MCODE_SIZE / 8);
+}
+#else
static void callback_mcode_init(global_State *g, uint32_t *page)
{
uint32_t *p = page;
void *target = (void *)lj_vm_ffi_callback;
MSize slot;
+#if LJ_ARCH_PPC_ELFV2
+ // Needs to be in sync with lj_vm_ffi_callback.
+ lua_assert(CALLBACK_MCODE_SIZE == 4096);
+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+ *p = PPCI_B | (((page+CALLBACK_MAX_SLOT-p) & 0x00ffffffu) << 2);
+ p++;
+ }
+ *p++ = PPCI_LI | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 32) & 0xffff);
+ *p++ = PPCI_LI | PPCF_T(RID_R11) | ((((intptr_t)g) >> 32) & 0xffff);
+ *p++ = PPCI_RLDICR | PPCF_T(RID_SYS1) | PPCF_A(RID_SYS1) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */
+ *p++ = PPCI_RLDICR | PPCF_T(RID_R11) | PPCF_A(RID_R11) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */
+ *p++ = PPCI_ORIS | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 16) & 0xffff);
+ *p++ = PPCI_ORIS | PPCF_A(RID_R11) | PPCF_T(RID_R11) | ((((intptr_t)g) >> 16) & 0xffff);
+ *p++ = PPCI_ORI | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | (((intptr_t)target) & 0xffff);
+ *p++ = PPCI_ORI | PPCF_A(RID_R11) | PPCF_T(RID_R11) | (((intptr_t)g) & 0xffff);
+ *p++ = PPCI_MTCTR | PPCF_T(RID_SYS1);
+ *p++ = PPCI_BCTR;
+#else
*p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16);
- *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16);
+ *p++ = PPCI_LIS | PPCF_T(RID_R11) | (u32ptr(g) >> 16);
*p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff);
- *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff);
+ *p++ = PPCI_ORI | PPCF_A(RID_R11)|PPCF_T(RID_R11) | (u32ptr(g) & 0xffff);
*p++ = PPCI_MTCTR | PPCF_T(RID_TMP);
*p++ = PPCI_BCTR;
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
- *p++ = PPCI_LI | PPCF_T(RID_R11) | slot;
+ *p++ = PPCI_LI | PPCF_T(RID_R12) | slot;
*p = PPCI_B | (((page-p) & 0x00ffffffu) << 2);
p++;
}
- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
+#endif
+ lua_assert(p - page <= CALLBACK_MCODE_SIZE / 4);
}
+#endif
#elif LJ_TARGET_MIPS
static void callback_mcode_init(global_State *g, uint32_t *page)
{
@@ -641,6 +692,15 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
+ if (ctr->size <= 4 &&
+ (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) {
+ if (ctr->info & CTF_UNSIGNED)
+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
+ else
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+ }
+#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
if (ctr->size <= 4 &&
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
index 0c220a8..105865b 100644
--- a/src/lj_ctype.h
+++ b/src/lj_ctype.h
@@ -153,7 +153,7 @@ typedef struct CType {
/* Simplify target-specific configuration. Checked in lj_ccall.h. */
#define CCALL_MAX_GPR 8
-#define CCALL_MAX_FPR 8
+#define CCALL_MAX_FPR 14
typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg;
diff --git a/src/lj_def.h b/src/lj_def.h
index 2d8fff6..381d6f5 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -71,7 +71,11 @@ typedef unsigned int uintptr_t;
#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
+#if defined(__powerpc64__) && _CALL_ELF != 2
+#define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */
+#else
#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
+#endif
/* Minimum table/buffer sizes. */
#define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */
diff --git a/src/lj_frame.h b/src/lj_frame.h
index 19c49a4..c666418 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -210,6 +210,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_OFS_MULTRES 408
#define CFRAME_SIZE 384
#define CFRAME_SHIFT_MULTRES 3
+#elif LJ_ARCH_PPC_ELFV2
+#define CFRAME_OFS_ERRF 360
+#define CFRAME_OFS_NRES 356
+#define CFRAME_OFS_PREV 336
+#define CFRAME_OFS_L 352
+#define CFRAME_OFS_PC 348
+#define CFRAME_OFS_MULTRES 344
+#define CFRAME_SIZE 368
+#define CFRAME_SHIFT_MULTRES 3
#elif LJ_ARCH_PPC32ON64
#define CFRAME_OFS_ERRF 472
#define CFRAME_OFS_NRES 468
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index c5c991a..f0c8c94 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -30,8 +30,13 @@ enum {
/* Calling conventions. */
RID_RET = RID_R3,
+#if LJ_LE
+ RID_RETHI = RID_R4,
+ RID_RETLO = RID_R3,
+#else
RID_RETHI = RID_R3,
RID_RETLO = RID_R4,
+#endif
RID_FPRET = RID_F1,
/* These definitions must match with the *.dasc file(s): */
@@ -131,6 +136,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
#define PPCF_C(r) ((r) << 6)
#define PPCF_MB(n) ((n) << 6)
#define PPCF_ME(n) ((n) << 1)
+#define PPCF_SH(n) ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1)))
+#define PPCF_M6(n) ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5)))
#define PPCF_Y 0x00200000
#define PPCF_DOT 0x00000001
@@ -200,6 +207,13 @@ typedef enum PPCIns {
PPCI_RLWINM = 0x54000000,
PPCI_RLWIMI = 0x50000000,
+ PPCI_RLDICL = 0x78000000,
+ PPCI_RLDICR = 0x78000004,
+ PPCI_RLDIC = 0x78000008,
+ PPCI_RLDIMI = 0x7800000c,
+ PPCI_RLDCL = 0x78000010,
+ PPCI_RLDCR = 0x78000012,
+
PPCI_B = 0x48000000,
PPCI_BL = 0x48000001,
PPCI_BC = 0x40800000,
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index b4260eb..abb381e 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -22,35 +22,40 @@
|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
|// Affects reg saves, stack layout, carry/overflow/dot flags etc.
|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
-|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3).
+|// OPD Need function descriptors (64 bit or 32 bit variant, e.g. PS3).
|// Function pointers are really a struct: code, TOC, env (optional).
-|// TOCENV Function pointers have an environment pointer, too (not on PS3).
+|// OPDENV Function pointers have an environment pointer, too (not on PS3).
+|// ELFV2 The 64-bit ELF V2 ABI is in use.
|// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360).
|// Must avoid (slow) micro-coded instructions.
|
|.if P64
-|.define TOC, 1
-|.define TOCENV, 1
|.macro lpx, a, b, c; ldx a, b, c; .endmacro
|.macro lp, a, b; ld a, b; .endmacro
|.macro stp, a, b; std a, b; .endmacro
+|.macro stpx, a, b, c; stdx a, b, c; .endmacro
|.define decode_OPP, decode_OP8
-|.if FFI
-|// Missing: Calling conventions, 64 bit regs, TOC.
-|.error lib_ffi not yet implemented for PPC64
-|.endif
+|.define PSIZE, 8
|.else
|.macro lpx, a, b, c; lwzx a, b, c; .endmacro
|.macro lp, a, b; lwz a, b; .endmacro
|.macro stp, a, b; stw a, b; .endmacro
+|.macro stpx, a, b, c; stwx a, b, c; .endmacro
|.define decode_OPP, decode_OP4
+|.define PSIZE, 4
|.endif
|
|// Convenience macros for TOC handling.
-|.if TOC
+|.if OPD or ELFV2
|// Linker needs a TOC patch area for every external call relocation.
-|.macro blex, target; bl extern target@plt; nop; .endmacro
+|.macro blex, target; bl extern target; nop; .endmacro
|.macro .toc, a, b; a, b; .endmacro
+|.else
+|.macro blex, target; bl extern target@plt; .endmacro
+|.macro .toc, a, b; .endmacro
+|.endif
+|.if OPD
+|.macro .opd, a, b; a, b; .endmacro
|.if P64
|.define TOC_OFS, 8
|.define ENV_OFS, 16
@@ -58,13 +63,13 @@
|.define TOC_OFS, 4
|.define ENV_OFS, 8
|.endif
-|.else // No TOC.
-|.macro blex, target; bl extern target@plt; .endmacro
-|.macro .toc, a, b; .endmacro
+|.else // No OPD.
+|.macro .opd, a, b; .endmacro
|.endif
-|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro
+|.macro .opdenv, a, b; .if OPDENV; a, b; .endif; .endmacro
|
|.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro
+|.macro .elfv2, a, b; .if ELFV2; a, b; .endif; .endmacro
|
|.macro andix., y, a, i
|.if PPE
@@ -75,29 +80,6 @@
|.endif
|.endmacro
|
-|.macro clrso, reg
-|.if PPE
-| li reg, 0
-| mtxer reg
-|.else
-| mcrxr cr0
-|.endif
-|.endmacro
-|
-|.macro checkov, reg, noov
-|.if PPE
-| mfxer reg
-| add reg, reg, reg
-| cmpwi reg, 0
-| li reg, 0
-| mtxer reg
-| bgey noov
-|.else
-| mcrxr cr0
-| bley noov
-|.endif
-|.endmacro
-|
|//-----------------------------------------------------------------------
|
|// Fixed register assignments for the interpreter.
@@ -111,6 +93,7 @@
|.define LREG, r18 // Register holding lua_State (also in SAVE_L).
|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8.
|.define JGL, r31 // On-trace: global_State + 32768.
+|.define BASEP4, r25 // Equal to BASE + 4
|
|// Constants for type-comparisons, stores and conversions. C callee-save.
|.define TISNUM, r22
@@ -143,12 +126,19 @@
|
|.define FARG1, f1
|.define FARG2, f2
+|.define FARG3, f3
+|.define FARG4, f4
+|.define FARG5, f5
+|.define FARG6, f6
+|.define FARG7, f7
+|.define FARG8, f8
|
|.define CRET1, r3
|.define CRET2, r4
|
|.define TOCREG, r2 // TOC register (only used by C code).
|.define ENVREG, r11 // Environment pointer (nested C functions).
+|.define FUNCREG, r12 // ELFv2 function pointer (overlaps RD)
|
|// Stack layout while in interpreter. Must match with lj_frame.h.
|.if GPR64
@@ -182,6 +172,49 @@
|.define TMPD, TMPD_HI
|.define TONUM_D, TONUM_HI
|
+|.elif ELFV2
+|
+|// 392(sp) // \ 32 bit C frame info.
+|.define SAVE_LR, 384(sp)
+|.define SAVE_CR, 376(sp) // 64 bit CR save.
+|.define CFRAME_SPACE, 368 // Delta for sp.
+|// Back chain for sp: 368(sp) <-- sp entering interpreter
+|.define SAVE_ERRF, 360(sp) // |
+|.define SAVE_NRES, 356(sp) // |
+|.define SAVE_L, 352(sp) // > Parameter save area.
+|.define SAVE_PC, 348(sp) // |
+|.define SAVE_MULTRES, 344(sp) // |
+|.define SAVE_CFRAME, 336(sp) // / 64 bit C frame chain.
+|.define SAVE_FPR_, 192 // .. 192+18*8: 64 bit FPR saves.
+|.define SAVE_GPR_, 48 // .. 48+18*8: 64 bit GPR saves.
+|.if ENDIAN_LE
+|.define TMPD_HI, 44(sp)
+|.define TMPD_LO, 40(sp)
+|.define TONUM_HI, 36(sp)
+|.define TONUM_LO, 32(sp)
+|.else
+|.define TMPD_LO, 44(sp)
+|.define TMPD_HI, 40(sp)
+|.define TONUM_LO, 36(sp)
+|.define TONUM_HI, 32(sp)
+|.endif
+|.define SAVE_TOC, 24(sp) // TOC save area.
+|// Next frame lr: 16(sp)
+|// Next frame cr: 8(sp)
+|// Back chain for sp: 0(sp) <-- sp while in interpreter
+|
+|.if ENDIAN_LE
+|.define TMPD_BLO, 32(sp)
+|.define TMPD, TMPD_LO
+|.define TONUM_D, TONUM_LO
+|.else
+|.define TMPD_BLO, 39(sp)
+|.define TMPD, TMPD_HI
+|.define TONUM_D, TONUM_HI
+|.endif
+|
+|.define EXIT_OFFSET, 32
+|
|.else
|
|// 508(sp) // \ 32 bit C frame info.
@@ -192,23 +225,39 @@
|.define SAVE_MULTRES, 456(sp) // |
|.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain.
|.define SAVE_LR, 416(sp)
+|.define SAVE_CR, 408(sp) // 64 bit CR save.
|.define CFRAME_SPACE, 400 // Delta for sp.
|// Back chain for sp: 400(sp) <-- sp entering interpreter
|.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves.
|.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves.
|// 48(sp) // Callee parameter save area (ABI mandated).
|.define SAVE_TOC, 40(sp) // TOC save area.
+|.if ENDIAN_LE
+|.define TMPD_HI, 36(sp) // \ Link editor temp (ABI mandated).
+|.define TMPD_LO, 32(sp) // /
+|.define TONUM_HI, 28(sp) // \ Compiler temp (ABI mandated).
+|.define TONUM_LO, 24(sp) // /
+|.else
|.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated).
|.define TMPD_HI, 32(sp) // /
|.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated).
|.define TONUM_HI, 24(sp) // /
+|.endif
|// Next frame lr: 16(sp)
-|.define SAVE_CR, 8(sp) // 64 bit CR save.
+|// Next frame cr: 8(sp)
|// Back chain for sp: 0(sp) <-- sp while in interpreter
|
+|.if ENDIAN_LE
+|.define TMPD_BLO, 32(sp)
+|.define TMPD, TMPD_LO
+|.define TONUM_D, TONUM_LO
+|.else
|.define TMPD_BLO, 39(sp)
|.define TMPD, TMPD_HI
|.define TONUM_D, TONUM_HI
+|.endif
+|
+|.define EXIT_OFFSET, 112
|
|.endif
|.else
@@ -226,16 +275,31 @@
|.define SAVE_PC, 32(sp)
|.define SAVE_MULTRES, 28(sp)
|.define UNUSED1, 24(sp)
+|.if ENDIAN_LE
+|.define TMPD_HI, 20(sp)
+|.define TMPD_LO, 16(sp)
+|.define TONUM_HI, 12(sp)
+|.define TONUM_LO, 8(sp)
+|.else
|.define TMPD_LO, 20(sp)
|.define TMPD_HI, 16(sp)
|.define TONUM_LO, 12(sp)
|.define TONUM_HI, 8(sp)
+|.endif
|// Next frame lr: 4(sp)
|// Back chain for sp: 0(sp) <-- sp while in interpreter
|
+|.if ENDIAN_LE
+|.define TMPD_BLO, 16(sp)
+|.define TMPD, TMPD_LO
+|.define TONUM_D, TONUM_LO
+|.else
|.define TMPD_BLO, 23(sp)
|.define TMPD, TMPD_HI
|.define TONUM_D, TONUM_HI
+|.endif
+|
+|.define EXIT_OFFSET, 16
|
|.endif
|
@@ -350,8 +414,35 @@
|//-----------------------------------------------------------------------
|
|// Access to frame relative to BASE.
+|.if ENDIAN_LE
+|.define FRAME_PC, -4
+|.define FRAME_FUNC, -8
+|.define FRAME_CONTPC, -12
+|.define FRAME_CONTRET, -16
+|.define WORD_LO, 0
+|.define WORD_HI, 4
+|.define WORD_BLO, 0
+|.define BASE_LO, BASE
+|.define BASE_HI, BASEP4
+|.macro lwzux2, hi, lo, base, idx
+| lwzux lo, base, idx
+| lwz hi, 4(base)
+|.endmacro
+|.else
|.define FRAME_PC, -8
|.define FRAME_FUNC, -4
+|.define FRAME_CONTPC, -16
+|.define FRAME_CONTRET, -12
+|.define WORD_LO, 4
+|.define WORD_HI, 0
+|.define WORD_BLO, 7
+|.define BASE_LO, BASEP4
+|.define BASE_HI, BASE
+|.macro lwzux2, hi, lo, base, idx
+| lwzux hi, base, idx
+| lwz lo, 4(base)
+|.endmacro
+|.endif
|
|// Instruction decode.
|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro
@@ -412,6 +503,7 @@
|// Call decode and dispatch.
|.macro ins_callt
| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+| addi BASEP4, BASE, 4
| lwz PC, LFUNC:RB->pc
| lwz INS, 0(PC)
| addi PC, PC, 4
@@ -504,7 +596,12 @@ static void build_subroutines(BuildCtx *ctx)
| lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
| mr BASE, TMP2 // Restore caller base.
| // Prepending may overwrite the pcall frame, so do it at the end.
- | stwu TMP1, FRAME_PC(RA) // Prepend true to results.
+ | .if ENDIAN_LE
+ | addi RA, RA, -8
+ | stw TMP1, WORD_HI(RA) // Prepend true to results.
+ | .else
+ | stwu TMP1, -8(RA) // Prepend true to results.
+ | .endif
|
|->vm_returnc:
| addi RD, RD, 8 // RD = (nresults+1)*8.
@@ -560,7 +657,7 @@ static void build_subroutines(BuildCtx *ctx)
| lwz TMP1, L->maxstack
| cmplw BASE, TMP1
| bge >8
- | stw TISNIL, 0(BASE)
+ | stw TISNIL, WORD_HI(BASE)
| addi RD, RD, 8
| addi BASE, BASE, 8
| b <2
@@ -611,7 +708,12 @@ static void build_subroutines(BuildCtx *ctx)
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
| lwz L, SAVE_L
| .toc ld TOCREG, SAVE_TOC
+ |.if P64
+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
+ |.else
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ |.endif
| lp BASE, L->base
| lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| lwz DISPATCH, L->glref // Setup pointer to dispatch table.
@@ -626,7 +728,7 @@ static void build_subroutines(BuildCtx *ctx)
| la RA, -8(BASE) // Results start at BASE-8.
| stw TMP3, TMPD
| addi DISPATCH, DISPATCH, GG_G2DISP
- | stw TMP1, 0(RA) // Prepend false to error message.
+ | stw TMP1, WORD_HI(RA) // Prepend false to error message.
| li RD, 16 // 2 results: false + error message.
| st_vmstate
| lfs TONUM, TMPD
@@ -687,7 +789,12 @@ static void build_subroutines(BuildCtx *ctx)
| stw L, DISPATCH_GL(cur_L)(DISPATCH)
| mr RA, BASE
| lp BASE, L->base
+ |.if P64
+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
+ |.else
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ |.endif
| lp TMP1, L->top
| lwz PC, FRAME_PC(BASE)
| lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
@@ -737,7 +844,12 @@ static void build_subroutines(BuildCtx *ctx)
|3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
| stw L, DISPATCH_GL(cur_L)(DISPATCH)
| lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
+ |.if P64
+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
+ |.else
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ |.endif
| lp TMP1, L->top
| lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| add PC, PC, BASE
@@ -757,8 +869,8 @@ static void build_subroutines(BuildCtx *ctx)
|
|->vm_call_dispatch:
| // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
- | lwz TMP0, FRAME_PC(BASE)
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
+ | lwz TMP0, WORD_HI-8(BASE)
+ | lwz LFUNC:RB, WORD_LO-8(BASE)
| checkfunc TMP0; bne ->vmeta_call
|
|->vm_call_dispatch_f:
@@ -777,7 +889,9 @@ static void build_subroutines(BuildCtx *ctx)
| sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
| lp TMP1, L->cframe
| addi DISPATCH, DISPATCH, GG_G2DISP
- | .toc lp CARG4, 0(CARG4)
+ | .opd lp TOCREG, TOC_OFS(CARG4)
+ | .opdenv lp ENVREG, ENV_OFS(CARG4)
+ | .opd lp CARG4, 0(CARG4)
| li TMP2, 0
| stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
| stw TMP2, SAVE_ERRF // No error function.
@@ -785,7 +899,9 @@ static void build_subroutines(BuildCtx *ctx)
| stp sp, L->cframe // Add our C frame to cframe chain.
| stw L, DISPATCH_GL(cur_L)(DISPATCH)
| mtctr CARG4
+ | .elfv2 mr FUNCREG, CARG4
| bctrl // (lua_State *L, lua_CFunction func, void *ud)
+ | .toc lp TOCREG, SAVE_TOC
|.if PPE
| mr BASE, CRET1
| cmpwi CRET1, 0
@@ -807,20 +923,27 @@ static void build_subroutines(BuildCtx *ctx)
|
|->cont_dispatch:
| // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
- | lwz TMP0, -12(BASE) // Continuation.
+ | lwz TMP0, FRAME_CONTRET(BASE) // Continuation.
| mr RB, BASE
| mr BASE, TMP2 // Restore caller BASE.
| lwz LFUNC:TMP1, FRAME_FUNC(TMP2)
|.if FFI
| cmplwi TMP0, 1
|.endif
- | lwz PC, -16(RB) // Restore PC from [cont|PC].
- | subi TMP2, RD, 8
+ | lwz PC, FRAME_CONTPC(RB) // Restore PC from [cont|PC].
+ | addi BASEP4, BASE, 4
+ | addi TMP2, RD, WORD_HI-8
| lwz TMP1, LFUNC:TMP1->pc
| stwx TISNIL, RA, TMP2 // Ensure one valid arg.
+ |.if P64
+ | ld TMP3, 0(DISPATCH)
+ |.endif
|.if FFI
| ble >1
|.endif
+ |.if P64
+ | add TMP0, TMP0, TMP3
+ |.endif
| lwz KBASE, PC2PROTO(k)(TMP1)
| // BASE = base, RA = resultptr, RB = meta base
| mtctr TMP0
@@ -856,20 +979,20 @@ static void build_subroutines(BuildCtx *ctx)
| la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
| li TMP0, LJ_TSTR
| decode_RB8 RB, INS
- | stw STR:RC, 4(CARG3)
+ | stw STR:RC, WORD_LO(CARG3)
| add CARG2, BASE, RB
- | stw TMP0, 0(CARG3)
+ | stw TMP0, WORD_HI(CARG3)
| b >1
|
|->vmeta_tgets:
| la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
| li TMP0, LJ_TTAB
- | stw TAB:RB, 4(CARG2)
+ | stw TAB:RB, WORD_LO(CARG2)
| la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
- | stw TMP0, 0(CARG2)
+ | stw TMP0, WORD_HI(CARG2)
| li TMP1, LJ_TSTR
- | stw STR:RC, 4(CARG3)
- | stw TMP1, 0(CARG3)
+ | stw STR:RC, WORD_LO(CARG3)
+ | stw TMP1, WORD_HI(CARG3)
| b >1
|
|->vmeta_tgetb: // TMP0 = index
@@ -880,8 +1003,8 @@ static void build_subroutines(BuildCtx *ctx)
| la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
| add CARG2, BASE, RB
|.if DUALNUM
- | stw TISNUM, 0(CARG3)
- | stw TMP0, 4(CARG3)
+ | stw TISNUM, WORD_HI(CARG3)
+ | stw TMP0, WORD_LO(CARG3)
|.else
| stfd f0, 0(CARG3)
|.endif
@@ -909,7 +1032,7 @@ static void build_subroutines(BuildCtx *ctx)
| // BASE = base, L->top = new base, stack = cont/func/t/k
| subfic TMP1, BASE, FRAME_CONT
| lp BASE, L->top
- | stw PC, -16(BASE) // [cont|PC]
+ | stw PC, FRAME_CONTPC(BASE) // [cont|PC]
| add PC, TMP1, BASE
| lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
| li NARGS8:RC, 16 // 2 args for func(t, k).
@@ -923,7 +1046,7 @@ static void build_subroutines(BuildCtx *ctx)
| lfd f14, 0(CRET1)
| b ->BC_TGETR_Z
|1:
- | stwx TISNIL, BASE, RA
+ | stwx TISNIL, BASE_HI, RA
| b ->cont_nop
|
|//-----------------------------------------------------------------------
@@ -932,20 +1055,20 @@ static void build_subroutines(BuildCtx *ctx)
| la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
| li TMP0, LJ_TSTR
| decode_RB8 RB, INS
- | stw STR:RC, 4(CARG3)
+ | stw STR:RC, WORD_LO(CARG3)
| add CARG2, BASE, RB
- | stw TMP0, 0(CARG3)
+ | stw TMP0, WORD_HI(CARG3)
| b >1
|
|->vmeta_tsets:
| la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
| li TMP0, LJ_TTAB
- | stw TAB:RB, 4(CARG2)
+ | stw TAB:RB, WORD_LO(CARG2)
| la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
- | stw TMP0, 0(CARG2)
+ | stw TMP0, WORD_HI(CARG2)
| li TMP1, LJ_TSTR
- | stw STR:RC, 4(CARG3)
- | stw TMP1, 0(CARG3)
+ | stw STR:RC, WORD_LO(CARG3)
+ | stw TMP1, WORD_HI(CARG3)
| b >1
|
|->vmeta_tsetb: // TMP0 = index
@@ -956,8 +1079,8 @@ static void build_subroutines(BuildCtx *ctx)
| la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
| add CARG2, BASE, RB
|.if DUALNUM
- | stw TISNUM, 0(CARG3)
- | stw TMP0, 4(CARG3)
+ | stw TISNUM, WORD_HI(CARG3)
+ | stw TMP0, WORD_LO(CARG3)
|.else
| stfd f0, 0(CARG3)
|.endif
@@ -986,7 +1109,7 @@ static void build_subroutines(BuildCtx *ctx)
| // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
| subfic TMP1, BASE, FRAME_CONT
| lp BASE, L->top
- | stw PC, -16(BASE) // [cont|PC]
+ | stw PC, FRAME_CONTPC(BASE) // [cont|PC]
| add PC, TMP1, BASE
| lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
| li NARGS8:RC, 24 // 3 args for func(t, k, v)
@@ -1006,17 +1129,9 @@ static void build_subroutines(BuildCtx *ctx)
|->vmeta_comp:
| mr CARG1, L
| subi PC, PC, 4
- |.if DUALNUM
- | mr CARG2, RA
- |.else
| add CARG2, BASE, RA
- |.endif
| stw PC, SAVE_PC
- |.if DUALNUM
- | mr CARG3, RD
- |.else
| add CARG3, BASE, RD
- |.endif
| stp BASE, L->base
| decode_OP1 CARG4, INS
| bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
@@ -1043,7 +1158,7 @@ static void build_subroutines(BuildCtx *ctx)
| b ->cont_nop
|
|->cont_condt: // RA = resultptr
- | lwz TMP0, 0(RA)
+ | lwz TMP0, WORD_HI(RA)
| .gpr64 extsw TMP0, TMP0
| subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true.
| subfe CRET1, CRET1, CRET1
@@ -1051,7 +1166,7 @@ static void build_subroutines(BuildCtx *ctx)
| b <4
|
|->cont_condf: // RA = resultptr
- | lwz TMP0, 0(RA)
+ | lwz TMP0, WORD_HI(RA)
| .gpr64 extsw TMP0, TMP0
| subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false.
| subfe CRET1, CRET1, CRET1
@@ -1103,8 +1218,8 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|
|->vmeta_unm:
- | mr CARG3, RD
- | mr CARG4, RD
+ | add CARG3, BASE, RD
+ | add CARG4, BASE, RD
| b >1
|
|->vmeta_arith_vn:
@@ -1139,7 +1254,7 @@ static void build_subroutines(BuildCtx *ctx)
|->vmeta_binop:
| // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
| sub TMP1, CRET1, BASE
- | stw PC, -16(CRET1) // [cont|PC]
+ | stw PC, FRAME_CONTPC(CRET1) // [cont|PC]
| mr TMP2, BASE
| addi PC, TMP1, FRAME_CONT
| mr BASE, CRET1
@@ -1150,7 +1265,7 @@ static void build_subroutines(BuildCtx *ctx)
#if LJ_52
| mr SAVE0, CARG1
#endif
- | mr CARG2, RD
+ | add CARG2, BASE, RD
| stp BASE, L->base
| mr CARG1, L
| stw PC, SAVE_PC
@@ -1227,25 +1342,25 @@ static void build_subroutines(BuildCtx *ctx)
|.macro .ffunc_1, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
- | lwz CARG1, 4(BASE)
+ | lwz CARG3, WORD_HI(BASE)
+ | lwz CARG1, WORD_LO(BASE)
| blt ->fff_fallback
|.endmacro
|
|.macro .ffunc_2, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
- | lwz CARG4, 8(BASE)
- | lwz CARG1, 4(BASE)
- | lwz CARG2, 12(BASE)
+ | lwz CARG3, WORD_HI(BASE)
+ | lwz CARG4, WORD_HI+8(BASE)
+ | lwz CARG1, WORD_LO(BASE)
+ | lwz CARG2, WORD_LO+8(BASE)
| blt ->fff_fallback
|.endmacro
|
|.macro .ffunc_n, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
+ | lwz CARG3, WORD_HI(BASE)
| lfd FARG1, 0(BASE)
| blt ->fff_fallback
| checknum CARG3; bge ->fff_fallback
@@ -1254,9 +1369,9 @@ static void build_subroutines(BuildCtx *ctx)
|.macro .ffunc_nn, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
+ | lwz CARG3, WORD_HI(BASE)
| lfd FARG1, 0(BASE)
- | lwz CARG4, 8(BASE)
+ | lwz CARG4, WORD_HI+8(BASE)
| lfd FARG2, 8(BASE)
| blt ->fff_fallback
| checknum CARG3; bge ->fff_fallback
@@ -1279,9 +1394,9 @@ static void build_subroutines(BuildCtx *ctx)
| cmplw cr1, CARG3, TMP1
| lwz PC, FRAME_PC(BASE)
| bge cr1, ->fff_fallback
- | stw CARG3, 0(RA)
+ | stw CARG3, WORD_HI(RA)
| addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
- | stw CARG1, 4(RA)
+ | stw CARG1, WORD_LO(RA)
| beq ->fff_res // Done if exactly 1 argument.
| li TMP1, 8
| subi RC, RC, 8
@@ -1295,17 +1410,36 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc type
| cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
+ | lwz CARG1, WORD_HI(BASE)
| blt ->fff_fallback
| .gpr64 extsw CARG1, CARG1
+ |.if P64
+ | li TMP0, LJ_TNUMX
+ | srawi TMP3, CARG1, 15
+ | subfc TMP1, TMP0, CARG1
+ |.else
| subfc TMP0, TISNUM, CARG1
- | subfe TMP2, CARG1, CARG1
+ |.endif
+ | subfe TMP2, CARG1, CARG1
+ |.if P64
+ | cmpwi TMP3, -2
+ | orc TMP1, TMP2, TMP1
+ | subf TMP1, TMP0, TMP1
+ | beq >1
+ |.else
| orc TMP1, TMP2, TMP0
- | addi TMP1, TMP1, ~LJ_TISNUM+1
+ | subf TMP1, TISNUM, TMP1
+ |.endif
| slwi TMP1, TMP1, 3
+ |2:
| la TMP2, CFUNC:RB->upvalue
| lfdx FARG1, TMP2, TMP1
| b ->fff_resn
+ |.if P64
+ |1:
+ | li TMP1, ~LJ_TLIGHTUD<<3
+ | b <2
+ |.endif
|
|//-- Base library: getters and setters ---------------------------------
|
@@ -1328,10 +1462,10 @@ static void build_subroutines(BuildCtx *ctx)
| sub TMP1, TMP0, TMP1
| add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
|3: // Rearranged logic, because we expect _not_ to find the key.
- | lwz CARG4, NODE:TMP2->key
- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
- | lwz CARG2, NODE:TMP2->val
- | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
+ | lwz CARG4, WORD_HI+offsetof(Node, key)(NODE:TMP2)
+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
+ | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2)
| checkstr CARG4; bne >4
| cmpw TMP0, STR:RC; beq >5
|4:
@@ -1349,14 +1483,33 @@ static void build_subroutines(BuildCtx *ctx)
|6:
| cmpwi CARG3, LJ_TUDATA; beq <1
| .gpr64 extsw CARG3, CARG3
+ |.if P64
+ | li TMP0, LJ_TNUMX
+ | srawi TMP3, CARG3, 15
+ | subfc TMP1, TMP0, CARG3
+ |.else
| subfc TMP0, TISNUM, CARG3
+ |.endif
| subfe TMP2, CARG3, CARG3
+ |.if P64
+ | cmpwi TMP3, -2
+ | orc TMP1, TMP2, TMP1
+ | subf TMP1, TMP0, TMP1
+ | beq >7
+ |.else
| orc TMP1, TMP2, TMP0
- | addi TMP1, TMP1, ~LJ_TISNUM+1
+ | subf TMP1, TISNUM, TMP1
+ |.endif
| slwi TMP1, TMP1, 2
+ |8:
| la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
| lwzx TAB:CARG1, TMP2, TMP1
| b <2
+ |.if P64
+ |7:
+ | li TMP1, ~LJ_TLIGHTUD<<2
+ | b <8
+ |.endif
|
|.ffunc_2 setmetatable
| // Fast path: no mt for table yet and not clearing the mt.
@@ -1374,8 +1527,8 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc rawget
| cmplwi NARGS8:RC, 16
- | lwz CARG4, 0(BASE)
- | lwz TAB:CARG2, 4(BASE)
+ | lwz CARG4, WORD_HI(BASE)
+ | lwz TAB:CARG2, WORD_LO(BASE)
| blt ->fff_fallback
| checktab CARG4; bne ->fff_fallback
| la CARG3, 8(BASE)
@@ -1390,7 +1543,7 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc tonumber
| // Only handles the number case inline (without a base argument).
| cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
+ | lwz CARG1, WORD_HI(BASE)
| lfd FARG1, 0(BASE)
| bne ->fff_fallback // Exactly one argument.
| checknum CARG1; bgt ->fff_fallback
@@ -1425,10 +1578,15 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc next
| cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
- | lwz TAB:CARG2, 4(BASE)
+ | lwz CARG1, WORD_HI(BASE)
+ | lwz TAB:CARG2, WORD_LO(BASE)
| blt ->fff_fallback
+ |.if ENDIAN_LE
+ | add TMP1, BASE, NARGS8:RC
+ | stw TISNIL, WORD_HI(TMP1) // Set missing 2nd arg to nil.
+ |.else
| stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
+ |.endif
| checktab CARG1
| lwz PC, FRAME_PC(BASE)
| bne ->fff_fallback
@@ -1464,18 +1622,18 @@ static void build_subroutines(BuildCtx *ctx)
| lfd f0, CFUNC:RB->upvalue[0]
| la RA, -8(BASE)
#endif
- | stw TISNIL, 8(BASE)
+ | stw TISNIL, 8+WORD_HI(BASE)
| li RD, (3+1)*8
| stfd f0, 0(RA)
| b ->fff_res
|
|.ffunc ipairs_aux
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
- | lwz TAB:CARG1, 4(BASE)
- | lwz CARG4, 8(BASE)
+ | lwz CARG3, WORD_HI(BASE)
+ | lwz TAB:CARG1, WORD_LO(BASE)
+ | lwz CARG4, 8+WORD_HI(BASE)
|.if DUALNUM
- | lwz TMP2, 12(BASE)
+ | lwz TMP2, 8+WORD_LO(BASE)
|.else
| lfd FARG2, 8(BASE)
|.endif
@@ -1504,16 +1662,16 @@ static void build_subroutines(BuildCtx *ctx)
| la RA, -8(BASE)
| cmplw TMP0, TMP2
|.if DUALNUM
- | stw TISNUM, 0(RA)
+ | stw TISNUM, WORD_HI(RA)
| slwi TMP3, TMP2, 3
- | stw TMP2, 4(RA)
+ | stw TMP2, WORD_LO(RA)
|.else
| slwi TMP3, TMP2, 3
| stfd FARG2, 0(RA)
|.endif
| ble >2 // Not in array part?
- | lwzx TMP2, TMP1, TMP3
- | lfdx f0, TMP1, TMP3
+ | lfdux f0, TMP1, TMP3
+ | lwz TMP2, WORD_HI(TMP1)
|1:
| checknil TMP2
| li RD, (0+1)*8
@@ -1532,7 +1690,7 @@ static void build_subroutines(BuildCtx *ctx)
| cmplwi CRET1, 0
| li RD, (0+1)*8
| beq ->fff_res
- | lwz TMP2, 0(CRET1)
+ | lwz TMP2, WORD_HI(CRET1)
| lfd f0, 0(CRET1)
| b <1
|
@@ -1551,11 +1709,11 @@ static void build_subroutines(BuildCtx *ctx)
| la RA, -8(BASE)
#endif
|.if DUALNUM
- | stw TISNUM, 8(BASE)
+ | stw TISNUM, 8+WORD_HI(BASE)
|.else
- | stw ZERO, 8(BASE)
+ | stw ZERO, 8+WORD_HI(BASE)
|.endif
- | stw ZERO, 12(BASE)
+ | stw ZERO, 8+WORD_LO(BASE)
| li RD, (3+1)*8
| stfd f0, 0(RA)
| b ->fff_res
@@ -1576,7 +1734,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc xpcall
| cmplwi NARGS8:RC, 16
- | lwz CARG4, 8(BASE)
+ | lwz CARG4, 8+WORD_HI(BASE)
| lfd FARG2, 8(BASE)
| lfd FARG1, 0(BASE)
| blt ->fff_fallback
@@ -1673,7 +1831,7 @@ static void build_subroutines(BuildCtx *ctx)
|.if resume
| li TMP1, LJ_TTRUE
| la RA, -8(BASE)
- | stw TMP1, -8(BASE) // Prepend true to results.
+ | stw TMP1, WORD_HI-8(BASE) // Prepend true to results.
| addi RD, RD, 16
|.else
| mr RA, BASE
@@ -1693,7 +1851,7 @@ static void build_subroutines(BuildCtx *ctx)
| lfd f0, 0(TMP3)
| stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
| li RD, (2+1)*8
- | stw TMP1, -8(BASE) // Prepend false to results.
+ | stw TMP1, WORD_HI-8(BASE) // Prepend false to results.
| la RA, -8(BASE)
| stfd f0, 0(BASE) // Copy error message.
| b <7
@@ -1746,8 +1904,8 @@ static void build_subroutines(BuildCtx *ctx)
|->fff_resi:
| lwz PC, FRAME_PC(BASE)
| la RA, -8(BASE)
- | stw TISNUM, -8(BASE)
- | stw CRET1, -4(BASE)
+ | stw TISNUM, WORD_HI-8(BASE)
+ | stw CRET1, WORD_LO-8(BASE)
| b ->fff_res1
|1:
| lus CARG3, 0x41e0 // 2^31.
@@ -1762,9 +1920,9 @@ static void build_subroutines(BuildCtx *ctx)
|->fff_restv:
| // CARG3/CARG1 = TValue result.
| lwz PC, FRAME_PC(BASE)
- | stw CARG3, -8(BASE)
+ | stw CARG3, WORD_HI-8(BASE)
| la RA, -8(BASE)
- | stw CARG1, -4(BASE)
+ | stw CARG1, WORD_LO-8(BASE)
|->fff_res1:
| // RA = results, PC = return.
| li RD, (1+1)*8
@@ -1782,10 +1940,11 @@ static void build_subroutines(BuildCtx *ctx)
| ins_next1
| // Adjust BASE. KBASE is assumed to be set for the calling frame.
| sub BASE, RA, TMP0
+ | addi BASEP4, BASE, 4
| ins_next2
|
|6: // Fill up results with nil.
- | subi TMP1, RD, 8
+ | addi TMP1, RD, WORD_HI-8
| addi RD, RD, 8
| stwx TISNIL, RA, TMP1
| b <5
@@ -1898,7 +2057,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc math_log
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
+ | lwz CARG3, WORD_HI(BASE)
| lfd FARG1, 0(BASE)
| bne ->fff_fallback // Need exactly 1 argument.
| checknum CARG3; bge ->fff_fallback
@@ -1923,13 +2082,13 @@ static void build_subroutines(BuildCtx *ctx)
|.if DUALNUM
|.ffunc math_ldexp
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
+ | lwz CARG3, WORD_HI(BASE)
| lfd FARG1, 0(BASE)
- | lwz CARG4, 8(BASE)
+ | lwz CARG4, WORD_HI+8(BASE)
|.if GPR64
- | lwz CARG2, 12(BASE)
+ | lwz CARG2, WORD_LO+8(BASE)
|.else
- | lwz CARG1, 12(BASE)
+ | lwz CARG1, WORD_LO+8(BASE)
|.endif
| blt ->fff_fallback
| checknum CARG3; bge ->fff_fallback
@@ -1961,8 +2120,8 @@ static void build_subroutines(BuildCtx *ctx)
| stfd FARG1, 0(RA)
| li RD, (2+1)*8
|.if DUALNUM
- | stw TISNUM, 8(RA)
- | stw TMP1, 12(RA)
+ | stw TISNUM, WORD_HI+8(RA)
+ | stw TMP1, WORD_LO+8(RA)
|.else
| stfd FARG2, 8(RA)
|.endif
@@ -1989,9 +2148,9 @@ static void build_subroutines(BuildCtx *ctx)
| add TMP2, BASE, NARGS8:RC
| bne >4
|1: // Handle integers.
- | lwz CARG4, 0(TMP1)
+ | lwz CARG4, WORD_HI(TMP1)
| cmplw cr1, TMP1, TMP2
- | lwz CARG2, 4(TMP1)
+ | lwz CARG2, WORD_LO(TMP1)
| bge cr1, ->fff_resi
| checknum CARG4
| xoris TMP0, CARG1, 0x8000
@@ -2020,7 +2179,7 @@ static void build_subroutines(BuildCtx *ctx)
| lfd FARG1, 0(BASE)
| bge ->fff_fallback
|5: // Handle numbers.
- | lwz CARG4, 0(TMP1)
+ | lwz CARG4, WORD_HI(TMP1)
| cmplw cr1, TMP1, TMP2
| lfd FARG2, 0(TMP1)
| bge cr1, ->fff_resn
@@ -2035,7 +2194,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| b <5
|7: // Convert integer to number and continue above.
- | lwz CARG2, 4(TMP1)
+ | lwz CARG2, WORD_LO(TMP1)
| bne ->fff_fallback
| tonum_i FARG2, CARG2
| b <6
@@ -2043,7 +2202,12 @@ static void build_subroutines(BuildCtx *ctx)
| .ffunc_n name
| li TMP1, 8
|1:
+ |.if ENDIAN_LE
+ | add CARG2, BASE, TMP1
+ | lwz CARG2, WORD_HI(CARG2)
+ |.else
| lwzx CARG2, BASE, TMP1
+ |.endif
| lfdx FARG2, BASE, TMP1
| cmplw cr1, TMP1, NARGS8:RC
| checknum CARG2
@@ -2067,8 +2231,8 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc string_byte // Only handle the 1-arg case here.
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
- | lwz STR:CARG1, 4(BASE)
+ | lwz CARG3, WORD_HI(BASE)
+ | lwz STR:CARG1, WORD_LO(BASE)
| bne ->fff_fallback // Need exactly 1 argument.
| checkstr CARG3
| bne ->fff_fallback
@@ -2099,12 +2263,12 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc string_char // Only handle the 1-arg case here.
| ffgccheck
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
+ | lwz CARG3, WORD_HI(BASE)
|.if DUALNUM
- | lwz TMP0, 4(BASE)
+ | lwz TMP0, WORD_LO(BASE)
| bne ->fff_fallback // Exactly 1 argument.
| checknum CARG3; bne ->fff_fallback
- | la CARG2, 7(BASE)
+ | la CARG2, WORD_BLO(BASE)
|.else
| lfd FARG1, 0(BASE)
| bne ->fff_fallback // Exactly 1 argument.
@@ -2128,16 +2292,16 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc string_sub
| ffgccheck
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 16(BASE)
+ | lwz CARG3, WORD_HI+16(BASE)
|.if not DUALNUM
| lfd f0, 16(BASE)
|.endif
- | lwz TMP0, 0(BASE)
- | lwz STR:CARG1, 4(BASE)
+ | lwz TMP0, WORD_HI(BASE)
+ | lwz STR:CARG1, WORD_LO(BASE)
| blt ->fff_fallback
- | lwz CARG2, 8(BASE)
+ | lwz CARG2, WORD_HI+8(BASE)
|.if DUALNUM
- | lwz TMP1, 12(BASE)
+ | lwz TMP1, WORD_LO+8(BASE)
|.else
| lfd f1, 8(BASE)
|.endif
@@ -2145,7 +2309,7 @@ static void build_subroutines(BuildCtx *ctx)
| beq >1
|.if DUALNUM
| checknum CARG3
- | lwz TMP2, 20(BASE)
+ | lwz TMP2, WORD_LO+16(BASE)
| bne ->fff_fallback
|1:
| checknum CARG2; bne ->fff_fallback
@@ -2201,8 +2365,8 @@ static void build_subroutines(BuildCtx *ctx)
| .ffunc string_ .. name
| ffgccheck
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
- | lwz STR:CARG2, 4(BASE)
+ | lwz CARG3, WORD_HI(BASE)
+ | lwz STR:CARG2, WORD_LO(BASE)
| blt ->fff_fallback
| checkstr CARG3
| la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
@@ -2240,10 +2404,10 @@ static void build_subroutines(BuildCtx *ctx)
| addi TMP1, BASE, 8
| add TMP2, BASE, NARGS8:RC
|1:
- | lwz CARG4, 0(TMP1)
+ | lwz CARG4, WORD_HI(TMP1)
| cmplw cr1, TMP1, TMP2
|.if DUALNUM
- | lwz CARG2, 4(TMP1)
+ | lwz CARG2, WORD_LO(TMP1)
|.else
| lfd FARG1, 0(TMP1)
|.endif
@@ -2344,20 +2508,23 @@ static void build_subroutines(BuildCtx *ctx)
|
|->fff_fallback: // Call fast function fallback handler.
| // BASE = new base, RB = CFUNC, RC = nargs*8
- | lp TMP3, CFUNC:RB->f
+ | lp FUNCREG, CFUNC:RB->f
| add TMP1, BASE, NARGS8:RC
| lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC.
| addi TMP0, TMP1, 8*LUA_MINSTACK
| lwz TMP2, L->maxstack
| stw PC, SAVE_PC // Redundant (but a defined value).
- | .toc lp TMP3, 0(TMP3)
+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
+ | .opd lp FUNCREG, 0(FUNCREG)
| cmplw TMP0, TMP2
| stp BASE, L->base
| stp TMP1, L->top
| mr CARG1, L
| bgt >5 // Need to grow stack.
- | mtctr TMP3
+ | mtctr FUNCREG
| bctrl // (lua_State *L)
+ | .toc lp TOCREG, SAVE_TOC
| // Either throws an error, or recovers and returns -1, 0 or nresults+1.
| lp BASE, L->base
| cmpwi CRET1, 0
@@ -2459,6 +2626,7 @@ static void build_subroutines(BuildCtx *ctx)
|3:
| lp BASE, L->base
|4: // Re-dispatch to static ins.
+ | addi BASEP4, BASE, 4
| lwz INS, -4(PC)
| decode_OPP TMP1, INS
| decode_RB8 RB, INS
@@ -2472,7 +2640,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|->cont_hook: // Continue from hook yield.
| addi PC, PC, 4
- | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins.
+ | lwz MULTRES, WORD_LO-24(RB) // Restore MULTRES for *M ins.
| b <4
|
|->vm_hotloop: // Hot loop counter underflow.
@@ -2514,6 +2682,7 @@ static void build_subroutines(BuildCtx *ctx)
| lp BASE, L->base
| lp TMP0, L->top
| stw ZERO, SAVE_PC // Invalidate for subsequent line hook.
+ | addi BASEP4, BASE, 4
| sub NARGS8:RC, TMP0, BASE
| add RA, BASE, RA
| lwz LFUNC:RB, FRAME_FUNC(BASE)
@@ -2525,7 +2694,7 @@ static void build_subroutines(BuildCtx *ctx)
|.if JIT
| // RA = resultptr, RB = meta base
| lwz INS, -4(PC)
- | lwz TRACE:TMP2, -20(RB) // Save previous trace.
+ | lwz TRACE:TMP2, WORD_LO-24(RB) // Save previous trace.
| addic. TMP1, MULTRES, -8
| decode_RA8 RC, INS // Call base.
| beq >2
@@ -2560,10 +2729,16 @@ static void build_subroutines(BuildCtx *ctx)
| mr CARG2, PC
| bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
| lp BASE, L->base
+ | addi BASEP4, BASE, 4
| b ->cont_nop
|
|9:
+ |.if ENDIAN_LE
+ | addi BASEP4, BASE, 4
+ | stwx TISNIL, BASEP4, RC
+ |.else
| stwx TISNIL, BASE, RC
+ |.endif
| addi RC, RC, 8
| b <3
|.endif
@@ -2578,6 +2753,7 @@ static void build_subroutines(BuildCtx *ctx)
| // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
| lp BASE, L->base
| subi PC, PC, 4
+ | addi BASEP4, BASE, 4
| b ->cont_nop
#endif
|
@@ -2586,39 +2762,72 @@ static void build_subroutines(BuildCtx *ctx)
|//-----------------------------------------------------------------------
|
|.macro savex_, a, b, c, d
- | stfd f..a, 16+a*8(sp)
- | stfd f..b, 16+b*8(sp)
- | stfd f..c, 16+c*8(sp)
- | stfd f..d, 16+d*8(sp)
+ | stfd f..a, EXIT_OFFSET+a*8(sp)
+ | stfd f..b, EXIT_OFFSET+b*8(sp)
+ | stfd f..c, EXIT_OFFSET+c*8(sp)
+ | stfd f..d, EXIT_OFFSET+d*8(sp)
+ |.endmacro
+ |
+ |.macro saver, a
+ | stp r..a, EXIT_OFFSET+32*8+a*PSIZE(sp)
|.endmacro
|
|->vm_exit_handler:
|.if JIT
- | addi sp, sp, -(16+32*8+32*4)
- | stmw r2, 16+32*8+2*4(sp)
+ | addi sp, TMP0, sp, -(EXIT_OFFSET+32*8+32*PSIZE)
+ | saver 3 // CARG1
+ | saver 4 // CARG2
+ | saver 5 // CARG3
+ | saver 17 // DISPATCH
| addi DISPATCH, JGL, -GG_DISP2G-32768
| li CARG2, ~LJ_VMST_EXIT
- | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain.
+ | lp CARG1, EXIT_OFFSET+32*8+32*PSIZE(sp) // Get stack chain.
| stw CARG2, DISPATCH_GL(vmstate)(DISPATCH)
+ | saver 2
+ | saver 6
+ | saver 7
+ | saver 8
+ | saver 9
+ | saver 10
+ | saver 11
+ | saver 12
+ | saver 13
| savex_ 0,1,2,3
- | stw CARG1, 0(sp) // Store extended stack chain.
- | clrso TMP1
+ | stp CARG1, 0(sp) // Store extended stack chain.
+
| savex_ 4,5,6,7
- | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp.
+ | saver 14
+ | saver 15
+ | saver 16
+ | saver 18
+ | addi CARG2, sp, EXIT_OFFSET+32*8+32*PSIZE // Recompute original value of sp.
| savex_ 8,9,10,11
- | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP.
+ | stp CARG2, EXIT_OFFSET+32*8+1*PSIZE(sp) // Store sp in RID_SP.
| savex_ 12,13,14,15
| mflr CARG3
| li TMP1, 0
| savex_ 16,17,18,19
- | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP.
+ | stw TMP1, EXIT_OFFSET+32*8+0*PSIZE(sp) // Clear RID_TMP.
| savex_ 20,21,22,23
| lhz CARG4, 2(CARG3) // Load trace number.
| savex_ 24,25,26,27
| lwz L, DISPATCH_GL(cur_L)(DISPATCH)
| savex_ 28,29,30,31
+ | saver 19
+ | saver 20
+ | saver 21
+ | saver 22
+ | saver 23
+ | saver 24
+ | saver 25
+ | saver 26
+ | saver 27
+ | saver 28
+ | saver 29
+ | saver 30
+ | saver 31
| sub CARG3, TMP0, CARG3 // Compute exit number.
- | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
+ | lwz BASE, DISPATCH_GL(jit_base)(DISPATCH)
| srwi CARG3, CARG3, 2
| stp L, DISPATCH_J(L)(DISPATCH)
| subi CARG3, CARG3, 2
@@ -2627,11 +2836,11 @@ static void build_subroutines(BuildCtx *ctx)
| stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
| addi CARG1, DISPATCH, GG_DISP2J
| stw CARG3, DISPATCH_J(exitno)(DISPATCH)
- | addi CARG2, sp, 16
+ | addi CARG2, sp, EXIT_OFFSET
| bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
| // Returns MULTRES (unscaled) or negated error code.
| lp TMP1, L->cframe
- | lwz TMP2, 0(sp)
+ | lp TMP2, 0(sp)
| lp BASE, L->base
|.if GPR64
| rldicr sp, TMP1, 0, 61
@@ -2639,7 +2848,7 @@ static void build_subroutines(BuildCtx *ctx)
| rlwinm sp, TMP1, 0, 0, 29
|.endif
| lwz PC, SAVE_PC // Get SAVE_PC.
- | stw TMP2, 0(sp)
+ | stp TMP2, 0(sp)
| stw L, SAVE_L // Set SAVE_L (on-trace resume/yield).
| b >1
|.endif
@@ -2660,7 +2869,12 @@ static void build_subroutines(BuildCtx *ctx)
| stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
| lwz KBASE, PC2PROTO(k)(TMP1)
| // Setup type comparison constants.
+ |.if P64
+ | lus TISNUM, LJ_TISNUM >> 16
+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
+ |.else
| li TISNUM, LJ_TISNUM
+ |.endif
| lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| stw TMP3, TMPD
| li ZERO, 0
@@ -2680,14 +2894,14 @@ static void build_subroutines(BuildCtx *ctx)
| decode_RA8 RA, INS
| lpx TMP0, DISPATCH, TMP1
| mtctr TMP0
- | cmplwi TMP1, BC_FUNCF*4 // Function header?
+ | cmplwi TMP1, BC_FUNCF*PSIZE // Function header?
| bge >2
| decode_RB8 RB, INS
| decode_RD8 RD, INS
| decode_RC8 RC, INS
| bctr
|2:
- | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
+ | cmplwi TMP1, (BC_FUNCC+2)*PSIZE // Fast function?
| blt >3
| // Check frame below fast function.
| lwz TMP1, FRAME_PC(BASE)
@@ -2697,7 +2911,7 @@ static void build_subroutines(BuildCtx *ctx)
| lwz TMP2, -4(TMP1)
| decode_RA8 TMP0, TMP2
| sub TMP1, BASE, TMP0
- | lwz LFUNC:TMP2, -12(TMP1)
+ | lwz LFUNC:TMP2, WORD_LO-16(TMP1)
| lwz TMP1, LFUNC:TMP2->pc
| lwz KBASE, PC2PROTO(k)(TMP1)
|3:
@@ -2718,6 +2932,8 @@ static void build_subroutines(BuildCtx *ctx)
|// NYI: Use internal implementations of floor, ceil, trunc.
|
|->vm_modi:
+ | li TMP1, 0
+ | mtxer TMP1
| divwo. TMP0, CARG1, CARG2
| bso >1
|.if GPR64
@@ -2736,7 +2952,8 @@ static void build_subroutines(BuildCtx *ctx)
| cmpwi CARG2, 0
| li CARG1, 0
| beqlr
- | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0.
+ | // Clear SO for -2147483648 % -1 and return 0.
+ | crxor 4*cr0+so, 4*cr0+so, 4*cr0+so
| blr
|
|//-----------------------------------------------------------------------
@@ -2749,10 +2966,18 @@ static void build_subroutines(BuildCtx *ctx)
|->vm_cachesync:
|.if JIT or FFI
| // Compute start of first cache line and number of cache lines.
+ | .if GPR64
+ | rldicr CARG1, CARG1, 0, 58
+ | .else
| rlwinm CARG1, CARG1, 0, 0, 26
+ | .endif
| sub CARG2, CARG2, CARG1
| addi CARG2, CARG2, 31
+ | .if GPR64
+ | srdi. CARG2, CARG2, 5
+ | .else
| rlwinm. CARG2, CARG2, 27, 5, 31
+ | .endif
| beqlr
| mtctr CARG2
| mr CARG3, CARG1
@@ -2774,39 +2999,70 @@ static void build_subroutines(BuildCtx *ctx)
|//-- FFI helper functions -----------------------------------------------
|//-----------------------------------------------------------------------
|
- |// Handler for callback functions. Callback slot number in r11, g in r12.
+ |// Handler for callback functions.
+ |// 32-bit: Callback slot number in r12, g in r11.
+ |// 64-bit v1: Callback slot number in bits 47+ of r11, g in 0-46, TOC in r2.
+ |// 64-bit v2: Callback slot number in bits 2-11 of r12, g in r11,
+ |// vm_ffi_callback in r2.
|->vm_ffi_callback:
|.if FFI
|.type CTSTATE, CTState, PC
+ | .if OPD
+ | rldicl r12, r11, 17, 47
+ | rldicl r11, r11, 0, 17
+ | .endif
+ | .if ELFV2
+ | rlwinm r12, r12, 30, 22, 31
+ | addisl TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@ha
+ | addil TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@l
+ | .endif
| saveregs
- | lwz CTSTATE, GL:r12->ctype_state
- | addi DISPATCH, r12, GG_G2DISP
- | stw r11, CTSTATE->cb.slot
- | stw r3, CTSTATE->cb.gpr[0]
+ | lwz CTSTATE, GL:r11->ctype_state
+ | addi DISPATCH, r11, GG_G2DISP
+ | stw r12, CTSTATE->cb.slot
+ | stp r3, CTSTATE->cb.gpr[0]
| stfd f1, CTSTATE->cb.fpr[0]
- | stw r4, CTSTATE->cb.gpr[1]
+ | stp r4, CTSTATE->cb.gpr[1]
| stfd f2, CTSTATE->cb.fpr[1]
- | stw r5, CTSTATE->cb.gpr[2]
+ | stp r5, CTSTATE->cb.gpr[2]
| stfd f3, CTSTATE->cb.fpr[2]
- | stw r6, CTSTATE->cb.gpr[3]
+ | stp r6, CTSTATE->cb.gpr[3]
| stfd f4, CTSTATE->cb.fpr[3]
- | stw r7, CTSTATE->cb.gpr[4]
+ | stp r7, CTSTATE->cb.gpr[4]
| stfd f5, CTSTATE->cb.fpr[4]
- | stw r8, CTSTATE->cb.gpr[5]
+ | stp r8, CTSTATE->cb.gpr[5]
| stfd f6, CTSTATE->cb.fpr[5]
- | stw r9, CTSTATE->cb.gpr[6]
+ | stp r9, CTSTATE->cb.gpr[6]
| stfd f7, CTSTATE->cb.fpr[6]
- | stw r10, CTSTATE->cb.gpr[7]
+ | stp r10, CTSTATE->cb.gpr[7]
| stfd f8, CTSTATE->cb.fpr[7]
+ | .if GPR64
+ | stfd f9, CTSTATE->cb.fpr[8]
+ | stfd f10, CTSTATE->cb.fpr[9]
+ | stfd f11, CTSTATE->cb.fpr[10]
+ | stfd f12, CTSTATE->cb.fpr[11]
+ | stfd f13, CTSTATE->cb.fpr[12]
+ | .endif
+ | .if ELFV2
+ | addi TMP0, sp, CFRAME_SPACE+96
+ | .elif GPR64
+ | addi TMP0, sp, CFRAME_SPACE+112
+ | .else
| addi TMP0, sp, CFRAME_SPACE+8
- | stw TMP0, CTSTATE->cb.stack
+ | .endif
+ | stp TMP0, CTSTATE->cb.stack
| mr CARG1, CTSTATE
| stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
| mr CARG2, sp
| bl extern lj_ccallback_enter // (CTState *cts, void *cf)
| // Returns lua_State *.
| lp BASE, L:CRET1->base
+ |.if P64
+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
+ |.else
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ |.endif
| lp RC, L:CRET1->top
| lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| li ZERO, 0
@@ -2835,9 +3091,21 @@ static void build_subroutines(BuildCtx *ctx)
| mr CARG1, CTSTATE
| mr CARG2, RA
| bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
- | lwz CRET1, CTSTATE->cb.gpr[0]
+ | lp CRET1, CTSTATE->cb.gpr[0]
| lfd FARG1, CTSTATE->cb.fpr[0]
- | lwz CRET2, CTSTATE->cb.gpr[1]
+ | lp CRET2, CTSTATE->cb.gpr[1]
+ | .if GPR64
+ | lfd FARG2, CTSTATE->cb.fpr[1]
+ | .else
+ | lp CARG3, CTSTATE->cb.gpr[2]
+ | lp CARG4, CTSTATE->cb.gpr[3]
+ | .endif
+ | .elfv2 lfd f3, CTSTATE->cb.fpr[2]
+ | .elfv2 lfd f4, CTSTATE->cb.fpr[3]
+ | .elfv2 lfd f5, CTSTATE->cb.fpr[4]
+ | .elfv2 lfd f6, CTSTATE->cb.fpr[5]
+ | .elfv2 lfd f7, CTSTATE->cb.fpr[6]
+ | .elfv2 lfd f8, CTSTATE->cb.fpr[7]
| b ->vm_leave_unw
|.endif
|
@@ -2850,23 +3118,46 @@ static void build_subroutines(BuildCtx *ctx)
| lbz CARG2, CCSTATE->nsp
| lbz CARG3, CCSTATE->nfpr
| neg TMP1, TMP1
+ | .if GPR64
+ | std TMP0, 16(sp)
+ | .else
| stw TMP0, 4(sp)
+ | .endif
| cmpwi cr1, CARG3, 0
| mr TMP2, sp
| addic. CARG2, CARG2, -1
+ | .if GPR64
+ | stdux sp, sp, TMP1
+ | .else
| stwux sp, sp, TMP1
+ | .endif
| crnot 4*cr1+eq, 4*cr1+eq // For vararg calls.
- | stw r14, -4(TMP2)
- | stw CCSTATE, -8(TMP2)
+ | .if GPR64
+ | std r14, -8(TMP2)
+ | std CCSTATE, -16(TMP2)
+ | .else
+ | stw r14, -4(TMP2)
+ | stw CCSTATE, -8(TMP2)
+ | .endif
| mr r14, TMP2
| la TMP1, CCSTATE->stack
+ | .if GPR64
+ | sldi CARG2, CARG2, 3
+ | .else
| slwi CARG2, CARG2, 2
+ | .endif
| blty >2
- | la TMP2, 8(sp)
+ | .if ELFV2
+ | la TMP2, 96(sp)
+ | .elif GPR64
+ | la TMP2, 112(sp)
+ | .else
+ | la TMP2, 8(sp)
+ | .endif
|1:
- | lwzx TMP0, TMP1, CARG2
- | stwx TMP0, TMP2, CARG2
- | addic. CARG2, CARG2, -4
+ | lpx TMP0, TMP1, CARG2
+ | stpx TMP0, TMP2, CARG2
+ | addic. CARG2, CARG2, -PSIZE
| bge <1
|2:
| bney cr1, >3
@@ -2878,28 +3169,55 @@ static void build_subroutines(BuildCtx *ctx)
| lfd f6, CCSTATE->fpr[5]
| lfd f7, CCSTATE->fpr[6]
| lfd f8, CCSTATE->fpr[7]
+ | .if GPR64
+ | lfd f9, CCSTATE->fpr[8]
+ | lfd f10, CCSTATE->fpr[9]
+ | lfd f11, CCSTATE->fpr[10]
+ | lfd f12, CCSTATE->fpr[11]
+ | lfd f13, CCSTATE->fpr[12]
+ | .endif
|3:
- | lp TMP0, CCSTATE->func
- | lwz CARG2, CCSTATE->gpr[1]
- | lwz CARG3, CCSTATE->gpr[2]
- | lwz CARG4, CCSTATE->gpr[3]
- | lwz CARG5, CCSTATE->gpr[4]
- | mtctr TMP0
- | lwz r8, CCSTATE->gpr[5]
- | lwz r9, CCSTATE->gpr[6]
- | lwz r10, CCSTATE->gpr[7]
- | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
+ | .toc std TOCREG, SAVE_TOC
+ | lp FUNCREG, CCSTATE->func
+ | lp CARG2, CCSTATE->gpr[1]
+ | lp CARG3, CCSTATE->gpr[2]
+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
+ | .opd lp FUNCREG, 0(FUNCREG)
+ | lp CARG4, CCSTATE->gpr[3]
+ | lp CARG5, CCSTATE->gpr[4]
+ | mtctr FUNCREG
+ | lp r8, CCSTATE->gpr[5]
+ | lp r9, CCSTATE->gpr[6]
+ | lp r10, CCSTATE->gpr[7]
+ | lp CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
| bctrl
- | lwz CCSTATE:TMP1, -8(r14)
- | lwz TMP2, -4(r14)
+ | .toc lp TOCREG, SAVE_TOC
+ | .if GPR64
+ | ld CCSTATE:TMP1, -16(r14)
+ | ld TMP2, -8(r14)
+ | ld TMP0, 16(r14)
+ | .else
+ | lwz CCSTATE:TMP1, -8(r14)
+ | lwz TMP2, -4(r14)
| lwz TMP0, 4(r14)
- | stw CARG1, CCSTATE:TMP1->gpr[0]
+ | .endif
+ | stp CARG1, CCSTATE:TMP1->gpr[0]
| stfd FARG1, CCSTATE:TMP1->fpr[0]
- | stw CARG2, CCSTATE:TMP1->gpr[1]
+ | stp CARG2, CCSTATE:TMP1->gpr[1]
+ | .if GPR64
+ | stfd FARG2, CCSTATE:TMP1->fpr[1]
+ | .endif
+ | .elfv2 stfd FARG3, CCSTATE:TMP1->fpr[2]
+ | .elfv2 stfd FARG4, CCSTATE:TMP1->fpr[3]
+ | .elfv2 stfd FARG5, CCSTATE:TMP1->fpr[4]
+ | .elfv2 stfd FARG6, CCSTATE:TMP1->fpr[5]
+ | .elfv2 stfd FARG7, CCSTATE:TMP1->fpr[6]
+ | .elfv2 stfd FARG8, CCSTATE:TMP1->fpr[7]
| mtlr TMP0
- | stw CARG3, CCSTATE:TMP1->gpr[2]
+ | stp CARG3, CCSTATE:TMP1->gpr[2]
| mr sp, r14
- | stw CARG4, CCSTATE:TMP1->gpr[3]
+ | stp CARG4, CCSTATE:TMP1->gpr[3]
| mr r14, TMP2
| blr
|.endif
@@ -2923,13 +3241,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
| // RA = src1*8, RD = src2*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzx TMP0, BASE_HI, RA
| addi PC, PC, 4
- | lwz CARG2, 4(RA)
- | lwzux TMP1, RD, BASE
+ | lwzx CARG2, BASE_LO, RA
+ | lwzx TMP1, BASE_HI, RD
| lwz TMP2, -4(PC)
| checknum cr0, TMP0
- | lwz CARG3, 4(RD)
+ | lwzx CARG3, BASE_LO, RD
| decode_RD4 TMP2, TMP2
| checknum cr1, TMP1
| addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
@@ -2953,7 +3271,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|7: // RA is not an integer.
| bgt cr0, ->vmeta_comp
| // RA is a number.
- | lfd f0, 0(RA)
+ | lfdx f0, BASE, RA
| bgt cr1, ->vmeta_comp
| blt cr1, >4
| // RA is a number, RD is an integer.
@@ -2965,7 +3283,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA is an integer, RD is a number.
| tonum_i f0, CARG2
|4:
- | lfd f1, 0(RD)
+ | lfdx f1, BASE, RD
|5:
| fcmpu cr0, f0, f1
if (op == BC_ISLT) {
@@ -2981,10 +3299,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
}
| b <1
|.else
- | lwzx TMP0, BASE, RA
+ | lwzx TMP0, BASE_HI, RA
| addi PC, PC, 4
| lfdx f0, BASE, RA
- | lwzx TMP1, BASE, RD
+ | lwzx TMP1, BASE_HI, RD
| checknum cr0, TMP0
| lwz TMP2, -4(PC)
| lfdx f1, BASE, RD
@@ -3015,15 +3333,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = op == BC_ISEQV;
| // RA = src1*8, RD = src2*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzx TMP0, BASE_HI, RA
| addi PC, PC, 4
- | lwz CARG2, 4(RA)
- | lwzux TMP1, RD, BASE
+ | lwzx CARG2, BASE_LO, RA
+ | .if ENDIAN_LE
+ | lwzx TMP1, BASE_HI, RD
+ | .else
+ | lwzux TMP1, RD, BASE_HI
+ | .endif
| checknum cr0, TMP0
| lwz TMP2, -4(PC)
| checknum cr1, TMP1
| decode_RD4 TMP2, TMP2
- | lwz CARG3, 4(RD)
+ | .if ENDIAN_LE
+ | lwzux CARG3, RD, BASE_LO
+ | .else
+ | lwz CARG3, WORD_LO(RD)
+ | .endif
| cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
| addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
if (vk) {
@@ -3032,14 +3358,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ble cr7, ->BC_ISNEN_Z
}
|.else
- | lwzux TMP0, RA, BASE
+ | lwzx TMP0, BASE_HI, RA
| lwz TMP2, 0(PC)
- | lfd f0, 0(RA)
+ | lfdx f0, BASE, RA
| addi PC, PC, 4
- | lwzux TMP1, RD, BASE
+ | lwzx TMP1, BASE_HI, RD
| checknum cr0, TMP0
| decode_RD4 TMP2, TMP2
- | lfd f1, 0(RD)
+ | lfdx f1, BASE, RD
| checknum cr1, TMP1
| addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
| bge cr0, >5
@@ -3057,8 +3383,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
|5: // Either or both types are not numbers.
|.if not DUALNUM
- | lwz CARG2, 4(RA)
- | lwz CARG3, 4(RD)
+ | lwzx CARG2, BASE_LO, RA
+ | lwzx CARG3, BASE_LO, RD
|.endif
|.if FFI
| cmpwi cr7, TMP0, LJ_TCDATA
@@ -3074,10 +3400,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.if FFI
| beq cr7, ->vmeta_equal_cd
|.endif
+ |.if P64
+ | cmplwi cr7, TMP3, ~LJ_TUDATA // Avoid 64 bit lightuserdata.
+ |.endif
| cmplw cr5, CARG2, CARG3
| crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
| crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
| crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
+ |.if P64
+ | cror 4*cr6+lt, 4*cr6+lt, 4*cr7+gt
+ |.endif
| mr SAVE0, PC
| cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
| cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
@@ -3116,9 +3448,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISEQS: case BC_ISNES:
vk = op == BC_ISEQS;
| // RA = src*8, RD = str_const*8 (~), JMP with RD = target
- | lwzux TMP0, RA, BASE
+ | lwzx TMP0, BASE_HI, RA
| srwi RD, RD, 1
- | lwz STR:TMP3, 4(RA)
+ | lwzx STR:TMP3, BASE_LO, RA
| lwz TMP2, 0(PC)
| subfic RD, RD, -4
| addi PC, PC, 4
@@ -3150,15 +3482,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = op == BC_ISEQN;
| // RA = src*8, RD = num_const*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzx TMP0, BASE_HI, RA
| addi PC, PC, 4
- | lwz CARG2, 4(RA)
- | lwzux TMP1, RD, KBASE
+ | lwzx CARG2, BASE_LO, RA
+ | lwzux2 TMP1, CARG3, RD, KBASE
| checknum cr0, TMP0
| lwz TMP2, -4(PC)
| checknum cr1, TMP1
| decode_RD4 TMP2, TMP2
- | lwz CARG3, 4(RD)
| addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
if (vk) {
|->BC_ISEQN_Z:
@@ -3175,7 +3506,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
} else {
|->BC_ISNEN_Z: // Dummy label.
}
- | lwzx TMP0, BASE, RA
+ | lwzx TMP0, BASE_HI, RA
| addi PC, PC, 4
| lfdx f0, BASE, RA
| lwz TMP2, -4(PC)
@@ -3213,7 +3544,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|7: // RA is not an integer.
| bge cr0, <3
| // RA is a number.
- | lfd f0, 0(RA)
+ | lfdx f0, BASE, RA
| blt cr1, >1
| // RA is a number, RD is an integer.
| tonum_i f1, CARG3
@@ -3232,7 +3563,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISEQP: case BC_ISNEP:
vk = op == BC_ISEQP;
| // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
- | lwzx TMP0, BASE, RA
+ | lwzx TMP0, BASE_HI, RA
| srwi TMP1, RD, 3
| lwz TMP2, 0(PC)
| not TMP1, TMP1
@@ -3262,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
| // RA = dst*8 or unused, RD = src*8, JMP with RD = target
- | lwzx TMP0, BASE, RD
+ | lwzx TMP0, BASE_HI, RD
| lwz INS, 0(PC)
| addi PC, PC, 4
if (op == BC_IST || op == BC_ISF) {
@@ -3297,7 +3628,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISTYPE:
| // RA = src*8, RD = -type*8
- | lwzx TMP0, BASE, RA
+ | lwzx TMP0, BASE_HI, RA
| srwi TMP1, RD, 3
| ins_next1
|.if not PPE and not GPR64
@@ -3311,7 +3642,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_ISNUM:
| // RA = src*8, RD = -(TISNUM-1)*8
- | lwzx TMP0, BASE, RA
+ | lwzx TMP0, BASE_HI, RA
| ins_next1
| checknum TMP0
| bge ->vmeta_istype
@@ -3330,17 +3661,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_NOT:
| // RA = dst*8, RD = src*8
| ins_next1
- | lwzx TMP0, BASE, RD
+ | lwzx TMP0, BASE_HI, RD
| .gpr64 extsw TMP0, TMP0
| subfic TMP1, TMP0, LJ_TTRUE
| adde TMP0, TMP0, TMP1
- | stwx TMP0, BASE, RA
+ | stwx TMP0, BASE_HI, RA
| ins_next2
break;
case BC_UNM:
| // RA = dst*8, RD = src*8
- | lwzux TMP1, RD, BASE
- | lwz TMP0, 4(RD)
+ | lwzx TMP1, BASE_HI, RD
+ | lwzx TMP0, BASE_LO, RD
+ |.if DUALNUM and not GPR64
+ | mtxer ZERO
+ |.endif
| checknum TMP1
|.if DUALNUM
| bne >5
@@ -3352,18 +3686,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.else
| nego. TMP0, TMP0
| bso >4
- |1:
|.endif
| ins_next1
- | stwux TISNUM, RA, BASE
- | stw TMP0, 4(RA)
+ | stwx TISNUM, BASE_HI, RA
+ | stwx TMP0, BASE_LO, RA
|3:
| ins_next2
|4:
- |.if not GPR64
- | // Potential overflow.
- | checkov TMP1, <1 // Ignore unrelated overflow.
- |.endif
| lus TMP1, 0x41e0 // 2^31.
| li TMP0, 0
| b >7
@@ -3373,8 +3702,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| xoris TMP1, TMP1, 0x8000
|7:
| ins_next1
- | stwux TMP1, RA, BASE
- | stw TMP0, 4(RA)
+ | stwx TMP1, BASE_HI, RA
+ | stwx TMP0, BASE_LO, RA
|.if DUALNUM
| b <3
|.else
@@ -3383,15 +3712,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_LEN:
| // RA = dst*8, RD = src*8
- | lwzux TMP0, RD, BASE
- | lwz CARG1, 4(RD)
+ | lwzx TMP0, BASE_HI, RD
+ | lwzx CARG1, BASE_LO, RD
| checkstr TMP0; bne >2
| lwz CRET1, STR:CARG1->len
|1:
|.if DUALNUM
| ins_next1
- | stwux TISNUM, RA, BASE
- | stw CRET1, 4(RA)
+ | stwx TISNUM, BASE_HI, RA
+ | stwx CRET1, BASE_LO, RA
|.else
| tonum_u f0, CRET1 // Result is a non-negative integer.
| ins_next1
@@ -3426,9 +3755,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
- | lwzx TMP1, BASE, RB
+ | .if ENDIAN_LE and DUALNUM
+ | addi TMP2, RC, 4
+ | .endif
+ | lwzx TMP1, BASE_HI, RB
| .if DUALNUM
- | lwzx TMP2, KBASE, RC
+ | .if ENDIAN_LE
+ | lwzx TMP2, KBASE, TMP2
+ | .else
+ | lwzx TMP2, KBASE, RC
+ | .endif
| .endif
| lfdx f14, BASE, RB
| lfdx f15, KBASE, RC
@@ -3442,9 +3778,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| .endif
|| break;
||case 1:
- | lwzx TMP1, BASE, RB
+ | .if ENDIAN_LE and DUALNUM
+ | addi TMP2, RC, 4
+ | .endif
+ | lwzx TMP1, BASE_HI, RB
| .if DUALNUM
- | lwzx TMP2, KBASE, RC
+ | .if ENDIAN_LE
+ | lwzx TMP2, KBASE, TMP2
+ | .else
+ | lwzx TMP2, KBASE, RC
+ | .endif
| .endif
| lfdx f15, BASE, RB
| lfdx f14, KBASE, RC
@@ -3458,8 +3801,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| .endif
|| break;
||default:
- | lwzx TMP1, BASE, RB
- | lwzx TMP2, BASE, RC
+ | lwzx TMP1, BASE_HI, RB
+ | lwzx TMP2, BASE_HI, RC
| lfdx f14, BASE, RB
| lfdx f15, BASE, RC
| checknum cr0, TMP1
@@ -3514,41 +3857,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, KBASE
- | lwz CARG1, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG2, 4(RC)
+ | .if ENDIAN_LE
+ | lwzux CARG2, RC, KBASE
+ | lwzx TMP1, RB, BASE_HI
+ | lwz TMP2, 4(RC)
+ | checknum cr0, TMP1
+ | lwzux CARG1, RB, BASE
+ | .else
+ | lwzux TMP1, RB, BASE
+ | lwzux TMP2, RC, KBASE
+ | lwz CARG1, 4(RB)
+ | checknum cr0, TMP1
+ | lwz CARG2, 4(RC)
+ | .endif
|| break;
||case 1:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, KBASE
- | lwz CARG2, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG1, 4(RC)
+ | .if ENDIAN_LE
+ | lwzux CARG1, RC, KBASE
+ | lwzx TMP1, RB, BASE_HI
+ | lwz TMP2, 4(RC)
+ | checknum cr0, TMP1
+ | lwzux CARG2, RB, BASE
+ | .else
+ | lwzux TMP1, RB, BASE
+ | lwzux TMP2, RC, KBASE
+ | lwz CARG2, 4(RB)
+ | checknum cr0, TMP1
+ | lwz CARG1, 4(RC)
+ | .endif
|| break;
||default:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, BASE
- | lwz CARG1, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG2, 4(RC)
+ | .if ENDIAN_LE
+ | lwzx TMP1, RB, BASE_HI
+ | lwzx TMP2, RC, BASE_HI
+ | lwzux CARG1, RB, BASE
+ | checknum cr0, TMP1
+ | lwzux CARG2, RC, BASE
+ | .else
+ | lwzux TMP1, RB, BASE
+ | lwzux TMP2, RC, BASE
+ | lwz CARG1, 4(RB)
+ | checknum cr0, TMP1
+ | lwz CARG2, 4(RC)
+ | .endif
|| break;
||}
+ | mtxer ZERO
| checknum cr1, TMP2
| bne >5
| bne cr1, >5
| intins CARG1, CARG1, CARG2
- | bso >4
- |1:
+ | ins_arithfallback bso
| ins_next1
- | stwux TISNUM, RA, BASE
- | stw CARG1, 4(RA)
+ | stwx TISNUM, BASE_HI, RA
+ | stwx CARG1, BASE_LO, RA
|2:
| ins_next2
- |4: // Overflow.
- | checkov TMP0, <1 // Ignore unrelated overflow.
- | ins_arithfallback b
|5: // FP variant.
||if (vk == 1) {
| lfd f15, 0(RB)
@@ -3620,9 +3984,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_POW:
| // NYI: (partial) integer arithmetic.
- | lwzx TMP1, BASE, RB
+ | lwzx TMP1, BASE_HI, RB
| lfdx FARG1, BASE, RB
- | lwzx TMP2, BASE, RC
+ | lwzx TMP2, BASE_HI, RC
| lfdx FARG2, BASE, RC
| checknum cr0, TMP1
| checknum cr1, TMP2
@@ -3648,6 +4012,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // Returns NULL (finished) or TValue * (metamethod).
| cmplwi CRET1, 0
| lp BASE, L->base
+ | addi BASEP4, BASE, 4
| bne ->vmeta_binop
| ins_next1
| lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
@@ -3664,8 +4029,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_next1
| lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
| li TMP2, LJ_TSTR
- | stwux TMP2, RA, BASE
- | stw TMP0, 4(RA)
+ | stwx TMP2, BASE_HI, RA
+ | stwx TMP0, BASE_LO, RA
| ins_next2
break;
case BC_KCDATA:
@@ -3676,8 +4041,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_next1
| lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
| li TMP2, LJ_TCDATA
- | stwux TMP2, RA, BASE
- | stw TMP0, 4(RA)
+ | stwx TMP2, BASE_HI, RA
+ | stwx TMP0, BASE_LO, RA
| ins_next2
|.endif
break;
@@ -3687,14 +4052,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| slwi RD, RD, 13
| srawi RD, RD, 16
| ins_next1
- | stwux TISNUM, RA, BASE
- | stw RD, 4(RA)
+ | stwx TISNUM, BASE_HI, RA
+ | stwx RD, BASE_LO, RA
| ins_next2
|.else
| // The soft-float approach is faster.
| slwi RD, RD, 13
| srawi TMP1, RD, 31
| xor TMP2, TMP1, RD
+ | .gpr64 extsw RD, RD
| sub TMP2, TMP2, TMP1 // TMP2 = abs(x)
| cntlzw TMP3, TMP2
| subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1
@@ -3706,8 +4072,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add RD, RD, TMP1 // hi = hi + exponent-1
| and RD, RD, TMP0 // hi = x == 0 ? 0 : hi
| ins_next1
- | stwux RD, RA, BASE
- | stw ZERO, 4(RA)
+ | stwx RD, BASE_HI, RA
+ | stwx ZERO, BASE_LO, RA
| ins_next2
|.endif
break;
@@ -3723,15 +4089,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| srwi TMP1, RD, 3
| not TMP0, TMP1
| ins_next1
- | stwx TMP0, BASE, RA
+ | stwx TMP0, BASE_HI, RA
| ins_next2
break;
case BC_KNIL:
| // RA = base*8, RD = end*8
- | stwx TISNIL, BASE, RA
+ | stwx TISNIL, BASE_HI, RA
| addi RA, RA, 8
|1:
- | stwx TISNIL, BASE, RA
+ | stwx TISNIL, BASE_HI, RA
| cmpw RA, RD
| addi RA, RA, 8
| blt <1
@@ -3763,10 +4129,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz CARG2, UPVAL:RB->v
| andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
| lbz TMP0, UPVAL:RB->closed
- | lwz TMP2, 0(RD)
+ | lwz TMP2, WORD_HI(RD)
| stfd f0, 0(CARG2)
| cmplwi cr1, TMP0, 0
- | lwz TMP1, 4(RD)
+ | lwz TMP1, WORD_LO(RD)
| cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
| subi TMP2, TMP2, (LJ_TNUMX+1)
| bne >2 // Upvalue is closed and black?
@@ -3799,8 +4165,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lbz TMP3, STR:TMP1->marked
| lbz TMP2, UPVAL:RB->closed
| li TMP0, LJ_TSTR
- | stw STR:TMP1, 4(CARG2)
- | stw TMP0, 0(CARG2)
+ | stw STR:TMP1, WORD_LO(CARG2)
+ | stw TMP0, WORD_HI(CARG2)
| bne >2
|1:
| ins_next
@@ -3837,7 +4203,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwzx UPVAL:RB, LFUNC:RB, RA
| ins_next1
| lwz TMP1, UPVAL:RB->v
- | stw TMP0, 0(TMP1)
+ | stw TMP0, WORD_HI(TMP1)
| ins_next2
break;
@@ -3852,6 +4218,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add CARG2, BASE, RA
| bl extern lj_func_closeuv // (lua_State *L, TValue *level)
| lp BASE, L->base
+ | addi BASEP4, BASE, 4
|1:
| ins_next
break;
@@ -3870,8 +4237,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // Returns GCfuncL *.
| lp BASE, L->base
| li TMP0, LJ_TFUNC
- | stwux TMP0, RA, BASE
- | stw LFUNC:CRET1, 4(RA)
+ | addi BASEP4, BASE, 4
+ | stwx TMP0, BASE_HI, RA
+ | stwx LFUNC:CRET1, BASE_LO, RA
| ins_next
break;
@@ -3904,8 +4272,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
}
| lp BASE, L->base
| li TMP0, LJ_TTAB
- | stwux TMP0, RA, BASE
- | stw TAB:CRET1, 4(RA)
+ | addi BASEP4, BASE, 4
+ | stwx TMP0, BASE_HI, RA
+ | stwx TAB:CRET1, BASE_LO, RA
| ins_next
if (op == BC_TNEW) {
|3:
@@ -3938,13 +4307,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_TGETV:
| // RA = dst*8, RB = table*8, RC = key*8
- | lwzux CARG1, RB, BASE
- | lwzux CARG2, RC, BASE
- | lwz TAB:RB, 4(RB)
+ | lwzx CARG1, BASE_HI, RB
+ | lwzx CARG2, BASE_HI, RC
+ | lwzx TAB:RB, BASE_LO, RB
|.if DUALNUM
- | lwz RC, 4(RC)
+ | lwzx RC, BASE_LO, RC
|.else
- | lfd f0, 0(RC)
+ | lfdx f0, BASE, RC
|.endif
| checktab CARG1
| checknum cr1, CARG2
@@ -3971,8 +4340,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| slwi TMP2, TMP2, 3
|.endif
| ble ->vmeta_tgetv // Integer key and in array part?
- | lwzx TMP0, TMP1, TMP2
- | lfdx f14, TMP1, TMP2
+ | .if ENDIAN_LE
+ | lfdux f14, TMP1, TMP2
+ | lwz TMP0, WORD_HI(TMP1)
+ | .else
+ | lwzx TMP0, TMP1, TMP2
+ | lfdx f14, TMP1, TMP2
+ | .endif
| checknil TMP0; beq >2
|1:
| ins_next1
@@ -3991,15 +4365,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5:
| checkstr CARG2; bne ->vmeta_tgetv
|.if not DUALNUM
- | lwz STR:RC, 4(RC)
+ | lwzx STR:RC, BASE_LO, RC
|.endif
| b ->BC_TGETS_Z // String key?
break;
case BC_TGETS:
| // RA = dst*8, RB = table*8, RC = str_const*8 (~)
- | lwzux CARG1, RB, BASE
+ | lwzx CARG1, BASE_HI, RB
| srwi TMP1, RC, 1
- | lwz TAB:RB, 4(RB)
+ | lwzx TAB:RB, BASE_LO, RB
| subfic TMP1, TMP1, -4
| checktab CARG1
| lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
@@ -4015,16 +4389,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sub TMP1, TMP0, TMP1
| add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
|1:
- | lwz CARG1, NODE:TMP2->key
- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
- | lwz CARG2, NODE:TMP2->val
- | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
+ | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2)
+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
+ | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2)
| checkstr CARG1; bne >4
| cmpw TMP0, STR:RC; bne >4
| checknil CARG2; beq >5 // Key found, but nil value?
|3:
- | stwux CARG2, RA, BASE
- | stw TMP1, 4(RA)
+ | stwx CARG2, BASE_HI, RA
+ | stwx TMP1, BASE_LO, RA
| ins_next
|
|4: // Follow hash chain.
@@ -4045,15 +4419,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_TGETB:
| // RA = dst*8, RB = table*8, RC = index*8
- | lwzux CARG1, RB, BASE
+ | lwzx CARG1, BASE_HI, RB
| srwi TMP0, RC, 3
- | lwz TAB:RB, 4(RB)
+ | lwzx TAB:RB, BASE_LO, RB
| checktab CARG1; bne ->vmeta_tgetb
| lwz TMP1, TAB:RB->asize
| lwz TMP2, TAB:RB->array
| cmplw TMP0, TMP1; bge ->vmeta_tgetb
- | lwzx TMP1, TMP2, RC
- | lfdx f0, TMP2, RC
+ | .if ENDIAN_LE
+ | lfdux f0, TMP2, RC
+ | lwz TMP1, WORD_HI(TMP2)
+ | .else
+ | lwzx TMP1, TMP2, RC
+ | lfdx f0, TMP2, RC
+ | .endif
| checknil TMP1; beq >5
|1:
| ins_next1
@@ -4071,12 +4450,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_TGETR:
| // RA = dst*8, RB = table*8, RC = key*8
- | add RB, BASE, RB
- | lwz TAB:CARG1, 4(RB)
+ | lwzx TAB:CARG1, BASE_LO, RB
|.if DUALNUM
- | add RC, BASE, RC
| lwz TMP0, TAB:CARG1->asize
- | lwz CARG2, 4(RC)
+ | lwzx CARG2, BASE_LO, RC
| lwz TMP1, TAB:CARG1->array
|.else
| lfdx f0, BASE, RC
@@ -4096,13 +4473,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_TSETV:
| // RA = src*8, RB = table*8, RC = key*8
- | lwzux CARG1, RB, BASE
- | lwzux CARG2, RC, BASE
- | lwz TAB:RB, 4(RB)
+ | lwzx CARG1, BASE_HI, RB
+ | lwzx CARG2, BASE_HI, RC
+ | lwzx TAB:RB, BASE_LO, RB
|.if DUALNUM
- | lwz RC, 4(RC)
+ | lwzx RC, BASE_LO, RC
|.else
- | lfd f0, 0(RC)
+ | lfdx f0, BASE, RC
|.endif
| checktab CARG1
| checknum cr1, CARG2
@@ -4129,7 +4506,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| slwi TMP0, TMP2, 3
|.endif
| ble ->vmeta_tsetv // Integer key and in array part?
+ | .if ENDIAN_LE
+ | addi TMP2, TMP1, 4
+ | lwzx TMP2, TMP2, TMP0
+ | .else
| lwzx TMP2, TMP1, TMP0
+ | .endif
| lbz TMP3, TAB:RB->marked
| lfdx f14, BASE, RA
| checknil TMP2; beq >3
@@ -4152,7 +4534,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5:
| checkstr CARG2; bne ->vmeta_tsetv
|.if not DUALNUM
- | lwz STR:RC, 4(RC)
+ | lwzx STR:RC, BASE_LO, RC
|.endif
| b ->BC_TSETS_Z // String key?
|
@@ -4162,9 +4544,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_TSETS:
| // RA = src*8, RB = table*8, RC = str_const*8 (~)
- | lwzux CARG1, RB, BASE
+ | lwzx CARG1, BASE_HI, RB
| srwi TMP1, RC, 1
- | lwz TAB:RB, 4(RB)
+ | lwzx TAB:RB, BASE_LO, RB
| subfic TMP1, TMP1, -4
| checktab CARG1
| lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
@@ -4183,9 +4565,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lbz TMP3, TAB:RB->marked
| add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
|1:
- | lwz CARG1, NODE:TMP2->key
- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
- | lwz CARG2, NODE:TMP2->val
+ | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2)
+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
| lwz NODE:TMP1, NODE:TMP2->next
| checkstr CARG1; bne >5
| cmpw TMP0, STR:RC; bne >5
@@ -4225,13 +4607,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
|6:
| li TMP0, LJ_TSTR
- | stw STR:RC, 4(CARG3)
+ | stw STR:RC, WORD_LO(CARG3)
| mr CARG2, TAB:RB
- | stw TMP0, 0(CARG3)
+ | stw TMP0, WORD_HI(CARG3)
| bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
| // Returns TValue *.
| lp BASE, L->base
| stfd f14, 0(CRET1)
+ | addi BASEP4, BASE, 4
| b <3 // No 2nd write barrier needed.
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4240,9 +4623,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_TSETB:
| // RA = src*8, RB = table*8, RC = index*8
- | lwzux CARG1, RB, BASE
+ | lwzx CARG1, BASE_HI, RB
| srwi TMP0, RC, 3
- | lwz TAB:RB, 4(RB)
+ | lwzx TAB:RB, BASE_LO, RB
| checktab CARG1; bne ->vmeta_tsetb
| lwz TMP1, TAB:RB->asize
| lwz TMP2, TAB:RB->array
@@ -4250,7 +4633,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| cmplw TMP0, TMP1
| lfdx f14, BASE, RA
| bge ->vmeta_tsetb
- | lwzx TMP1, TMP2, RC
+ | .if ENDIAN_LE
+ | addi TMP1, TMP2, 4
+ | lwzx TMP1, TMP1, RC
+ | .else
+ | lwzx TMP1, TMP2, RC
+ | .endif
| checknil TMP1; beq >5
|1:
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
@@ -4274,13 +4662,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_TSETR:
| // RA = dst*8, RB = table*8, RC = key*8
- | add RB, BASE, RB
- | lwz TAB:CARG2, 4(RB)
+ | lwzx TAB:CARG2, BASE_LO, RB
|.if DUALNUM
- | add RC, BASE, RC
| lbz TMP3, TAB:CARG2->marked
| lwz TMP0, TAB:CARG2->asize
- | lwz CARG3, 4(RC)
+ | lwzx CARG3, BASE_LO, RC
| lwz TMP1, TAB:CARG2->array
|.else
| lfdx f0, BASE, RC
@@ -4311,9 +4697,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add RA, BASE, RA
|1:
| add TMP3, KBASE, RD
- | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table.
+ | lwz TAB:CARG2, WORD_LO-8(RA) // Guaranteed to be a table.
| addic. TMP0, MULTRES, -8
- | lwz TMP3, 4(TMP3) // Integer constant is in lo-word.
+ | lwz TMP3, WORD_LO(TMP3) // Integer constant is in lo-word.
| srwi CARG3, TMP0, 3
| beq >4 // Nothing to copy?
| add CARG3, CARG3, TMP3
@@ -4362,8 +4748,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_CALL:
| // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
| mr TMP2, BASE
- | lwzux TMP0, BASE, RA
- | lwz LFUNC:RB, 4(BASE)
+ | lwzux2 TMP0, LFUNC:RB, BASE, RA
| subi NARGS8:RC, NARGS8:RC, 8
| addi BASE, BASE, 8
| checkfunc TMP0; bne ->vmeta_call
@@ -4377,8 +4762,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_CALLT:
| // RA = base*8, (RB = 0,) RC = (nargs+1)*8
- | lwzux TMP0, RA, BASE
- | lwz LFUNC:RB, 4(RA)
+ | lwzux2 TMP0, LFUNC:RB, RA, BASE
| subi NARGS8:RC, NARGS8:RC, 8
| lwz TMP1, FRAME_PC(BASE)
| checkfunc TMP0
@@ -4430,12 +4814,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
| mr TMP2, BASE
| add BASE, BASE, RA
- | lwz TMP1, -24(BASE)
- | lwz LFUNC:RB, -20(BASE)
+ | lwz TMP1, WORD_HI-24(BASE)
+ | lwz LFUNC:RB, WORD_LO-24(BASE)
| lfd f1, -8(BASE)
| lfd f0, -16(BASE)
- | stw TMP1, 0(BASE) // Copy callable.
- | stw LFUNC:RB, 4(BASE)
+ | stw TMP1, WORD_HI(BASE) // Copy callable.
+ | stw LFUNC:RB, WORD_LO(BASE)
| checkfunc TMP1
| stfd f1, 16(BASE) // Copy control var.
| li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -4450,8 +4834,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // NYI: add hotloop, record BC_ITERN.
|.endif
| add RA, BASE, RA
- | lwz TAB:RB, -12(RA)
- | lwz RC, -4(RA) // Get index from control var.
+ | lwz TAB:RB, WORD_LO-16(RA)
+ | lwz RC, WORD_LO-8(RA) // Get index from control var.
| lwz TMP0, TAB:RB->asize
| lwz TMP1, TAB:RB->array
| addi PC, PC, 4
@@ -4459,14 +4843,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| cmplw RC, TMP0
| slwi TMP3, RC, 3
| bge >5 // Index points after array part?
- | lwzx TMP2, TMP1, TMP3
- | lfdx f0, TMP1, TMP3
+ | lfdux f0, TMP3, TMP1
+ | lwz TMP2, WORD_HI(TMP3)
| checknil TMP2
| lwz INS, -4(PC)
| beq >4
|.if DUALNUM
- | stw RC, 4(RA)
- | stw TISNUM, 0(RA)
+ | stw RC, WORD_LO(RA)
+ | stw TISNUM, WORD_HI(RA)
|.else
| tonum_u f1, RC
|.endif
@@ -4474,7 +4858,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addis TMP3, PC, -(BCBIAS_J*4 >> 16)
| stfd f0, 8(RA)
| decode_RD4 TMP1, INS
- | stw RC, -4(RA) // Update control var.
+ | stw RC, WORD_LO-8(RA) // Update control var.
| add PC, TMP1, TMP3
|.if not DUALNUM
| stfd f1, 0(RA)
@@ -4496,9 +4880,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bgty <3
| slwi RB, RC, 3
| sub TMP3, TMP3, RB
- | lwzx RB, TMP2, TMP3
- | lfdx f0, TMP2, TMP3
- | add NODE:TMP3, TMP2, TMP3
+ | lfdux f0, TMP3, TMP2
+ | lwz RB, WORD_HI(TMP3)
| checknil RB
| lwz INS, -4(PC)
| beq >7
@@ -4510,7 +4893,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| stfd f1, 0(RA)
| addi RC, RC, 1
| add PC, TMP1, TMP2
- | stw RC, -4(RA) // Update control var.
+ | stw RC, WORD_LO-8(RA) // Update control var.
| b <3
|
|7: // Skip holes in hash part.
@@ -4521,10 +4904,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISNEXT:
| // RA = base*8, RD = target (points to ITERN)
| add RA, BASE, RA
- | lwz TMP0, -24(RA)
- | lwz CFUNC:TMP1, -20(RA)
- | lwz TMP2, -16(RA)
- | lwz TMP3, -8(RA)
+ | lwz TMP0, WORD_HI-24(RA)
+ | lwz CFUNC:TMP1, WORD_LO-24(RA)
+ | lwz TMP2, WORD_HI-16(RA)
+ | lwz TMP3, WORD_HI-8(RA)
| cmpwi cr0, TMP2, LJ_TTAB
| cmpwi cr1, TMP0, LJ_TFUNC
| cmpwi cr6, TMP3, LJ_TNIL
@@ -4538,17 +4921,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bne cr0, >5
| lus TMP1, 0xfffe
| ori TMP1, TMP1, 0x7fff
- | stw ZERO, -4(RA) // Initialize control var.
- | stw TMP1, -8(RA)
+ | stw ZERO, WORD_LO-8(RA) // Initialize control var.
+ | stw TMP1, WORD_HI-8(RA)
| addis PC, TMP3, -(BCBIAS_J*4 >> 16)
|1:
| ins_next
|5: // Despecialize bytecode if any of the checks fail.
| li TMP0, BC_JMP
| li TMP1, BC_ITERC
+ | .if ENDIAN_LE
+ | stb TMP0, -4(PC)
+ | .else
| stb TMP0, -1(PC)
+ | .endif
| addis PC, TMP3, -(BCBIAS_J*4 >> 16)
+ | .if ENDIAN_LE
+ | stb TMP1, 0(PC)
+ | .else
| stb TMP1, 3(PC)
+ | .endif
| b <1
break;
@@ -4582,7 +4973,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addi RA, RA, 8
| blt cr1, <1 // More vararg slots?
|2: // Fill up remainder with nil.
- | stw TISNIL, 0(RA)
+ | stw TISNIL, WORD_HI(RA)
| cmplw RA, TMP2
| addi RA, RA, 8
| blt <2
@@ -4619,6 +5010,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add RA, BASE, RA
| add RC, BASE, SAVE0
| subi TMP3, BASE, 8
+ | addi BASEP4, BASE, 4
| b <6
break;
@@ -4667,13 +5059,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bgt >6
| sub BASE, TMP2, RA
| lwz LFUNC:TMP1, FRAME_FUNC(BASE)
+ | addi BASEP4, BASE, 4
| ins_next1
| lwz TMP1, LFUNC:TMP1->pc
| lwz KBASE, PC2PROTO(k)(TMP1)
| ins_next2
|
|6: // Fill up results with nil.
- | subi TMP1, RD, 8
+ | addi TMP1, RD, WORD_HI-8
| addi RD, RD, 8
| stwx TISNIL, TMP2, TMP1
| b <5
@@ -4709,13 +5102,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bgt >6
| sub BASE, TMP2, RA
| lwz LFUNC:TMP1, FRAME_FUNC(BASE)
+ | addi BASEP4, BASE, 4
| ins_next1
| lwz TMP1, LFUNC:TMP1->pc
| lwz KBASE, PC2PROTO(k)(TMP1)
| ins_next2
|
|6: // Fill up results with nil.
- | subi TMP1, RD, 8
+ | addi TMP1, RD, WORD_HI-8
| addi RD, RD, 8
| stwx TISNIL, TMP2, TMP1
| b <5
@@ -4741,11 +5135,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = (op == BC_IFORL || op == BC_JFORL);
|.if DUALNUM
| // Integer loop.
- | lwzux TMP1, RA, BASE
- | lwz CARG1, FORL_IDX*8+4(RA)
+ | lwzux2 TMP1, CARG1, RA, BASE
+ if (vk) {
+ | mtxer ZERO
+ }
| cmplw cr0, TMP1, TISNUM
if (vk) {
- | lwz CARG3, FORL_STEP*8+4(RA)
+ | lwz CARG3, FORL_STEP*8+WORD_LO(RA)
| bne >9
|.if GPR64
| // Need to check overflow for (a<<32) + (b<<32).
@@ -4757,15 +5153,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addo. CARG1, CARG1, CARG3
|.endif
| cmpwi cr6, CARG3, 0
- | lwz CARG2, FORL_STOP*8+4(RA)
- | bso >6
+ | lwz CARG2, FORL_STOP*8+WORD_LO(RA)
+ | bso >2
|4:
- | stw CARG1, FORL_IDX*8+4(RA)
+ | stw CARG1, FORL_IDX*8+WORD_LO(RA)
} else {
- | lwz TMP3, FORL_STEP*8(RA)
- | lwz CARG3, FORL_STEP*8+4(RA)
- | lwz TMP2, FORL_STOP*8(RA)
- | lwz CARG2, FORL_STOP*8+4(RA)
+ | lwz TMP3, FORL_STEP*8+WORD_HI(RA)
+ | lwz CARG3, FORL_STEP*8+WORD_LO(RA)
+ | lwz TMP2, FORL_STOP*8+WORD_HI(RA)
+ | lwz CARG2, FORL_STOP*8+WORD_LO(RA)
| cmplw cr7, TMP3, TISNUM
| cmplw cr1, TMP2, TISNUM
| crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
@@ -4776,11 +5172,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| blt cr6, >5
| cmpw CARG1, CARG2
|1:
- | stw TISNUM, FORL_EXT*8(RA)
+ | stw TISNUM, FORL_EXT*8+WORD_HI(RA)
if (op != BC_JFORL) {
| srwi RD, RD, 1
}
- | stw CARG1, FORL_EXT*8+4(RA)
+ | stw CARG1, FORL_EXT*8+WORD_LO(RA)
if (op != BC_JFORL) {
| add RD, PC, RD
}
@@ -4800,11 +5196,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5: // Invert check for negative step.
| cmpw CARG2, CARG1
| b <1
- if (vk) {
- |6: // Potential overflow.
- | checkov TMP0, <4 // Ignore unrelated overflow.
- | b <2
- }
|.endif
if (vk) {
|.if DUALNUM
@@ -4815,14 +5206,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
| lfd f3, FORL_STEP*8(RA)
| lfd f2, FORL_STOP*8(RA)
- | lwz TMP3, FORL_STEP*8(RA)
+ | lwz TMP3, FORL_STEP*8+WORD_HI(RA)
| fadd f1, f1, f3
| stfd f1, FORL_IDX*8(RA)
} else {
|.if DUALNUM
|9: // FP loop.
|.else
+ |.if ENDIAN_LE
+ | lwzx TMP1, RA, BASE_LO
+ | add RA, RA, BASE
+ |.else
| lwzux TMP1, RA, BASE
+ |.endif
| lwz TMP3, FORL_STEP*8(RA)
| lwz TMP2, FORL_STOP*8(RA)
| cmplw cr0, TMP1, TISNUM
@@ -4903,17 +5299,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
#endif
case BC_IITERL:
| // RA = base*8, RD = target
- | lwzux TMP1, RA, BASE
- | lwz TMP2, 4(RA)
+ | lwzux2 TMP1, TMP2, RA, BASE
| checknil TMP1; beq >1 // Stop if iterator returned nil.
if (op == BC_JITERL) {
- | stw TMP1, -8(RA)
- | stw TMP2, -4(RA)
+ | stw TMP1, WORD_HI-8(RA)
+ | stw TMP2, WORD_LO-8(RA)
| b =>BC_JLOOP
} else {
| branch_RD // Otherwise save control var + branch.
- | stw TMP1, -8(RA)
- | stw TMP2, -4(RA)
+ | stw TMP1, WORD_HI-8(RA)
+ | stw TMP2, WORD_LO-8(RA)
}
|1:
| ins_next
@@ -4942,7 +5337,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // Traces on PPC don't store the trace number, so use 0.
| stw ZERO, DISPATCH_GL(vmstate)(DISPATCH)
| lwzx TRACE:TMP2, TMP1, RD
- | clrso TMP1
+ | mtxer ZERO
| lp TMP2, TRACE:TMP2->mcode
| stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
| mtctr TMP2
@@ -4994,7 +5389,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
}
|
|3: // Clear missing parameters.
- | stwx TISNIL, BASE, NARGS8:RC
+ | stwx TISNIL, BASE_HI, NARGS8:RC
| addi NARGS8:RC, NARGS8:RC, 8
| b <2
break;
@@ -5011,11 +5406,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz TMP2, L->maxstack
| add TMP1, BASE, RC
| add TMP0, RA, RC
- | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC.
+ | stw LFUNC:RB, WORD_LO(TMP1) // Store copy of LFUNC.
| addi TMP3, RC, 8+FRAME_VARG
| lwz KBASE, -4+PC2PROTO(k)(PC)
| cmplw TMP0, TMP2
- | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG.
+ | stw TMP3, WORD_HI(TMP1) // Store delta + FRAME_VARG.
| bge ->vm_growstack_l
| lbz TMP2, -4+PC2PROTO(numparams)(PC)
| mr RA, BASE
@@ -5026,18 +5421,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beq >3
|1:
| cmplw RA, RC // Less args than parameters?
- | lwz TMP0, 0(RA)
- | lwz TMP3, 4(RA)
+ | lwz TMP0, WORD_HI(RA)
+ | lwz TMP3, WORD_LO(RA)
| bge >4
- | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC).
+ | stw TISNIL, WORD_HI(RA) // Clear old fixarg slot (help the GC).
| addi RA, RA, 8
|2:
| addic. TMP2, TMP2, -1
- | stw TMP0, 8(TMP1)
- | stw TMP3, 12(TMP1)
+ | stw TMP0, WORD_HI+8(TMP1)
+ | stw TMP3, WORD_LO+8(TMP1)
| addi TMP1, TMP1, 8
| bne <1
|3:
+ | addi BASEP4, BASE, 4
| ins_next2
|
|4: // Clear missing parameters.
@@ -5049,35 +5445,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_FUNCCW:
| // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
if (op == BC_FUNCC) {
- | lp RD, CFUNC:RB->f
+ | lp FUNCREG, CFUNC:RB->f
} else {
- | lp RD, DISPATCH_GL(wrapf)(DISPATCH)
+ | lp FUNCREG, DISPATCH_GL(wrapf)(DISPATCH)
}
| add TMP1, RA, NARGS8:RC
| lwz TMP2, L->maxstack
- | .toc lp TMP3, 0(RD)
+ | .opd lp TMP3, 0(FUNCREG)
| add RC, BASE, NARGS8:RC
| stp BASE, L->base
| cmplw TMP1, TMP2
| stp RC, L->top
| li_vmstate C
- |.if TOC
+ |.if OPD
| mtctr TMP3
|.else
- | mtctr RD
+ | mtctr FUNCREG
|.endif
if (op == BC_FUNCCW) {
| lp CARG2, CFUNC:RB->f
}
| mr CARG1, L
| bgt ->vm_growstack_c // Need to grow stack.
- | .toc lp TOCREG, TOC_OFS(RD)
- | .tocenv lp ENVREG, ENV_OFS(RD)
+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
| st_vmstate
| bctrl // (lua_State *L [, lua_CFunction f])
+ | .toc lp TOCREG, SAVE_TOC
| // Returns nresults.
| lp BASE, L->base
- | .toc ld TOCREG, SAVE_TOC
| slwi RD, CRET1, 3
| lp TMP1, L->top
| li_vmstate INTERP
@@ -5128,7 +5524,11 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.byte 0x1\n"
"\t.string \"\"\n"
"\t.uleb128 0x1\n"
+#if LJ_ARCH_PPC32ON64
+ "\t.sleb128 -8\n"
+#else
"\t.sleb128 -4\n"
+#endif
"\t.byte 65\n"
"\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
"\t.align 2\n"
@@ -5141,14 +5541,24 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.long .Lbegin\n"
"\t.long %d\n"
"\t.byte 0xe\n\t.uleb128 %d\n"
+#if LJ_ARCH_PPC32ON64
+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
+ "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n",
+#else
"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
"\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
+#endif
fcofs, CFRAME_SIZE);
for (i = 14; i <= 31; i++)
fprintf(ctx->fp,
"\t.byte %d\n\t.uleb128 %d\n"
"\t.byte %d\n\t.uleb128 %d\n",
- 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i));
+#if LJ_ARCH_PPC32ON64
+ 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i)
+#else
+ 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)
+#endif
+ );
fprintf(ctx->fp,
"\t.align 2\n"
".LEFDE0:\n\n");
@@ -5164,8 +5574,12 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.long lj_vm_ffi_call\n"
#endif
"\t.long %d\n"
+#if LJ_ARCH_PPC32ON64
+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
+#else
"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x8e\n\t.uleb128 2\n"
+#endif
+ "\t.byte 0x8e\n\t.uleb128 1\n"
"\t.byte 0xd\n\t.uleb128 0xe\n"
"\t.align 2\n"
".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
@@ -5180,7 +5594,11 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.byte 0x1\n"
"\t.string \"zPR\"\n"
"\t.uleb128 0x1\n"
+#if LJ_ARCH_PPC32ON64
+ "\t.sleb128 -8\n"
+#else
"\t.sleb128 -4\n"
+#endif
"\t.byte 65\n"
"\t.uleb128 6\n" /* augmentation length */
"\t.byte 0x1b\n" /* pcrel|sdata4 */
@@ -5198,14 +5616,24 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.long %d\n"
"\t.uleb128 0\n" /* augmentation length */
"\t.byte 0xe\n\t.uleb128 %d\n"
+#if LJ_ARCH_PPC32ON64
+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
+ "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n",
+#else
"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
"\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
+#endif
fcofs, CFRAME_SIZE);
for (i = 14; i <= 31; i++)
fprintf(ctx->fp,
"\t.byte %d\n\t.uleb128 %d\n"
"\t.byte %d\n\t.uleb128 %d\n",
- 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i));
+#if LJ_ARCH_PPC32ON64
+ 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i)
+#else
+ 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)
+#endif
+ );
fprintf(ctx->fp,
"\t.align 2\n"
".LEFDE2:\n\n");
@@ -5233,8 +5661,12 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.long lj_vm_ffi_call-.\n"
"\t.long %d\n"
"\t.uleb128 0\n" /* augmentation length */
+#if LJ_ARCH_PPC32ON64
+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
+#else
"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x8e\n\t.uleb128 2\n"
+#endif
+ "\t.byte 0x8e\n\t.uleb128 1\n"
"\t.byte 0xd\n\t.uleb128 0xe\n"
"\t.align 2\n"
".LEFDE3:\n\n", (int)ctx->codesz - fcofs);