Dirk Mueller
0ed966f036
- drop: valgrind-3.9.0-merge.patches.from.Paul.McKenney.patch, valgrind-3.9.0-ppc64le-abiv2.patch - add: VEX-r2803.diff, VEX-r2808.diff, VEX-r2816.diff VEX-r2904.diff, VEX-r2910.diff, VEX-r2914.diff, VEX-r2915.diff, VEX-r2916.diff, r13767.diff, r13770.diff, r14184.diff, r14238.diff, r14239.diff, r14240.diff, r14246.diff OBS-URL: https://build.opensuse.org/package/show/devel:tools/valgrind?expand=0&rev=114
21177 lines
816 KiB
Diff
21177 lines
816 KiB
Diff
Index: VEX/auxprogs/genoffsets.c
|
|
===================================================================
|
|
--- VEX/auxprogs/genoffsets.c.orig
|
|
+++ VEX/auxprogs/genoffsets.c
|
|
@@ -51,6 +51,7 @@
|
|
#include "../pub/libvex_guest_ppc32.h"
|
|
#include "../pub/libvex_guest_ppc64.h"
|
|
#include "../pub/libvex_guest_arm.h"
|
|
+#include "../pub/libvex_guest_arm64.h"
|
|
#include "../pub/libvex_guest_s390x.h"
|
|
#include "../pub/libvex_guest_mips32.h"
|
|
#include "../pub/libvex_guest_mips64.h"
|
|
@@ -159,6 +160,19 @@ void foo ( void )
|
|
GENOFFSET(ARM,arm,R14);
|
|
GENOFFSET(ARM,arm,R15T);
|
|
|
|
+ // arm64
|
|
+ GENOFFSET(ARM64,arm64,X0);
|
|
+ GENOFFSET(ARM64,arm64,X1);
|
|
+ GENOFFSET(ARM64,arm64,X2);
|
|
+ GENOFFSET(ARM64,arm64,X3);
|
|
+ GENOFFSET(ARM64,arm64,X4);
|
|
+ GENOFFSET(ARM64,arm64,X5);
|
|
+ GENOFFSET(ARM64,arm64,X6);
|
|
+ GENOFFSET(ARM64,arm64,X7);
|
|
+ GENOFFSET(ARM64,arm64,X8);
|
|
+ GENOFFSET(ARM64,arm64,SP);
|
|
+ GENOFFSET(ARM64,arm64,PC);
|
|
+
|
|
// s390x
|
|
GENOFFSET(S390X,s390x,r2);
|
|
GENOFFSET(S390X,s390x,r3);
|
|
Index: VEX/priv/host_arm64_defs.c
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ VEX/priv/host_arm64_defs.c
|
|
@@ -0,0 +1,5930 @@
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- begin host_arm64_defs.c ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
+
|
|
+/*
|
|
+ This file is part of Valgrind, a dynamic binary instrumentation
|
|
+ framework.
|
|
+
|
|
+ Copyright (C) 2013-2013 OpenWorks
|
|
+ info@open-works.net
|
|
+
|
|
+ This program is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU General Public License as
|
|
+ published by the Free Software Foundation; either version 2 of the
|
|
+ License, or (at your option) any later version.
|
|
+
|
|
+ This program is distributed in the hope that it will be useful, but
|
|
+ WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License
|
|
+ along with this program; if not, write to the Free Software
|
|
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
+ 02110-1301, USA.
|
|
+
|
|
+ The GNU General Public License is contained in the file COPYING.
|
|
+*/
|
|
+
|
|
+#include "libvex_basictypes.h"
|
|
+#include "libvex.h"
|
|
+#include "libvex_trc_values.h"
|
|
+
|
|
+#include "main_util.h"
|
|
+#include "host_generic_regs.h"
|
|
+#include "host_arm64_defs.h"
|
|
+
|
|
+//ZZ UInt arm_hwcaps = 0;
|
|
+
|
|
+
|
|
+/* --------- Registers. --------- */
|
|
+
|
|
+/* The usual HReg abstraction. We use the following classes only:
|
|
+ X regs (64 bit int)
|
|
+ D regs (64 bit float, also used for 32 bit float)
|
|
+ Q regs (128 bit vector)
|
|
+*/
|
|
+
|
|
+void ppHRegARM64 ( HReg reg ) {
|
|
+ Int r;
|
|
+ /* Be generic for all virtual regs. */
|
|
+ if (hregIsVirtual(reg)) {
|
|
+ ppHReg(reg);
|
|
+ return;
|
|
+ }
|
|
+ /* But specific for real regs. */
|
|
+ switch (hregClass(reg)) {
|
|
+ case HRcInt64:
|
|
+ r = hregNumber(reg);
|
|
+ vassert(r >= 0 && r < 31);
|
|
+ vex_printf("x%d", r);
|
|
+ return;
|
|
+ case HRcFlt64:
|
|
+ r = hregNumber(reg);
|
|
+ vassert(r >= 0 && r < 32);
|
|
+ vex_printf("d%d", r);
|
|
+ return;
|
|
+ case HRcVec128:
|
|
+ r = hregNumber(reg);
|
|
+ vassert(r >= 0 && r < 32);
|
|
+ vex_printf("q%d", r);
|
|
+ return;
|
|
+ default:
|
|
+ vpanic("ppHRegARM64");
|
|
+ }
|
|
+}
|
|
+
|
|
+static void ppHRegARM64asSreg ( HReg reg ) {
|
|
+ ppHRegARM64(reg);
|
|
+ vex_printf("(S-reg)");
|
|
+}
|
|
+
|
|
+HReg hregARM64_X0 ( void ) { return mkHReg(0, HRcInt64, False); }
|
|
+HReg hregARM64_X1 ( void ) { return mkHReg(1, HRcInt64, False); }
|
|
+HReg hregARM64_X2 ( void ) { return mkHReg(2, HRcInt64, False); }
|
|
+HReg hregARM64_X3 ( void ) { return mkHReg(3, HRcInt64, False); }
|
|
+HReg hregARM64_X4 ( void ) { return mkHReg(4, HRcInt64, False); }
|
|
+HReg hregARM64_X5 ( void ) { return mkHReg(5, HRcInt64, False); }
|
|
+HReg hregARM64_X6 ( void ) { return mkHReg(6, HRcInt64, False); }
|
|
+HReg hregARM64_X7 ( void ) { return mkHReg(7, HRcInt64, False); }
|
|
+//ZZ HReg hregARM_R8 ( void ) { return mkHReg(8, HRcInt32, False); }
|
|
+HReg hregARM64_X9 ( void ) { return mkHReg(9, HRcInt64, False); }
|
|
+HReg hregARM64_X10 ( void ) { return mkHReg(10, HRcInt64, False); }
|
|
+HReg hregARM64_X11 ( void ) { return mkHReg(11, HRcInt64, False); }
|
|
+HReg hregARM64_X12 ( void ) { return mkHReg(12, HRcInt64, False); }
|
|
+HReg hregARM64_X13 ( void ) { return mkHReg(13, HRcInt64, False); }
|
|
+HReg hregARM64_X14 ( void ) { return mkHReg(14, HRcInt64, False); }
|
|
+HReg hregARM64_X15 ( void ) { return mkHReg(15, HRcInt64, False); }
|
|
+HReg hregARM64_X21 ( void ) { return mkHReg(21, HRcInt64, False); }
|
|
+HReg hregARM64_X22 ( void ) { return mkHReg(22, HRcInt64, False); }
|
|
+HReg hregARM64_X23 ( void ) { return mkHReg(23, HRcInt64, False); }
|
|
+HReg hregARM64_X24 ( void ) { return mkHReg(24, HRcInt64, False); }
|
|
+HReg hregARM64_X25 ( void ) { return mkHReg(25, HRcInt64, False); }
|
|
+HReg hregARM64_X26 ( void ) { return mkHReg(26, HRcInt64, False); }
|
|
+HReg hregARM64_X27 ( void ) { return mkHReg(27, HRcInt64, False); }
|
|
+HReg hregARM64_X28 ( void ) { return mkHReg(28, HRcInt64, False); }
|
|
+
|
|
+// Should really use D8 .. D15 for class F64, since they are callee
|
|
+// save
|
|
+HReg hregARM64_D8 ( void ) { return mkHReg(8, HRcFlt64, False); }
|
|
+HReg hregARM64_D9 ( void ) { return mkHReg(9, HRcFlt64, False); }
|
|
+HReg hregARM64_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
|
|
+HReg hregARM64_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
|
|
+HReg hregARM64_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
|
|
+HReg hregARM64_D13 ( void ) { return mkHReg(13, HRcFlt64, False); }
|
|
+//ZZ HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
|
|
+//ZZ HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
|
|
+//ZZ HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
|
|
+//ZZ HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
|
|
+//ZZ HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
|
|
+HReg hregARM64_Q16 ( void ) { return mkHReg(16, HRcVec128, False); }
|
|
+HReg hregARM64_Q17 ( void ) { return mkHReg(17, HRcVec128, False); }
|
|
+HReg hregARM64_Q18 ( void ) { return mkHReg(18, HRcVec128, False); }
|
|
+//ZZ HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
|
|
+//ZZ HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
|
|
+//ZZ HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
|
|
+//ZZ HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
|
|
+//ZZ HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
|
|
+
|
|
+void getAllocableRegs_ARM64 ( Int* nregs, HReg** arr )
|
|
+{
|
|
+ Int i = 0;
|
|
+ *nregs = 24;
|
|
+ *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
|
|
+
|
|
+ // callee saves ones (22 to 28) are listed first, since we prefer
|
|
+ // them if they're available
|
|
+ (*arr)[i++] = hregARM64_X22();
|
|
+ (*arr)[i++] = hregARM64_X23();
|
|
+ (*arr)[i++] = hregARM64_X24();
|
|
+ (*arr)[i++] = hregARM64_X25();
|
|
+ (*arr)[i++] = hregARM64_X26();
|
|
+ (*arr)[i++] = hregARM64_X27();
|
|
+ (*arr)[i++] = hregARM64_X28();
|
|
+
|
|
+ (*arr)[i++] = hregARM64_X0();
|
|
+ (*arr)[i++] = hregARM64_X1();
|
|
+ (*arr)[i++] = hregARM64_X2();
|
|
+ (*arr)[i++] = hregARM64_X3();
|
|
+ (*arr)[i++] = hregARM64_X4();
|
|
+ (*arr)[i++] = hregARM64_X5();
|
|
+ (*arr)[i++] = hregARM64_X6();
|
|
+ (*arr)[i++] = hregARM64_X7();
|
|
+ // X8 .. who knows.
|
|
+ // X9 is a chaining/spill temporary, not available to regalloc.
|
|
+
|
|
+ // Do we really need all these?
|
|
+ //(*arr)[i++] = hregARM64_X10();
|
|
+ //(*arr)[i++] = hregARM64_X11();
|
|
+ //(*arr)[i++] = hregARM64_X12();
|
|
+ //(*arr)[i++] = hregARM64_X13();
|
|
+ //(*arr)[i++] = hregARM64_X14();
|
|
+ //(*arr)[i++] = hregARM64_X15();
|
|
+ // X21 is the guest state pointer, not available to regalloc.
|
|
+
|
|
+ // vector regs. Unfortunately not callee-saved.
|
|
+ (*arr)[i++] = hregARM64_Q16();
|
|
+ (*arr)[i++] = hregARM64_Q17();
|
|
+ (*arr)[i++] = hregARM64_Q18();
|
|
+
|
|
+ // F64 regs, all of which are callee-saved
|
|
+ (*arr)[i++] = hregARM64_D8();
|
|
+ (*arr)[i++] = hregARM64_D9();
|
|
+ (*arr)[i++] = hregARM64_D10();
|
|
+ (*arr)[i++] = hregARM64_D11();
|
|
+ (*arr)[i++] = hregARM64_D12();
|
|
+ (*arr)[i++] = hregARM64_D13();
|
|
+
|
|
+ // unavail: x21 as GSP
|
|
+ // x9 is used as a spill/reload/chaining/call temporary
|
|
+ // x8 is unassigned
|
|
+ // x30 as LR
|
|
+ // x31 because dealing with the SP-vs-ZR overloading is too
|
|
+ // confusing, and we don't need to do so, so let's just avoid
|
|
+ // the problem
|
|
+ //
|
|
+ // Currently, we have 15 allocatable integer registers:
|
|
+ // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
|
|
+ //
|
|
+ // Hence for the allocatable integer registers we have:
|
|
+ //
|
|
+ // callee-saved: 22 23 24 25 26 27 28
|
|
+ // caller-saved: 0 1 2 3 4 5 6 7
|
|
+ //
|
|
+ // If the set of available registers changes or if the e/r status
|
|
+ // changes, be sure to re-check/sync the definition of
|
|
+ // getHRegUsage for ARMInstr_Call too.
|
|
+ vassert(i == *nregs);
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+/* --------- Condition codes, ARM64 encoding. --------- */
|
|
+
|
|
+static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
|
|
+ switch (cond) {
|
|
+ case ARM64cc_EQ: return "eq";
|
|
+ case ARM64cc_NE: return "ne";
|
|
+ case ARM64cc_CS: return "cs";
|
|
+ case ARM64cc_CC: return "cc";
|
|
+ case ARM64cc_MI: return "mi";
|
|
+ case ARM64cc_PL: return "pl";
|
|
+ case ARM64cc_VS: return "vs";
|
|
+ case ARM64cc_VC: return "vc";
|
|
+ case ARM64cc_HI: return "hi";
|
|
+ case ARM64cc_LS: return "ls";
|
|
+ case ARM64cc_GE: return "ge";
|
|
+ case ARM64cc_LT: return "lt";
|
|
+ case ARM64cc_GT: return "gt";
|
|
+ case ARM64cc_LE: return "le";
|
|
+ case ARM64cc_AL: return "al"; // default
|
|
+ case ARM64cc_NV: return "nv";
|
|
+ default: vpanic("showARM64CondCode");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* --------- Memory address expressions (amodes). --------- */
|
|
+
|
|
+ARM64AMode* ARM64AMode_RI9 ( HReg reg, Int simm9 ) {
|
|
+ ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode));
|
|
+ am->tag = ARM64am_RI9;
|
|
+ am->ARM64am.RI9.reg = reg;
|
|
+ am->ARM64am.RI9.simm9 = simm9;
|
|
+ vassert(-256 <= simm9 && simm9 <= 255);
|
|
+ return am;
|
|
+}
|
|
+
|
|
+ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
|
|
+ ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode));
|
|
+ am->tag = ARM64am_RI12;
|
|
+ am->ARM64am.RI12.reg = reg;
|
|
+ am->ARM64am.RI12.uimm12 = uimm12;
|
|
+ am->ARM64am.RI12.szB = szB;
|
|
+ vassert(uimm12 >= 0 && uimm12 <= 4095);
|
|
+ switch (szB) {
|
|
+ case 1: case 2: case 4: case 8: break;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+ return am;
|
|
+}
|
|
+
|
|
+ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
|
|
+ ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode));
|
|
+ am->tag = ARM64am_RR;
|
|
+ am->ARM64am.RR.base = base;
|
|
+ am->ARM64am.RR.index = index;
|
|
+ return am;
|
|
+}
|
|
+
|
|
+static void ppARM64AMode ( ARM64AMode* am ) {
|
|
+ switch (am->tag) {
|
|
+ case ARM64am_RI9:
|
|
+ vex_printf("%d(", am->ARM64am.RI9.simm9);
|
|
+ ppHRegARM64(am->ARM64am.RI9.reg);
|
|
+ vex_printf(")");
|
|
+ break;
|
|
+ case ARM64am_RI12:
|
|
+ vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
|
|
+ * (UInt)am->ARM64am.RI12.uimm12);
|
|
+ ppHRegARM64(am->ARM64am.RI12.reg);
|
|
+ vex_printf(")");
|
|
+ break;
|
|
+ case ARM64am_RR:
|
|
+ vex_printf("(");
|
|
+ ppHRegARM64(am->ARM64am.RR.base);
|
|
+ vex_printf(",");
|
|
+ ppHRegARM64(am->ARM64am.RR.index);
|
|
+ vex_printf(")");
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
|
|
+ switch (am->tag) {
|
|
+ case ARM64am_RI9:
|
|
+ addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
|
|
+ return;
|
|
+ case ARM64am_RI12:
|
|
+ addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
|
|
+ return;
|
|
+ case ARM64am_RR:
|
|
+ addHRegUse(u, HRmRead, am->ARM64am.RR.base);
|
|
+ addHRegUse(u, HRmRead, am->ARM64am.RR.index);
|
|
+ return;
|
|
+ default:
|
|
+ vpanic("addRegUsage_ARM64Amode");
|
|
+ }
|
|
+}
|
|
+
|
|
+static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
|
|
+ switch (am->tag) {
|
|
+ case ARM64am_RI9:
|
|
+ am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
|
|
+ return;
|
|
+ case ARM64am_RI12:
|
|
+ am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
|
|
+ return;
|
|
+ case ARM64am_RR:
|
|
+ am->ARM64am.RR.base = lookupHRegRemap(m, am->ARM64am.RR.base);
|
|
+ am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
|
|
+ return;
|
|
+ default:
|
|
+ vpanic("mapRegs_ARM64Amode");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+//ZZ /* --------- Mem AModes: Addressing Mode 2 --------- */
|
|
+//ZZ
|
|
+//ZZ ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
|
|
+//ZZ ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2));
|
|
+//ZZ am->tag = ARMam2_RI;
|
|
+//ZZ am->ARMam2.RI.reg = reg;
|
|
+//ZZ am->ARMam2.RI.simm9 = simm9;
|
|
+//ZZ vassert(-255 <= simm9 && simm9 <= 255);
|
|
+//ZZ return am;
|
|
+//ZZ }
|
|
+//ZZ ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
|
|
+//ZZ ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2));
|
|
+//ZZ am->tag = ARMam2_RR;
|
|
+//ZZ am->ARMam2.RR.base = base;
|
|
+//ZZ am->ARMam2.RR.index = index;
|
|
+//ZZ return am;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ void ppARMAMode2 ( ARMAMode2* am ) {
|
|
+//ZZ switch (am->tag) {
|
|
+//ZZ case ARMam2_RI:
|
|
+//ZZ vex_printf("%d(", am->ARMam2.RI.simm9);
|
|
+//ZZ ppHRegARM(am->ARMam2.RI.reg);
|
|
+//ZZ vex_printf(")");
|
|
+//ZZ break;
|
|
+//ZZ case ARMam2_RR:
|
|
+//ZZ vex_printf("(");
|
|
+//ZZ ppHRegARM(am->ARMam2.RR.base);
|
|
+//ZZ vex_printf(",");
|
|
+//ZZ ppHRegARM(am->ARMam2.RR.index);
|
|
+//ZZ vex_printf(")");
|
|
+//ZZ break;
|
|
+//ZZ default:
|
|
+//ZZ vassert(0);
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
|
|
+//ZZ switch (am->tag) {
|
|
+//ZZ case ARMam2_RI:
|
|
+//ZZ addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
|
|
+//ZZ return;
|
|
+//ZZ case ARMam2_RR:
|
|
+//ZZ // addHRegUse(u, HRmRead, am->ARMam2.RR.base);
|
|
+//ZZ // addHRegUse(u, HRmRead, am->ARMam2.RR.index);
|
|
+//ZZ // return;
|
|
+//ZZ default:
|
|
+//ZZ vpanic("addRegUsage_ARMAmode2");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
|
|
+//ZZ switch (am->tag) {
|
|
+//ZZ case ARMam2_RI:
|
|
+//ZZ am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
|
|
+//ZZ return;
|
|
+//ZZ case ARMam2_RR:
|
|
+//ZZ //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
|
|
+//ZZ //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
|
|
+//ZZ //return;
|
|
+//ZZ default:
|
|
+//ZZ vpanic("mapRegs_ARMAmode2");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ
|
|
+//ZZ /* --------- Mem AModes: Addressing Mode VFP --------- */
|
|
+//ZZ
|
|
+//ZZ ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
|
|
+//ZZ ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
|
|
+//ZZ vassert(simm11 >= -1020 && simm11 <= 1020);
|
|
+//ZZ vassert(0 == (simm11 & 3));
|
|
+//ZZ am->reg = reg;
|
|
+//ZZ am->simm11 = simm11;
|
|
+//ZZ return am;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ void ppARMAModeV ( ARMAModeV* am ) {
|
|
+//ZZ vex_printf("%d(", am->simm11);
|
|
+//ZZ ppHRegARM(am->reg);
|
|
+//ZZ vex_printf(")");
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
|
|
+//ZZ addHRegUse(u, HRmRead, am->reg);
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
|
|
+//ZZ am->reg = lookupHRegRemap(m, am->reg);
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ
|
|
+//ZZ /* --------- Mem AModes: Addressing Mode Neon ------- */
|
|
+//ZZ
|
|
+//ZZ ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
|
|
+//ZZ ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
|
|
+//ZZ am->tag = ARMamN_RR;
|
|
+//ZZ am->ARMamN.RR.rN = rN;
|
|
+//ZZ am->ARMamN.RR.rM = rM;
|
|
+//ZZ return am;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ARMAModeN *mkARMAModeN_R ( HReg rN ) {
|
|
+//ZZ ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
|
|
+//ZZ am->tag = ARMamN_R;
|
|
+//ZZ am->ARMamN.R.rN = rN;
|
|
+//ZZ return am;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
|
|
+//ZZ if (am->tag == ARMamN_R) {
|
|
+//ZZ addHRegUse(u, HRmRead, am->ARMamN.R.rN);
|
|
+//ZZ } else {
|
|
+//ZZ addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
|
|
+//ZZ addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
|
|
+//ZZ if (am->tag == ARMamN_R) {
|
|
+//ZZ am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
|
|
+//ZZ } else {
|
|
+//ZZ am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
|
|
+//ZZ am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ void ppARMAModeN ( ARMAModeN* am ) {
|
|
+//ZZ vex_printf("[");
|
|
+//ZZ if (am->tag == ARMamN_R) {
|
|
+//ZZ ppHRegARM(am->ARMamN.R.rN);
|
|
+//ZZ } else {
|
|
+//ZZ ppHRegARM(am->ARMamN.RR.rN);
|
|
+//ZZ }
|
|
+//ZZ vex_printf("]");
|
|
+//ZZ if (am->tag == ARMamN_RR) {
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(am->ARMamN.RR.rM);
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+
|
|
+
|
|
+/* --------- Reg or uimm12<<{0,12} operands --------- */
|
|
+
|
|
+ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
|
|
+ ARM64RIA* riA = LibVEX_Alloc(sizeof(ARM64RIA));
|
|
+ riA->tag = ARM64riA_I12;
|
|
+ riA->ARM64riA.I12.imm12 = imm12;
|
|
+ riA->ARM64riA.I12.shift = shift;
|
|
+ vassert(imm12 < 4096);
|
|
+ vassert(shift == 0 || shift == 12);
|
|
+ return riA;
|
|
+}
|
|
+ARM64RIA* ARM64RIA_R ( HReg reg ) {
|
|
+ ARM64RIA* riA = LibVEX_Alloc(sizeof(ARM64RIA));
|
|
+ riA->tag = ARM64riA_R;
|
|
+ riA->ARM64riA.R.reg = reg;
|
|
+ return riA;
|
|
+}
|
|
+
|
|
+static void ppARM64RIA ( ARM64RIA* riA ) {
|
|
+ switch (riA->tag) {
|
|
+ case ARM64riA_I12:
|
|
+ vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
|
|
+ << riA->ARM64riA.I12.shift));
|
|
+ break;
|
|
+ case ARM64riA_R:
|
|
+ ppHRegARM64(riA->ARM64riA.R.reg);
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
|
|
+ switch (riA->tag) {
|
|
+ case ARM64riA_I12:
|
|
+ return;
|
|
+ case ARM64riA_R:
|
|
+ addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
|
|
+ return;
|
|
+ default:
|
|
+ vpanic("addRegUsage_ARM64RIA");
|
|
+ }
|
|
+}
|
|
+
|
|
+static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
|
|
+ switch (riA->tag) {
|
|
+ case ARM64riA_I12:
|
|
+ return;
|
|
+ case ARM64riA_R:
|
|
+ riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
|
|
+ return;
|
|
+ default:
|
|
+ vpanic("mapRegs_ARM64RIA");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* --------- Reg or "bitfield" (logic immediate) operands --------- */
|
|
+
|
|
+ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
|
|
+ ARM64RIL* riL = LibVEX_Alloc(sizeof(ARM64RIL));
|
|
+ riL->tag = ARM64riL_I13;
|
|
+ riL->ARM64riL.I13.bitN = bitN;
|
|
+ riL->ARM64riL.I13.immR = immR;
|
|
+ riL->ARM64riL.I13.immS = immS;
|
|
+ vassert(bitN < 2);
|
|
+ vassert(immR < 64);
|
|
+ vassert(immS < 64);
|
|
+ return riL;
|
|
+}
|
|
+ARM64RIL* ARM64RIL_R ( HReg reg ) {
|
|
+ ARM64RIL* riL = LibVEX_Alloc(sizeof(ARM64RIL));
|
|
+ riL->tag = ARM64riL_R;
|
|
+ riL->ARM64riL.R.reg = reg;
|
|
+ return riL;
|
|
+}
|
|
+
|
|
+static void ppARM64RIL ( ARM64RIL* riL ) {
|
|
+ switch (riL->tag) {
|
|
+ case ARM64riL_I13:
|
|
+ vex_printf("#nrs(%u,%u,%u)",
|
|
+ (UInt)riL->ARM64riL.I13.bitN,
|
|
+ (UInt)riL->ARM64riL.I13.immR,
|
|
+ (UInt)riL->ARM64riL.I13.immS);
|
|
+ break;
|
|
+ case ARM64riL_R:
|
|
+ ppHRegARM64(riL->ARM64riL.R.reg);
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
|
|
+ switch (riL->tag) {
|
|
+ case ARM64riL_I13:
|
|
+ return;
|
|
+ case ARM64riL_R:
|
|
+ addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
|
|
+ return;
|
|
+ default:
|
|
+ vpanic("addRegUsage_ARM64RIL");
|
|
+ }
|
|
+}
|
|
+
|
|
+static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
|
|
+ switch (riL->tag) {
|
|
+ case ARM64riL_I13:
|
|
+ return;
|
|
+ case ARM64riL_R:
|
|
+ riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
|
|
+ return;
|
|
+ default:
|
|
+ vpanic("mapRegs_ARM64RIL");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* --------------- Reg or uimm6 operands --------------- */
|
|
+
|
|
+ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
|
|
+ ARM64RI6* ri6 = LibVEX_Alloc(sizeof(ARM64RI6));
|
|
+ ri6->tag = ARM64ri6_I6;
|
|
+ ri6->ARM64ri6.I6.imm6 = imm6;
|
|
+ vassert(imm6 > 0 && imm6 < 64);
|
|
+ return ri6;
|
|
+}
|
|
+ARM64RI6* ARM64RI6_R ( HReg reg ) {
|
|
+ ARM64RI6* ri6 = LibVEX_Alloc(sizeof(ARM64RI6));
|
|
+ ri6->tag = ARM64ri6_R;
|
|
+ ri6->ARM64ri6.R.reg = reg;
|
|
+ return ri6;
|
|
+}
|
|
+
|
|
+static void ppARM64RI6 ( ARM64RI6* ri6 ) {
|
|
+ switch (ri6->tag) {
|
|
+ case ARM64ri6_I6:
|
|
+ vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
|
|
+ break;
|
|
+ case ARM64ri6_R:
|
|
+ ppHRegARM64(ri6->ARM64ri6.R.reg);
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
|
|
+ switch (ri6->tag) {
|
|
+ case ARM64ri6_I6:
|
|
+ return;
|
|
+ case ARM64ri6_R:
|
|
+ addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
|
|
+ return;
|
|
+ default:
|
|
+ vpanic("addRegUsage_ARM64RI6");
|
|
+ }
|
|
+}
|
|
+
|
|
+static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
|
|
+ switch (ri6->tag) {
|
|
+ case ARM64ri6_I6:
|
|
+ return;
|
|
+ case ARM64ri6_R:
|
|
+ ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
|
|
+ return;
|
|
+ default:
|
|
+ vpanic("mapRegs_ARM64RI6");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+//ZZ /* -------- Neon Immediate operatnd --------- */
|
|
+//ZZ
|
|
+//ZZ ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
|
|
+//ZZ ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
|
|
+//ZZ i->type = type;
|
|
+//ZZ i->imm8 = imm8;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
|
|
+//ZZ int i, j;
|
|
+//ZZ ULong y, x = imm->imm8;
|
|
+//ZZ switch (imm->type) {
|
|
+//ZZ case 3:
|
|
+//ZZ x = x << 8; /* fallthrough */
|
|
+//ZZ case 2:
|
|
+//ZZ x = x << 8; /* fallthrough */
|
|
+//ZZ case 1:
|
|
+//ZZ x = x << 8; /* fallthrough */
|
|
+//ZZ case 0:
|
|
+//ZZ return (x << 32) | x;
|
|
+//ZZ case 5:
|
|
+//ZZ case 6:
|
|
+//ZZ if (imm->type == 5)
|
|
+//ZZ x = x << 8;
|
|
+//ZZ else
|
|
+//ZZ x = (x << 8) | x;
|
|
+//ZZ /* fallthrough */
|
|
+//ZZ case 4:
|
|
+//ZZ x = (x << 16) | x;
|
|
+//ZZ return (x << 32) | x;
|
|
+//ZZ case 8:
|
|
+//ZZ x = (x << 8) | 0xFF;
|
|
+//ZZ /* fallthrough */
|
|
+//ZZ case 7:
|
|
+//ZZ x = (x << 8) | 0xFF;
|
|
+//ZZ return (x << 32) | x;
|
|
+//ZZ case 9:
|
|
+//ZZ x = 0;
|
|
+//ZZ for (i = 7; i >= 0; i--) {
|
|
+//ZZ y = ((ULong)imm->imm8 >> i) & 1;
|
|
+//ZZ for (j = 0; j < 8; j++) {
|
|
+//ZZ x = (x << 1) | y;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ return x;
|
|
+//ZZ case 10:
|
|
+//ZZ x |= (x & 0x80) << 5;
|
|
+//ZZ x |= (~x & 0x40) << 5;
|
|
+//ZZ x &= 0x187F; /* 0001 1000 0111 1111 */
|
|
+//ZZ x |= (x & 0x40) << 4;
|
|
+//ZZ x |= (x & 0x40) << 3;
|
|
+//ZZ x |= (x & 0x40) << 2;
|
|
+//ZZ x |= (x & 0x40) << 1;
|
|
+//ZZ x = x << 19;
|
|
+//ZZ x = (x << 32) | x;
|
|
+//ZZ return x;
|
|
+//ZZ default:
|
|
+//ZZ vpanic("ARMNImm_to_Imm64");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
|
|
+//ZZ ARMNImm tmp;
|
|
+//ZZ if ((x & 0xFFFFFFFF) == (x >> 32)) {
|
|
+//ZZ if ((x & 0xFFFFFF00) == 0)
|
|
+//ZZ return ARMNImm_TI(0, x & 0xFF);
|
|
+//ZZ if ((x & 0xFFFF00FF) == 0)
|
|
+//ZZ return ARMNImm_TI(1, (x >> 8) & 0xFF);
|
|
+//ZZ if ((x & 0xFF00FFFF) == 0)
|
|
+//ZZ return ARMNImm_TI(2, (x >> 16) & 0xFF);
|
|
+//ZZ if ((x & 0x00FFFFFF) == 0)
|
|
+//ZZ return ARMNImm_TI(3, (x >> 24) & 0xFF);
|
|
+//ZZ if ((x & 0xFFFF00FF) == 0xFF)
|
|
+//ZZ return ARMNImm_TI(7, (x >> 8) & 0xFF);
|
|
+//ZZ if ((x & 0xFF00FFFF) == 0xFFFF)
|
|
+//ZZ return ARMNImm_TI(8, (x >> 16) & 0xFF);
|
|
+//ZZ if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
|
|
+//ZZ if ((x & 0xFF00) == 0)
|
|
+//ZZ return ARMNImm_TI(4, x & 0xFF);
|
|
+//ZZ if ((x & 0x00FF) == 0)
|
|
+//ZZ return ARMNImm_TI(5, (x >> 8) & 0xFF);
|
|
+//ZZ if ((x & 0xFF) == ((x >> 8) & 0xFF))
|
|
+//ZZ return ARMNImm_TI(6, x & 0xFF);
|
|
+//ZZ }
|
|
+//ZZ if ((x & 0x7FFFF) == 0) {
|
|
+//ZZ tmp.type = 10;
|
|
+//ZZ tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
|
|
+//ZZ if (ARMNImm_to_Imm64(&tmp) == x)
|
|
+//ZZ return ARMNImm_TI(tmp.type, tmp.imm8);
|
|
+//ZZ }
|
|
+//ZZ } else {
|
|
+//ZZ /* This can only be type 9. */
|
|
+//ZZ tmp.imm8 = (((x >> 56) & 1) << 7)
|
|
+//ZZ | (((x >> 48) & 1) << 6)
|
|
+//ZZ | (((x >> 40) & 1) << 5)
|
|
+//ZZ | (((x >> 32) & 1) << 4)
|
|
+//ZZ | (((x >> 24) & 1) << 3)
|
|
+//ZZ | (((x >> 16) & 1) << 2)
|
|
+//ZZ | (((x >> 8) & 1) << 1)
|
|
+//ZZ | (((x >> 0) & 1) << 0);
|
|
+//ZZ tmp.type = 9;
|
|
+//ZZ if (ARMNImm_to_Imm64 (&tmp) == x)
|
|
+//ZZ return ARMNImm_TI(tmp.type, tmp.imm8);
|
|
+//ZZ }
|
|
+//ZZ return NULL;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ void ppARMNImm (ARMNImm* i) {
|
|
+//ZZ ULong x = ARMNImm_to_Imm64(i);
|
|
+//ZZ vex_printf("0x%llX%llX", x, x);
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* -- Register or scalar operand --- */
|
|
+//ZZ
|
|
+//ZZ ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
|
|
+//ZZ {
|
|
+//ZZ ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
|
|
+//ZZ p->tag = tag;
|
|
+//ZZ p->reg = reg;
|
|
+//ZZ p->index = index;
|
|
+//ZZ return p;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ void ppARMNRS(ARMNRS *p)
|
|
+//ZZ {
|
|
+//ZZ ppHRegARM(p->reg);
|
|
+//ZZ if (p->tag == ARMNRS_Scalar) {
|
|
+//ZZ vex_printf("[%d]", p->index);
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+
|
|
+/* --------- Instructions. --------- */
|
|
+
|
|
+static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
|
|
+ switch (op) {
|
|
+ case ARM64lo_AND: return "and";
|
|
+ case ARM64lo_OR: return "orr";
|
|
+ case ARM64lo_XOR: return "eor";
|
|
+ default: vpanic("showARM64LogicOp");
|
|
+ }
|
|
+}
|
|
+
|
|
+static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
|
|
+ switch (op) {
|
|
+ case ARM64sh_SHL: return "lsl";
|
|
+ case ARM64sh_SHR: return "lsr";
|
|
+ case ARM64sh_SAR: return "asr";
|
|
+ default: vpanic("showARM64ShiftOp");
|
|
+ }
|
|
+}
|
|
+
|
|
+static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
|
|
+ switch (op) {
|
|
+ case ARM64un_NEG: return "neg";
|
|
+ case ARM64un_NOT: return "not";
|
|
+ case ARM64un_CLZ: return "clz";
|
|
+ default: vpanic("showARM64UnaryOp");
|
|
+ }
|
|
+}
|
|
+
|
|
+static const HChar* showARM64MulOp ( ARM64MulOp op ) {
|
|
+ switch (op) {
|
|
+ case ARM64mul_PLAIN: return "mul ";
|
|
+ case ARM64mul_ZX: return "umulh";
|
|
+ case ARM64mul_SX: return "smulh";
|
|
+ default: vpanic("showARM64MulOp");
|
|
+ }
|
|
+}
|
|
+
|
|
+static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
|
|
+ /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
|
|
+ ARM64CvtOp op ) {
|
|
+ switch (op) {
|
|
+ case ARM64cvt_F32_I32S:
|
|
+ *syn = 's'; *fszB = 4; *iszB = 4; break;
|
|
+ case ARM64cvt_F64_I32S:
|
|
+ *syn = 's'; *fszB = 8; *iszB = 4; break;
|
|
+ case ARM64cvt_F32_I64S:
|
|
+ *syn = 's'; *fszB = 4; *iszB = 8; break;
|
|
+ case ARM64cvt_F64_I64S:
|
|
+ *syn = 's'; *fszB = 8; *iszB = 8; break;
|
|
+ case ARM64cvt_F32_I32U:
|
|
+ *syn = 'u'; *fszB = 4; *iszB = 4; break;
|
|
+ case ARM64cvt_F64_I32U:
|
|
+ *syn = 'u'; *fszB = 8; *iszB = 4; break;
|
|
+ case ARM64cvt_F32_I64U:
|
|
+ *syn = 'u'; *fszB = 4; *iszB = 8; break;
|
|
+ case ARM64cvt_F64_I64U:
|
|
+ *syn = 'u'; *fszB = 8; *iszB = 8; break;
|
|
+ default:
|
|
+ vpanic("characteriseARM64CvtOp");
|
|
+ }
|
|
+}
|
|
+
|
|
+static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
|
|
+ switch (op) {
|
|
+ case ARM64fpb_ADD: return "add";
|
|
+ case ARM64fpb_SUB: return "sub";
|
|
+ case ARM64fpb_MUL: return "mul";
|
|
+ case ARM64fpb_DIV: return "div";
|
|
+ default: vpanic("showARM64FpBinOp");
|
|
+ }
|
|
+}
|
|
+
|
|
+static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
|
|
+ switch (op) {
|
|
+ case ARM64fpu_NEG: return "neg ";
|
|
+ case ARM64fpu_ABS: return "abs ";
|
|
+ case ARM64fpu_SQRT: return "sqrt ";
|
|
+ case ARM64fpu_RINT: return "rinti";
|
|
+ default: vpanic("showARM64FpUnaryOp");
|
|
+ }
|
|
+}
|
|
+
|
|
+//ZZ const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
|
|
+//ZZ switch (op) {
|
|
+//ZZ case ARMneon_VAND: return "vand";
|
|
+//ZZ case ARMneon_VORR: return "vorr";
|
|
+//ZZ case ARMneon_VXOR: return "veor";
|
|
+//ZZ case ARMneon_VADD: return "vadd";
|
|
+//ZZ case ARMneon_VRHADDS: return "vrhadd";
|
|
+//ZZ case ARMneon_VRHADDU: return "vrhadd";
|
|
+//ZZ case ARMneon_VADDFP: return "vadd";
|
|
+//ZZ case ARMneon_VPADDFP: return "vpadd";
|
|
+//ZZ case ARMneon_VABDFP: return "vabd";
|
|
+//ZZ case ARMneon_VSUB: return "vsub";
|
|
+//ZZ case ARMneon_VSUBFP: return "vsub";
|
|
+//ZZ case ARMneon_VMINU: return "vmin";
|
|
+//ZZ case ARMneon_VMINS: return "vmin";
|
|
+//ZZ case ARMneon_VMINF: return "vmin";
|
|
+//ZZ case ARMneon_VMAXU: return "vmax";
|
|
+//ZZ case ARMneon_VMAXS: return "vmax";
|
|
+//ZZ case ARMneon_VMAXF: return "vmax";
|
|
+//ZZ case ARMneon_VQADDU: return "vqadd";
|
|
+//ZZ case ARMneon_VQADDS: return "vqadd";
|
|
+//ZZ case ARMneon_VQSUBU: return "vqsub";
|
|
+//ZZ case ARMneon_VQSUBS: return "vqsub";
|
|
+//ZZ case ARMneon_VCGTU: return "vcgt";
|
|
+//ZZ case ARMneon_VCGTS: return "vcgt";
|
|
+//ZZ case ARMneon_VCGTF: return "vcgt";
|
|
+//ZZ case ARMneon_VCGEF: return "vcgt";
|
|
+//ZZ case ARMneon_VCGEU: return "vcge";
|
|
+//ZZ case ARMneon_VCGES: return "vcge";
|
|
+//ZZ case ARMneon_VCEQ: return "vceq";
|
|
+//ZZ case ARMneon_VCEQF: return "vceq";
|
|
+//ZZ case ARMneon_VPADD: return "vpadd";
|
|
+//ZZ case ARMneon_VPMINU: return "vpmin";
|
|
+//ZZ case ARMneon_VPMINS: return "vpmin";
|
|
+//ZZ case ARMneon_VPMINF: return "vpmin";
|
|
+//ZZ case ARMneon_VPMAXU: return "vpmax";
|
|
+//ZZ case ARMneon_VPMAXS: return "vpmax";
|
|
+//ZZ case ARMneon_VPMAXF: return "vpmax";
|
|
+//ZZ case ARMneon_VEXT: return "vext";
|
|
+//ZZ case ARMneon_VMUL: return "vmuli";
|
|
+//ZZ case ARMneon_VMULLU: return "vmull";
|
|
+//ZZ case ARMneon_VMULLS: return "vmull";
|
|
+//ZZ case ARMneon_VMULP: return "vmul";
|
|
+//ZZ case ARMneon_VMULFP: return "vmul";
|
|
+//ZZ case ARMneon_VMULLP: return "vmul";
|
|
+//ZZ case ARMneon_VQDMULH: return "vqdmulh";
|
|
+//ZZ case ARMneon_VQRDMULH: return "vqrdmulh";
|
|
+//ZZ case ARMneon_VQDMULL: return "vqdmull";
|
|
+//ZZ case ARMneon_VTBL: return "vtbl";
|
|
+//ZZ case ARMneon_VRECPS: return "vrecps";
|
|
+//ZZ case ARMneon_VRSQRTS: return "vrecps";
|
|
+//ZZ /* ... */
|
|
+//ZZ default: vpanic("showARMNeonBinOp");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
|
|
+//ZZ switch (op) {
|
|
+//ZZ case ARMneon_VAND:
|
|
+//ZZ case ARMneon_VORR:
|
|
+//ZZ case ARMneon_VXOR:
|
|
+//ZZ return "";
|
|
+//ZZ case ARMneon_VADD:
|
|
+//ZZ case ARMneon_VSUB:
|
|
+//ZZ case ARMneon_VEXT:
|
|
+//ZZ case ARMneon_VMUL:
|
|
+//ZZ case ARMneon_VPADD:
|
|
+//ZZ case ARMneon_VTBL:
|
|
+//ZZ case ARMneon_VCEQ:
|
|
+//ZZ return ".i";
|
|
+//ZZ case ARMneon_VRHADDU:
|
|
+//ZZ case ARMneon_VMINU:
|
|
+//ZZ case ARMneon_VMAXU:
|
|
+//ZZ case ARMneon_VQADDU:
|
|
+//ZZ case ARMneon_VQSUBU:
|
|
+//ZZ case ARMneon_VCGTU:
|
|
+//ZZ case ARMneon_VCGEU:
|
|
+//ZZ case ARMneon_VMULLU:
|
|
+//ZZ case ARMneon_VPMINU:
|
|
+//ZZ case ARMneon_VPMAXU:
|
|
+//ZZ return ".u";
|
|
+//ZZ case ARMneon_VRHADDS:
|
|
+//ZZ case ARMneon_VMINS:
|
|
+//ZZ case ARMneon_VMAXS:
|
|
+//ZZ case ARMneon_VQADDS:
|
|
+//ZZ case ARMneon_VQSUBS:
|
|
+//ZZ case ARMneon_VCGTS:
|
|
+//ZZ case ARMneon_VCGES:
|
|
+//ZZ case ARMneon_VQDMULL:
|
|
+//ZZ case ARMneon_VMULLS:
|
|
+//ZZ case ARMneon_VPMINS:
|
|
+//ZZ case ARMneon_VPMAXS:
|
|
+//ZZ case ARMneon_VQDMULH:
|
|
+//ZZ case ARMneon_VQRDMULH:
|
|
+//ZZ return ".s";
|
|
+//ZZ case ARMneon_VMULP:
|
|
+//ZZ case ARMneon_VMULLP:
|
|
+//ZZ return ".p";
|
|
+//ZZ case ARMneon_VADDFP:
|
|
+//ZZ case ARMneon_VABDFP:
|
|
+//ZZ case ARMneon_VPADDFP:
|
|
+//ZZ case ARMneon_VSUBFP:
|
|
+//ZZ case ARMneon_VMULFP:
|
|
+//ZZ case ARMneon_VMINF:
|
|
+//ZZ case ARMneon_VMAXF:
|
|
+//ZZ case ARMneon_VPMINF:
|
|
+//ZZ case ARMneon_VPMAXF:
|
|
+//ZZ case ARMneon_VCGTF:
|
|
+//ZZ case ARMneon_VCGEF:
|
|
+//ZZ case ARMneon_VCEQF:
|
|
+//ZZ case ARMneon_VRECPS:
|
|
+//ZZ case ARMneon_VRSQRTS:
|
|
+//ZZ return ".f";
|
|
+//ZZ /* ... */
|
|
+//ZZ default: vpanic("showARMNeonBinOpDataType");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
|
|
+//ZZ switch (op) {
|
|
+//ZZ case ARMneon_COPY: return "vmov";
|
|
+//ZZ case ARMneon_COPYLS: return "vmov";
|
|
+//ZZ case ARMneon_COPYLU: return "vmov";
|
|
+//ZZ case ARMneon_COPYN: return "vmov";
|
|
+//ZZ case ARMneon_COPYQNSS: return "vqmovn";
|
|
+//ZZ case ARMneon_COPYQNUS: return "vqmovun";
|
|
+//ZZ case ARMneon_COPYQNUU: return "vqmovn";
|
|
+//ZZ case ARMneon_NOT: return "vmvn";
|
|
+//ZZ case ARMneon_EQZ: return "vceq";
|
|
+//ZZ case ARMneon_CNT: return "vcnt";
|
|
+//ZZ case ARMneon_CLS: return "vcls";
|
|
+//ZZ case ARMneon_CLZ: return "vclz";
|
|
+//ZZ case ARMneon_DUP: return "vdup";
|
|
+//ZZ case ARMneon_PADDLS: return "vpaddl";
|
|
+//ZZ case ARMneon_PADDLU: return "vpaddl";
|
|
+//ZZ case ARMneon_VQSHLNSS: return "vqshl";
|
|
+//ZZ case ARMneon_VQSHLNUU: return "vqshl";
|
|
+//ZZ case ARMneon_VQSHLNUS: return "vqshlu";
|
|
+//ZZ case ARMneon_REV16: return "vrev16";
|
|
+//ZZ case ARMneon_REV32: return "vrev32";
|
|
+//ZZ case ARMneon_REV64: return "vrev64";
|
|
+//ZZ case ARMneon_VCVTFtoU: return "vcvt";
|
|
+//ZZ case ARMneon_VCVTFtoS: return "vcvt";
|
|
+//ZZ case ARMneon_VCVTUtoF: return "vcvt";
|
|
+//ZZ case ARMneon_VCVTStoF: return "vcvt";
|
|
+//ZZ case ARMneon_VCVTFtoFixedU: return "vcvt";
|
|
+//ZZ case ARMneon_VCVTFtoFixedS: return "vcvt";
|
|
+//ZZ case ARMneon_VCVTFixedUtoF: return "vcvt";
|
|
+//ZZ case ARMneon_VCVTFixedStoF: return "vcvt";
|
|
+//ZZ case ARMneon_VCVTF32toF16: return "vcvt";
|
|
+//ZZ case ARMneon_VCVTF16toF32: return "vcvt";
|
|
+//ZZ case ARMneon_VRECIP: return "vrecip";
|
|
+//ZZ case ARMneon_VRECIPF: return "vrecipf";
|
|
+//ZZ case ARMneon_VNEGF: return "vneg";
|
|
+//ZZ case ARMneon_ABS: return "vabs";
|
|
+//ZZ case ARMneon_VABSFP: return "vabsfp";
|
|
+//ZZ case ARMneon_VRSQRTEFP: return "vrsqrtefp";
|
|
+//ZZ case ARMneon_VRSQRTE: return "vrsqrte";
|
|
+//ZZ /* ... */
|
|
+//ZZ default: vpanic("showARMNeonUnOp");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
|
|
+//ZZ switch (op) {
|
|
+//ZZ case ARMneon_COPY:
|
|
+//ZZ case ARMneon_NOT:
|
|
+//ZZ return "";
|
|
+//ZZ case ARMneon_COPYN:
|
|
+//ZZ case ARMneon_EQZ:
|
|
+//ZZ case ARMneon_CNT:
|
|
+//ZZ case ARMneon_DUP:
|
|
+//ZZ case ARMneon_REV16:
|
|
+//ZZ case ARMneon_REV32:
|
|
+//ZZ case ARMneon_REV64:
|
|
+//ZZ return ".i";
|
|
+//ZZ case ARMneon_COPYLU:
|
|
+//ZZ case ARMneon_PADDLU:
|
|
+//ZZ case ARMneon_COPYQNUU:
|
|
+//ZZ case ARMneon_VQSHLNUU:
|
|
+//ZZ case ARMneon_VRECIP:
|
|
+//ZZ case ARMneon_VRSQRTE:
|
|
+//ZZ return ".u";
|
|
+//ZZ case ARMneon_CLS:
|
|
+//ZZ case ARMneon_CLZ:
|
|
+//ZZ case ARMneon_COPYLS:
|
|
+//ZZ case ARMneon_PADDLS:
|
|
+//ZZ case ARMneon_COPYQNSS:
|
|
+//ZZ case ARMneon_COPYQNUS:
|
|
+//ZZ case ARMneon_VQSHLNSS:
|
|
+//ZZ case ARMneon_VQSHLNUS:
|
|
+//ZZ case ARMneon_ABS:
|
|
+//ZZ return ".s";
|
|
+//ZZ case ARMneon_VRECIPF:
|
|
+//ZZ case ARMneon_VNEGF:
|
|
+//ZZ case ARMneon_VABSFP:
|
|
+//ZZ case ARMneon_VRSQRTEFP:
|
|
+//ZZ return ".f";
|
|
+//ZZ case ARMneon_VCVTFtoU: return ".u32.f32";
|
|
+//ZZ case ARMneon_VCVTFtoS: return ".s32.f32";
|
|
+//ZZ case ARMneon_VCVTUtoF: return ".f32.u32";
|
|
+//ZZ case ARMneon_VCVTStoF: return ".f32.s32";
|
|
+//ZZ case ARMneon_VCVTF16toF32: return ".f32.f16";
|
|
+//ZZ case ARMneon_VCVTF32toF16: return ".f16.f32";
|
|
+//ZZ case ARMneon_VCVTFtoFixedU: return ".u32.f32";
|
|
+//ZZ case ARMneon_VCVTFtoFixedS: return ".s32.f32";
|
|
+//ZZ case ARMneon_VCVTFixedUtoF: return ".f32.u32";
|
|
+//ZZ case ARMneon_VCVTFixedStoF: return ".f32.s32";
|
|
+//ZZ /* ... */
|
|
+//ZZ default: vpanic("showARMNeonUnOpDataType");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
|
|
+//ZZ switch (op) {
|
|
+//ZZ case ARMneon_SETELEM: return "vmov";
|
|
+//ZZ case ARMneon_GETELEMU: return "vmov";
|
|
+//ZZ case ARMneon_GETELEMS: return "vmov";
|
|
+//ZZ case ARMneon_VDUP: return "vdup";
|
|
+//ZZ /* ... */
|
|
+//ZZ default: vpanic("showARMNeonUnarySOp");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
|
|
+//ZZ switch (op) {
|
|
+//ZZ case ARMneon_SETELEM:
|
|
+//ZZ case ARMneon_VDUP:
|
|
+//ZZ return ".i";
|
|
+//ZZ case ARMneon_GETELEMS:
|
|
+//ZZ return ".s";
|
|
+//ZZ case ARMneon_GETELEMU:
|
|
+//ZZ return ".u";
|
|
+//ZZ /* ... */
|
|
+//ZZ default: vpanic("showARMNeonUnarySOp");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
|
|
+//ZZ switch (op) {
|
|
+//ZZ case ARMneon_VSHL: return "vshl";
|
|
+//ZZ case ARMneon_VSAL: return "vshl";
|
|
+//ZZ case ARMneon_VQSHL: return "vqshl";
|
|
+//ZZ case ARMneon_VQSAL: return "vqshl";
|
|
+//ZZ /* ... */
|
|
+//ZZ default: vpanic("showARMNeonShiftOp");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
|
|
+//ZZ switch (op) {
|
|
+//ZZ case ARMneon_VSHL:
|
|
+//ZZ case ARMneon_VQSHL:
|
|
+//ZZ return ".u";
|
|
+//ZZ case ARMneon_VSAL:
|
|
+//ZZ case ARMneon_VQSAL:
|
|
+//ZZ return ".s";
|
|
+//ZZ /* ... */
|
|
+//ZZ default: vpanic("showARMNeonShiftOpDataType");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
|
|
+//ZZ switch (op) {
|
|
+//ZZ case ARMneon_TRN: return "vtrn";
|
|
+//ZZ case ARMneon_ZIP: return "vzip";
|
|
+//ZZ case ARMneon_UZP: return "vuzp";
|
|
+//ZZ /* ... */
|
|
+//ZZ default: vpanic("showARMNeonDualOp");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
|
|
+//ZZ switch (op) {
|
|
+//ZZ case ARMneon_TRN:
|
|
+//ZZ case ARMneon_ZIP:
|
|
+//ZZ case ARMneon_UZP:
|
|
+//ZZ return "i";
|
|
+//ZZ /* ... */
|
|
+//ZZ default: vpanic("showARMNeonDualOp");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static const HChar* showARMNeonDataSize_wrk ( UInt size )
|
|
+//ZZ {
|
|
+//ZZ switch (size) {
|
|
+//ZZ case 0: return "8";
|
|
+//ZZ case 1: return "16";
|
|
+//ZZ case 2: return "32";
|
|
+//ZZ case 3: return "64";
|
|
+//ZZ default: vpanic("showARMNeonDataSize");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static const HChar* showARMNeonDataSize ( ARMInstr* i )
|
|
+//ZZ {
|
|
+//ZZ switch (i->tag) {
|
|
+//ZZ case ARMin_NBinary:
|
|
+//ZZ if (i->ARMin.NBinary.op == ARMneon_VEXT)
|
|
+//ZZ return "8";
|
|
+//ZZ if (i->ARMin.NBinary.op == ARMneon_VAND ||
|
|
+//ZZ i->ARMin.NBinary.op == ARMneon_VORR ||
|
|
+//ZZ i->ARMin.NBinary.op == ARMneon_VXOR)
|
|
+//ZZ return "";
|
|
+//ZZ return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
|
|
+//ZZ case ARMin_NUnary:
|
|
+//ZZ if (i->ARMin.NUnary.op == ARMneon_COPY ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_NOT ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
|
|
+//ZZ return "";
|
|
+//ZZ if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
|
|
+//ZZ UInt size;
|
|
+//ZZ size = i->ARMin.NUnary.size;
|
|
+//ZZ if (size & 0x40)
|
|
+//ZZ return "64";
|
|
+//ZZ if (size & 0x20)
|
|
+//ZZ return "32";
|
|
+//ZZ if (size & 0x10)
|
|
+//ZZ return "16";
|
|
+//ZZ if (size & 0x08)
|
|
+//ZZ return "8";
|
|
+//ZZ vpanic("showARMNeonDataSize");
|
|
+//ZZ }
|
|
+//ZZ return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
|
|
+//ZZ case ARMin_NUnaryS:
|
|
+//ZZ if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
|
|
+//ZZ int size;
|
|
+//ZZ size = i->ARMin.NUnaryS.size;
|
|
+//ZZ if ((size & 1) == 1)
|
|
+//ZZ return "8";
|
|
+//ZZ if ((size & 3) == 2)
|
|
+//ZZ return "16";
|
|
+//ZZ if ((size & 7) == 4)
|
|
+//ZZ return "32";
|
|
+//ZZ vpanic("showARMNeonDataSize");
|
|
+//ZZ }
|
|
+//ZZ return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
|
|
+//ZZ case ARMin_NShift:
|
|
+//ZZ return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
|
|
+//ZZ case ARMin_NDual:
|
|
+//ZZ return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
|
|
+//ZZ default:
|
|
+//ZZ vpanic("showARMNeonDataSize");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+
|
|
+ARM64Instr* ARM64Instr_Arith ( HReg dst,
|
|
+ HReg argL, ARM64RIA* argR, Bool isAdd ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_Arith;
|
|
+ i->ARM64in.Arith.dst = dst;
|
|
+ i->ARM64in.Arith.argL = argL;
|
|
+ i->ARM64in.Arith.argR = argR;
|
|
+ i->ARM64in.Arith.isAdd = isAdd;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_Cmp;
|
|
+ i->ARM64in.Cmp.argL = argL;
|
|
+ i->ARM64in.Cmp.argR = argR;
|
|
+ i->ARM64in.Cmp.is64 = is64;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_Logic ( HReg dst,
|
|
+ HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_Logic;
|
|
+ i->ARM64in.Logic.dst = dst;
|
|
+ i->ARM64in.Logic.argL = argL;
|
|
+ i->ARM64in.Logic.argR = argR;
|
|
+ i->ARM64in.Logic.op = op;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_Test;
|
|
+ i->ARM64in.Test.argL = argL;
|
|
+ i->ARM64in.Test.argR = argR;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_Shift ( HReg dst,
|
|
+ HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_Shift;
|
|
+ i->ARM64in.Shift.dst = dst;
|
|
+ i->ARM64in.Shift.argL = argL;
|
|
+ i->ARM64in.Shift.argR = argR;
|
|
+ i->ARM64in.Shift.op = op;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_Unary;
|
|
+ i->ARM64in.Unary.dst = dst;
|
|
+ i->ARM64in.Unary.src = src;
|
|
+ i->ARM64in.Unary.op = op;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_MovI;
|
|
+ i->ARM64in.MovI.dst = dst;
|
|
+ i->ARM64in.MovI.src = src;
|
|
+ vassert(hregClass(src) == HRcInt64);
|
|
+ vassert(hregClass(dst) == HRcInt64);
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_Imm64;
|
|
+ i->ARM64in.Imm64.dst = dst;
|
|
+ i->ARM64in.Imm64.imm64 = imm64;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_LdSt64;
|
|
+ i->ARM64in.LdSt64.isLoad = isLoad;
|
|
+ i->ARM64in.LdSt64.rD = rD;
|
|
+ i->ARM64in.LdSt64.amode = amode;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_LdSt32;
|
|
+ i->ARM64in.LdSt32.isLoad = isLoad;
|
|
+ i->ARM64in.LdSt32.rD = rD;
|
|
+ i->ARM64in.LdSt32.amode = amode;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_LdSt16;
|
|
+ i->ARM64in.LdSt16.isLoad = isLoad;
|
|
+ i->ARM64in.LdSt16.rD = rD;
|
|
+ i->ARM64in.LdSt16.amode = amode;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_LdSt8;
|
|
+ i->ARM64in.LdSt8.isLoad = isLoad;
|
|
+ i->ARM64in.LdSt8.rD = rD;
|
|
+ i->ARM64in.LdSt8.amode = amode;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
|
|
+ ARM64CondCode cond, Bool toFastEP ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_XDirect;
|
|
+ i->ARM64in.XDirect.dstGA = dstGA;
|
|
+ i->ARM64in.XDirect.amPC = amPC;
|
|
+ i->ARM64in.XDirect.cond = cond;
|
|
+ i->ARM64in.XDirect.toFastEP = toFastEP;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
|
|
+ ARM64CondCode cond ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_XIndir;
|
|
+ i->ARM64in.XIndir.dstGA = dstGA;
|
|
+ i->ARM64in.XIndir.amPC = amPC;
|
|
+ i->ARM64in.XIndir.cond = cond;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
|
|
+ ARM64CondCode cond, IRJumpKind jk ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_XAssisted;
|
|
+ i->ARM64in.XAssisted.dstGA = dstGA;
|
|
+ i->ARM64in.XAssisted.amPC = amPC;
|
|
+ i->ARM64in.XAssisted.cond = cond;
|
|
+ i->ARM64in.XAssisted.jk = jk;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
|
|
+ ARM64CondCode cond ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_CSel;
|
|
+ i->ARM64in.CSel.dst = dst;
|
|
+ i->ARM64in.CSel.argL = argL;
|
|
+ i->ARM64in.CSel.argR = argR;
|
|
+ i->ARM64in.CSel.cond = cond;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, HWord target, Int nArgRegs,
|
|
+ RetLoc rloc ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_Call;
|
|
+ i->ARM64in.Call.cond = cond;
|
|
+ i->ARM64in.Call.target = target;
|
|
+ i->ARM64in.Call.nArgRegs = nArgRegs;
|
|
+ i->ARM64in.Call.rloc = rloc;
|
|
+ vassert(is_sane_RetLoc(rloc));
|
|
+ return i;
|
|
+}
|
|
+extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_AddToSP;
|
|
+ i->ARM64in.AddToSP.simm = simm;
|
|
+ vassert(-4096 < simm && simm < 4096);
|
|
+ vassert(0 == (simm & 0xF));
|
|
+ return i;
|
|
+}
|
|
+extern ARM64Instr* ARM64Instr_FromSP ( HReg dst ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_FromSP;
|
|
+ i->ARM64in.FromSP.dst = dst;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
|
|
+ ARM64MulOp op ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_Mul;
|
|
+ i->ARM64in.Mul.dst = dst;
|
|
+ i->ARM64in.Mul.argL = argL;
|
|
+ i->ARM64in.Mul.argR = argR;
|
|
+ i->ARM64in.Mul.op = op;
|
|
+ return i;
|
|
+}
|
|
+//ZZ ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_Mul;
|
|
+//ZZ i->ARMin.Mul.op = op;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ ARMInstr* ARMInstr_LdrEX ( Int szB ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_LdrEX;
|
|
+//ZZ i->ARMin.LdrEX.szB = szB;
|
|
+//ZZ vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ ARMInstr* ARMInstr_StrEX ( Int szB ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_StrEX;
|
|
+//ZZ i->ARMin.StrEX.szB = szB;
|
|
+//ZZ vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VLdStS;
|
|
+ i->ARM64in.VLdStS.isLoad = isLoad;
|
|
+ i->ARM64in.VLdStS.sD = sD;
|
|
+ i->ARM64in.VLdStS.rN = rN;
|
|
+ i->ARM64in.VLdStS.uimm12 = uimm12;
|
|
+ vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VLdStD;
|
|
+ i->ARM64in.VLdStD.isLoad = isLoad;
|
|
+ i->ARM64in.VLdStD.dD = dD;
|
|
+ i->ARM64in.VLdStD.rN = rN;
|
|
+ i->ARM64in.VLdStD.uimm12 = uimm12;
|
|
+ vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VLdStQ;
|
|
+ i->ARM64in.VLdStQ.isLoad = isLoad;
|
|
+ i->ARM64in.VLdStQ.rQ = rQ;
|
|
+ i->ARM64in.VLdStQ.rN = rN;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VCvtI2F;
|
|
+ i->ARM64in.VCvtI2F.how = how;
|
|
+ i->ARM64in.VCvtI2F.rD = rD;
|
|
+ i->ARM64in.VCvtI2F.rS = rS;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
|
|
+ UChar armRM ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VCvtF2I;
|
|
+ i->ARM64in.VCvtF2I.how = how;
|
|
+ i->ARM64in.VCvtF2I.rD = rD;
|
|
+ i->ARM64in.VCvtF2I.rS = rS;
|
|
+ i->ARM64in.VCvtF2I.armRM = armRM;
|
|
+ vassert(armRM <= 3);
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VCvtSD;
|
|
+ i->ARM64in.VCvtSD.sToD = sToD;
|
|
+ i->ARM64in.VCvtSD.dst = dst;
|
|
+ i->ARM64in.VCvtSD.src = src;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VUnaryD;
|
|
+ i->ARM64in.VUnaryD.op = op;
|
|
+ i->ARM64in.VUnaryD.dst = dst;
|
|
+ i->ARM64in.VUnaryD.src = src;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VUnaryS;
|
|
+ i->ARM64in.VUnaryS.op = op;
|
|
+ i->ARM64in.VUnaryS.dst = dst;
|
|
+ i->ARM64in.VUnaryS.src = src;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
|
|
+ HReg dst, HReg argL, HReg argR ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VBinD;
|
|
+ i->ARM64in.VBinD.op = op;
|
|
+ i->ARM64in.VBinD.dst = dst;
|
|
+ i->ARM64in.VBinD.argL = argL;
|
|
+ i->ARM64in.VBinD.argR = argR;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
|
|
+ HReg dst, HReg argL, HReg argR ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VBinS;
|
|
+ i->ARM64in.VBinS.op = op;
|
|
+ i->ARM64in.VBinS.dst = dst;
|
|
+ i->ARM64in.VBinS.argL = argL;
|
|
+ i->ARM64in.VBinS.argR = argR;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VCmpD;
|
|
+ i->ARM64in.VCmpD.argL = argL;
|
|
+ i->ARM64in.VCmpD.argR = argR;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VCmpS;
|
|
+ i->ARM64in.VCmpS.argL = argL;
|
|
+ i->ARM64in.VCmpS.argR = argR;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_FPCR;
|
|
+ i->ARM64in.FPCR.toFPCR = toFPCR;
|
|
+ i->ARM64in.FPCR.iReg = iReg;
|
|
+ return i;
|
|
+}
|
|
+//ZZ ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_VAluS;
|
|
+//ZZ i->ARMin.VAluS.op = op;
|
|
+//ZZ i->ARMin.VAluS.dst = dst;
|
|
+//ZZ i->ARMin.VAluS.argL = argL;
|
|
+//ZZ i->ARMin.VAluS.argR = argR;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_VCMovD;
|
|
+//ZZ i->ARMin.VCMovD.cond = cond;
|
|
+//ZZ i->ARMin.VCMovD.dst = dst;
|
|
+//ZZ i->ARMin.VCMovD.src = src;
|
|
+//ZZ vassert(cond != ARMcc_AL);
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_VCMovS;
|
|
+//ZZ i->ARMin.VCMovS.cond = cond;
|
|
+//ZZ i->ARMin.VCMovS.dst = dst;
|
|
+//ZZ i->ARMin.VCMovS.src = src;
|
|
+//ZZ vassert(cond != ARMcc_AL);
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_VXferD;
|
|
+//ZZ i->ARMin.VXferD.toD = toD;
|
|
+//ZZ i->ARMin.VXferD.dD = dD;
|
|
+//ZZ i->ARMin.VXferD.rHi = rHi;
|
|
+//ZZ i->ARMin.VXferD.rLo = rLo;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_VXferS;
|
|
+//ZZ i->ARMin.VXferS.toS = toS;
|
|
+//ZZ i->ARMin.VXferS.fD = fD;
|
|
+//ZZ i->ARMin.VXferS.rLo = rLo;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
|
|
+//ZZ HReg dst, HReg src ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_VCvtID;
|
|
+//ZZ i->ARMin.VCvtID.iToD = iToD;
|
|
+//ZZ i->ARMin.VCvtID.syned = syned;
|
|
+//ZZ i->ARMin.VCvtID.dst = dst;
|
|
+//ZZ i->ARMin.VCvtID.src = src;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ ARMInstr* ARMInstr_MFence ( void ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_MFence;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ ARMInstr* ARMInstr_CLREX( void ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_CLREX;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_NLdStD;
|
|
+//ZZ i->ARMin.NLdStD.isLoad = isLoad;
|
|
+//ZZ i->ARMin.NLdStD.dD = dD;
|
|
+//ZZ i->ARMin.NLdStD.amode = amode;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
|
|
+//ZZ UInt size, Bool Q ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_NUnary;
|
|
+//ZZ i->ARMin.NUnary.op = op;
|
|
+//ZZ i->ARMin.NUnary.src = nQ;
|
|
+//ZZ i->ARMin.NUnary.dst = dQ;
|
|
+//ZZ i->ARMin.NUnary.size = size;
|
|
+//ZZ i->ARMin.NUnary.Q = Q;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
|
|
+//ZZ UInt size, Bool Q ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_NUnaryS;
|
|
+//ZZ i->ARMin.NUnaryS.op = op;
|
|
+//ZZ i->ARMin.NUnaryS.src = src;
|
|
+//ZZ i->ARMin.NUnaryS.dst = dst;
|
|
+//ZZ i->ARMin.NUnaryS.size = size;
|
|
+//ZZ i->ARMin.NUnaryS.Q = Q;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
|
|
+//ZZ UInt size, Bool Q ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_NDual;
|
|
+//ZZ i->ARMin.NDual.op = op;
|
|
+//ZZ i->ARMin.NDual.arg1 = nQ;
|
|
+//ZZ i->ARMin.NDual.arg2 = mQ;
|
|
+//ZZ i->ARMin.NDual.size = size;
|
|
+//ZZ i->ARMin.NDual.Q = Q;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
|
|
+//ZZ HReg dst, HReg argL, HReg argR,
|
|
+//ZZ UInt size, Bool Q ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_NBinary;
|
|
+//ZZ i->ARMin.NBinary.op = op;
|
|
+//ZZ i->ARMin.NBinary.argL = argL;
|
|
+//ZZ i->ARMin.NBinary.argR = argR;
|
|
+//ZZ i->ARMin.NBinary.dst = dst;
|
|
+//ZZ i->ARMin.NBinary.size = size;
|
|
+//ZZ i->ARMin.NBinary.Q = Q;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+
|
|
+ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VImmQ;
|
|
+ i->ARM64in.VImmQ.rQ = rQ;
|
|
+ i->ARM64in.VImmQ.imm = imm;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VDfromX;
|
|
+ i->ARM64in.VDfromX.rD = rD;
|
|
+ i->ARM64in.VDfromX.rX = rX;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VQfromXX;
|
|
+ i->ARM64in.VQfromXX.rQ = rQ;
|
|
+ i->ARM64in.VQfromXX.rXhi = rXhi;
|
|
+ i->ARM64in.VQfromXX.rXlo = rXlo;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VXfromQ;
|
|
+ i->ARM64in.VXfromQ.rX = rX;
|
|
+ i->ARM64in.VXfromQ.rQ = rQ;
|
|
+ i->ARM64in.VXfromQ.laneNo = laneNo;
|
|
+ vassert(laneNo <= 1);
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VMov;
|
|
+ i->ARM64in.VMov.szB = szB;
|
|
+ i->ARM64in.VMov.dst = dst;
|
|
+ i->ARM64in.VMov.src = src;
|
|
+ switch (szB) {
|
|
+ case 16:
|
|
+ vassert(hregClass(src) == HRcVec128);
|
|
+ vassert(hregClass(dst) == HRcVec128);
|
|
+ break;
|
|
+ case 8:
|
|
+ vassert(hregClass(src) == HRcFlt64);
|
|
+ vassert(hregClass(dst) == HRcFlt64);
|
|
+ break;
|
|
+ default:
|
|
+ vpanic("ARM64Instr_VMov");
|
|
+ }
|
|
+ return i;
|
|
+}
|
|
+
|
|
+//ZZ ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_NCMovQ;
|
|
+//ZZ i->ARMin.NCMovQ.cond = cond;
|
|
+//ZZ i->ARMin.NCMovQ.dst = dst;
|
|
+//ZZ i->ARMin.NCMovQ.src = src;
|
|
+//ZZ vassert(cond != ARMcc_AL);
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
|
|
+//ZZ HReg dst, HReg argL, HReg argR,
|
|
+//ZZ UInt size, Bool Q ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_NShift;
|
|
+//ZZ i->ARMin.NShift.op = op;
|
|
+//ZZ i->ARMin.NShift.argL = argL;
|
|
+//ZZ i->ARMin.NShift.argR = argR;
|
|
+//ZZ i->ARMin.NShift.dst = dst;
|
|
+//ZZ i->ARMin.NShift.size = size;
|
|
+//ZZ i->ARMin.NShift.Q = Q;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
|
|
+//ZZ {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_NShl64;
|
|
+//ZZ i->ARMin.NShl64.dst = dst;
|
|
+//ZZ i->ARMin.NShl64.src = src;
|
|
+//ZZ i->ARMin.NShl64.amt = amt;
|
|
+//ZZ vassert(amt >= 1 && amt <= 63);
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* Helper copy-pasted from isel.c */
|
|
+//ZZ static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
|
|
+//ZZ {
|
|
+//ZZ UInt i;
|
|
+//ZZ for (i = 0; i < 16; i++) {
|
|
+//ZZ if (0 == (u & 0xFFFFFF00)) {
|
|
+//ZZ *u8 = u;
|
|
+//ZZ *u4 = i;
|
|
+//ZZ return True;
|
|
+//ZZ }
|
|
+//ZZ u = ROR32(u, 30);
|
|
+//ZZ }
|
|
+//ZZ vassert(i == 16);
|
|
+//ZZ return False;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
|
|
+//ZZ UInt u8, u4;
|
|
+//ZZ ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ /* Try to generate single ADD if possible */
|
|
+//ZZ if (fitsIn8x4(&u8, &u4, imm32)) {
|
|
+//ZZ i->tag = ARMin_Alu;
|
|
+//ZZ i->ARMin.Alu.op = ARMalu_ADD;
|
|
+//ZZ i->ARMin.Alu.dst = rD;
|
|
+//ZZ i->ARMin.Alu.argL = rN;
|
|
+//ZZ i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
|
|
+//ZZ } else {
|
|
+//ZZ i->tag = ARMin_Add32;
|
|
+//ZZ i->ARMin.Add32.rD = rD;
|
|
+//ZZ i->ARMin.Add32.rN = rN;
|
|
+//ZZ i->ARMin.Add32.imm32 = imm32;
|
|
+//ZZ }
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+
|
|
+ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
|
|
+ ARM64AMode* amFailAddr ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_EvCheck;
|
|
+ i->ARM64in.EvCheck.amCounter = amCounter;
|
|
+ i->ARM64in.EvCheck.amFailAddr = amFailAddr;
|
|
+ return i;
|
|
+}
|
|
+
|
|
+//ZZ ARMInstr* ARMInstr_ProfInc ( void ) {
|
|
+//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
|
|
+//ZZ i->tag = ARMin_ProfInc;
|
|
+//ZZ return i;
|
|
+//ZZ }
|
|
+
|
|
+/* ... */
|
|
+
|
|
+void ppARM64Instr ( ARM64Instr* i ) {
|
|
+ switch (i->tag) {
|
|
+ case ARM64in_Arith:
|
|
+ vex_printf("%s ", i->ARM64in.Arith.isAdd ? "add" : "sub");
|
|
+ ppHRegARM64(i->ARM64in.Arith.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.Arith.argL);
|
|
+ vex_printf(", ");
|
|
+ ppARM64RIA(i->ARM64in.Arith.argR);
|
|
+ return;
|
|
+ case ARM64in_Cmp:
|
|
+ vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? " " : "(w)" );
|
|
+ ppHRegARM64(i->ARM64in.Cmp.argL);
|
|
+ vex_printf(", ");
|
|
+ ppARM64RIA(i->ARM64in.Cmp.argR);
|
|
+ return;
|
|
+ case ARM64in_Logic:
|
|
+ vex_printf("%s ", showARM64LogicOp(i->ARM64in.Logic.op));
|
|
+ ppHRegARM64(i->ARM64in.Logic.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.Logic.argL);
|
|
+ vex_printf(", ");
|
|
+ ppARM64RIL(i->ARM64in.Logic.argR);
|
|
+ return;
|
|
+ case ARM64in_Test:
|
|
+ vex_printf("tst ");
|
|
+ ppHRegARM64(i->ARM64in.Test.argL);
|
|
+ vex_printf(", ");
|
|
+ ppARM64RIL(i->ARM64in.Test.argR);
|
|
+ return;
|
|
+ case ARM64in_Shift:
|
|
+ vex_printf("%s ", showARM64ShiftOp(i->ARM64in.Shift.op));
|
|
+ ppHRegARM64(i->ARM64in.Shift.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.Shift.argL);
|
|
+ vex_printf(", ");
|
|
+ ppARM64RI6(i->ARM64in.Shift.argR);
|
|
+ return;
|
|
+ case ARM64in_Unary:
|
|
+ vex_printf("%s ", showARM64UnaryOp(i->ARM64in.Unary.op));
|
|
+ ppHRegARM64(i->ARM64in.Unary.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.Unary.src);
|
|
+ return;
|
|
+ case ARM64in_MovI:
|
|
+ vex_printf("mov ");
|
|
+ ppHRegARM64(i->ARM64in.MovI.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.MovI.src);
|
|
+ return;
|
|
+ case ARM64in_Imm64:
|
|
+ vex_printf("imm64 ");
|
|
+ ppHRegARM64(i->ARM64in.Imm64.dst);
|
|
+ vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
|
|
+ return;
|
|
+ case ARM64in_LdSt64:
|
|
+ if (i->ARM64in.LdSt64.isLoad) {
|
|
+ vex_printf("ldr ");
|
|
+ ppHRegARM64(i->ARM64in.LdSt64.rD);
|
|
+ vex_printf(", ");
|
|
+ ppARM64AMode(i->ARM64in.LdSt64.amode);
|
|
+ } else {
|
|
+ vex_printf("str ");
|
|
+ ppARM64AMode(i->ARM64in.LdSt64.amode);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.LdSt64.rD);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_LdSt32:
|
|
+ if (i->ARM64in.LdSt32.isLoad) {
|
|
+ vex_printf("ldruw ");
|
|
+ ppHRegARM64(i->ARM64in.LdSt32.rD);
|
|
+ vex_printf(", ");
|
|
+ ppARM64AMode(i->ARM64in.LdSt32.amode);
|
|
+ } else {
|
|
+ vex_printf("strw ");
|
|
+ ppARM64AMode(i->ARM64in.LdSt32.amode);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.LdSt32.rD);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_LdSt16:
|
|
+ if (i->ARM64in.LdSt16.isLoad) {
|
|
+ vex_printf("ldruh ");
|
|
+ ppHRegARM64(i->ARM64in.LdSt16.rD);
|
|
+ vex_printf(", ");
|
|
+ ppARM64AMode(i->ARM64in.LdSt16.amode);
|
|
+ } else {
|
|
+ vex_printf("strh ");
|
|
+ ppARM64AMode(i->ARM64in.LdSt16.amode);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.LdSt16.rD);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_LdSt8:
|
|
+ if (i->ARM64in.LdSt8.isLoad) {
|
|
+ vex_printf("ldrub ");
|
|
+ ppHRegARM64(i->ARM64in.LdSt8.rD);
|
|
+ vex_printf(", ");
|
|
+ ppARM64AMode(i->ARM64in.LdSt8.amode);
|
|
+ } else {
|
|
+ vex_printf("strb ");
|
|
+ ppARM64AMode(i->ARM64in.LdSt8.amode);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.LdSt8.rD);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_XDirect:
|
|
+ vex_printf("(xDirect) ");
|
|
+ vex_printf("if (%%pstate.%s) { ",
|
|
+ showARM64CondCode(i->ARM64in.XDirect.cond));
|
|
+ vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
|
|
+ vex_printf("str x9,");
|
|
+ ppARM64AMode(i->ARM64in.XDirect.amPC);
|
|
+ vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
|
|
+ i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
|
|
+ vex_printf("blr x9 }");
|
|
+ return;
|
|
+ case ARM64in_XIndir:
|
|
+ vex_printf("(xIndir) ");
|
|
+ vex_printf("if (%%pstate.%s) { ",
|
|
+ showARM64CondCode(i->ARM64in.XIndir.cond));
|
|
+ vex_printf("str ");
|
|
+ ppHRegARM64(i->ARM64in.XIndir.dstGA);
|
|
+ vex_printf(",");
|
|
+ ppARM64AMode(i->ARM64in.XIndir.amPC);
|
|
+ vex_printf("; imm64 x9,$disp_cp_xindir; ");
|
|
+ vex_printf("br x9 }");
|
|
+ return;
|
|
+ case ARM64in_XAssisted:
|
|
+ vex_printf("(xAssisted) ");
|
|
+ vex_printf("if (%%pstate.%s) { ",
|
|
+ showARM64CondCode(i->ARM64in.XAssisted.cond));
|
|
+ vex_printf("str ");
|
|
+ ppHRegARM64(i->ARM64in.XAssisted.dstGA);
|
|
+ vex_printf(",");
|
|
+ ppARM64AMode(i->ARM64in.XAssisted.amPC);
|
|
+ vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
|
|
+ (Int)i->ARM64in.XAssisted.jk);
|
|
+ vex_printf("imm64 x9,$disp_cp_xassisted; ");
|
|
+ vex_printf("br x9 }");
|
|
+ return;
|
|
+ case ARM64in_CSel:
|
|
+ vex_printf("csel ");
|
|
+ ppHRegARM64(i->ARM64in.CSel.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.CSel.argL);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.CSel.argR);
|
|
+ vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
|
|
+ return;
|
|
+ case ARM64in_Call:
|
|
+ vex_printf("call%s ",
|
|
+ i->ARM64in.Call.cond==ARM64cc_AL
|
|
+ ? " " : showARM64CondCode(i->ARM64in.Call.cond));
|
|
+ vex_printf("0x%lx [nArgRegs=%d, ",
|
|
+ i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
|
|
+ ppRetLoc(i->ARM64in.Call.rloc);
|
|
+ vex_printf("]");
|
|
+ return;
|
|
+ case ARM64in_AddToSP: {
|
|
+ Int simm = i->ARM64in.AddToSP.simm;
|
|
+ vex_printf("%s xsp, xsp, #%d", simm < 0 ? "sub" : "add",
|
|
+ simm < 0 ? -simm : simm);
|
|
+ return;
|
|
+ }
|
|
+ case ARM64in_FromSP:
|
|
+ vex_printf("mov ");
|
|
+ ppHRegARM64(i->ARM64in.FromSP.dst);
|
|
+ vex_printf(", xsp");
|
|
+ return;
|
|
+ case ARM64in_Mul:
|
|
+ vex_printf("%s ", showARM64MulOp(i->ARM64in.Mul.op));
|
|
+ ppHRegARM64(i->ARM64in.Mul.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.Mul.argL);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.Mul.argR);
|
|
+ return;
|
|
+//ZZ case ARMin_LdrEX: {
|
|
+//ZZ const HChar* sz = "";
|
|
+//ZZ switch (i->ARMin.LdrEX.szB) {
|
|
+//ZZ case 1: sz = "b"; break; case 2: sz = "h"; break;
|
|
+//ZZ case 8: sz = "d"; break; case 4: break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ vex_printf("ldrex%s %sr2, [r4]",
|
|
+//ZZ sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_StrEX: {
|
|
+//ZZ const HChar* sz = "";
|
|
+//ZZ switch (i->ARMin.StrEX.szB) {
|
|
+//ZZ case 1: sz = "b"; break; case 2: sz = "h"; break;
|
|
+//ZZ case 8: sz = "d"; break; case 4: break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ vex_printf("strex%s r0, %sr2, [r4]",
|
|
+//ZZ sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+ case ARM64in_VLdStS:
|
|
+ if (i->ARM64in.VLdStS.isLoad) {
|
|
+ vex_printf("ldr ");
|
|
+ ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
|
|
+ vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
|
|
+ ppHRegARM64(i->ARM64in.VLdStS.rN);
|
|
+ vex_printf(")");
|
|
+ } else {
|
|
+ vex_printf("str ");
|
|
+ vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
|
|
+ ppHRegARM64(i->ARM64in.VLdStS.rN);
|
|
+ vex_printf("), ");
|
|
+ ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_VLdStD:
|
|
+ if (i->ARM64in.VLdStD.isLoad) {
|
|
+ vex_printf("ldr ");
|
|
+ ppHRegARM64(i->ARM64in.VLdStD.dD);
|
|
+ vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
|
|
+ ppHRegARM64(i->ARM64in.VLdStD.rN);
|
|
+ vex_printf(")");
|
|
+ } else {
|
|
+ vex_printf("str ");
|
|
+ vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
|
|
+ ppHRegARM64(i->ARM64in.VLdStD.rN);
|
|
+ vex_printf("), ");
|
|
+ ppHRegARM64(i->ARM64in.VLdStD.dD);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_VLdStQ:
|
|
+ if (i->ARM64in.VLdStQ.isLoad)
|
|
+ vex_printf("ld1.2d {");
|
|
+ else
|
|
+ vex_printf("st1.2d {");
|
|
+ ppHRegARM64(i->ARM64in.VLdStQ.rQ);
|
|
+ vex_printf("}, [");
|
|
+ ppHRegARM64(i->ARM64in.VLdStQ.rN);
|
|
+ vex_printf("]");
|
|
+ return;
|
|
+ case ARM64in_VCvtI2F: {
|
|
+ HChar syn = '?';
|
|
+ UInt fszB = 0;
|
|
+ UInt iszB = 0;
|
|
+ characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
|
|
+ vex_printf("%ccvtf ", syn);
|
|
+ ppHRegARM64(i->ARM64in.VCvtI2F.rD);
|
|
+ vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
|
|
+ ppHRegARM64(i->ARM64in.VCvtI2F.rS);
|
|
+ vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
|
|
+ return;
|
|
+ }
|
|
+ case ARM64in_VCvtF2I: {
|
|
+ HChar syn = '?';
|
|
+ UInt fszB = 0;
|
|
+ UInt iszB = 0;
|
|
+ HChar rmo = '?';
|
|
+ characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
|
|
+ UChar armRM = i->ARM64in.VCvtF2I.armRM;
|
|
+ if (armRM < 4) rmo = "npmz"[armRM];
|
|
+ vex_printf("fcvt%c%c ", rmo, syn);
|
|
+ ppHRegARM64(i->ARM64in.VCvtF2I.rD);
|
|
+ vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
|
|
+ ppHRegARM64(i->ARM64in.VCvtF2I.rS);
|
|
+ vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
|
|
+ return;
|
|
+ }
|
|
+ case ARM64in_VCvtSD:
|
|
+ vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
|
|
+ if (i->ARM64in.VCvtSD.sToD) {
|
|
+ ppHRegARM64(i->ARM64in.VCvtSD.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
|
|
+ } else {
|
|
+ ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VCvtSD.src);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_VUnaryD:
|
|
+ vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
|
|
+ ppHRegARM64(i->ARM64in.VUnaryD.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VUnaryD.src);
|
|
+ return;
|
|
+ case ARM64in_VUnaryS:
|
|
+ vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
|
|
+ ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
|
|
+ return;
|
|
+ case ARM64in_VBinD:
|
|
+ vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinD.op));
|
|
+ ppHRegARM64(i->ARM64in.VBinD.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VBinD.argL);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VBinD.argR);
|
|
+ return;
|
|
+ case ARM64in_VBinS:
|
|
+ vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinS.op));
|
|
+ ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
|
|
+ return;
|
|
+ case ARM64in_VCmpD:
|
|
+ vex_printf("fcmp ");
|
|
+ ppHRegARM64(i->ARM64in.VCmpD.argL);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VCmpD.argR);
|
|
+ return;
|
|
+ case ARM64in_VCmpS:
|
|
+ vex_printf("fcmp ");
|
|
+ ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
|
|
+ return;
|
|
+ case ARM64in_FPCR:
|
|
+ if (i->ARM64in.FPCR.toFPCR) {
|
|
+ vex_printf("msr fpcr, ");
|
|
+ ppHRegARM64(i->ARM64in.FPCR.iReg);
|
|
+ } else {
|
|
+ vex_printf("mrs ");
|
|
+ ppHRegARM64(i->ARM64in.FPCR.iReg);
|
|
+ vex_printf(", fpcr");
|
|
+ }
|
|
+ return;
|
|
+//ZZ case ARMin_VAluS:
|
|
+//ZZ vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
|
|
+//ZZ ppHRegARM(i->ARMin.VAluS.dst);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.VAluS.argL);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.VAluS.argR);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VCMovD:
|
|
+//ZZ vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
|
|
+//ZZ ppHRegARM(i->ARMin.VCMovD.dst);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.VCMovD.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VCMovS:
|
|
+//ZZ vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
|
|
+//ZZ ppHRegARM(i->ARMin.VCMovS.dst);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.VCMovS.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VXferD:
|
|
+//ZZ vex_printf("vmov ");
|
|
+//ZZ if (i->ARMin.VXferD.toD) {
|
|
+//ZZ ppHRegARM(i->ARMin.VXferD.dD);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.VXferD.rLo);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.VXferD.rHi);
|
|
+//ZZ } else {
|
|
+//ZZ ppHRegARM(i->ARMin.VXferD.rLo);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.VXferD.rHi);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.VXferD.dD);
|
|
+//ZZ }
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VXferS:
|
|
+//ZZ vex_printf("vmov ");
|
|
+//ZZ if (i->ARMin.VXferS.toS) {
|
|
+//ZZ ppHRegARM(i->ARMin.VXferS.fD);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.VXferS.rLo);
|
|
+//ZZ } else {
|
|
+//ZZ ppHRegARM(i->ARMin.VXferS.rLo);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.VXferS.fD);
|
|
+//ZZ }
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VCvtID: {
|
|
+//ZZ const HChar* nm = "?";
|
|
+//ZZ if (i->ARMin.VCvtID.iToD) {
|
|
+//ZZ nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
|
|
+//ZZ } else {
|
|
+//ZZ nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
|
|
+//ZZ }
|
|
+//ZZ vex_printf("%s ", nm);
|
|
+//ZZ ppHRegARM(i->ARMin.VCvtID.dst);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.VCvtID.src);
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_MFence:
|
|
+//ZZ vex_printf("(mfence) dsb sy; dmb sy; isb");
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_CLREX:
|
|
+//ZZ vex_printf("clrex");
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NLdStD:
|
|
+//ZZ if (i->ARMin.NLdStD.isLoad)
|
|
+//ZZ vex_printf("vld1.32 {");
|
|
+//ZZ else
|
|
+//ZZ vex_printf("vst1.32 {");
|
|
+//ZZ ppHRegARM(i->ARMin.NLdStD.dD);
|
|
+//ZZ vex_printf("} ");
|
|
+//ZZ ppARMAModeN(i->ARMin.NLdStD.amode);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NUnary:
|
|
+//ZZ vex_printf("%s%s%s ",
|
|
+//ZZ showARMNeonUnOp(i->ARMin.NUnary.op),
|
|
+//ZZ showARMNeonUnOpDataType(i->ARMin.NUnary.op),
|
|
+//ZZ showARMNeonDataSize(i));
|
|
+//ZZ ppHRegARM(i->ARMin.NUnary.dst);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.NUnary.src);
|
|
+//ZZ if (i->ARMin.NUnary.op == ARMneon_EQZ)
|
|
+//ZZ vex_printf(", #0");
|
|
+//ZZ if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
|
|
+//ZZ vex_printf(", #%d", i->ARMin.NUnary.size);
|
|
+//ZZ }
|
|
+//ZZ if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
|
|
+//ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
|
|
+//ZZ UInt size;
|
|
+//ZZ size = i->ARMin.NUnary.size;
|
|
+//ZZ if (size & 0x40) {
|
|
+//ZZ vex_printf(", #%d", size - 64);
|
|
+//ZZ } else if (size & 0x20) {
|
|
+//ZZ vex_printf(", #%d", size - 32);
|
|
+//ZZ } else if (size & 0x10) {
|
|
+//ZZ vex_printf(", #%d", size - 16);
|
|
+//ZZ } else if (size & 0x08) {
|
|
+//ZZ vex_printf(", #%d", size - 8);
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NUnaryS:
|
|
+//ZZ vex_printf("%s%s%s ",
|
|
+//ZZ showARMNeonUnOpS(i->ARMin.NUnaryS.op),
|
|
+//ZZ showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
|
|
+//ZZ showARMNeonDataSize(i));
|
|
+//ZZ ppARMNRS(i->ARMin.NUnaryS.dst);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppARMNRS(i->ARMin.NUnaryS.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NShift:
|
|
+//ZZ vex_printf("%s%s%s ",
|
|
+//ZZ showARMNeonShiftOp(i->ARMin.NShift.op),
|
|
+//ZZ showARMNeonShiftOpDataType(i->ARMin.NShift.op),
|
|
+//ZZ showARMNeonDataSize(i));
|
|
+//ZZ ppHRegARM(i->ARMin.NShift.dst);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.NShift.argL);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.NShift.argR);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NShl64:
|
|
+//ZZ vex_printf("vshl.i64 ");
|
|
+//ZZ ppHRegARM(i->ARMin.NShl64.dst);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.NShl64.src);
|
|
+//ZZ vex_printf(", #%u", i->ARMin.NShl64.amt);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NDual:
|
|
+//ZZ vex_printf("%s%s%s ",
|
|
+//ZZ showARMNeonDualOp(i->ARMin.NDual.op),
|
|
+//ZZ showARMNeonDualOpDataType(i->ARMin.NDual.op),
|
|
+//ZZ showARMNeonDataSize(i));
|
|
+//ZZ ppHRegARM(i->ARMin.NDual.arg1);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.NDual.arg2);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NBinary:
|
|
+//ZZ vex_printf("%s%s%s",
|
|
+//ZZ showARMNeonBinOp(i->ARMin.NBinary.op),
|
|
+//ZZ showARMNeonBinOpDataType(i->ARMin.NBinary.op),
|
|
+//ZZ showARMNeonDataSize(i));
|
|
+//ZZ vex_printf(" ");
|
|
+//ZZ ppHRegARM(i->ARMin.NBinary.dst);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.NBinary.argL);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.NBinary.argR);
|
|
+//ZZ return;
|
|
+ case ARM64in_VImmQ:
|
|
+ vex_printf("qimm ");
|
|
+ ppHRegARM64(i->ARM64in.VImmQ.rQ);
|
|
+ vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
|
|
+ return;
|
|
+ case ARM64in_VDfromX:
|
|
+ vex_printf("fmov ");
|
|
+ ppHRegARM64(i->ARM64in.VDfromX.rD);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VDfromX.rX);
|
|
+ return;
|
|
+ case ARM64in_VQfromXX:
|
|
+ vex_printf("qFromXX ");
|
|
+ ppHRegARM64(i->ARM64in.VQfromXX.rQ);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
|
|
+ return;
|
|
+ case ARM64in_VXfromQ:
|
|
+ vex_printf("mov ");
|
|
+ ppHRegARM64(i->ARM64in.VXfromQ.rX);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VXfromQ.rQ);
|
|
+ vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
|
|
+ return;
|
|
+ case ARM64in_VMov: {
|
|
+ UChar aux = '?';
|
|
+ switch (i->ARM64in.VMov.szB) {
|
|
+ case 16: aux = 'q'; break;
|
|
+ case 8: aux = 'd'; break;
|
|
+ case 4: aux = 's'; break;
|
|
+ default: break;
|
|
+ }
|
|
+ vex_printf("mov(%c) ", aux);
|
|
+ ppHRegARM64(i->ARM64in.VMov.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VMov.src);
|
|
+ return;
|
|
+ }
|
|
+//ZZ case ARMin_NCMovQ:
|
|
+//ZZ vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
|
|
+//ZZ ppHRegARM(i->ARMin.NCMovQ.dst);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.NCMovQ.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_Add32:
|
|
+//ZZ vex_printf("add32 ");
|
|
+//ZZ ppHRegARM(i->ARMin.Add32.rD);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ ppHRegARM(i->ARMin.Add32.rN);
|
|
+//ZZ vex_printf(", ");
|
|
+//ZZ vex_printf("%d", i->ARMin.Add32.imm32);
|
|
+//ZZ return;
|
|
+ case ARM64in_EvCheck:
|
|
+ vex_printf("(evCheck) ldr w9,");
|
|
+ ppARM64AMode(i->ARM64in.EvCheck.amCounter);
|
|
+ vex_printf("; subs w9,w9,$1; str w9,");
|
|
+ ppARM64AMode(i->ARM64in.EvCheck.amCounter);
|
|
+ vex_printf("; bpl nofail; ldr x9,");
|
|
+ ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
|
|
+ vex_printf("; br x9; nofail:");
|
|
+ return;
|
|
+//ZZ case ARMin_ProfInc:
|
|
+//ZZ vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
|
|
+//ZZ "movw r12,HI16($NotKnownYet); "
|
|
+//ZZ "ldr r11,[r12]; "
|
|
+//ZZ "adds r11,r11,$1; "
|
|
+//ZZ "str r11,[r12]; "
|
|
+//ZZ "ldr r11,[r12+4]; "
|
|
+//ZZ "adc r11,r11,$0; "
|
|
+//ZZ "str r11,[r12+4]");
|
|
+//ZZ return;
|
|
+ default:
|
|
+ vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
|
|
+ vpanic("ppARM64Instr(1)");
|
|
+ return;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* --------- Helpers for register allocation. --------- */
|
|
+
|
|
+void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 )
|
|
+{
|
|
+ vassert(mode64 == True);
|
|
+ initHRegUsage(u);
|
|
+ switch (i->tag) {
|
|
+ case ARM64in_Arith:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
|
|
+ addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
|
|
+ return;
|
|
+ case ARM64in_Cmp:
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
|
|
+ addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
|
|
+ return;
|
|
+ case ARM64in_Logic:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
|
|
+ addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
|
|
+ return;
|
|
+ case ARM64in_Test:
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
|
|
+ addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
|
|
+ return;
|
|
+ case ARM64in_Shift:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
|
|
+ addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
|
|
+ return;
|
|
+ case ARM64in_Unary:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
|
|
+ return;
|
|
+ case ARM64in_MovI:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.MovI.src);
|
|
+ return;
|
|
+ case ARM64in_Imm64:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
|
|
+ return;
|
|
+ case ARM64in_LdSt64:
|
|
+ addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
|
|
+ if (i->ARM64in.LdSt64.isLoad) {
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
|
|
+ } else {
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_LdSt32:
|
|
+ addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
|
|
+ if (i->ARM64in.LdSt32.isLoad) {
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
|
|
+ } else {
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_LdSt16:
|
|
+ addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
|
|
+ if (i->ARM64in.LdSt16.isLoad) {
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
|
|
+ } else {
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_LdSt8:
|
|
+ addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
|
|
+ if (i->ARM64in.LdSt8.isLoad) {
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
|
|
+ } else {
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
|
|
+ }
|
|
+ return;
|
|
+ /* XDirect/XIndir/XAssisted are also a bit subtle. They
|
|
+ conditionally exit the block. Hence we only need to list (1)
|
|
+ the registers that they read, and (2) the registers that they
|
|
+ write in the case where the block is not exited. (2) is
|
|
+ empty, hence only (1) is relevant here. */
|
|
+ case ARM64in_XDirect:
|
|
+ addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
|
|
+ return;
|
|
+ case ARM64in_XIndir:
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
|
|
+ addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
|
|
+ return;
|
|
+ case ARM64in_XAssisted:
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
|
|
+ addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
|
|
+ return;
|
|
+ case ARM64in_CSel:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.CSel.argL);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.CSel.argR);
|
|
+ return;
|
|
+ case ARM64in_Call:
|
|
+ /* logic and comments copied/modified from x86 back end */
|
|
+ /* This is a bit subtle. */
|
|
+ /* First off, claim it trashes all the caller-saved regs
|
|
+ which fall within the register allocator's jurisdiction.
|
|
+ These I believe to be x0 to x7. Also need to be
|
|
+ careful about vector regs. */
|
|
+ addHRegUse(u, HRmWrite, hregARM64_X0());
|
|
+ addHRegUse(u, HRmWrite, hregARM64_X1());
|
|
+ addHRegUse(u, HRmWrite, hregARM64_X2());
|
|
+ addHRegUse(u, HRmWrite, hregARM64_X3());
|
|
+ addHRegUse(u, HRmWrite, hregARM64_X4());
|
|
+ addHRegUse(u, HRmWrite, hregARM64_X5());
|
|
+ addHRegUse(u, HRmWrite, hregARM64_X6());
|
|
+ addHRegUse(u, HRmWrite, hregARM64_X7());
|
|
+ addHRegUse(u, HRmWrite, hregARM64_Q16());
|
|
+ addHRegUse(u, HRmWrite, hregARM64_Q17());
|
|
+ addHRegUse(u, HRmWrite, hregARM64_Q18());
|
|
+ /* Now we have to state any parameter-carrying registers
|
|
+ which might be read. This depends on nArgRegs. */
|
|
+ switch (i->ARM64in.Call.nArgRegs) {
|
|
+ case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
|
|
+ case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
|
|
+ case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
|
|
+ case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
|
|
+ case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
|
|
+ case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
|
|
+ case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
|
|
+ case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
|
|
+ case 0: break;
|
|
+ default: vpanic("getRegUsage_ARM64:Call:regparms");
|
|
+ }
|
|
+ /* Finally, there is the issue that the insn trashes a
|
|
+ register because the literal target address has to be
|
|
+ loaded into a register. However, we reserve x9 for that
|
|
+ purpose so there's no further complexity here. Stating x9
|
|
+ as trashed is pointless since it's not under the control
|
|
+ of the allocator, but what the hell. */
|
|
+ addHRegUse(u, HRmWrite, hregARM64_X9());
|
|
+ return;
|
|
+ case ARM64in_AddToSP:
|
|
+ /* Only changes SP, but regalloc doesn't control that, hence
|
|
+ we don't care. */
|
|
+ return;
|
|
+ case ARM64in_FromSP:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
|
|
+ return;
|
|
+ case ARM64in_Mul:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.Mul.argL);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.Mul.argR);
|
|
+ return;
|
|
+//ZZ case ARMin_LdrEX:
|
|
+//ZZ addHRegUse(u, HRmRead, hregARM_R4());
|
|
+//ZZ addHRegUse(u, HRmWrite, hregARM_R2());
|
|
+//ZZ if (i->ARMin.LdrEX.szB == 8)
|
|
+//ZZ addHRegUse(u, HRmWrite, hregARM_R3());
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_StrEX:
|
|
+//ZZ addHRegUse(u, HRmRead, hregARM_R4());
|
|
+//ZZ addHRegUse(u, HRmWrite, hregARM_R0());
|
|
+//ZZ addHRegUse(u, HRmRead, hregARM_R2());
|
|
+//ZZ if (i->ARMin.StrEX.szB == 8)
|
|
+//ZZ addHRegUse(u, HRmRead, hregARM_R3());
|
|
+//ZZ return;
|
|
+ case ARM64in_VLdStS:
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
|
|
+ if (i->ARM64in.VLdStS.isLoad) {
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
|
|
+ } else {
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_VLdStD:
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
|
|
+ if (i->ARM64in.VLdStD.isLoad) {
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
|
|
+ } else {
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
|
|
+ }
|
|
+ return;
|
|
+ case ARM64in_VLdStQ:
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
|
|
+ if (i->ARM64in.VLdStQ.isLoad)
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
|
|
+ else
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
|
|
+ return;
|
|
+ case ARM64in_VCvtI2F:
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
|
|
+ return;
|
|
+ case ARM64in_VCvtF2I:
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
|
|
+ return;
|
|
+ case ARM64in_VCvtSD:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VCvtSD.src);
|
|
+ return;
|
|
+ case ARM64in_VUnaryD:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
|
|
+ return;
|
|
+ case ARM64in_VUnaryS:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
|
|
+ return;
|
|
+ case ARM64in_VBinD:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
|
|
+ return;
|
|
+ case ARM64in_VBinS:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
|
|
+ return;
|
|
+ case ARM64in_VCmpD:
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
|
|
+ return;
|
|
+ case ARM64in_VCmpS:
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
|
|
+ return;
|
|
+ case ARM64in_FPCR:
|
|
+ if (i->ARM64in.FPCR.toFPCR)
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
|
|
+ else
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
|
|
+ return;
|
|
+//ZZ case ARMin_VAluS:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VUnaryS:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VCMovD:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovD.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovD.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VCMovS:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovS.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovS.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VXferD:
|
|
+//ZZ if (i->ARMin.VXferD.toD) {
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VXferD.rHi);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VXferD.rLo);
|
|
+//ZZ } else {
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VXferD.dD);
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
|
|
+//ZZ }
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VXferS:
|
|
+//ZZ if (i->ARMin.VXferS.toS) {
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VXferS.rLo);
|
|
+//ZZ } else {
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VXferS.fD);
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
|
|
+//ZZ }
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VCvtID:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.VCvtID.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_MFence:
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_CLREX:
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NLdStD:
|
|
+//ZZ if (i->ARMin.NLdStD.isLoad)
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
|
|
+//ZZ else
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
|
|
+//ZZ addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NUnary:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NUnaryS:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NShift:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NShl64:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NDual:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
|
|
+//ZZ return;
|
|
+ case ARM64in_VImmQ:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
|
|
+ return;
|
|
+ case ARM64in_VDfromX:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VDfromX.rX);
|
|
+ return;
|
|
+ case ARM64in_VQfromXX:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXhi);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXlo);
|
|
+ return;
|
|
+ case ARM64in_VXfromQ:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VXfromQ.rQ);
|
|
+ return;
|
|
+ case ARM64in_VMov:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VMov.src);
|
|
+ return;
|
|
+//ZZ case ARMin_NBinary:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
|
|
+//ZZ /* TODO: sometimes dst is also being read! */
|
|
+//ZZ // XXX fix this
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NCMovQ:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NCMovQ.dst);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.NCMovQ.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_Add32:
|
|
+//ZZ addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
|
|
+//ZZ addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
|
|
+//ZZ return;
|
|
+ case ARM64in_EvCheck:
|
|
+ /* We expect both amodes only to mention x21, so this is in
|
|
+ fact pointless, since x21 isn't allocatable, but
|
|
+ anyway.. */
|
|
+ addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
|
|
+ addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
|
|
+ addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
|
|
+ return;
|
|
+//ZZ case ARMin_ProfInc:
|
|
+//ZZ addHRegUse(u, HRmWrite, hregARM_R12());
|
|
+//ZZ addHRegUse(u, HRmWrite, hregARM_R11());
|
|
+//ZZ return;
|
|
+ default:
|
|
+ ppARM64Instr(i);
|
|
+ vpanic("getRegUsage_ARM64Instr");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
|
|
+{
|
|
+ vassert(mode64 == True);
|
|
+ switch (i->tag) {
|
|
+ case ARM64in_Arith:
|
|
+ i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
|
|
+ i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
|
|
+ mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
|
|
+ return;
|
|
+ case ARM64in_Cmp:
|
|
+ i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
|
|
+ mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
|
|
+ return;
|
|
+ case ARM64in_Logic:
|
|
+ i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
|
|
+ i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
|
|
+ mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
|
|
+ return;
|
|
+ case ARM64in_Test:
|
|
+ i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
|
|
+ mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
|
|
+ return;
|
|
+ case ARM64in_Shift:
|
|
+ i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
|
|
+ i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
|
|
+ mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
|
|
+ return;
|
|
+ case ARM64in_Unary:
|
|
+ i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
|
|
+ i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
|
|
+ return;
|
|
+ case ARM64in_MovI:
|
|
+ i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
|
|
+ i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
|
|
+ return;
|
|
+ case ARM64in_Imm64:
|
|
+ i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
|
|
+ return;
|
|
+ case ARM64in_LdSt64:
|
|
+ i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
|
|
+ mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
|
|
+ return;
|
|
+ case ARM64in_LdSt32:
|
|
+ i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
|
|
+ mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
|
|
+ return;
|
|
+ case ARM64in_LdSt16:
|
|
+ i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
|
|
+ mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
|
|
+ return;
|
|
+ case ARM64in_LdSt8:
|
|
+ i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
|
|
+ mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
|
|
+ return;
|
|
+ case ARM64in_XDirect:
|
|
+ mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
|
|
+ return;
|
|
+ case ARM64in_XIndir:
|
|
+ i->ARM64in.XIndir.dstGA
|
|
+ = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
|
|
+ mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
|
|
+ return;
|
|
+ case ARM64in_XAssisted:
|
|
+ i->ARM64in.XAssisted.dstGA
|
|
+ = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
|
|
+ mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
|
|
+ return;
|
|
+ case ARM64in_CSel:
|
|
+ i->ARM64in.CSel.dst = lookupHRegRemap(m, i->ARM64in.CSel.dst);
|
|
+ i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
|
|
+ i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
|
|
+ return;
|
|
+ case ARM64in_Call:
|
|
+ return;
|
|
+ case ARM64in_AddToSP:
|
|
+ return;
|
|
+ case ARM64in_FromSP:
|
|
+ i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
|
|
+ return;
|
|
+ case ARM64in_Mul:
|
|
+ i->ARM64in.Mul.dst = lookupHRegRemap(m, i->ARM64in.Mul.dst);
|
|
+ i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
|
|
+ i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
|
|
+ break;
|
|
+//ZZ case ARMin_Mul:
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_LdrEX:
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_StrEX:
|
|
+//ZZ return;
|
|
+ case ARM64in_VLdStS:
|
|
+ i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
|
|
+ i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
|
|
+ return;
|
|
+ case ARM64in_VLdStD:
|
|
+ i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
|
|
+ i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
|
|
+ return;
|
|
+ case ARM64in_VLdStQ:
|
|
+ i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
|
|
+ i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
|
|
+ return;
|
|
+ case ARM64in_VCvtI2F:
|
|
+ i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
|
|
+ i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
|
|
+ return;
|
|
+ case ARM64in_VCvtF2I:
|
|
+ i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
|
|
+ i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
|
|
+ return;
|
|
+ case ARM64in_VCvtSD:
|
|
+ i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
|
|
+ i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
|
|
+ return;
|
|
+ case ARM64in_VUnaryD:
|
|
+ i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
|
|
+ i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
|
|
+ return;
|
|
+ case ARM64in_VUnaryS:
|
|
+ i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
|
|
+ i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
|
|
+ return;
|
|
+ case ARM64in_VBinD:
|
|
+ i->ARM64in.VBinD.dst = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
|
|
+ i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
|
|
+ i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
|
|
+ return;
|
|
+ case ARM64in_VBinS:
|
|
+ i->ARM64in.VBinS.dst = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
|
|
+ i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
|
|
+ i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
|
|
+ return;
|
|
+ case ARM64in_VCmpD:
|
|
+ i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
|
|
+ i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
|
|
+ return;
|
|
+ case ARM64in_VCmpS:
|
|
+ i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
|
|
+ i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
|
|
+ return;
|
|
+ case ARM64in_FPCR:
|
|
+ i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
|
|
+ return;
|
|
+//ZZ case ARMin_VAluS:
|
|
+//ZZ i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst);
|
|
+//ZZ i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
|
|
+//ZZ i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VCMovD:
|
|
+//ZZ i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
|
|
+//ZZ i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VCMovS:
|
|
+//ZZ i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
|
|
+//ZZ i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VXferD:
|
|
+//ZZ i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD);
|
|
+//ZZ i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
|
|
+//ZZ i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VXferS:
|
|
+//ZZ i->ARMin.VXferS.fD = lookupHRegRemap(m, i->ARMin.VXferS.fD);
|
|
+//ZZ i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_VCvtID:
|
|
+//ZZ i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
|
|
+//ZZ i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_MFence:
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_CLREX:
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NLdStD:
|
|
+//ZZ i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
|
|
+//ZZ mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NUnary:
|
|
+//ZZ i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
|
|
+//ZZ i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NUnaryS:
|
|
+//ZZ i->ARMin.NUnaryS.src->reg
|
|
+//ZZ = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
|
|
+//ZZ i->ARMin.NUnaryS.dst->reg
|
|
+//ZZ = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NShift:
|
|
+//ZZ i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
|
|
+//ZZ i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
|
|
+//ZZ i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NShl64:
|
|
+//ZZ i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
|
|
+//ZZ i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NDual:
|
|
+//ZZ i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
|
|
+//ZZ i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
|
|
+//ZZ return;
|
|
+ case ARM64in_VImmQ:
|
|
+ i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
|
|
+ return;
|
|
+ case ARM64in_VDfromX:
|
|
+ i->ARM64in.VDfromX.rD
|
|
+ = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
|
|
+ i->ARM64in.VDfromX.rX
|
|
+ = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
|
|
+ return;
|
|
+ case ARM64in_VQfromXX:
|
|
+ i->ARM64in.VQfromXX.rQ
|
|
+ = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
|
|
+ i->ARM64in.VQfromXX.rXhi
|
|
+ = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
|
|
+ i->ARM64in.VQfromXX.rXlo
|
|
+ = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
|
|
+ return;
|
|
+ case ARM64in_VXfromQ:
|
|
+ i->ARM64in.VXfromQ.rX
|
|
+ = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
|
|
+ i->ARM64in.VXfromQ.rQ
|
|
+ = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
|
|
+ return;
|
|
+ case ARM64in_VMov:
|
|
+ i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
|
|
+ i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
|
|
+ return;
|
|
+
|
|
+//ZZ case ARMin_NBinary:
|
|
+//ZZ i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
|
|
+//ZZ i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
|
|
+//ZZ i->ARMin.NBinary.dst = lookupHRegRemap(m, i->ARMin.NBinary.dst);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_NCMovQ:
|
|
+//ZZ i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
|
|
+//ZZ i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
|
|
+//ZZ return;
|
|
+//ZZ case ARMin_Add32:
|
|
+//ZZ i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
|
|
+//ZZ i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
|
|
+//ZZ return;
|
|
+ case ARM64in_EvCheck:
|
|
+ /* We expect both amodes only to mention x21, so this is in
|
|
+ fact pointless, since x21 isn't allocatable, but
|
|
+ anyway.. */
|
|
+ mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
|
|
+ mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
|
|
+ return;
|
|
+//ZZ case ARMin_ProfInc:
|
|
+//ZZ /* hardwires r11 and r12 -- nothing to modify. */
|
|
+//ZZ return;
|
|
+ default:
|
|
+ ppARM64Instr(i);
|
|
+ vpanic("mapRegs_ARM64Instr");
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Figure out if i represents a reg-reg move, and if so assign the
|
|
+ source and destination to *src and *dst. If in doubt say No. Used
|
|
+ by the register allocator to do move coalescing.
|
|
+*/
|
|
+Bool isMove_ARM64Instr ( ARM64Instr* i, HReg* src, HReg* dst )
|
|
+{
|
|
+ switch (i->tag) {
|
|
+ case ARM64in_MovI:
|
|
+ *src = i->ARM64in.MovI.src;
|
|
+ *dst = i->ARM64in.MovI.dst;
|
|
+ return True;
|
|
+ case ARM64in_VMov:
|
|
+ *src = i->ARM64in.VMov.src;
|
|
+ *dst = i->ARM64in.VMov.dst;
|
|
+ return True;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return False;
|
|
+}
|
|
+
|
|
+
|
|
+/* Generate arm spill/reload instructions under the direction of the
|
|
+ register allocator. Note it's critical these don't write the
|
|
+ condition codes. */
|
|
+
|
|
+void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
|
|
+ HReg rreg, Int offsetB, Bool mode64 )
|
|
+{
|
|
+ HRegClass rclass;
|
|
+ vassert(offsetB >= 0);
|
|
+ vassert(!hregIsVirtual(rreg));
|
|
+ vassert(mode64 == True);
|
|
+ *i1 = *i2 = NULL;
|
|
+ rclass = hregClass(rreg);
|
|
+ switch (rclass) {
|
|
+ case HRcInt64:
|
|
+ vassert(0 == (offsetB & 7));
|
|
+ offsetB >>= 3;
|
|
+ vassert(offsetB < 4096);
|
|
+ *i1 = ARM64Instr_LdSt64(
|
|
+ False/*!isLoad*/,
|
|
+ rreg,
|
|
+ ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
|
|
+ );
|
|
+ return;
|
|
+ case HRcFlt64:
|
|
+ vassert(0 == (offsetB & 7));
|
|
+ vassert(offsetB >= 0 && offsetB < 32768);
|
|
+ *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
|
|
+ rreg, hregARM64_X21(), offsetB);
|
|
+ return;
|
|
+ case HRcVec128: {
|
|
+ HReg x21 = hregARM64_X21(); // baseblock
|
|
+ HReg x9 = hregARM64_X9(); // spill temporary
|
|
+ vassert(0 == (offsetB & 15)); // check sane alignment
|
|
+ vassert(offsetB < 4096);
|
|
+ *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
|
|
+ *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
|
|
+ return;
|
|
+ }
|
|
+ default:
|
|
+ ppHRegClass(rclass);
|
|
+ vpanic("genSpill_ARM: unimplemented regclass");
|
|
+ }
|
|
+}
|
|
+
|
|
+void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
|
|
+ HReg rreg, Int offsetB, Bool mode64 )
|
|
+{
|
|
+ HRegClass rclass;
|
|
+ vassert(offsetB >= 0);
|
|
+ vassert(!hregIsVirtual(rreg));
|
|
+ vassert(mode64 == True);
|
|
+ *i1 = *i2 = NULL;
|
|
+ rclass = hregClass(rreg);
|
|
+ switch (rclass) {
|
|
+ case HRcInt64:
|
|
+ vassert(0 == (offsetB & 7));
|
|
+ offsetB >>= 3;
|
|
+ vassert(offsetB < 4096);
|
|
+ *i1 = ARM64Instr_LdSt64(
|
|
+ True/*isLoad*/,
|
|
+ rreg,
|
|
+ ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
|
|
+ );
|
|
+ return;
|
|
+ case HRcFlt64:
|
|
+ vassert(0 == (offsetB & 7));
|
|
+ vassert(offsetB >= 0 && offsetB < 32768);
|
|
+ *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
|
|
+ rreg, hregARM64_X21(), offsetB);
|
|
+ return;
|
|
+ case HRcVec128: {
|
|
+ HReg x21 = hregARM64_X21(); // baseblock
|
|
+ HReg x9 = hregARM64_X9(); // spill temporary
|
|
+ vassert(0 == (offsetB & 15)); // check sane alignment
|
|
+ vassert(offsetB < 4096);
|
|
+ *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
|
|
+ *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
|
|
+ return;
|
|
+ }
|
|
+ default:
|
|
+ ppHRegClass(rclass);
|
|
+ vpanic("genReload_ARM: unimplemented regclass");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+//ZZ /* Emit an instruction into buf and return the number of bytes used.
|
|
+//ZZ Note that buf is not the insn's final place, and therefore it is
|
|
+//ZZ imperative to emit position-independent code. */
|
|
+
|
|
+static inline UChar iregNo ( HReg r )
|
|
+{
|
|
+ UInt n;
|
|
+ vassert(hregClass(r) == HRcInt64);
|
|
+ vassert(!hregIsVirtual(r));
|
|
+ n = hregNumber(r);
|
|
+ vassert(n <= 30);
|
|
+ return toUChar(n);
|
|
+}
|
|
+
|
|
+static inline UChar dregNo ( HReg r )
|
|
+{
|
|
+ UInt n;
|
|
+ vassert(hregClass(r) == HRcFlt64);
|
|
+ vassert(!hregIsVirtual(r));
|
|
+ n = hregNumber(r);
|
|
+ vassert(n <= 31);
|
|
+ return toUChar(n);
|
|
+}
|
|
+
|
|
+static inline UChar qregNo ( HReg r )
|
|
+{
|
|
+ UInt n;
|
|
+ vassert(hregClass(r) == HRcVec128);
|
|
+ vassert(!hregIsVirtual(r));
|
|
+ n = hregNumber(r);
|
|
+ vassert(n <= 31);
|
|
+ return toUChar(n);
|
|
+}
|
|
+
|
|
+#define BITS4(zzb3,zzb2,zzb1,zzb0) \
|
|
+ (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
|
|
+
|
|
+#define X00 BITS4(0,0, 0,0)
|
|
+#define X01 BITS4(0,0, 0,1)
|
|
+#define X10 BITS4(0,0, 1,0)
|
|
+#define X11 BITS4(0,0, 1,1)
|
|
+
|
|
+#define X000 BITS4(0, 0,0,0)
|
|
+#define X001 BITS4(0, 0,0,1)
|
|
+#define X010 BITS4(0, 0,1,0)
|
|
+#define X011 BITS4(0, 0,1,1)
|
|
+#define X100 BITS4(0, 1,0,0)
|
|
+#define X101 BITS4(0, 1,0,1)
|
|
+#define X110 BITS4(0, 1,1,0)
|
|
+#define X111 BITS4(0, 1,1,1)
|
|
+
|
|
+#define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
|
|
+ ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
|
|
+
|
|
+#define X0000 BITS4(0,0,0,0)
|
|
+#define X0001 BITS4(0,0,0,1)
|
|
+#define X0010 BITS4(0,0,1,0)
|
|
+#define X0011 BITS4(0,0,1,1)
|
|
+
|
|
+#define X00000 BITS8(0,0,0, 0,0,0,0,0)
|
|
+#define X00111 BITS8(0,0,0, 0,0,1,1,1)
|
|
+#define X01000 BITS8(0,0,0, 0,1,0,0,0)
|
|
+#define X10000 BITS8(0,0,0, 1,0,0,0,0)
|
|
+#define X11000 BITS8(0,0,0, 1,1,0,0,0)
|
|
+#define X11110 BITS8(0,0,0, 1,1,1,1,0)
|
|
+#define X11111 BITS8(0,0,0, 1,1,1,1,1)
|
|
+
|
|
+#define X000000 BITS8(0,0, 0,0,0,0,0,0)
|
|
+#define X000100 BITS8(0,0, 0,0,0,1,0,0)
|
|
+#define X001000 BITS8(0,0, 0,0,1,0,0,0)
|
|
+#define X001001 BITS8(0,0, 0,0,1,0,0,1)
|
|
+#define X001010 BITS8(0,0, 0,0,1,0,1,0)
|
|
+#define X001111 BITS8(0,0, 0,0,1,1,1,1)
|
|
+#define X010000 BITS8(0,0, 0,1,0,0,0,0)
|
|
+#define X010001 BITS8(0,0, 0,1,0,0,0,1)
|
|
+#define X011010 BITS8(0,0, 0,1,1,0,1,0)
|
|
+#define X011111 BITS8(0,0, 0,1,1,1,1,1)
|
|
+#define X100100 BITS8(0,0, 1,0,0,1,0,0)
|
|
+#define X100101 BITS8(0,0, 1,0,0,1,0,1)
|
|
+#define X100110 BITS8(0,0, 1,0,0,1,1,0)
|
|
+#define X110000 BITS8(0,0, 1,1,0,0,0,0)
|
|
+#define X110001 BITS8(0,0, 1,1,0,0,0,1)
|
|
+#define X111000 BITS8(0,0, 1,1,1,0,0,0)
|
|
+#define X111001 BITS8(0,0, 1,1,1,0,0,1)
|
|
+#define X111101 BITS8(0,0, 1,1,1,1,0,1)
|
|
+
|
|
+#define X00100000 BITS8(0,0,1,0,0,0,0,0)
|
|
+#define X00100001 BITS8(0,0,1,0,0,0,0,1)
|
|
+#define X00100010 BITS8(0,0,1,0,0,0,1,0)
|
|
+#define X00100011 BITS8(0,0,1,0,0,0,1,1)
|
|
+#define X01010000 BITS8(0,1,0,1,0,0,0,0)
|
|
+#define X01010001 BITS8(0,1,0,1,0,0,0,1)
|
|
+#define X01010100 BITS8(0,1,0,1,0,1,0,0)
|
|
+#define X01011000 BITS8(0,1,0,1,1,0,0,0)
|
|
+#define X01100000 BITS8(0,1,1,0,0,0,0,0)
|
|
+#define X01100001 BITS8(0,1,1,0,0,0,0,1)
|
|
+#define X01100010 BITS8(0,1,1,0,0,0,1,0)
|
|
+#define X01100011 BITS8(0,1,1,0,0,0,1,1)
|
|
+#define X01110000 BITS8(0,1,1,1,0,0,0,0)
|
|
+#define X11000001 BITS8(1,1,0,0,0,0,0,1)
|
|
+#define X11000011 BITS8(1,1,0,0,0,0,1,1)
|
|
+#define X11010100 BITS8(1,1,0,1,0,1,0,0)
|
|
+#define X11010110 BITS8(1,1,0,1,0,1,1,0)
|
|
+#define X11011000 BITS8(1,1,0,1,1,0,0,0)
|
|
+#define X11011010 BITS8(1,1,0,1,1,0,1,0)
|
|
+#define X11011110 BITS8(1,1,0,1,1,1,1,0)
|
|
+#define X11110001 BITS8(1,1,1,1,0,0,0,1)
|
|
+#define X11110011 BITS8(1,1,1,1,0,0,1,1)
|
|
+
|
|
+
|
|
+/* --- 4 fields --- */
|
|
+
|
|
+static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
|
|
+ vassert(8+19+1+4 == 32);
|
|
+ vassert(f1 < (1<<8));
|
|
+ vassert(f2 < (1<<19));
|
|
+ vassert(f3 < (1<<1));
|
|
+ vassert(f4 < (1<<4));
|
|
+ UInt w = 0;
|
|
+ w = (w << 8) | f1;
|
|
+ w = (w << 19) | f2;
|
|
+ w = (w << 1) | f3;
|
|
+ w = (w << 4) | f4;
|
|
+ return w;
|
|
+}
|
|
+
|
|
+/* --- 5 fields --- */
|
|
+
|
|
+static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
|
|
+ UInt f3, UInt f4, UInt f5 ) {
|
|
+ vassert(3+6+2+16+5 == 32);
|
|
+ vassert(f1 < (1<<3));
|
|
+ vassert(f2 < (1<<6));
|
|
+ vassert(f3 < (1<<2));
|
|
+ vassert(f4 < (1<<16));
|
|
+ vassert(f5 < (1<<5));
|
|
+ UInt w = 0;
|
|
+ w = (w << 3) | f1;
|
|
+ w = (w << 6) | f2;
|
|
+ w = (w << 2) | f3;
|
|
+ w = (w << 16) | f4;
|
|
+ w = (w << 5) | f5;
|
|
+ return w;
|
|
+}
|
|
+
|
|
+/* --- 6 fields --- */
|
|
+
|
|
+static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
|
|
+ UInt f4, UInt f5, UInt f6 ) {
|
|
+ vassert(2+6+2+12+5+5 == 32);
|
|
+ vassert(f1 < (1<<2));
|
|
+ vassert(f2 < (1<<6));
|
|
+ vassert(f3 < (1<<2));
|
|
+ vassert(f4 < (1<<12));
|
|
+ vassert(f5 < (1<<5));
|
|
+ vassert(f6 < (1<<5));
|
|
+ UInt w = 0;
|
|
+ w = (w << 2) | f1;
|
|
+ w = (w << 6) | f2;
|
|
+ w = (w << 2) | f3;
|
|
+ w = (w << 12) | f4;
|
|
+ w = (w << 5) | f5;
|
|
+ w = (w << 5) | f6;
|
|
+ return w;
|
|
+}
|
|
+
|
|
+static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
|
|
+ UInt f4, UInt f5, UInt f6 ) {
|
|
+ vassert(3+8+5+6+5+5 == 32);
|
|
+ vassert(f1 < (1<<3));
|
|
+ vassert(f2 < (1<<8));
|
|
+ vassert(f3 < (1<<5));
|
|
+ vassert(f4 < (1<<6));
|
|
+ vassert(f5 < (1<<5));
|
|
+ vassert(f6 < (1<<5));
|
|
+ UInt w = 0;
|
|
+ w = (w << 3) | f1;
|
|
+ w = (w << 8) | f2;
|
|
+ w = (w << 5) | f3;
|
|
+ w = (w << 6) | f4;
|
|
+ w = (w << 5) | f5;
|
|
+ w = (w << 5) | f6;
|
|
+ return w;
|
|
+}
|
|
+
|
|
+static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
|
|
+ UInt f4, UInt f5, UInt f6 ) {
|
|
+ vassert(3+8+5+6+5+5 == 32);
|
|
+ vassert(f1 < (1<<3));
|
|
+ vassert(f2 < (1<<5));
|
|
+ vassert(f3 < (1<<8));
|
|
+ vassert(f4 < (1<<6));
|
|
+ vassert(f5 < (1<<5));
|
|
+ vassert(f6 < (1<<5));
|
|
+ UInt w = 0;
|
|
+ w = (w << 3) | f1;
|
|
+ w = (w << 5) | f2;
|
|
+ w = (w << 8) | f3;
|
|
+ w = (w << 6) | f4;
|
|
+ w = (w << 5) | f5;
|
|
+ w = (w << 5) | f6;
|
|
+ return w;
|
|
+}
|
|
+
|
|
+/* --- 7 fields --- */
|
|
+
|
|
+static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
|
|
+ UInt f4, UInt f5, UInt f6, UInt f7 ) {
|
|
+ vassert(2+6+3+9+2+5+5 == 32);
|
|
+ vassert(f1 < (1<<2));
|
|
+ vassert(f2 < (1<<6));
|
|
+ vassert(f3 < (1<<3));
|
|
+ vassert(f4 < (1<<9));
|
|
+ vassert(f5 < (1<<2));
|
|
+ vassert(f6 < (1<<5));
|
|
+ vassert(f7 < (1<<5));
|
|
+ UInt w = 0;
|
|
+ w = (w << 2) | f1;
|
|
+ w = (w << 6) | f2;
|
|
+ w = (w << 3) | f3;
|
|
+ w = (w << 9) | f4;
|
|
+ w = (w << 2) | f5;
|
|
+ w = (w << 5) | f6;
|
|
+ w = (w << 5) | f7;
|
|
+ return w;
|
|
+}
|
|
+
|
|
+static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
|
|
+ UInt f4, UInt f5, UInt f6, UInt f7 ) {
|
|
+ vassert(3+6+1+6+6+5+5 == 32);
|
|
+ vassert(f1 < (1<<3));
|
|
+ vassert(f2 < (1<<6));
|
|
+ vassert(f3 < (1<<1));
|
|
+ vassert(f4 < (1<<6));
|
|
+ vassert(f5 < (1<<6));
|
|
+ vassert(f6 < (1<<5));
|
|
+ vassert(f7 < (1<<5));
|
|
+ UInt w = 0;
|
|
+ w = (w << 3) | f1;
|
|
+ w = (w << 6) | f2;
|
|
+ w = (w << 1) | f3;
|
|
+ w = (w << 6) | f4;
|
|
+ w = (w << 6) | f5;
|
|
+ w = (w << 5) | f6;
|
|
+ w = (w << 5) | f7;
|
|
+ return w;
|
|
+}
|
|
+
|
|
+
|
|
+//ZZ #define X0000 BITS4(0,0,0,0)
|
|
+//ZZ #define X0001 BITS4(0,0,0,1)
|
|
+//ZZ #define X0010 BITS4(0,0,1,0)
|
|
+//ZZ #define X0011 BITS4(0,0,1,1)
|
|
+//ZZ #define X0100 BITS4(0,1,0,0)
|
|
+//ZZ #define X0101 BITS4(0,1,0,1)
|
|
+//ZZ #define X0110 BITS4(0,1,1,0)
|
|
+//ZZ #define X0111 BITS4(0,1,1,1)
|
|
+//ZZ #define X1000 BITS4(1,0,0,0)
|
|
+//ZZ #define X1001 BITS4(1,0,0,1)
|
|
+//ZZ #define X1010 BITS4(1,0,1,0)
|
|
+//ZZ #define X1011 BITS4(1,0,1,1)
|
|
+//ZZ #define X1100 BITS4(1,1,0,0)
|
|
+//ZZ #define X1101 BITS4(1,1,0,1)
|
|
+//ZZ #define X1110 BITS4(1,1,1,0)
|
|
+//ZZ #define X1111 BITS4(1,1,1,1)
|
|
+/*
|
|
+#define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
|
|
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
|
|
+ (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
|
|
+ (((zzx3) & 0xF) << 12))
|
|
+
|
|
+#define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
|
|
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
|
|
+ (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
|
|
+ (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
|
|
+
|
|
+#define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
|
|
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
|
|
+ (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
|
|
+ (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
|
|
+
|
|
+#define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
|
|
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
|
|
+ (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
|
|
+ (((zzx0) & 0xF) << 0))
|
|
+
|
|
+#define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
|
|
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
|
|
+ (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
|
|
+ (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
|
|
+ (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
|
|
+
|
|
+#define XX______(zzx7,zzx6) \
|
|
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
|
|
+*/
|
|
+//ZZ /* Generate a skeletal insn that involves an a RI84 shifter operand.
|
|
+//ZZ Returns a word which is all zeroes apart from bits 25 and 11..0,
|
|
+//ZZ since it is those that encode the shifter operand (at least to the
|
|
+//ZZ extent that we care about it.) */
|
|
+//ZZ static UInt skeletal_RI84 ( ARMRI84* ri )
|
|
+//ZZ {
|
|
+//ZZ UInt instr;
|
|
+//ZZ if (ri->tag == ARMri84_I84) {
|
|
+//ZZ vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
|
|
+//ZZ vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
|
|
+//ZZ instr = 1 << 25;
|
|
+//ZZ instr |= (ri->ARMri84.I84.imm4 << 8);
|
|
+//ZZ instr |= ri->ARMri84.I84.imm8;
|
|
+//ZZ } else {
|
|
+//ZZ instr = 0 << 25;
|
|
+//ZZ instr |= iregNo(ri->ARMri84.R.reg);
|
|
+//ZZ }
|
|
+//ZZ return instr;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* Ditto for RI5. Resulting word is zeroes apart from bit 4 and bits
|
|
+//ZZ 11..7. */
|
|
+//ZZ static UInt skeletal_RI5 ( ARMRI5* ri )
|
|
+//ZZ {
|
|
+//ZZ UInt instr;
|
|
+//ZZ if (ri->tag == ARMri5_I5) {
|
|
+//ZZ UInt imm5 = ri->ARMri5.I5.imm5;
|
|
+//ZZ vassert(imm5 >= 1 && imm5 <= 31);
|
|
+//ZZ instr = 0 << 4;
|
|
+//ZZ instr |= imm5 << 7;
|
|
+//ZZ } else {
|
|
+//ZZ instr = 1 << 4;
|
|
+//ZZ instr |= iregNo(ri->ARMri5.R.reg) << 8;
|
|
+//ZZ }
|
|
+//ZZ return instr;
|
|
+//ZZ }
|
|
+
|
|
+
|
|
+/* Get an immediate into a register, using only that register. */
|
|
+static UInt* imm64_to_iregNo ( UInt* p, Int xD, ULong imm64 )
|
|
+{
|
|
+ if (imm64 == 0) {
|
|
+ // This has to be special-cased, since the logic below
|
|
+ // will leave the register unchanged in this case.
|
|
+ // MOVZ xD, #0, LSL #0
|
|
+ *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
|
|
+ return p;
|
|
+ }
|
|
+
|
|
+ // There must be at least one non-zero halfword. Find the
|
|
+ // lowest nonzero such, and use MOVZ to install it and zero
|
|
+ // out the rest of the register.
|
|
+ UShort h[4];
|
|
+ h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
|
|
+ h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
|
|
+ h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
|
|
+ h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
|
|
+
|
|
+ UInt i;
|
|
+ for (i = 0; i < 4; i++) {
|
|
+ if (h[i] != 0)
|
|
+ break;
|
|
+ }
|
|
+ vassert(i < 4);
|
|
+
|
|
+ // MOVZ xD, h[i], LSL (16*i)
|
|
+ *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
|
|
+
|
|
+ // Work on upwards through h[i], using MOVK to stuff in any
|
|
+ // remaining nonzero elements.
|
|
+ i++;
|
|
+ for (; i < 4; i++) {
|
|
+ if (h[i] == 0)
|
|
+ continue;
|
|
+ // MOVK xD, h[i], LSL (16*i)
|
|
+ *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
|
|
+ }
|
|
+
|
|
+ return p;
|
|
+}
|
|
+
|
|
+/* Get an immediate into a register, using only that register, and
|
|
+ generating exactly 4 instructions, regardless of the value of the
|
|
+ immediate. This is used when generating sections of code that need
|
|
+ to be patched later, so as to guarantee a specific size. */
|
|
+static UInt* imm64_to_iregNo_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
|
|
+{
|
|
+ UShort h[4];
|
|
+ h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
|
|
+ h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
|
|
+ h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
|
|
+ h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
|
|
+ // Work on upwards through h[i], using MOVK to stuff in the
|
|
+ // remaining elements.
|
|
+ UInt i;
|
|
+ for (i = 0; i < 4; i++) {
|
|
+ if (i == 0) {
|
|
+ // MOVZ xD, h[0], LSL (16*0)
|
|
+ *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
|
|
+ } else {
|
|
+ // MOVK xD, h[i], LSL (16*i)
|
|
+ *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
|
|
+ }
|
|
+ }
|
|
+ return p;
|
|
+}
|
|
+
|
|
+/* Check whether p points at a 4-insn sequence cooked up by
|
|
+ imm64_to_iregNo_EXACTLY4(). */
|
|
+static Bool is_imm64_to_iregNo_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
|
|
+{
|
|
+ UShort h[4];
|
|
+ h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
|
|
+ h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
|
|
+ h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
|
|
+ h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
|
|
+ // Work on upwards through h[i], using MOVK to stuff in the
|
|
+ // remaining elements.
|
|
+ UInt i;
|
|
+ for (i = 0; i < 4; i++) {
|
|
+ UInt expected;
|
|
+ if (i == 0) {
|
|
+ // MOVZ xD, h[0], LSL (16*0)
|
|
+ expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
|
|
+ } else {
|
|
+ // MOVK xD, h[i], LSL (16*i)
|
|
+ expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
|
|
+ }
|
|
+ if (p[i] != expected)
|
|
+ return False;
|
|
+ }
|
|
+ return True;
|
|
+}
|
|
+
|
|
+
|
|
+/* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
|
|
+ rD, using the given amode for the address. */
|
|
+static UInt* do_load_or_store8 ( UInt* p,
|
|
+ Bool isLoad, UInt wD, ARM64AMode* am )
|
|
+{
|
|
+ vassert(wD <= 30);
|
|
+ if (am->tag == ARM64am_RI9) {
|
|
+ /* STURB Wd, [Xn|SP + simm9]: 00 111000 000 simm9 00 n d
|
|
+ LDURB Wd, [Xn|SP + simm9]: 00 111000 010 simm9 00 n d
|
|
+ */
|
|
+ Int simm9 = am->ARM64am.RI9.simm9;
|
|
+ vassert(-256 <= simm9 && simm9 <= 255);
|
|
+ UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
|
|
+ simm9 & 0x1FF, X00,
|
|
+ iregNo(am->ARM64am.RI9.reg), wD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ if (am->tag == ARM64am_RI12) {
|
|
+ /* STRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 00 imm12 n d
|
|
+ LDRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 01 imm12 n d
|
|
+ */
|
|
+ UInt uimm12 = am->ARM64am.RI12.uimm12;
|
|
+ UInt scale = am->ARM64am.RI12.szB;
|
|
+ vassert(scale == 1); /* failure of this is serious. Do not ignore. */
|
|
+ UInt xN = iregNo(am->ARM64am.RI12.reg);
|
|
+ vassert(xN <= 30);
|
|
+ UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
|
|
+ uimm12, xN, wD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ if (am->tag == ARM64am_RR) {
|
|
+ /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
|
|
+ LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
|
|
+ */
|
|
+ UInt xN = iregNo(am->ARM64am.RR.base);
|
|
+ UInt xM = iregNo(am->ARM64am.RR.index);
|
|
+ vassert(xN <= 30);
|
|
+ UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
|
|
+ xM, X011010, xN, wD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ vpanic("do_load_or_store8");
|
|
+ vassert(0);
|
|
+}
|
|
+
|
|
+
|
|
+/* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
|
|
+ rD, using the given amode for the address. */
|
|
+static UInt* do_load_or_store16 ( UInt* p,
|
|
+ Bool isLoad, UInt wD, ARM64AMode* am )
|
|
+{
|
|
+ vassert(wD <= 30);
|
|
+ if (am->tag == ARM64am_RI9) {
|
|
+ /* STURH Wd, [Xn|SP + simm9]: 01 111000 000 simm9 00 n d
|
|
+ LDURH Wd, [Xn|SP + simm9]: 01 111000 010 simm9 00 n d
|
|
+ */
|
|
+ Int simm9 = am->ARM64am.RI9.simm9;
|
|
+ vassert(-256 <= simm9 && simm9 <= 255);
|
|
+ UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
|
|
+ simm9 & 0x1FF, X00,
|
|
+ iregNo(am->ARM64am.RI9.reg), wD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ if (am->tag == ARM64am_RI12) {
|
|
+ /* STRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 00 imm12 n d
|
|
+ LDRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 01 imm12 n d
|
|
+ */
|
|
+ UInt uimm12 = am->ARM64am.RI12.uimm12;
|
|
+ UInt scale = am->ARM64am.RI12.szB;
|
|
+ vassert(scale == 2); /* failure of this is serious. Do not ignore. */
|
|
+ UInt xN = iregNo(am->ARM64am.RI12.reg);
|
|
+ vassert(xN <= 30);
|
|
+ UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
|
|
+ uimm12, xN, wD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ if (am->tag == ARM64am_RR) {
|
|
+ /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
|
|
+ LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
|
|
+ */
|
|
+ UInt xN = iregNo(am->ARM64am.RR.base);
|
|
+ UInt xM = iregNo(am->ARM64am.RR.index);
|
|
+ vassert(xN <= 30);
|
|
+ UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
|
|
+ xM, X011010, xN, wD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ vpanic("do_load_or_store16");
|
|
+ vassert(0);
|
|
+}
|
|
+
|
|
+
|
|
+/* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
|
|
+ rD, using the given amode for the address. */
|
|
+static UInt* do_load_or_store32 ( UInt* p,
|
|
+ Bool isLoad, UInt wD, ARM64AMode* am )
|
|
+{
|
|
+ vassert(wD <= 30);
|
|
+ if (am->tag == ARM64am_RI9) {
|
|
+ /* STUR Wd, [Xn|SP + simm9]: 10 111000 000 simm9 00 n d
|
|
+ LDUR Wd, [Xn|SP + simm9]: 10 111000 010 simm9 00 n d
|
|
+ */
|
|
+ Int simm9 = am->ARM64am.RI9.simm9;
|
|
+ vassert(-256 <= simm9 && simm9 <= 255);
|
|
+ UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
|
|
+ simm9 & 0x1FF, X00,
|
|
+ iregNo(am->ARM64am.RI9.reg), wD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ if (am->tag == ARM64am_RI12) {
|
|
+ /* STR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 00 imm12 n d
|
|
+ LDR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 01 imm12 n d
|
|
+ */
|
|
+ UInt uimm12 = am->ARM64am.RI12.uimm12;
|
|
+ UInt scale = am->ARM64am.RI12.szB;
|
|
+ vassert(scale == 4); /* failure of this is serious. Do not ignore. */
|
|
+ UInt xN = iregNo(am->ARM64am.RI12.reg);
|
|
+ vassert(xN <= 30);
|
|
+ UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
|
|
+ uimm12, xN, wD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ if (am->tag == ARM64am_RR) {
|
|
+ /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
|
|
+ LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
|
|
+ */
|
|
+ UInt xN = iregNo(am->ARM64am.RR.base);
|
|
+ UInt xM = iregNo(am->ARM64am.RR.index);
|
|
+ vassert(xN <= 30);
|
|
+ UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
|
|
+ xM, X011010, xN, wD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ vpanic("do_load_or_store32");
|
|
+ vassert(0);
|
|
+}
|
|
+
|
|
+
|
|
+/* Generate a 64 bit load or store to/from xD, using the given amode
|
|
+ for the address. */
|
|
+static UInt* do_load_or_store64 ( UInt* p,
|
|
+ Bool isLoad, UInt xD, ARM64AMode* am )
|
|
+{
|
|
+ /* In all these cases, Rn can't be 31 since that means SP. */
|
|
+ vassert(xD <= 30);
|
|
+ if (am->tag == ARM64am_RI9) {
|
|
+ /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d
|
|
+ LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d
|
|
+ */
|
|
+ Int simm9 = am->ARM64am.RI9.simm9;
|
|
+ vassert(-256 <= simm9 && simm9 <= 255);
|
|
+ UInt xN = iregNo(am->ARM64am.RI9.reg);
|
|
+ vassert(xN <= 30);
|
|
+ UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
|
|
+ simm9 & 0x1FF, X00, xN, xD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ if (am->tag == ARM64am_RI12) {
|
|
+ /* STR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 00 imm12 n d
|
|
+ LDR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 01 imm12 n d
|
|
+ */
|
|
+ UInt uimm12 = am->ARM64am.RI12.uimm12;
|
|
+ UInt scale = am->ARM64am.RI12.szB;
|
|
+ vassert(scale == 8); /* failure of this is serious. Do not ignore. */
|
|
+ UInt xN = iregNo(am->ARM64am.RI12.reg);
|
|
+ vassert(xN <= 30);
|
|
+ UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
|
|
+ uimm12, xN, xD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ if (am->tag == ARM64am_RR) {
|
|
+ /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
|
|
+ LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
|
|
+ */
|
|
+ UInt xN = iregNo(am->ARM64am.RR.base);
|
|
+ UInt xM = iregNo(am->ARM64am.RR.index);
|
|
+ vassert(xN <= 30);
|
|
+ UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
|
|
+ xM, X011010, xN, xD);
|
|
+ *p++ = instr;
|
|
+ return p;
|
|
+ }
|
|
+ vpanic("do_load_or_store64");
|
|
+ vassert(0);
|
|
+}
|
|
+
|
|
+
|
|
+/* Emit an instruction into buf and return the number of bytes used.
|
|
+ Note that buf is not the insn's final place, and therefore it is
|
|
+ imperative to emit position-independent code. If the emitted
|
|
+ instruction was a profiler inc, set *is_profInc to True, else
|
|
+ leave it unchanged. */
|
|
+
|
|
+Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
|
|
+ UChar* buf, Int nbuf, ARM64Instr* i,
|
|
+ Bool mode64,
|
|
+ void* disp_cp_chain_me_to_slowEP,
|
|
+ void* disp_cp_chain_me_to_fastEP,
|
|
+ void* disp_cp_xindir,
|
|
+ void* disp_cp_xassisted )
|
|
+{
|
|
+ UInt* p = (UInt*)buf;
|
|
+ vassert(nbuf >= 32);
|
|
+ vassert(mode64 == True);
|
|
+ vassert(0 == (((HWord)buf) & 3));
|
|
+
|
|
+ switch (i->tag) {
|
|
+ case ARM64in_Arith: {
|
|
+ UInt rD = iregNo(i->ARM64in.Arith.dst);
|
|
+ UInt rN = iregNo(i->ARM64in.Arith.argL);
|
|
+ ARM64RIA* argR = i->ARM64in.Arith.argR;
|
|
+ switch (argR->tag) {
|
|
+ case ARM64riA_I12:
|
|
+ *p++ = X_2_6_2_12_5_5(
|
|
+ i->ARM64in.Arith.isAdd ? X10 : X11,
|
|
+ X010001,
|
|
+ argR->ARM64riA.I12.shift == 12 ? X01 : X00,
|
|
+ argR->ARM64riA.I12.imm12, rN, rD
|
|
+ );
|
|
+ break;
|
|
+ case ARM64riA_R: {
|
|
+ UInt rM = iregNo(i->ARM64in.Arith.argR->ARM64riA.R.reg);
|
|
+ *p++ = X_3_8_5_6_5_5(
|
|
+ i->ARM64in.Arith.isAdd ? X100 : X110,
|
|
+ X01011000, rM, X000000, rN, rD
|
|
+ );
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ goto bad;
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_Cmp: {
|
|
+ UInt rD = 31; /* XZR, we are going to dump the result */
|
|
+ UInt rN = iregNo(i->ARM64in.Cmp.argL);
|
|
+ ARM64RIA* argR = i->ARM64in.Cmp.argR;
|
|
+ Bool is64 = i->ARM64in.Cmp.is64;
|
|
+ switch (argR->tag) {
|
|
+ case ARM64riA_I12:
|
|
+ /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
|
|
+ /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
|
|
+ *p++ = X_2_6_2_12_5_5(
|
|
+ is64 ? X11 : X01, X110001,
|
|
+ argR->ARM64riA.I12.shift == 12 ? X01 : X00,
|
|
+ argR->ARM64riA.I12.imm12, rN, rD);
|
|
+ break;
|
|
+ case ARM64riA_R: {
|
|
+ /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
|
|
+ /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
|
|
+ UInt rM = iregNo(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
|
|
+ *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
|
|
+ X01011000, rM, X000000, rN, rD);
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ goto bad;
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_Logic: {
|
|
+ UInt rD = iregNo(i->ARM64in.Logic.dst);
|
|
+ UInt rN = iregNo(i->ARM64in.Logic.argL);
|
|
+ ARM64RIL* argR = i->ARM64in.Logic.argR;
|
|
+ UInt opc = 0; /* invalid */
|
|
+ vassert(rD < 31);
|
|
+ vassert(rN < 31);
|
|
+ switch (i->ARM64in.Logic.op) {
|
|
+ case ARM64lo_OR: opc = X101; break;
|
|
+ case ARM64lo_AND: opc = X100; break;
|
|
+ case ARM64lo_XOR: opc = X110; break;
|
|
+ default: break;
|
|
+ }
|
|
+ vassert(opc != 0);
|
|
+ switch (argR->tag) {
|
|
+ case ARM64riL_I13: {
|
|
+ /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
|
|
+ /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
|
|
+ /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
|
|
+ *p++ = X_3_6_1_6_6_5_5(
|
|
+ opc, X100100, argR->ARM64riL.I13.bitN,
|
|
+ argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
|
|
+ rN, rD
|
|
+ );
|
|
+ break;
|
|
+ }
|
|
+ case ARM64riL_R: {
|
|
+ /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
|
|
+ /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
|
|
+ /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
|
|
+ UInt rM = iregNo(argR->ARM64riL.R.reg);
|
|
+ vassert(rM < 31);
|
|
+ *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ goto bad;
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_Test: {
|
|
+ UInt rD = 31; /* XZR, we are going to dump the result */
|
|
+ UInt rN = iregNo(i->ARM64in.Test.argL);
|
|
+ ARM64RIL* argR = i->ARM64in.Test.argR;
|
|
+ switch (argR->tag) {
|
|
+ case ARM64riL_I13: {
|
|
+ /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
|
|
+ *p++ = X_3_6_1_6_6_5_5(
|
|
+ X111, X100100, argR->ARM64riL.I13.bitN,
|
|
+ argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
|
|
+ rN, rD
|
|
+ );
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ goto bad;
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_Shift: {
|
|
+ UInt rD = iregNo(i->ARM64in.Shift.dst);
|
|
+ UInt rN = iregNo(i->ARM64in.Shift.argL);
|
|
+ ARM64RI6* argR = i->ARM64in.Shift.argR;
|
|
+ vassert(rD < 31);
|
|
+ vassert(rN < 31);
|
|
+ switch (argR->tag) {
|
|
+ case ARM64ri6_I6: {
|
|
+ /* 110 1001101 (63-sh) (64-sh) nn dd LSL Xd, Xn, sh */
|
|
+ /* 110 1001101 sh 63 nn dd LSR Xd, Xn, sh */
|
|
+ /* 100 1001101 sh 63 nn dd ASR Xd, Xn, sh */
|
|
+ UInt sh = argR->ARM64ri6.I6.imm6;
|
|
+ vassert(sh > 0 && sh < 64);
|
|
+ switch (i->ARM64in.Shift.op) {
|
|
+ case ARM64sh_SHL:
|
|
+ *p++ = X_3_6_1_6_6_5_5(X110, X100110,
|
|
+ 1, 64-sh, 63-sh, rN, rD);
|
|
+ break;
|
|
+ case ARM64sh_SHR:
|
|
+ *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
|
|
+ break;
|
|
+ case ARM64sh_SAR:
|
|
+ *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ case ARM64ri6_R: {
|
|
+ /* 100 1101 0110 mm 001000 nn dd LSL Xd, Xn, Xm */
|
|
+ /* 100 1101 0110 mm 001001 nn dd LSR Xd, Xn, Xm */
|
|
+ /* 100 1101 0110 mm 001010 nn dd ASR Xd, Xn, Xm */
|
|
+ UInt rM = iregNo(argR->ARM64ri6.R.reg);
|
|
+ vassert(rM < 31);
|
|
+ UInt subOpc = 0;
|
|
+ switch (i->ARM64in.Shift.op) {
|
|
+ case ARM64sh_SHL: subOpc = X001000; break;
|
|
+ case ARM64sh_SHR: subOpc = X001001; break;
|
|
+ case ARM64sh_SAR: subOpc = X001010; break;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+ *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_Unary: {
|
|
+ UInt rDst = iregNo(i->ARM64in.Unary.dst);
|
|
+ UInt rSrc = iregNo(i->ARM64in.Unary.src);
|
|
+ switch (i->ARM64in.Unary.op) {
|
|
+ case ARM64un_CLZ:
|
|
+ /* 1 10 1101 0110 00000 00010 0 nn dd CLZ Xd, Xn */
|
|
+ /* 1 10 1101 0110 00000 00010 1 nn dd CLS Xd, Xn (unimp) */
|
|
+ *p++ = X_3_8_5_6_5_5(X110,
|
|
+ X11010110, X00000, X000100, rSrc, rDst);
|
|
+ goto done;
|
|
+ case ARM64un_NEG:
|
|
+ /* 1 10 01011 000 m 000000 11111 d NEG Xd,Xm */
|
|
+ /* 0 10 01011 000 m 000000 11111 d NEG Wd,Wm (unimp) */
|
|
+ *p++ = X_3_8_5_6_5_5(X110,
|
|
+ X01011000, rSrc, X000000, X11111, rDst);
|
|
+ goto done;
|
|
+ case ARM64un_NOT: {
|
|
+ /* 1 01 01010 00 1 m 000000 11111 d MVN Xd,Xm */
|
|
+ *p++ = X_3_8_5_6_5_5(X101,
|
|
+ X01010001, rSrc, X000000, X11111, rDst);
|
|
+ goto done;
|
|
+ }
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ goto bad;
|
|
+ }
|
|
+ case ARM64in_MovI: {
|
|
+ /* We generate the "preferred form", ORR Xd, XZR, Xm
|
|
+ 101 01010 00 0 m 000000 11111 d
|
|
+ */
|
|
+ UInt instr = 0xAA0003E0;
|
|
+ UInt d = iregNo(i->ARM64in.MovI.dst);
|
|
+ UInt m = iregNo(i->ARM64in.MovI.src);
|
|
+ *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_Imm64: {
|
|
+ p = imm64_to_iregNo( p, iregNo(i->ARM64in.Imm64.dst),
|
|
+ i->ARM64in.Imm64.imm64 );
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_LdSt64: {
|
|
+ p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
|
|
+ iregNo(i->ARM64in.LdSt64.rD),
|
|
+ i->ARM64in.LdSt64.amode );
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_LdSt32: {
|
|
+ p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
|
|
+ iregNo(i->ARM64in.LdSt32.rD),
|
|
+ i->ARM64in.LdSt32.amode );
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_LdSt16: {
|
|
+ p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
|
|
+ iregNo(i->ARM64in.LdSt16.rD),
|
|
+ i->ARM64in.LdSt16.amode );
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_LdSt8: {
|
|
+ p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
|
|
+ iregNo(i->ARM64in.LdSt8.rD),
|
|
+ i->ARM64in.LdSt8.amode );
|
|
+ goto done;
|
|
+ }
|
|
+//ZZ case ARMin_LdSt32:
|
|
+//ZZ case ARMin_LdSt8U: {
|
|
+//ZZ UInt bL, bB;
|
|
+//ZZ HReg rD;
|
|
+//ZZ ARMAMode1* am;
|
|
+//ZZ ARMCondCode cc;
|
|
+//ZZ if (i->tag == ARMin_LdSt32) {
|
|
+//ZZ bB = 0;
|
|
+//ZZ bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
|
|
+//ZZ am = i->ARMin.LdSt32.amode;
|
|
+//ZZ rD = i->ARMin.LdSt32.rD;
|
|
+//ZZ cc = i->ARMin.LdSt32.cc;
|
|
+//ZZ } else {
|
|
+//ZZ bB = 1;
|
|
+//ZZ bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
|
|
+//ZZ am = i->ARMin.LdSt8U.amode;
|
|
+//ZZ rD = i->ARMin.LdSt8U.rD;
|
|
+//ZZ cc = i->ARMin.LdSt8U.cc;
|
|
+//ZZ }
|
|
+//ZZ vassert(cc != ARMcc_NV);
|
|
+//ZZ if (am->tag == ARMam1_RI) {
|
|
+//ZZ Int simm12;
|
|
+//ZZ UInt instr, bP;
|
|
+//ZZ if (am->ARMam1.RI.simm13 < 0) {
|
|
+//ZZ bP = 0;
|
|
+//ZZ simm12 = -am->ARMam1.RI.simm13;
|
|
+//ZZ } else {
|
|
+//ZZ bP = 1;
|
|
+//ZZ simm12 = am->ARMam1.RI.simm13;
|
|
+//ZZ }
|
|
+//ZZ vassert(simm12 >= 0 && simm12 <= 4095);
|
|
+//ZZ instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
|
|
+//ZZ iregNo(am->ARMam1.RI.reg),
|
|
+//ZZ iregNo(rD));
|
|
+//ZZ instr |= simm12;
|
|
+//ZZ *p++ = instr;
|
|
+//ZZ goto done;
|
|
+//ZZ } else {
|
|
+//ZZ // RR case
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ case ARMin_LdSt16: {
|
|
+//ZZ HReg rD = i->ARMin.LdSt16.rD;
|
|
+//ZZ UInt bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
|
|
+//ZZ UInt bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
|
|
+//ZZ ARMAMode2* am = i->ARMin.LdSt16.amode;
|
|
+//ZZ ARMCondCode cc = i->ARMin.LdSt16.cc;
|
|
+//ZZ vassert(cc != ARMcc_NV);
|
|
+//ZZ if (am->tag == ARMam2_RI) {
|
|
+//ZZ HReg rN = am->ARMam2.RI.reg;
|
|
+//ZZ Int simm8;
|
|
+//ZZ UInt bP, imm8hi, imm8lo, instr;
|
|
+//ZZ if (am->ARMam2.RI.simm9 < 0) {
|
|
+//ZZ bP = 0;
|
|
+//ZZ simm8 = -am->ARMam2.RI.simm9;
|
|
+//ZZ } else {
|
|
+//ZZ bP = 1;
|
|
+//ZZ simm8 = am->ARMam2.RI.simm9;
|
|
+//ZZ }
|
|
+//ZZ vassert(simm8 >= 0 && simm8 <= 255);
|
|
+//ZZ imm8hi = (simm8 >> 4) & 0xF;
|
|
+//ZZ imm8lo = simm8 & 0xF;
|
|
+//ZZ vassert(!(bL == 0 && bS == 1)); // "! signed store"
|
|
+//ZZ /**/ if (bL == 0 && bS == 0) {
|
|
+//ZZ // strh
|
|
+//ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregNo(rN),
|
|
+//ZZ iregNo(rD), imm8hi, X1011, imm8lo);
|
|
+//ZZ *p++ = instr;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ else if (bL == 1 && bS == 0) {
|
|
+//ZZ // ldrh
|
|
+//ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
|
|
+//ZZ iregNo(rD), imm8hi, X1011, imm8lo);
|
|
+//ZZ *p++ = instr;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ else if (bL == 1 && bS == 1) {
|
|
+//ZZ // ldrsh
|
|
+//ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
|
|
+//ZZ iregNo(rD), imm8hi, X1111, imm8lo);
|
|
+//ZZ *p++ = instr;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ else vassert(0); // ill-constructed insn
|
|
+//ZZ } else {
|
|
+//ZZ // RR case
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ case ARMin_Ld8S: {
|
|
+//ZZ HReg rD = i->ARMin.Ld8S.rD;
|
|
+//ZZ ARMAMode2* am = i->ARMin.Ld8S.amode;
|
|
+//ZZ ARMCondCode cc = i->ARMin.Ld8S.cc;
|
|
+//ZZ vassert(cc != ARMcc_NV);
|
|
+//ZZ if (am->tag == ARMam2_RI) {
|
|
+//ZZ HReg rN = am->ARMam2.RI.reg;
|
|
+//ZZ Int simm8;
|
|
+//ZZ UInt bP, imm8hi, imm8lo, instr;
|
|
+//ZZ if (am->ARMam2.RI.simm9 < 0) {
|
|
+//ZZ bP = 0;
|
|
+//ZZ simm8 = -am->ARMam2.RI.simm9;
|
|
+//ZZ } else {
|
|
+//ZZ bP = 1;
|
|
+//ZZ simm8 = am->ARMam2.RI.simm9;
|
|
+//ZZ }
|
|
+//ZZ vassert(simm8 >= 0 && simm8 <= 255);
|
|
+//ZZ imm8hi = (simm8 >> 4) & 0xF;
|
|
+//ZZ imm8lo = simm8 & 0xF;
|
|
+//ZZ // ldrsb
|
|
+//ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
|
|
+//ZZ iregNo(rD), imm8hi, X1101, imm8lo);
|
|
+//ZZ *p++ = instr;
|
|
+//ZZ goto done;
|
|
+//ZZ } else {
|
|
+//ZZ // RR case
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+
|
|
+ case ARM64in_XDirect: {
|
|
+ /* NB: what goes on here has to be very closely coordinated
|
|
+ with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
|
|
+ /* We're generating chain-me requests here, so we need to be
|
|
+ sure this is actually allowed -- no-redir translations
|
|
+ can't use chain-me's. Hence: */
|
|
+ vassert(disp_cp_chain_me_to_slowEP != NULL);
|
|
+ vassert(disp_cp_chain_me_to_fastEP != NULL);
|
|
+
|
|
+ /* Use ptmp for backpatching conditional jumps. */
|
|
+ UInt* ptmp = NULL;
|
|
+
|
|
+ /* First off, if this is conditional, create a conditional
|
|
+ jump over the rest of it. Or at least, leave a space for
|
|
+ it that we will shortly fill in. */
|
|
+ if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
|
|
+ vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
|
|
+ ptmp = p;
|
|
+ *p++ = 0;
|
|
+ }
|
|
+
|
|
+ /* Update the guest PC. */
|
|
+ /* imm64 x9, dstGA */
|
|
+ /* str x9, amPC */
|
|
+ p = imm64_to_iregNo(p, /*x*/9, i->ARM64in.XDirect.dstGA);
|
|
+ p = do_load_or_store64(p, False/*!isLoad*/,
|
|
+ /*x*/9, i->ARM64in.XDirect.amPC);
|
|
+
|
|
+ /* --- FIRST PATCHABLE BYTE follows --- */
|
|
+ /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
|
|
+ calling to) backs up the return address, so as to find the
|
|
+ address of the first patchable byte. So: don't change the
|
|
+ number of instructions (5) below. */
|
|
+ /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
|
|
+ /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
|
|
+ /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
|
|
+ /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
|
|
+ /* blr x9 */
|
|
+ void* disp_cp_chain_me
|
|
+ = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
|
|
+ : disp_cp_chain_me_to_slowEP;
|
|
+ p = imm64_to_iregNo_EXACTLY4(p, /*x*/9,
|
|
+ Ptr_to_ULong(disp_cp_chain_me));
|
|
+ *p++ = 0xD63F0120;
|
|
+ /* --- END of PATCHABLE BYTES --- */
|
|
+
|
|
+ /* Fix up the conditional jump, if there was one. */
|
|
+ if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
|
|
+ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
|
|
+ vassert(delta > 0 && delta < 40);
|
|
+ vassert((delta & 3) == 0);
|
|
+ UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
|
|
+ vassert(notCond <= 13); /* Neither AL nor NV */
|
|
+ vassert(ptmp != NULL);
|
|
+ delta = delta >> 2;
|
|
+ *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ case ARM64in_XIndir: {
|
|
+ // XIndir is more or less the same as XAssisted, except
|
|
+ // we don't have a trc value to hand back, so there's no
|
|
+ // write to r21
|
|
+ /* Use ptmp for backpatching conditional jumps. */
|
|
+ //UInt* ptmp = NULL;
|
|
+
|
|
+ /* First off, if this is conditional, create a conditional
|
|
+ jump over the rest of it. Or at least, leave a space for
|
|
+ it that we will shortly fill in. */
|
|
+ if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
|
|
+ vassert(0); //ATC
|
|
+//ZZ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
|
|
+//ZZ ptmp = p;
|
|
+//ZZ *p++ = 0;
|
|
+ }
|
|
+
|
|
+ /* Update the guest PC. */
|
|
+ /* str r-dstGA, amPC */
|
|
+ p = do_load_or_store64(p, False/*!isLoad*/,
|
|
+ iregNo(i->ARM64in.XIndir.dstGA),
|
|
+ i->ARM64in.XIndir.amPC);
|
|
+
|
|
+ /* imm64 x9, VG_(disp_cp_xindir) */
|
|
+ /* br x9 */
|
|
+ p = imm64_to_iregNo(p, /*x*/9, Ptr_to_ULong(disp_cp_xindir));
|
|
+ *p++ = 0xD61F0120; /* br x9 */
|
|
+
|
|
+ /* Fix up the conditional jump, if there was one. */
|
|
+ if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
|
|
+ vassert(0); //ATC
|
|
+//ZZ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
|
|
+//ZZ vassert(delta > 0 && delta < 40);
|
|
+//ZZ vassert((delta & 3) == 0);
|
|
+//ZZ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
|
|
+//ZZ vassert(notCond <= 13); /* Neither AL nor NV */
|
|
+//ZZ delta = (delta >> 2) - 2;
|
|
+//ZZ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ case ARM64in_XAssisted: {
|
|
+ /* Use ptmp for backpatching conditional jumps. */
|
|
+ UInt* ptmp = NULL;
|
|
+
|
|
+ /* First off, if this is conditional, create a conditional
|
|
+ jump over the rest of it. Or at least, leave a space for
|
|
+ it that we will shortly fill in. I think this can only
|
|
+ ever happen when VEX is driven by the switchbacker. */
|
|
+ if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
|
|
+ vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
|
|
+ ptmp = p;
|
|
+ *p++ = 0;
|
|
+ }
|
|
+
|
|
+ /* Update the guest PC. */
|
|
+ /* str r-dstGA, amPC */
|
|
+ p = do_load_or_store64(p, False/*!isLoad*/,
|
|
+ iregNo(i->ARM64in.XAssisted.dstGA),
|
|
+ i->ARM64in.XAssisted.amPC);
|
|
+
|
|
+ /* movw r21, $magic_number */
|
|
+ UInt trcval = 0;
|
|
+ switch (i->ARM64in.XAssisted.jk) {
|
|
+ case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
|
|
+ case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
|
|
+ //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
|
|
+ //case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
|
|
+ //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
|
|
+ //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
|
|
+ case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
|
|
+ //case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break;
|
|
+ //case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
|
|
+ //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
|
|
+ //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
|
|
+ case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
|
|
+ /* We don't expect to see the following being assisted. */
|
|
+ //case Ijk_Ret:
|
|
+ //case Ijk_Call:
|
|
+ /* fallthrough */
|
|
+ default:
|
|
+ ppIRJumpKind(i->ARM64in.XAssisted.jk);
|
|
+ vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
|
|
+ "unexpected jump kind");
|
|
+ }
|
|
+ vassert(trcval != 0);
|
|
+ p = imm64_to_iregNo(p, /*x*/21, (ULong)trcval);
|
|
+
|
|
+ /* imm64 x9, VG_(disp_cp_xassisted) */
|
|
+ /* br x9 */
|
|
+ p = imm64_to_iregNo(p, /*x*/9, Ptr_to_ULong(disp_cp_xassisted));
|
|
+ *p++ = 0xD61F0120; /* br x9 */
|
|
+
|
|
+ /* Fix up the conditional jump, if there was one. */
|
|
+ if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
|
|
+ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
|
|
+ vassert(delta > 0 && delta < 40);
|
|
+ vassert((delta & 3) == 0);
|
|
+ UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
|
|
+ vassert(notCond <= 13); /* Neither AL nor NV */
|
|
+ vassert(ptmp != NULL);
|
|
+ delta = delta >> 2;
|
|
+ *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ case ARM64in_CSel: {
|
|
+ /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
|
|
+ UInt dd = iregNo(i->ARM64in.CSel.dst);
|
|
+ UInt nn = iregNo(i->ARM64in.CSel.argL);
|
|
+ UInt mm = iregNo(i->ARM64in.CSel.argR);
|
|
+ UInt cond = (UInt)i->ARM64in.CSel.cond;
|
|
+ vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16);
|
|
+ *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ case ARM64in_Call: {
|
|
+ /* We'll use x9 as a scratch register to put the target
|
|
+ address in. */
|
|
+ if (i->ARM64in.Call.cond != ARM64cc_AL
|
|
+ && i->ARM64in.Call.rloc.pri != RLPri_None) {
|
|
+ /* The call might not happen (it isn't unconditional) and
|
|
+ it returns a result. In this case we will need to
|
|
+ generate a control flow diamond to put 0x555..555 in
|
|
+ the return register(s) in the case where the call
|
|
+ doesn't happen. If this ever becomes necessary, maybe
|
|
+ copy code from the 32-bit ARM equivalent. Until that
|
|
+ day, just give up. */
|
|
+ goto bad;
|
|
+ }
|
|
+
|
|
+ UInt* ptmp = NULL;
|
|
+ if (i->ARM64in.Call.cond != ARM64cc_AL) {
|
|
+ /* Create a hole to put a conditional branch in. We'll
|
|
+ patch it once we know the branch length. */
|
|
+ ptmp = p;
|
|
+ *p++ = 0;
|
|
+ }
|
|
+
|
|
+ // x9 = &target
|
|
+ p = imm64_to_iregNo( (UInt*)p,
|
|
+ /*x*/9, (ULong)i->ARM64in.Call.target );
|
|
+ // blr x9
|
|
+ *p++ = 0xD63F0120;
|
|
+
|
|
+ // Patch the hole if necessary
|
|
+ if (i->ARM64in.Call.cond != ARM64cc_AL) {
|
|
+ ULong dist = (ULong)(p - ptmp);
|
|
+ /* imm64_to_iregNo produces between 1 and 4 insns, and
|
|
+ then there's the BLR itself. Hence: */
|
|
+ vassert(dist >= 2 && dist <= 5);
|
|
+ vassert(ptmp != NULL);
|
|
+ // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
|
|
+ *ptmp = X_8_19_1_4(X01010100, dist, 0,
|
|
+ 1 ^ (UInt)i->ARM64in.Call.cond);
|
|
+ } else {
|
|
+ vassert(ptmp == NULL);
|
|
+ }
|
|
+
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ case ARM64in_AddToSP: {
|
|
+ /* 10,0 10001 00 imm12 11111 11111 ADD xsp, xsp, #imm12
|
|
+ 11,0 10001 00 imm12 11111 11111 SUB xsp, xsp, #imm12
|
|
+ */
|
|
+ Int simm12 = i->ARM64in.AddToSP.simm;
|
|
+ vassert(-4096 < simm12 && simm12 < 4096);
|
|
+ vassert(0 == (simm12 & 0xF));
|
|
+ if (simm12 >= 0) {
|
|
+ *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
|
|
+ } else {
|
|
+ *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ case ARM64in_FromSP: {
|
|
+ /* 10,0 10001 00 0..(12)..0 11111 dd MOV Xd, xsp */
|
|
+ UInt dd = iregNo(i->ARM64in.FromSP.dst);
|
|
+ vassert(dd < 31);
|
|
+ *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ case ARM64in_Mul: {
|
|
+ /* 100 11011 110 mm 011111 nn dd UMULH Xd, Xn,Xm
|
|
+ 100 11011 010 mm 011111 nn dd SMULH Xd, Xn,Xm
|
|
+ 100 11011 000 mm 011111 nn dd MUL Xd, Xn,Xm
|
|
+ */
|
|
+ UInt dd = iregNo(i->ARM64in.Mul.dst);
|
|
+ UInt nn = iregNo(i->ARM64in.Mul.argL);
|
|
+ UInt mm = iregNo(i->ARM64in.Mul.argR);
|
|
+ vassert(dd < 31 && nn < 31 && mm < 31);
|
|
+ switch (i->ARM64in.Mul.op) {
|
|
+ case ARM64mul_ZX:
|
|
+ *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
|
|
+ goto done;
|
|
+ //case ARM64mul_SX:
|
|
+ // *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
|
|
+ // goto done;
|
|
+ case ARM64mul_PLAIN:
|
|
+ *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
|
|
+ goto done;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ goto bad;
|
|
+ }
|
|
+//ZZ case ARMin_LdrEX: {
|
|
+//ZZ /* E1D42F9F ldrexb r2, [r4]
|
|
+//ZZ E1F42F9F ldrexh r2, [r4]
|
|
+//ZZ E1942F9F ldrex r2, [r4]
|
|
+//ZZ E1B42F9F ldrexd r2, r3, [r4]
|
|
+//ZZ */
|
|
+//ZZ switch (i->ARMin.LdrEX.szB) {
|
|
+//ZZ case 1: *p++ = 0xE1D42F9F; goto done;
|
|
+//ZZ case 2: *p++ = 0xE1F42F9F; goto done;
|
|
+//ZZ case 4: *p++ = 0xE1942F9F; goto done;
|
|
+//ZZ case 8: *p++ = 0xE1B42F9F; goto done;
|
|
+//ZZ default: break;
|
|
+//ZZ }
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_StrEX: {
|
|
+//ZZ /* E1C40F92 strexb r0, r2, [r4]
|
|
+//ZZ E1E40F92 strexh r0, r2, [r4]
|
|
+//ZZ E1840F92 strex r0, r2, [r4]
|
|
+//ZZ E1A40F92 strexd r0, r2, r3, [r4]
|
|
+//ZZ */
|
|
+//ZZ switch (i->ARMin.StrEX.szB) {
|
|
+//ZZ case 1: *p++ = 0xE1C40F92; goto done;
|
|
+//ZZ case 2: *p++ = 0xE1E40F92; goto done;
|
|
+//ZZ case 4: *p++ = 0xE1840F92; goto done;
|
|
+//ZZ case 8: *p++ = 0xE1A40F92; goto done;
|
|
+//ZZ default: break;
|
|
+//ZZ }
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+ case ARM64in_VLdStS: {
|
|
+ /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4]
|
|
+ 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4]
|
|
+ */
|
|
+ UInt sD = dregNo(i->ARM64in.VLdStS.sD);
|
|
+ UInt rN = iregNo(i->ARM64in.VLdStS.rN);
|
|
+ UInt uimm12 = i->ARM64in.VLdStS.uimm12;
|
|
+ Bool isLD = i->ARM64in.VLdStS.isLoad;
|
|
+ vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
|
|
+ uimm12 >>= 2;
|
|
+ vassert(uimm12 < (1<<12));
|
|
+ vassert(sD < 32);
|
|
+ vassert(rN < 31);
|
|
+ *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
|
|
+ uimm12, rN, sD);
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_VLdStD: {
|
|
+ /* 11 111101 01 imm12 n t LDR Dt, [Xn|SP, #imm12 * 8]
|
|
+ 11 111101 00 imm12 n t STR Dt, [Xn|SP, #imm12 * 8]
|
|
+ */
|
|
+ UInt dD = dregNo(i->ARM64in.VLdStD.dD);
|
|
+ UInt rN = iregNo(i->ARM64in.VLdStD.rN);
|
|
+ UInt uimm12 = i->ARM64in.VLdStD.uimm12;
|
|
+ Bool isLD = i->ARM64in.VLdStD.isLoad;
|
|
+ vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
|
|
+ uimm12 >>= 3;
|
|
+ vassert(uimm12 < (1<<12));
|
|
+ vassert(dD < 32);
|
|
+ vassert(rN < 31);
|
|
+ *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
|
|
+ uimm12, rN, dD);
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_VLdStQ: {
|
|
+ /* 0100 1100 0000 0000 0111 11 rN rQ st1 {vQ.2d}, [<rN|SP>]
|
|
+ 0100 1100 0100 0000 0111 11 rN rQ ld1 {vQ.2d}, [<rN|SP>]
|
|
+ */
|
|
+ UInt rQ = qregNo(i->ARM64in.VLdStQ.rQ);
|
|
+ UInt rN = iregNo(i->ARM64in.VLdStQ.rN);
|
|
+ vassert(rQ < 32);
|
|
+ vassert(rN < 31);
|
|
+ if (i->ARM64in.VLdStQ.isLoad) {
|
|
+ *p++ = 0x4C407C00 | (rN << 5) | rQ;
|
|
+ } else {
|
|
+ *p++ = 0x4C007C00 | (rN << 5) | rQ;
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_VCvtI2F: {
|
|
+ /* 31 28 23 21 20 18 15 9 4
|
|
+ 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
|
|
+ 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
|
|
+ 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn x
|
|
+ 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
|
|
+ 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
|
|
+ 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
|
|
+ 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
|
|
+ 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
|
|
+ */
|
|
+ UInt rN = iregNo(i->ARM64in.VCvtI2F.rS);
|
|
+ UInt rD = dregNo(i->ARM64in.VCvtI2F.rD);
|
|
+ ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
|
|
+ /* Just handle cases as they show up. */
|
|
+ switch (how) {
|
|
+ case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
|
|
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
|
|
+ break;
|
|
+ case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
|
|
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
|
|
+ break;
|
|
+ case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
|
|
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
|
|
+ break;
|
|
+ case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
|
|
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
|
|
+ break;
|
|
+ /* UCVTF Sd, Wn ATC */
|
|
+ case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
|
|
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
|
|
+ break;
|
|
+ case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
|
|
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
|
|
+ break;
|
|
+ case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn */
|
|
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
|
|
+ break;
|
|
+ default:
|
|
+ goto bad; //ATC
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_VCvtF2I: {
|
|
+ /* 30 23 20 18 15 9 4
|
|
+ sf 00,11110,0x 1 00 000,000000 n d FCVTNS Rd, Fn (round to
|
|
+ sf 00,11110,0x 1 00 001,000000 n d FCVTNU Rd, Fn nearest)
|
|
+ ---------------- 01 -------------- FCVTP-------- (round to +inf)
|
|
+ ---------------- 10 -------------- FCVTM-------- (round to -inf)
|
|
+ ---------------- 11 -------------- FCVTZ-------- (round to zero)
|
|
+
|
|
+ Rd is Xd when sf==1, Wd when sf==0
|
|
+ Fn is Dn when x==1, Sn when x==0
|
|
+ 20:19 carry the rounding mode, using the same encoding as FPCR
|
|
+ */
|
|
+ UInt rD = iregNo(i->ARM64in.VCvtF2I.rD);
|
|
+ UInt rN = dregNo(i->ARM64in.VCvtF2I.rS);
|
|
+ ARM64CvtOp how = i->ARM64in.VCvtF2I.how;
|
|
+ UChar armRM = i->ARM64in.VCvtF2I.armRM;
|
|
+ /* Just handle cases as they show up. */
|
|
+ switch (how) {
|
|
+ case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
|
|
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
|
|
+ X000000, rN, rD);
|
|
+ break;
|
|
+ case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
|
|
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
|
|
+ X000000, rN, rD);
|
|
+ break;
|
|
+ case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
|
|
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
|
|
+ X000000, rN, rD);
|
|
+ break;
|
|
+ case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
|
|
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
|
|
+ X000000, rN, rD);
|
|
+ break;
|
|
+ /* */
|
|
+ case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
|
|
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
|
|
+ X000000, rN, rD);
|
|
+ break;
|
|
+ /* */
|
|
+ case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
|
|
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
|
|
+ X000000, rN, rD);
|
|
+ break;
|
|
+ default:
|
|
+ goto bad; //ATC
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_VCvtSD: {
|
|
+ /* 31 23 21 16 14 9 4
|
|
+ 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D)
|
|
+ ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S)
|
|
+ Rounding, when dst is smaller than src, is per the FPCR.
|
|
+ */
|
|
+ UInt dd = dregNo(i->ARM64in.VCvtSD.dst);
|
|
+ UInt nn = dregNo(i->ARM64in.VCvtSD.src);
|
|
+ if (i->ARM64in.VCvtSD.sToD) {
|
|
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
|
|
+ } else {
|
|
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
|
|
+ }
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_FPCR: {
|
|
+ Bool toFPCR = i->ARM64in.FPCR.toFPCR;
|
|
+ UInt iReg = iregNo(i->ARM64in.FPCR.iReg);
|
|
+ if (toFPCR) {
|
|
+ /* 0xD51B44 000 Rt MSR fpcr, rT */
|
|
+ *p++ = 0xD51B4400 | (iReg & 0x1F);
|
|
+ goto done;
|
|
+ }
|
|
+ goto bad; // FPCR -> iReg case currently ATC
|
|
+ }
|
|
+ case ARM64in_VUnaryD: {
|
|
+ /* 31 23 21 16 14 9 4
|
|
+ 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
|
|
+ ------------------- 0,1 --------- FABS ------
|
|
+ ------------------- 1,0 --------- FNEG ------
|
|
+ ------------------- 1,1 --------- FQSRT -----
|
|
+ */
|
|
+ UInt dD = dregNo(i->ARM64in.VUnaryD.dst);
|
|
+ UInt dN = dregNo(i->ARM64in.VUnaryD.src);
|
|
+ UInt b16 = 2; /* impossible */
|
|
+ UInt b15 = 2; /* impossible */
|
|
+ switch (i->ARM64in.VUnaryD.op) {
|
|
+ case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
|
|
+ case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
|
|
+ case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
|
|
+ default: break;
|
|
+ }
|
|
+ if (b16 < 2 && b15 < 2) {
|
|
+ *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
|
|
+ (b15 << 5) | X10000, dN, dD);
|
|
+ goto done;
|
|
+ }
|
|
+ /*
|
|
+ 000, 11110 01 1,001 11,1 10000 n d FRINTI Dd, Dm (round per FPCR)
|
|
+ */
|
|
+ if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
|
|
+ *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
|
|
+ goto done;
|
|
+ }
|
|
+ goto bad;
|
|
+ }
|
|
+ case ARM64in_VUnaryS: {
|
|
+ /* 31 23 21 16 14 9 4
|
|
+ 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled)
|
|
+ ------------------- 0,1 --------- FABS ------
|
|
+ ------------------- 1,0 --------- FNEG ------
|
|
+ ------------------- 1,1 --------- FQSRT -----
|
|
+ */
|
|
+ UInt sD = dregNo(i->ARM64in.VUnaryS.dst);
|
|
+ UInt sN = dregNo(i->ARM64in.VUnaryS.src);
|
|
+ UInt b16 = 2; /* impossible */
|
|
+ UInt b15 = 2; /* impossible */
|
|
+ switch (i->ARM64in.VUnaryS.op) {
|
|
+ case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
|
|
+ case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
|
|
+ case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
|
|
+ default: break;
|
|
+ }
|
|
+ if (b16 < 2 && b15 < 2) {
|
|
+ *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
|
|
+ (b15 << 5) | X10000, sN, sD);
|
|
+ goto done;
|
|
+ }
|
|
+ /*
|
|
+ 000, 11110 00 1,001 11,1 10000 n d FRINTI Sd, Sm (round per FPCR)
|
|
+ */
|
|
+ if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
|
|
+ *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
|
|
+ goto done;
|
|
+ }
|
|
+ goto bad;
|
|
+ }
|
|
+ case ARM64in_VBinD: {
|
|
+ /* 31 23 20 15 11 9 4
|
|
+ ---------------- 0000 ------ FMUL --------
|
|
+ 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
|
|
+ ---------------- 0010 ------ FADD --------
|
|
+ ---------------- 0011 ------ FSUB --------
|
|
+ */
|
|
+ UInt dD = dregNo(i->ARM64in.VBinD.dst);
|
|
+ UInt dN = dregNo(i->ARM64in.VBinD.argL);
|
|
+ UInt dM = dregNo(i->ARM64in.VBinD.argR);
|
|
+ UInt b1512 = 16; /* impossible */
|
|
+ switch (i->ARM64in.VBinD.op) {
|
|
+ case ARM64fpb_DIV: b1512 = X0001; break;
|
|
+ case ARM64fpb_MUL: b1512 = X0000; break;
|
|
+ case ARM64fpb_SUB: b1512 = X0011; break;
|
|
+ case ARM64fpb_ADD: b1512 = X0010; break;
|
|
+ default: goto bad;
|
|
+ }
|
|
+ vassert(b1512 < 16);
|
|
+ *p++
|
|
+ = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_VBinS: {
|
|
+ /* 31 23 20 15 11 9 4
|
|
+ ---------------- 0000 ------ FMUL --------
|
|
+ 000 11110 001 m 0001 10 n d FDIV Dd,Dn,Dm
|
|
+ ---------------- 0010 ------ FADD --------
|
|
+ ---------------- 0011 ------ FSUB --------
|
|
+ */
|
|
+ UInt sD = dregNo(i->ARM64in.VBinS.dst);
|
|
+ UInt sN = dregNo(i->ARM64in.VBinS.argL);
|
|
+ UInt sM = dregNo(i->ARM64in.VBinS.argR);
|
|
+ UInt b1512 = 16; /* impossible */
|
|
+ switch (i->ARM64in.VBinS.op) {
|
|
+ case ARM64fpb_DIV: b1512 = X0001; break;
|
|
+ case ARM64fpb_MUL: b1512 = X0000; break;
|
|
+ case ARM64fpb_SUB: b1512 = X0011; break;
|
|
+ case ARM64fpb_ADD: b1512 = X0010; break;
|
|
+ default: goto bad;
|
|
+ }
|
|
+ vassert(b1512 < 16);
|
|
+ *p++
|
|
+ = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_VCmpD: {
|
|
+ /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */
|
|
+ UInt dN = dregNo(i->ARM64in.VCmpD.argL);
|
|
+ UInt dM = dregNo(i->ARM64in.VCmpD.argR);
|
|
+ *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_VCmpS: {
|
|
+ /* 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm */
|
|
+ UInt sN = dregNo(i->ARM64in.VCmpS.argL);
|
|
+ UInt sM = dregNo(i->ARM64in.VCmpS.argR);
|
|
+ *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
|
|
+ goto done;
|
|
+ }
|
|
+//ZZ case ARMin_VAluS: {
|
|
+//ZZ UInt dN = fregNo(i->ARMin.VAluS.argL);
|
|
+//ZZ UInt dD = fregNo(i->ARMin.VAluS.dst);
|
|
+//ZZ UInt dM = fregNo(i->ARMin.VAluS.argR);
|
|
+//ZZ UInt bN = dN & 1;
|
|
+//ZZ UInt bD = dD & 1;
|
|
+//ZZ UInt bM = dM & 1;
|
|
+//ZZ UInt pqrs = X1111; /* undefined */
|
|
+//ZZ switch (i->ARMin.VAluS.op) {
|
|
+//ZZ case ARMvfp_ADD: pqrs = X0110; break;
|
|
+//ZZ case ARMvfp_SUB: pqrs = X0111; break;
|
|
+//ZZ case ARMvfp_MUL: pqrs = X0100; break;
|
|
+//ZZ case ARMvfp_DIV: pqrs = X1000; break;
|
|
+//ZZ default: goto bad;
|
|
+//ZZ }
|
|
+//ZZ vassert(pqrs != X1111);
|
|
+//ZZ UInt bP = (pqrs >> 3) & 1;
|
|
+//ZZ UInt bQ = (pqrs >> 2) & 1;
|
|
+//ZZ UInt bR = (pqrs >> 1) & 1;
|
|
+//ZZ UInt bS = (pqrs >> 0) & 1;
|
|
+//ZZ UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
|
|
+//ZZ (dN >> 1), (dD >> 1),
|
|
+//ZZ X1010, BITS4(bN,bS,bM,0), (dM >> 1));
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_VUnaryS: {
|
|
+//ZZ UInt fD = fregNo(i->ARMin.VUnaryS.dst);
|
|
+//ZZ UInt fM = fregNo(i->ARMin.VUnaryS.src);
|
|
+//ZZ UInt insn = 0;
|
|
+//ZZ switch (i->ARMin.VUnaryS.op) {
|
|
+//ZZ case ARMvfpu_COPY:
|
|
+//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
|
|
+//ZZ (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
|
|
+//ZZ (fM >> 1));
|
|
+//ZZ break;
|
|
+//ZZ case ARMvfpu_ABS:
|
|
+//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
|
|
+//ZZ (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
|
|
+//ZZ (fM >> 1));
|
|
+//ZZ break;
|
|
+//ZZ case ARMvfpu_NEG:
|
|
+//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
|
|
+//ZZ (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
|
|
+//ZZ (fM >> 1));
|
|
+//ZZ break;
|
|
+//ZZ case ARMvfpu_SQRT:
|
|
+//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
|
|
+//ZZ (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
|
|
+//ZZ (fM >> 1));
|
|
+//ZZ break;
|
|
+//ZZ default:
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_VCMovD: {
|
|
+//ZZ UInt cc = (UInt)i->ARMin.VCMovD.cond;
|
|
+//ZZ UInt dD = dregNo(i->ARMin.VCMovD.dst);
|
|
+//ZZ UInt dM = dregNo(i->ARMin.VCMovD.src);
|
|
+//ZZ vassert(cc < 16 && cc != ARMcc_AL);
|
|
+//ZZ UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_VCMovS: {
|
|
+//ZZ UInt cc = (UInt)i->ARMin.VCMovS.cond;
|
|
+//ZZ UInt fD = fregNo(i->ARMin.VCMovS.dst);
|
|
+//ZZ UInt fM = fregNo(i->ARMin.VCMovS.src);
|
|
+//ZZ vassert(cc < 16 && cc != ARMcc_AL);
|
|
+//ZZ UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
|
|
+//ZZ X0000,(fD >> 1),X1010,
|
|
+//ZZ BITS4(0,1,(fM & 1),0), (fM >> 1));
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_VXferD: {
|
|
+//ZZ UInt dD = dregNo(i->ARMin.VXferD.dD);
|
|
+//ZZ UInt rHi = iregNo(i->ARMin.VXferD.rHi);
|
|
+//ZZ UInt rLo = iregNo(i->ARMin.VXferD.rLo);
|
|
+//ZZ /* vmov dD, rLo, rHi is
|
|
+//ZZ E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
|
|
+//ZZ vmov rLo, rHi, dD is
|
|
+//ZZ E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
|
|
+//ZZ */
|
|
+//ZZ UInt insn
|
|
+//ZZ = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
|
|
+//ZZ rHi, rLo, 0xB,
|
|
+//ZZ BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_VXferS: {
|
|
+//ZZ UInt fD = fregNo(i->ARMin.VXferS.fD);
|
|
+//ZZ UInt rLo = iregNo(i->ARMin.VXferS.rLo);
|
|
+//ZZ /* vmov fD, rLo is
|
|
+//ZZ E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
|
|
+//ZZ vmov rLo, fD is
|
|
+//ZZ E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
|
|
+//ZZ */
|
|
+//ZZ UInt insn
|
|
+//ZZ = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
|
|
+//ZZ (fD >> 1) & 0xF, rLo, 0xA,
|
|
+//ZZ BITS4((fD & 1),0,0,1), 0);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_VCvtID: {
|
|
+//ZZ Bool iToD = i->ARMin.VCvtID.iToD;
|
|
+//ZZ Bool syned = i->ARMin.VCvtID.syned;
|
|
+//ZZ if (iToD && syned) {
|
|
+//ZZ // FSITOD: I32S-in-freg to F64-in-dreg
|
|
+//ZZ UInt regF = fregNo(i->ARMin.VCvtID.src);
|
|
+//ZZ UInt regD = dregNo(i->ARMin.VCvtID.dst);
|
|
+//ZZ UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
|
|
+//ZZ X1011, BITS4(1,1,(regF & 1),0),
|
|
+//ZZ (regF >> 1) & 0xF);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ if (iToD && (!syned)) {
|
|
+//ZZ // FUITOD: I32U-in-freg to F64-in-dreg
|
|
+//ZZ UInt regF = fregNo(i->ARMin.VCvtID.src);
|
|
+//ZZ UInt regD = dregNo(i->ARMin.VCvtID.dst);
|
|
+//ZZ UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
|
|
+//ZZ X1011, BITS4(0,1,(regF & 1),0),
|
|
+//ZZ (regF >> 1) & 0xF);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ if ((!iToD) && syned) {
|
|
+//ZZ // FTOSID: F64-in-dreg to I32S-in-freg
|
|
+//ZZ UInt regD = dregNo(i->ARMin.VCvtID.src);
|
|
+//ZZ UInt regF = fregNo(i->ARMin.VCvtID.dst);
|
|
+//ZZ UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
|
|
+//ZZ X1101, (regF >> 1) & 0xF,
|
|
+//ZZ X1011, X0100, regD);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ if ((!iToD) && (!syned)) {
|
|
+//ZZ // FTOUID: F64-in-dreg to I32U-in-freg
|
|
+//ZZ UInt regD = dregNo(i->ARMin.VCvtID.src);
|
|
+//ZZ UInt regF = fregNo(i->ARMin.VCvtID.dst);
|
|
+//ZZ UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
|
|
+//ZZ X1100, (regF >> 1) & 0xF,
|
|
+//ZZ X1011, X0100, regD);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ /*UNREACHED*/
|
|
+//ZZ vassert(0);
|
|
+//ZZ }
|
|
+//ZZ case ARMin_MFence: {
|
|
+//ZZ // It's not clear (to me) how these relate to the ARMv7
|
|
+//ZZ // versions, so let's just use the v7 versions as they
|
|
+//ZZ // are at least well documented.
|
|
+//ZZ //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
|
|
+//ZZ //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
|
|
+//ZZ //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */
|
|
+//ZZ *p++ = 0xF57FF04F; /* DSB sy */
|
|
+//ZZ *p++ = 0xF57FF05F; /* DMB sy */
|
|
+//ZZ *p++ = 0xF57FF06F; /* ISB */
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_CLREX: {
|
|
+//ZZ *p++ = 0xF57FF01F; /* clrex */
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_NLdStD: {
|
|
+//ZZ UInt regD = dregNo(i->ARMin.NLdStD.dD);
|
|
+//ZZ UInt regN, regM;
|
|
+//ZZ UInt D = regD >> 4;
|
|
+//ZZ UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
|
|
+//ZZ UInt insn;
|
|
+//ZZ vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
|
|
+//ZZ regD &= 0xF;
|
|
+//ZZ if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
|
|
+//ZZ regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
|
|
+//ZZ regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
|
|
+//ZZ } else {
|
|
+//ZZ regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
|
|
+//ZZ regM = 15;
|
|
+//ZZ }
|
|
+//ZZ insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
|
|
+//ZZ regN, regD, X0111, X1000, regM);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_NUnaryS: {
|
|
+//ZZ UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
|
|
+//ZZ UInt regD, D;
|
|
+//ZZ UInt regM, M;
|
|
+//ZZ UInt size = i->ARMin.NUnaryS.size;
|
|
+//ZZ UInt insn;
|
|
+//ZZ UInt opc, opc1, opc2;
|
|
+//ZZ switch (i->ARMin.NUnaryS.op) {
|
|
+//ZZ case ARMneon_VDUP:
|
|
+//ZZ if (i->ARMin.NUnaryS.size >= 16)
|
|
+//ZZ goto bad;
|
|
+//ZZ if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
|
|
+//ZZ goto bad;
|
|
+//ZZ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
|
|
+//ZZ goto bad;
|
|
+//ZZ regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NUnaryS.dst->reg);
|
|
+//ZZ regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NUnaryS.src->reg);
|
|
+//ZZ D = regD >> 4;
|
|
+//ZZ M = regM >> 4;
|
|
+//ZZ regD &= 0xf;
|
|
+//ZZ regM &= 0xf;
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
|
|
+//ZZ (i->ARMin.NUnaryS.size & 0xf), regD,
|
|
+//ZZ X1100, BITS4(0,Q,M,0), regM);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ case ARMneon_SETELEM:
|
|
+//ZZ regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
|
|
+//ZZ dregNo(i->ARMin.NUnaryS.dst->reg);
|
|
+//ZZ regM = iregNo(i->ARMin.NUnaryS.src->reg);
|
|
+//ZZ M = regM >> 4;
|
|
+//ZZ D = regD >> 4;
|
|
+//ZZ regM &= 0xF;
|
|
+//ZZ regD &= 0xF;
|
|
+//ZZ if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
|
|
+//ZZ goto bad;
|
|
+//ZZ switch (size) {
|
|
+//ZZ case 0:
|
|
+//ZZ if (i->ARMin.NUnaryS.dst->index > 7)
|
|
+//ZZ goto bad;
|
|
+//ZZ opc = X1000 | i->ARMin.NUnaryS.dst->index;
|
|
+//ZZ break;
|
|
+//ZZ case 1:
|
|
+//ZZ if (i->ARMin.NUnaryS.dst->index > 3)
|
|
+//ZZ goto bad;
|
|
+//ZZ opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
|
|
+//ZZ break;
|
|
+//ZZ case 2:
|
|
+//ZZ if (i->ARMin.NUnaryS.dst->index > 1)
|
|
+//ZZ goto bad;
|
|
+//ZZ opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
|
|
+//ZZ break;
|
|
+//ZZ default:
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ opc1 = (opc >> 2) & 3;
|
|
+//ZZ opc2 = opc & 3;
|
|
+//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
|
|
+//ZZ regD, regM, X1011,
|
|
+//ZZ BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ case ARMneon_GETELEMU:
|
|
+//ZZ regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
|
|
+//ZZ dregNo(i->ARMin.NUnaryS.src->reg);
|
|
+//ZZ regD = iregNo(i->ARMin.NUnaryS.dst->reg);
|
|
+//ZZ M = regM >> 4;
|
|
+//ZZ D = regD >> 4;
|
|
+//ZZ regM &= 0xF;
|
|
+//ZZ regD &= 0xF;
|
|
+//ZZ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
|
|
+//ZZ goto bad;
|
|
+//ZZ switch (size) {
|
|
+//ZZ case 0:
|
|
+//ZZ if (Q && i->ARMin.NUnaryS.src->index > 7) {
|
|
+//ZZ regM++;
|
|
+//ZZ i->ARMin.NUnaryS.src->index -= 8;
|
|
+//ZZ }
|
|
+//ZZ if (i->ARMin.NUnaryS.src->index > 7)
|
|
+//ZZ goto bad;
|
|
+//ZZ opc = X1000 | i->ARMin.NUnaryS.src->index;
|
|
+//ZZ break;
|
|
+//ZZ case 1:
|
|
+//ZZ if (Q && i->ARMin.NUnaryS.src->index > 3) {
|
|
+//ZZ regM++;
|
|
+//ZZ i->ARMin.NUnaryS.src->index -= 4;
|
|
+//ZZ }
|
|
+//ZZ if (i->ARMin.NUnaryS.src->index > 3)
|
|
+//ZZ goto bad;
|
|
+//ZZ opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
|
|
+//ZZ break;
|
|
+//ZZ case 2:
|
|
+//ZZ goto bad;
|
|
+//ZZ default:
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ opc1 = (opc >> 2) & 3;
|
|
+//ZZ opc2 = opc & 3;
|
|
+//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
|
|
+//ZZ regM, regD, X1011,
|
|
+//ZZ BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ case ARMneon_GETELEMS:
|
|
+//ZZ regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
|
|
+//ZZ dregNo(i->ARMin.NUnaryS.src->reg);
|
|
+//ZZ regD = iregNo(i->ARMin.NUnaryS.dst->reg);
|
|
+//ZZ M = regM >> 4;
|
|
+//ZZ D = regD >> 4;
|
|
+//ZZ regM &= 0xF;
|
|
+//ZZ regD &= 0xF;
|
|
+//ZZ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
|
|
+//ZZ goto bad;
|
|
+//ZZ switch (size) {
|
|
+//ZZ case 0:
|
|
+//ZZ if (Q && i->ARMin.NUnaryS.src->index > 7) {
|
|
+//ZZ regM++;
|
|
+//ZZ i->ARMin.NUnaryS.src->index -= 8;
|
|
+//ZZ }
|
|
+//ZZ if (i->ARMin.NUnaryS.src->index > 7)
|
|
+//ZZ goto bad;
|
|
+//ZZ opc = X1000 | i->ARMin.NUnaryS.src->index;
|
|
+//ZZ break;
|
|
+//ZZ case 1:
|
|
+//ZZ if (Q && i->ARMin.NUnaryS.src->index > 3) {
|
|
+//ZZ regM++;
|
|
+//ZZ i->ARMin.NUnaryS.src->index -= 4;
|
|
+//ZZ }
|
|
+//ZZ if (i->ARMin.NUnaryS.src->index > 3)
|
|
+//ZZ goto bad;
|
|
+//ZZ opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
|
|
+//ZZ break;
|
|
+//ZZ case 2:
|
|
+//ZZ if (Q && i->ARMin.NUnaryS.src->index > 1) {
|
|
+//ZZ regM++;
|
|
+//ZZ i->ARMin.NUnaryS.src->index -= 2;
|
|
+//ZZ }
|
|
+//ZZ if (i->ARMin.NUnaryS.src->index > 1)
|
|
+//ZZ goto bad;
|
|
+//ZZ opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
|
|
+//ZZ break;
|
|
+//ZZ default:
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ opc1 = (opc >> 2) & 3;
|
|
+//ZZ opc2 = opc & 3;
|
|
+//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
|
|
+//ZZ regM, regD, X1011,
|
|
+//ZZ BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ default:
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ case ARMin_NUnary: {
|
|
+//ZZ UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
|
|
+//ZZ UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NUnary.dst) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NUnary.dst);
|
|
+//ZZ UInt regM, M;
|
|
+//ZZ UInt D = regD >> 4;
|
|
+//ZZ UInt sz1 = i->ARMin.NUnary.size >> 1;
|
|
+//ZZ UInt sz2 = i->ARMin.NUnary.size & 1;
|
|
+//ZZ UInt sz = i->ARMin.NUnary.size;
|
|
+//ZZ UInt insn;
|
|
+//ZZ UInt F = 0; /* TODO: floating point EQZ ??? */
|
|
+//ZZ if (i->ARMin.NUnary.op != ARMneon_DUP) {
|
|
+//ZZ regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NUnary.src) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NUnary.src);
|
|
+//ZZ M = regM >> 4;
|
|
+//ZZ } else {
|
|
+//ZZ regM = iregNo(i->ARMin.NUnary.src);
|
|
+//ZZ M = regM >> 4;
|
|
+//ZZ }
|
|
+//ZZ regD &= 0xF;
|
|
+//ZZ regM &= 0xF;
|
|
+//ZZ switch (i->ARMin.NUnary.op) {
|
|
+//ZZ case ARMneon_COPY: /* VMOV reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
|
|
+//ZZ BITS4(M,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_COPYN: /* VMOVN regD, regQ */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
|
|
+//ZZ regD, X0010, BITS4(0,0,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
|
|
+//ZZ regD, X0010, BITS4(1,0,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
|
|
+//ZZ regD, X0010, BITS4(0,1,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
|
|
+//ZZ regD, X0010, BITS4(1,1,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_COPYLS: /* VMOVL regQ, regD */
|
|
+//ZZ if (sz >= 3)
|
|
+//ZZ goto bad;
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010,
|
|
+//ZZ BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
|
|
+//ZZ BITS4((sz == 0) ? 1 : 0,0,0,0),
|
|
+//ZZ regD, X1010, BITS4(0,0,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_COPYLU: /* VMOVL regQ, regD */
|
|
+//ZZ if (sz >= 3)
|
|
+//ZZ goto bad;
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011,
|
|
+//ZZ BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
|
|
+//ZZ BITS4((sz == 0) ? 1 : 0,0,0,0),
|
|
+//ZZ regD, X1010, BITS4(0,0,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_NOT: /* VMVN reg, reg*/
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
|
|
+//ZZ BITS4(1,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_EQZ:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
|
|
+//ZZ regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_CNT:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
|
|
+//ZZ BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_CLZ:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
|
|
+//ZZ regD, X0100, BITS4(1,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_CLS:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
|
|
+//ZZ regD, X0100, BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_ABS:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
|
|
+//ZZ regD, X0011, BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_DUP:
|
|
+//ZZ sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
|
|
+//ZZ sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
|
|
+//ZZ vassert(sz1 + sz2 < 2);
|
|
+//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
|
|
+//ZZ X1011, BITS4(D,0,sz2,1), X0000);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_REV16:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
|
|
+//ZZ regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_REV32:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
|
|
+//ZZ regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_REV64:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
|
|
+//ZZ regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_PADDLU:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
|
|
+//ZZ regD, X0010, BITS4(1,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_PADDLS:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
|
|
+//ZZ regD, X0010, BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQSHLNUU:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011,
|
|
+//ZZ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
|
|
+//ZZ sz & 0xf, regD, X0111,
|
|
+//ZZ BITS4(sz >> 6,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQSHLNSS:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010,
|
|
+//ZZ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
|
|
+//ZZ sz & 0xf, regD, X0111,
|
|
+//ZZ BITS4(sz >> 6,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQSHLNUS:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011,
|
|
+//ZZ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
|
|
+//ZZ sz & 0xf, regD, X0110,
|
|
+//ZZ BITS4(sz >> 6,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCVTFtoS:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
|
|
+//ZZ BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCVTFtoU:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
|
|
+//ZZ BITS4(1,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCVTStoF:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
|
|
+//ZZ BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCVTUtoF:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
|
|
+//ZZ BITS4(1,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCVTFtoFixedU:
|
|
+//ZZ sz1 = (sz >> 5) & 1;
|
|
+//ZZ sz2 = (sz >> 4) & 1;
|
|
+//ZZ sz &= 0xf;
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011,
|
|
+//ZZ BITS4(1,D,sz1,sz2), sz, regD, X1111,
|
|
+//ZZ BITS4(0,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCVTFtoFixedS:
|
|
+//ZZ sz1 = (sz >> 5) & 1;
|
|
+//ZZ sz2 = (sz >> 4) & 1;
|
|
+//ZZ sz &= 0xf;
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010,
|
|
+//ZZ BITS4(1,D,sz1,sz2), sz, regD, X1111,
|
|
+//ZZ BITS4(0,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCVTFixedUtoF:
|
|
+//ZZ sz1 = (sz >> 5) & 1;
|
|
+//ZZ sz2 = (sz >> 4) & 1;
|
|
+//ZZ sz &= 0xf;
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011,
|
|
+//ZZ BITS4(1,D,sz1,sz2), sz, regD, X1110,
|
|
+//ZZ BITS4(0,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCVTFixedStoF:
|
|
+//ZZ sz1 = (sz >> 5) & 1;
|
|
+//ZZ sz2 = (sz >> 4) & 1;
|
|
+//ZZ sz &= 0xf;
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010,
|
|
+//ZZ BITS4(1,D,sz1,sz2), sz, regD, X1110,
|
|
+//ZZ BITS4(0,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCVTF32toF16:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
|
|
+//ZZ BITS4(0,0,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCVTF16toF32:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
|
|
+//ZZ BITS4(0,0,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VRECIP:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
|
|
+//ZZ BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VRECIPF:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
|
|
+//ZZ BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VABSFP:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
|
|
+//ZZ BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VRSQRTEFP:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
|
|
+//ZZ BITS4(1,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VRSQRTE:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
|
|
+//ZZ BITS4(1,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VNEGF:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
|
|
+//ZZ BITS4(1,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ
|
|
+//ZZ default:
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_NDual: {
|
|
+//ZZ UInt Q = i->ARMin.NDual.Q ? 1 : 0;
|
|
+//ZZ UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NDual.arg1) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NDual.arg1);
|
|
+//ZZ UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NDual.arg2) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NDual.arg2);
|
|
+//ZZ UInt D = regD >> 4;
|
|
+//ZZ UInt M = regM >> 4;
|
|
+//ZZ UInt sz1 = i->ARMin.NDual.size >> 1;
|
|
+//ZZ UInt sz2 = i->ARMin.NDual.size & 1;
|
|
+//ZZ UInt insn;
|
|
+//ZZ regD &= 0xF;
|
|
+//ZZ regM &= 0xF;
|
|
+//ZZ switch (i->ARMin.NDual.op) {
|
|
+//ZZ case ARMneon_TRN: /* VTRN reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
|
|
+//ZZ regD, X0000, BITS4(1,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_ZIP: /* VZIP reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
|
|
+//ZZ regD, X0001, BITS4(1,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_UZP: /* VUZP reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
|
|
+//ZZ regD, X0001, BITS4(0,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ default:
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_NBinary: {
|
|
+//ZZ UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
|
|
+//ZZ UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NBinary.dst) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NBinary.dst);
|
|
+//ZZ UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NBinary.argL) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NBinary.argL);
|
|
+//ZZ UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NBinary.argR) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NBinary.argR);
|
|
+//ZZ UInt sz1 = i->ARMin.NBinary.size >> 1;
|
|
+//ZZ UInt sz2 = i->ARMin.NBinary.size & 1;
|
|
+//ZZ UInt D = regD >> 4;
|
|
+//ZZ UInt N = regN >> 4;
|
|
+//ZZ UInt M = regM >> 4;
|
|
+//ZZ UInt insn;
|
|
+//ZZ regD &= 0xF;
|
|
+//ZZ regM &= 0xF;
|
|
+//ZZ regN &= 0xF;
|
|
+//ZZ switch (i->ARMin.NBinary.op) {
|
|
+//ZZ case ARMneon_VAND: /* VAND reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
|
|
+//ZZ BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VORR: /* VORR reg, reg, reg*/
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
|
|
+//ZZ BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VXOR: /* VEOR reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
|
|
+//ZZ BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VADD: /* VADD reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1000, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VSUB: /* VSUB reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1000, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0110, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0110, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0110, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0110, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0001, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0001, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0000, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0000, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0010, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0010, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0011, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0011, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0011, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0011, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1000, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
|
|
+//ZZ if (i->ARMin.NBinary.size >= 16)
|
|
+//ZZ goto bad;
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
|
|
+//ZZ i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
|
|
+//ZZ regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMUL:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1001, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMULLU:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1100, BITS4(N,0,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMULLS:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1100, BITS4(N,0,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMULP:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1001, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMULFP:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
|
|
+//ZZ X1101, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMULLP:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1110, BITS4(N,0,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQDMULH:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1011, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQRDMULH:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1011, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQDMULL:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1101, BITS4(N,0,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VTBL:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
|
|
+//ZZ X1000, BITS4(N,0,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VPADD:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1011, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VPADDFP:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
|
|
+//ZZ X1101, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VPMINU:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1010, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VPMINS:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1010, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VPMAXU:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1010, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VPMAXS:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X1010, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VADDFP: /* VADD reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
|
|
+//ZZ X1101, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VSUBFP: /* VADD reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
|
|
+//ZZ X1101, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VABDFP: /* VABD reg, reg, reg */
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
|
|
+//ZZ X1101, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMINF:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
|
|
+//ZZ X1111, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VMAXF:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
|
|
+//ZZ X1111, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VPMINF:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
|
|
+//ZZ X1111, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VPMAXF:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
|
|
+//ZZ X1111, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VRECPS:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
|
|
+//ZZ BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCGTF:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
|
|
+//ZZ BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCGEF:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
|
|
+//ZZ BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VCEQF:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
|
|
+//ZZ BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VRSQRTS:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
|
|
+//ZZ BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ default:
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_NShift: {
|
|
+//ZZ UInt Q = i->ARMin.NShift.Q ? 1 : 0;
|
|
+//ZZ UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NShift.dst) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NShift.dst);
|
|
+//ZZ UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NShift.argL) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NShift.argL);
|
|
+//ZZ UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
|
|
+//ZZ ? (qregNo(i->ARMin.NShift.argR) << 1)
|
|
+//ZZ : dregNo(i->ARMin.NShift.argR);
|
|
+//ZZ UInt sz1 = i->ARMin.NShift.size >> 1;
|
|
+//ZZ UInt sz2 = i->ARMin.NShift.size & 1;
|
|
+//ZZ UInt D = regD >> 4;
|
|
+//ZZ UInt N = regN >> 4;
|
|
+//ZZ UInt M = regM >> 4;
|
|
+//ZZ UInt insn;
|
|
+//ZZ regD &= 0xF;
|
|
+//ZZ regM &= 0xF;
|
|
+//ZZ regN &= 0xF;
|
|
+//ZZ switch (i->ARMin.NShift.op) {
|
|
+//ZZ case ARMneon_VSHL:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0100, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VSAL:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0100, BITS4(N,Q,M,0), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQSHL:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0100, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ case ARMneon_VQSAL:
|
|
+//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
|
|
+//ZZ X0100, BITS4(N,Q,M,1), regM);
|
|
+//ZZ break;
|
|
+//ZZ default:
|
|
+//ZZ goto bad;
|
|
+//ZZ }
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_NShl64: {
|
|
+//ZZ HReg regDreg = i->ARMin.NShl64.dst;
|
|
+//ZZ HReg regMreg = i->ARMin.NShl64.src;
|
|
+//ZZ UInt amt = i->ARMin.NShl64.amt;
|
|
+//ZZ vassert(amt >= 1 && amt <= 63);
|
|
+//ZZ vassert(hregClass(regDreg) == HRcFlt64);
|
|
+//ZZ vassert(hregClass(regMreg) == HRcFlt64);
|
|
+//ZZ UInt regD = dregNo(regDreg);
|
|
+//ZZ UInt regM = dregNo(regMreg);
|
|
+//ZZ UInt D = (regD >> 4) & 1;
|
|
+//ZZ UInt Vd = regD & 0xF;
|
|
+//ZZ UInt L = 1;
|
|
+//ZZ UInt Q = 0; /* always 64-bit */
|
|
+//ZZ UInt M = (regM >> 4) & 1;
|
|
+//ZZ UInt Vm = regM & 0xF;
|
|
+//ZZ UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
|
|
+//ZZ amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+ case ARM64in_VImmQ: {
|
|
+ UInt rQ = qregNo(i->ARM64in.VImmQ.rQ);
|
|
+ UShort imm = i->ARM64in.VImmQ.imm;
|
|
+ if (imm == 0) {
|
|
+ /* movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ */
|
|
+ vassert(rQ < 32);
|
|
+ *p++ = 0x4F000400 | rQ;
|
|
+ goto done;
|
|
+ }
|
|
+ goto bad; /* zero is the only handled case right now */
|
|
+ }
|
|
+
|
|
+ case ARM64in_VDfromX: {
|
|
+ /* INS Vd.D[0], rX
|
|
+ 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
|
|
+ This isn't wonderful, in the sense that the upper half of
|
|
+ the vector register stays unchanged and thus the insn is
|
|
+ data dependent on its output register. */
|
|
+ UInt dd = dregNo(i->ARM64in.VDfromX.rD);
|
|
+ UInt xx = iregNo(i->ARM64in.VDfromX.rX);
|
|
+ vassert(xx < 31);
|
|
+ *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ case ARM64in_VQfromXX: {
|
|
+ /* What we really generate is a two insn sequence:
|
|
+ INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
|
|
+ 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
|
|
+ 0100 1110 0001 1000 0001 11 nn dd INS Vd.D[1], Xn
|
|
+ */
|
|
+ UInt qq = qregNo(i->ARM64in.VQfromXX.rQ);
|
|
+ UInt xhi = iregNo(i->ARM64in.VQfromXX.rXhi);
|
|
+ UInt xlo = iregNo(i->ARM64in.VQfromXX.rXlo);
|
|
+ vassert(xhi < 31 && xlo < 31);
|
|
+ *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
|
|
+ *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ case ARM64in_VXfromQ: {
|
|
+ /* 010 0111 0000 01000 001111 nn dd UMOV Xd, Vn.D[0]
|
|
+ 010 0111 0000 11000 001111 nn dd UMOV Xd, Vn.D[1]
|
|
+ */
|
|
+ UInt dd = iregNo(i->ARM64in.VXfromQ.rX);
|
|
+ UInt nn = qregNo(i->ARM64in.VXfromQ.rQ);
|
|
+ UInt laneNo = i->ARM64in.VXfromQ.laneNo;
|
|
+ vassert(dd < 31);
|
|
+ vassert(laneNo < 2);
|
|
+ *p++ = X_3_8_5_6_5_5(X010, X01110000,
|
|
+ laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ case ARM64in_VMov: {
|
|
+ /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
|
|
+ 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
|
|
+ 010 01110 10 1 n 0 00111 n d MOV Vd.16b, Vn.16b
|
|
+ */
|
|
+ HReg rD = i->ARM64in.VMov.dst;
|
|
+ HReg rN = i->ARM64in.VMov.src;
|
|
+ switch (i->ARM64in.VMov.szB) {
|
|
+ case 8: {
|
|
+ UInt dd = dregNo(rD);
|
|
+ UInt nn = dregNo(rN);
|
|
+ *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
|
|
+ goto done;
|
|
+ }
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ goto bad;
|
|
+ }
|
|
+//ZZ case ARMin_NeonImm: {
|
|
+//ZZ UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
|
|
+//ZZ UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
|
|
+//ZZ dregNo(i->ARMin.NeonImm.dst);
|
|
+//ZZ UInt D = regD >> 4;
|
|
+//ZZ UInt imm = i->ARMin.NeonImm.imm->imm8;
|
|
+//ZZ UInt tp = i->ARMin.NeonImm.imm->type;
|
|
+//ZZ UInt j = imm >> 7;
|
|
+//ZZ UInt imm3 = (imm >> 4) & 0x7;
|
|
+//ZZ UInt imm4 = imm & 0xF;
|
|
+//ZZ UInt cmode, op;
|
|
+//ZZ UInt insn;
|
|
+//ZZ regD &= 0xF;
|
|
+//ZZ if (tp == 9)
|
|
+//ZZ op = 1;
|
|
+//ZZ else
|
|
+//ZZ op = 0;
|
|
+//ZZ switch (tp) {
|
|
+//ZZ case 0:
|
|
+//ZZ case 1:
|
|
+//ZZ case 2:
|
|
+//ZZ case 3:
|
|
+//ZZ case 4:
|
|
+//ZZ case 5:
|
|
+//ZZ cmode = tp << 1;
|
|
+//ZZ break;
|
|
+//ZZ case 9:
|
|
+//ZZ case 6:
|
|
+//ZZ cmode = 14;
|
|
+//ZZ break;
|
|
+//ZZ case 7:
|
|
+//ZZ cmode = 12;
|
|
+//ZZ break;
|
|
+//ZZ case 8:
|
|
+//ZZ cmode = 13;
|
|
+//ZZ break;
|
|
+//ZZ case 10:
|
|
+//ZZ cmode = 15;
|
|
+//ZZ break;
|
|
+//ZZ default:
|
|
+//ZZ vpanic("ARMin_NeonImm");
|
|
+//ZZ
|
|
+//ZZ }
|
|
+//ZZ insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
|
|
+//ZZ cmode, BITS4(0,Q,op,1), imm4);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_NCMovQ: {
|
|
+//ZZ UInt cc = (UInt)i->ARMin.NCMovQ.cond;
|
|
+//ZZ UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
|
|
+//ZZ UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
|
|
+//ZZ UInt vM = qM & 0xF;
|
|
+//ZZ UInt vD = qD & 0xF;
|
|
+//ZZ UInt M = (qM >> 4) & 1;
|
|
+//ZZ UInt D = (qD >> 4) & 1;
|
|
+//ZZ vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
|
|
+//ZZ /* b!cc here+8: !cc A00 0000 */
|
|
+//ZZ UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ /* vmov qD, qM */
|
|
+//ZZ insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
|
|
+//ZZ vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+//ZZ case ARMin_Add32: {
|
|
+//ZZ UInt regD = iregNo(i->ARMin.Add32.rD);
|
|
+//ZZ UInt regN = iregNo(i->ARMin.Add32.rN);
|
|
+//ZZ UInt imm32 = i->ARMin.Add32.imm32;
|
|
+//ZZ vassert(regD != regN);
|
|
+//ZZ /* MOV regD, imm32 */
|
|
+//ZZ p = imm32_to_iregNo((UInt *)p, regD, imm32);
|
|
+//ZZ /* ADD regD, regN, regD */
|
|
+//ZZ UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
|
|
+//ZZ *p++ = insn;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+
|
|
+ case ARM64in_EvCheck: {
|
|
+ /* The sequence is fixed (canned) except for the two amodes
|
|
+ supplied by the insn. These don't change the length, though.
|
|
+ We generate:
|
|
+ ldr w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
|
|
+ subs w9, w9, #1
|
|
+ str w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
|
|
+ bpl nofail
|
|
+ ldr x9, [x21 + #0] 0 == offsetof(host_EvC_FAILADDR)
|
|
+ br x9
|
|
+ nofail:
|
|
+ */
|
|
+ UInt* p0 = p;
|
|
+ p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
|
|
+ i->ARM64in.EvCheck.amCounter);
|
|
+ *p++ = 0x71000529; /* subs w9, w9, #1 */
|
|
+ p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
|
|
+ i->ARM64in.EvCheck.amCounter);
|
|
+ *p++ = 0x54000065; /* bpl nofail */
|
|
+ p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
|
|
+ i->ARM64in.EvCheck.amFailAddr);
|
|
+ *p++ = 0xD61F0120; /* br x9 */
|
|
+ /* nofail: */
|
|
+
|
|
+ /* Crosscheck */
|
|
+ vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+//ZZ case ARMin_ProfInc: {
|
|
+//ZZ /* We generate:
|
|
+//ZZ (ctrP is unknown now, so use 0x65556555 in the
|
|
+//ZZ expectation that a later call to LibVEX_patchProfCtr
|
|
+//ZZ will be used to fill in the immediate fields once the
|
|
+//ZZ right value is known.)
|
|
+//ZZ movw r12, lo16(0x65556555)
|
|
+//ZZ movt r12, lo16(0x65556555)
|
|
+//ZZ ldr r11, [r12]
|
|
+//ZZ adds r11, r11, #1
|
|
+//ZZ str r11, [r12]
|
|
+//ZZ ldr r11, [r12+4]
|
|
+//ZZ adc r11, r11, #0
|
|
+//ZZ str r11, [r12+4]
|
|
+//ZZ */
|
|
+//ZZ p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555);
|
|
+//ZZ *p++ = 0xE59CB000;
|
|
+//ZZ *p++ = 0xE29BB001;
|
|
+//ZZ *p++ = 0xE58CB000;
|
|
+//ZZ *p++ = 0xE59CB004;
|
|
+//ZZ *p++ = 0xE2ABB000;
|
|
+//ZZ *p++ = 0xE58CB004;
|
|
+//ZZ /* Tell the caller .. */
|
|
+//ZZ vassert(!(*is_profInc));
|
|
+//ZZ *is_profInc = True;
|
|
+//ZZ goto done;
|
|
+//ZZ }
|
|
+
|
|
+ /* ... */
|
|
+ default:
|
|
+ goto bad;
|
|
+ }
|
|
+
|
|
+ bad:
|
|
+ ppARM64Instr(i);
|
|
+ vpanic("emit_ARM64Instr");
|
|
+ /*NOTREACHED*/
|
|
+
|
|
+ done:
|
|
+ vassert(((UChar*)p) - &buf[0] <= 36);
|
|
+ return ((UChar*)p) - &buf[0];
|
|
+}
|
|
+
|
|
+
|
|
+/* How big is an event check? See case for ARM64in_EvCheck in
|
|
+ emit_ARM64Instr just above. That crosschecks what this returns, so
|
|
+ we can tell if we're inconsistent. */
|
|
+Int evCheckSzB_ARM64 ( void )
|
|
+{
|
|
+ return 24;
|
|
+}
|
|
+
|
|
+
|
|
+/* NB: what goes on here has to be very closely coordinated with the
|
|
+ emitInstr case for XDirect, above. */
|
|
+VexInvalRange chainXDirect_ARM64 ( void* place_to_chain,
|
|
+ void* disp_cp_chain_me_EXPECTED,
|
|
+ void* place_to_jump_to )
|
|
+{
|
|
+ /* What we're expecting to see is:
|
|
+ movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
|
|
+ movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
|
|
+ movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
|
|
+ movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
|
|
+ blr x9
|
|
+ viz
|
|
+ <16 bytes generated by imm64_to_iregNo_EXACTLY4>
|
|
+ D6 3F 01 20
|
|
+ */
|
|
+ UInt* p = (UInt*)place_to_chain;
|
|
+ vassert(0 == (3 & (HWord)p));
|
|
+ vassert(is_imm64_to_iregNo_EXACTLY4(
|
|
+ p, /*x*/9, Ptr_to_ULong(disp_cp_chain_me_EXPECTED)));
|
|
+ vassert(p[4] == 0xD63F0120);
|
|
+ /* And what we want to change it to is:
|
|
+ movw x9, place_to_jump_to[15:0]
|
|
+ movk x9, place_to_jump_to[31:15], lsl 16
|
|
+ movk x9, place_to_jump_to[47:32], lsl 32
|
|
+ movk x9, place_to_jump_to[63:48], lsl 48
|
|
+ br x9
|
|
+ viz
|
|
+ <16 bytes generated by imm64_to_iregNo_EXACTLY4>
|
|
+ D6 1F 01 20
|
|
+
|
|
+ The replacement has the same length as the original.
|
|
+ */
|
|
+
|
|
+ (void)imm64_to_iregNo_EXACTLY4(
|
|
+ p, /*x*/9, Ptr_to_ULong(place_to_jump_to));
|
|
+ p[4] = 0xD61F0120;
|
|
+
|
|
+ VexInvalRange vir = {(HWord)p, 20};
|
|
+ return vir;
|
|
+}
|
|
+
|
|
+
|
|
+//ZZ /* NB: what goes on here has to be very closely coordinated with the
|
|
+//ZZ emitInstr case for XDirect, above. */
|
|
+//ZZ VexInvalRange unchainXDirect_ARM ( void* place_to_unchain,
|
|
+//ZZ void* place_to_jump_to_EXPECTED,
|
|
+//ZZ void* disp_cp_chain_me )
|
|
+//ZZ {
|
|
+//ZZ /* What we're expecting to see is:
|
|
+//ZZ (general case)
|
|
+//ZZ movw r12, lo16(place_to_jump_to_EXPECTED)
|
|
+//ZZ movt r12, lo16(place_to_jump_to_EXPECTED)
|
|
+//ZZ bx r12
|
|
+//ZZ viz
|
|
+//ZZ <8 bytes generated by imm32_to_iregNo_EXACTLY2>
|
|
+//ZZ E1 2F FF 1C
|
|
+//ZZ ---OR---
|
|
+//ZZ in the case where the displacement falls within 26 bits
|
|
+//ZZ b disp24; undef; undef
|
|
+//ZZ viz
|
|
+//ZZ EA <3 bytes == disp24>
|
|
+//ZZ FF 00 00 00
|
|
+//ZZ FF 00 00 00
|
|
+//ZZ */
|
|
+//ZZ UInt* p = (UInt*)place_to_unchain;
|
|
+//ZZ vassert(0 == (3 & (HWord)p));
|
|
+//ZZ
|
|
+//ZZ Bool valid = False;
|
|
+//ZZ if (is_imm32_to_iregNo_EXACTLY2(
|
|
+//ZZ p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to_EXPECTED))
|
|
+//ZZ && p[2] == 0xE12FFF1C) {
|
|
+//ZZ valid = True; /* it's the long form */
|
|
+//ZZ if (0)
|
|
+//ZZ vex_printf("QQQ unchainXDirect_ARM: found long form\n");
|
|
+//ZZ } else
|
|
+//ZZ if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
|
|
+//ZZ /* It's the short form. Check the displacement is right. */
|
|
+//ZZ Int simm24 = p[0] & 0x00FFFFFF;
|
|
+//ZZ simm24 <<= 8; simm24 >>= 8;
|
|
+//ZZ if ((UChar*)p + (simm24 << 2) + 8 == (UChar*)place_to_jump_to_EXPECTED) {
|
|
+//ZZ valid = True;
|
|
+//ZZ if (0)
|
|
+//ZZ vex_printf("QQQ unchainXDirect_ARM: found short form\n");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ vassert(valid);
|
|
+//ZZ
|
|
+//ZZ /* And what we want to change it to is:
|
|
+//ZZ movw r12, lo16(disp_cp_chain_me)
|
|
+//ZZ movt r12, hi16(disp_cp_chain_me)
|
|
+//ZZ blx r12
|
|
+//ZZ viz
|
|
+//ZZ <8 bytes generated by imm32_to_iregNo_EXACTLY2>
|
|
+//ZZ E1 2F FF 3C
|
|
+//ZZ */
|
|
+//ZZ (void)imm32_to_iregNo_EXACTLY2(
|
|
+//ZZ p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me));
|
|
+//ZZ p[2] = 0xE12FFF3C;
|
|
+//ZZ VexInvalRange vir = {(HWord)p, 12};
|
|
+//ZZ return vir;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ
|
|
+//ZZ /* Patch the counter address into a profile inc point, as previously
|
|
+//ZZ created by the ARMin_ProfInc case for emit_ARMInstr. */
|
|
+//ZZ VexInvalRange patchProfInc_ARM ( void* place_to_patch,
|
|
+//ZZ ULong* location_of_counter )
|
|
+//ZZ {
|
|
+//ZZ vassert(sizeof(ULong*) == 4);
|
|
+//ZZ UInt* p = (UInt*)place_to_patch;
|
|
+//ZZ vassert(0 == (3 & (HWord)p));
|
|
+//ZZ vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555));
|
|
+//ZZ vassert(p[2] == 0xE59CB000);
|
|
+//ZZ vassert(p[3] == 0xE29BB001);
|
|
+//ZZ vassert(p[4] == 0xE58CB000);
|
|
+//ZZ vassert(p[5] == 0xE59CB004);
|
|
+//ZZ vassert(p[6] == 0xE2ABB000);
|
|
+//ZZ vassert(p[7] == 0xE58CB004);
|
|
+//ZZ imm32_to_iregNo_EXACTLY2(p, /*r*/12,
|
|
+//ZZ (UInt)Ptr_to_ULong(location_of_counter));
|
|
+//ZZ VexInvalRange vir = {(HWord)p, 8};
|
|
+//ZZ return vir;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ
|
|
+//ZZ #undef BITS4
|
|
+//ZZ #undef X0000
|
|
+//ZZ #undef X0001
|
|
+//ZZ #undef X0010
|
|
+//ZZ #undef X0011
|
|
+//ZZ #undef X0100
|
|
+//ZZ #undef X0101
|
|
+//ZZ #undef X0110
|
|
+//ZZ #undef X0111
|
|
+//ZZ #undef X1000
|
|
+//ZZ #undef X1001
|
|
+//ZZ #undef X1010
|
|
+//ZZ #undef X1011
|
|
+//ZZ #undef X1100
|
|
+//ZZ #undef X1101
|
|
+//ZZ #undef X1110
|
|
+//ZZ #undef X1111
|
|
+//ZZ #undef XXXXX___
|
|
+//ZZ #undef XXXXXX__
|
|
+//ZZ #undef XXX___XX
|
|
+//ZZ #undef XXXXX__X
|
|
+//ZZ #undef XXXXXXXX
|
|
+//ZZ #undef XX______
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- end host_arm64_defs.c ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
Index: VEX/priv/host_arm64_defs.h
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ VEX/priv/host_arm64_defs.h
|
|
@@ -0,0 +1,1036 @@
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- begin host_arm64_defs.h ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
+
|
|
+/*
|
|
+ This file is part of Valgrind, a dynamic binary instrumentation
|
|
+ framework.
|
|
+
|
|
+ Copyright (C) 2013-2013 OpenWorks
|
|
+ info@open-works.net
|
|
+
|
|
+ This program is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU General Public License as
|
|
+ published by the Free Software Foundation; either version 2 of the
|
|
+ License, or (at your option) any later version.
|
|
+
|
|
+ This program is distributed in the hope that it will be useful, but
|
|
+ WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License
|
|
+ along with this program; if not, write to the Free Software
|
|
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
+ 02110-1301, USA.
|
|
+
|
|
+ The GNU General Public License is contained in the file COPYING.
|
|
+*/
|
|
+
|
|
+#ifndef __VEX_HOST_ARM64_DEFS_H
|
|
+#define __VEX_HOST_ARM64_DEFS_H
|
|
+
|
|
+#include "libvex_basictypes.h"
|
|
+#include "libvex.h" // VexArch
|
|
+#include "host_generic_regs.h" // HReg
|
|
+
|
|
+//ZZ extern UInt arm_hwcaps;
|
|
+
|
|
+
|
|
+/* --------- Registers. --------- */
|
|
+
|
|
+//ZZ /* The usual HReg abstraction.
|
|
+//ZZ There are 16 general purpose regs.
|
|
+//ZZ */
|
|
+
|
|
+extern void ppHRegARM64 ( HReg );
|
|
+
|
|
+extern HReg hregARM64_X0 ( void );
|
|
+extern HReg hregARM64_X1 ( void );
|
|
+extern HReg hregARM64_X2 ( void );
|
|
+extern HReg hregARM64_X3 ( void );
|
|
+extern HReg hregARM64_X4 ( void );
|
|
+extern HReg hregARM64_X5 ( void );
|
|
+extern HReg hregARM64_X6 ( void );
|
|
+extern HReg hregARM64_X7 ( void );
|
|
+//ZZ extern HReg hregARM_R8 ( void );
|
|
+extern HReg hregARM64_X9 ( void );
|
|
+extern HReg hregARM64_X10 ( void );
|
|
+extern HReg hregARM64_X11 ( void );
|
|
+extern HReg hregARM64_X12 ( void );
|
|
+extern HReg hregARM64_X13 ( void );
|
|
+extern HReg hregARM64_X14 ( void );
|
|
+extern HReg hregARM64_X15 ( void );
|
|
+extern HReg hregARM64_X21 ( void );
|
|
+extern HReg hregARM64_X22 ( void );
|
|
+extern HReg hregARM64_X23 ( void );
|
|
+extern HReg hregARM64_X24 ( void );
|
|
+extern HReg hregARM64_X25 ( void );
|
|
+extern HReg hregARM64_X26 ( void );
|
|
+extern HReg hregARM64_X27 ( void );
|
|
+extern HReg hregARM64_X28 ( void );
|
|
+extern HReg hregARM64_D8 ( void );
|
|
+extern HReg hregARM64_D9 ( void );
|
|
+extern HReg hregARM64_D10 ( void );
|
|
+extern HReg hregARM64_D11 ( void );
|
|
+extern HReg hregARM64_D12 ( void );
|
|
+extern HReg hregARM64_D13 ( void );
|
|
+extern HReg hregARM64_Q16 ( void );
|
|
+extern HReg hregARM64_Q17 ( void );
|
|
+extern HReg hregARM64_Q18 ( void );
|
|
+
|
|
+/* Number of registers used arg passing in function calls */
|
|
+#define ARM64_N_ARGREGS 8 /* x0 .. x7 */
|
|
+
|
|
+
|
|
+/* --------- Condition codes. --------- */
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64cc_EQ = 0, /* equal : Z=1 */
|
|
+ ARM64cc_NE = 1, /* not equal : Z=0 */
|
|
+
|
|
+ ARM64cc_CS = 2, /* >=u (higher or same) : C=1 */
|
|
+ ARM64cc_CC = 3, /* <u (lower) : C=0 */
|
|
+
|
|
+ ARM64cc_MI = 4, /* minus (negative) : N=1 */
|
|
+ ARM64cc_PL = 5, /* plus (zero or +ve) : N=0 */
|
|
+
|
|
+ ARM64cc_VS = 6, /* overflow : V=1 */
|
|
+ ARM64cc_VC = 7, /* no overflow : V=0 */
|
|
+
|
|
+ ARM64cc_HI = 8, /* >u (higher) : C=1 && Z=0 */
|
|
+ ARM64cc_LS = 9, /* <=u (lower or same) : !(C=1 && Z=0) */
|
|
+
|
|
+ ARM64cc_GE = 10, /* >=s (signed greater or equal) : N=V */
|
|
+ ARM64cc_LT = 11, /* <s (signed less than) : !(N=V) */
|
|
+
|
|
+ ARM64cc_GT = 12, /* >s (signed greater) : Z=0 && N=V */
|
|
+ ARM64cc_LE = 13, /* <=s (signed less or equal) : !(Z=0 && N=V) */
|
|
+
|
|
+ ARM64cc_AL = 14, /* always (unconditional) */
|
|
+ ARM64cc_NV = 15 /* in 64-bit mode also means "always" */
|
|
+ }
|
|
+ ARM64CondCode;
|
|
+
|
|
+
|
|
+/* --------- Memory address expressions (amodes). --------- */
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64am_RI9=1, /* reg + simm9 */
|
|
+ ARM64am_RI12, /* reg + uimm12 * szB (iow, scaled by access size) */
|
|
+ ARM64am_RR /* reg1 + reg2 */
|
|
+ }
|
|
+ ARM64AModeTag;
|
|
+
|
|
+typedef
|
|
+ struct {
|
|
+ ARM64AModeTag tag;
|
|
+ union {
|
|
+ struct {
|
|
+ HReg reg;
|
|
+ Int simm9; /* -256 .. +255 */
|
|
+ } RI9;
|
|
+ struct {
|
|
+ HReg reg;
|
|
+ UInt uimm12; /* 0 .. 4095 */
|
|
+ UChar szB; /* 1, 2, 4, 8 (16 ?) */
|
|
+ } RI12;
|
|
+ struct {
|
|
+ HReg base;
|
|
+ HReg index;
|
|
+ } RR;
|
|
+ } ARM64am;
|
|
+ }
|
|
+ ARM64AMode;
|
|
+
|
|
+extern ARM64AMode* ARM64AMode_RI9 ( HReg reg, Int simm9 );
|
|
+extern ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB );
|
|
+extern ARM64AMode* ARM64AMode_RR ( HReg base, HReg index );
|
|
+
|
|
+
|
|
+/* --------- Reg or uimm12 or (uimm12 << 12) operands --------- */
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64riA_I12=4, /* uimm12 << 0 or 12 only */
|
|
+ ARM64riA_R /* reg */
|
|
+ }
|
|
+ ARM64RIATag;
|
|
+
|
|
+typedef
|
|
+ struct {
|
|
+ ARM64RIATag tag;
|
|
+ union {
|
|
+ struct {
|
|
+ UShort imm12; /* 0 .. 4095 */
|
|
+ UChar shift; /* 0 or 12 only */
|
|
+ } I12;
|
|
+ struct {
|
|
+ HReg reg;
|
|
+ } R;
|
|
+ } ARM64riA;
|
|
+ }
|
|
+ ARM64RIA;
|
|
+
|
|
+extern ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift );
|
|
+extern ARM64RIA* ARM64RIA_R ( HReg );
|
|
+
|
|
+
|
|
+/* --------- Reg or "bitfield" (logic immediate) operands --------- */
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64riL_I13=6, /* wierd-o bitfield immediate, 13 bits in total */
|
|
+ ARM64riL_R /* reg */
|
|
+ }
|
|
+ ARM64RILTag;
|
|
+
|
|
+typedef
|
|
+ struct {
|
|
+ ARM64RILTag tag;
|
|
+ union {
|
|
+ struct {
|
|
+ UChar bitN; /* 0 .. 1 */
|
|
+ UChar immR; /* 0 .. 63 */
|
|
+ UChar immS; /* 0 .. 63 */
|
|
+ } I13;
|
|
+ struct {
|
|
+ HReg reg;
|
|
+ } R;
|
|
+ } ARM64riL;
|
|
+ }
|
|
+ ARM64RIL;
|
|
+
|
|
+extern ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS );
|
|
+extern ARM64RIL* ARM64RIL_R ( HReg );
|
|
+
|
|
+
|
|
+/* --------------- Reg or uimm6 operands --------------- */
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64ri6_I6=8, /* uimm6, 1 .. 63 only */
|
|
+ ARM64ri6_R /* reg */
|
|
+ }
|
|
+ ARM64RI6Tag;
|
|
+
|
|
+typedef
|
|
+ struct {
|
|
+ ARM64RI6Tag tag;
|
|
+ union {
|
|
+ struct {
|
|
+ UInt imm6; /* 1 .. 63 */
|
|
+ } I6;
|
|
+ struct {
|
|
+ HReg reg;
|
|
+ } R;
|
|
+ } ARM64ri6;
|
|
+ }
|
|
+ ARM64RI6;
|
|
+
|
|
+extern ARM64RI6* ARM64RI6_I6 ( UInt imm6 );
|
|
+extern ARM64RI6* ARM64RI6_R ( HReg );
|
|
+
|
|
+
|
|
+/* --------------------- Instructions --------------------- */
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64lo_AND=10,
|
|
+ ARM64lo_OR,
|
|
+ ARM64lo_XOR
|
|
+ }
|
|
+ ARM64LogicOp;
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64sh_SHL=13,
|
|
+ ARM64sh_SHR,
|
|
+ ARM64sh_SAR
|
|
+ }
|
|
+ ARM64ShiftOp;
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64un_NEG=16,
|
|
+ ARM64un_NOT,
|
|
+ ARM64un_CLZ,
|
|
+ }
|
|
+ ARM64UnaryOp;
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64mul_PLAIN=60, /* lo64(64 * 64) */
|
|
+ ARM64mul_ZX, /* hi64(64 *u 64) */
|
|
+ ARM64mul_SX /* hi64(64 *s 64) */
|
|
+ }
|
|
+ ARM64MulOp;
|
|
+
|
|
+typedef
|
|
+ /* These characterise an integer-FP conversion, but don't imply any
|
|
+ particular direction. */
|
|
+ enum {
|
|
+ ARM64cvt_F32_I32S=65,
|
|
+ ARM64cvt_F64_I32S,
|
|
+ ARM64cvt_F32_I64S,
|
|
+ ARM64cvt_F64_I64S,
|
|
+ ARM64cvt_F32_I32U,
|
|
+ ARM64cvt_F64_I32U,
|
|
+ ARM64cvt_F32_I64U,
|
|
+ ARM64cvt_F64_I64U,
|
|
+ ARM64cvt_INVALID
|
|
+ }
|
|
+ ARM64CvtOp;
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64fpb_ADD=75,
|
|
+ ARM64fpb_SUB,
|
|
+ ARM64fpb_MUL,
|
|
+ ARM64fpb_DIV,
|
|
+ ARM64fpb_INVALID
|
|
+ }
|
|
+ ARM64FpBinOp;
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64fpu_NEG=82,
|
|
+ ARM64fpu_ABS,
|
|
+ ARM64fpu_SQRT,
|
|
+ ARM64fpu_RINT,
|
|
+ ARM64fpu_INVALID
|
|
+ }
|
|
+ ARM64FpUnaryOp;
|
|
+
|
|
+//ZZ extern const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op );
|
|
+//ZZ
|
|
+//ZZ typedef
|
|
+//ZZ enum {
|
|
+//ZZ ARMneon_VAND=90,
|
|
+//ZZ ARMneon_VORR,
|
|
+//ZZ ARMneon_VXOR,
|
|
+//ZZ ARMneon_VADD,
|
|
+//ZZ ARMneon_VADDFP,
|
|
+//ZZ ARMneon_VRHADDS,
|
|
+//ZZ ARMneon_VRHADDU,
|
|
+//ZZ ARMneon_VPADDFP,
|
|
+//ZZ ARMneon_VABDFP,
|
|
+//ZZ ARMneon_VSUB,
|
|
+//ZZ ARMneon_VSUBFP,
|
|
+//ZZ ARMneon_VMAXU,
|
|
+//ZZ ARMneon_VMAXS,
|
|
+//ZZ ARMneon_VMAXF,
|
|
+//ZZ ARMneon_VMINU,
|
|
+//ZZ ARMneon_VMINS,
|
|
+//ZZ ARMneon_VMINF,
|
|
+//ZZ ARMneon_VQADDU,
|
|
+//ZZ ARMneon_VQADDS,
|
|
+//ZZ ARMneon_VQSUBU,
|
|
+//ZZ ARMneon_VQSUBS,
|
|
+//ZZ ARMneon_VCGTU,
|
|
+//ZZ ARMneon_VCGTS,
|
|
+//ZZ ARMneon_VCGEU,
|
|
+//ZZ ARMneon_VCGES,
|
|
+//ZZ ARMneon_VCGTF,
|
|
+//ZZ ARMneon_VCGEF,
|
|
+//ZZ ARMneon_VCEQ,
|
|
+//ZZ ARMneon_VCEQF,
|
|
+//ZZ ARMneon_VEXT,
|
|
+//ZZ ARMneon_VMUL,
|
|
+//ZZ ARMneon_VMULFP,
|
|
+//ZZ ARMneon_VMULLU,
|
|
+//ZZ ARMneon_VMULLS,
|
|
+//ZZ ARMneon_VMULP,
|
|
+//ZZ ARMneon_VMULLP,
|
|
+//ZZ ARMneon_VQDMULH,
|
|
+//ZZ ARMneon_VQRDMULH,
|
|
+//ZZ ARMneon_VPADD,
|
|
+//ZZ ARMneon_VPMINU,
|
|
+//ZZ ARMneon_VPMINS,
|
|
+//ZZ ARMneon_VPMINF,
|
|
+//ZZ ARMneon_VPMAXU,
|
|
+//ZZ ARMneon_VPMAXS,
|
|
+//ZZ ARMneon_VPMAXF,
|
|
+//ZZ ARMneon_VTBL,
|
|
+//ZZ ARMneon_VQDMULL,
|
|
+//ZZ ARMneon_VRECPS,
|
|
+//ZZ ARMneon_VRSQRTS,
|
|
+//ZZ /* ... */
|
|
+//ZZ }
|
|
+//ZZ ARMNeonBinOp;
|
|
+//ZZ
|
|
+//ZZ typedef
|
|
+//ZZ enum {
|
|
+//ZZ ARMneon_VSHL=150,
|
|
+//ZZ ARMneon_VSAL, /* Yah, not SAR but SAL */
|
|
+//ZZ ARMneon_VQSHL,
|
|
+//ZZ ARMneon_VQSAL
|
|
+//ZZ }
|
|
+//ZZ ARMNeonShiftOp;
|
|
+//ZZ
|
|
+//ZZ typedef
|
|
+//ZZ enum {
|
|
+//ZZ ARMneon_COPY=160,
|
|
+//ZZ ARMneon_COPYLU,
|
|
+//ZZ ARMneon_COPYLS,
|
|
+//ZZ ARMneon_COPYN,
|
|
+//ZZ ARMneon_COPYQNSS,
|
|
+//ZZ ARMneon_COPYQNUS,
|
|
+//ZZ ARMneon_COPYQNUU,
|
|
+//ZZ ARMneon_NOT,
|
|
+//ZZ ARMneon_EQZ,
|
|
+//ZZ ARMneon_DUP,
|
|
+//ZZ ARMneon_PADDLS,
|
|
+//ZZ ARMneon_PADDLU,
|
|
+//ZZ ARMneon_CNT,
|
|
+//ZZ ARMneon_CLZ,
|
|
+//ZZ ARMneon_CLS,
|
|
+//ZZ ARMneon_VCVTxFPxINT,
|
|
+//ZZ ARMneon_VQSHLNSS,
|
|
+//ZZ ARMneon_VQSHLNUU,
|
|
+//ZZ ARMneon_VQSHLNUS,
|
|
+//ZZ ARMneon_VCVTFtoU,
|
|
+//ZZ ARMneon_VCVTFtoS,
|
|
+//ZZ ARMneon_VCVTUtoF,
|
|
+//ZZ ARMneon_VCVTStoF,
|
|
+//ZZ ARMneon_VCVTFtoFixedU,
|
|
+//ZZ ARMneon_VCVTFtoFixedS,
|
|
+//ZZ ARMneon_VCVTFixedUtoF,
|
|
+//ZZ ARMneon_VCVTFixedStoF,
|
|
+//ZZ ARMneon_VCVTF16toF32,
|
|
+//ZZ ARMneon_VCVTF32toF16,
|
|
+//ZZ ARMneon_REV16,
|
|
+//ZZ ARMneon_REV32,
|
|
+//ZZ ARMneon_REV64,
|
|
+//ZZ ARMneon_ABS,
|
|
+//ZZ ARMneon_VNEGF,
|
|
+//ZZ ARMneon_VRECIP,
|
|
+//ZZ ARMneon_VRECIPF,
|
|
+//ZZ ARMneon_VABSFP,
|
|
+//ZZ ARMneon_VRSQRTEFP,
|
|
+//ZZ ARMneon_VRSQRTE
|
|
+//ZZ /* ... */
|
|
+//ZZ }
|
|
+//ZZ ARMNeonUnOp;
|
|
+//ZZ
|
|
+//ZZ typedef
|
|
+//ZZ enum {
|
|
+//ZZ ARMneon_SETELEM=200,
|
|
+//ZZ ARMneon_GETELEMU,
|
|
+//ZZ ARMneon_GETELEMS,
|
|
+//ZZ ARMneon_VDUP,
|
|
+//ZZ }
|
|
+//ZZ ARMNeonUnOpS;
|
|
+//ZZ
|
|
+//ZZ typedef
|
|
+//ZZ enum {
|
|
+//ZZ ARMneon_TRN=210,
|
|
+//ZZ ARMneon_ZIP,
|
|
+//ZZ ARMneon_UZP
|
|
+//ZZ /* ... */
|
|
+//ZZ }
|
|
+//ZZ ARMNeonDualOp;
|
|
+//ZZ
|
|
+//ZZ extern const HChar* showARMNeonBinOp ( ARMNeonBinOp op );
|
|
+//ZZ extern const HChar* showARMNeonUnOp ( ARMNeonUnOp op );
|
|
+//ZZ extern const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op );
|
|
+//ZZ extern const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op );
|
|
+//ZZ extern const HChar* showARMNeonDualOp ( ARMNeonDualOp op );
|
|
+//ZZ extern const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op );
|
|
+//ZZ extern const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op );
|
|
+//ZZ extern const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op );
|
|
+//ZZ extern const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op );
|
|
+//ZZ extern const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op );
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ /* baseline */
|
|
+ ARM64in_Arith=1220,
|
|
+ ARM64in_Cmp,
|
|
+ ARM64in_Logic,
|
|
+ ARM64in_Test,
|
|
+ ARM64in_Shift,
|
|
+ ARM64in_Unary,
|
|
+ ARM64in_MovI, /* int reg-reg move */
|
|
+ ARM64in_Imm64,
|
|
+ ARM64in_LdSt64,
|
|
+ ARM64in_LdSt32, /* w/ ZX loads */
|
|
+ ARM64in_LdSt16, /* w/ ZX loads */
|
|
+ ARM64in_LdSt8, /* w/ ZX loads */
|
|
+ ARM64in_XDirect, /* direct transfer to GA */
|
|
+ ARM64in_XIndir, /* indirect transfer to GA */
|
|
+ ARM64in_XAssisted, /* assisted transfer to GA */
|
|
+ ARM64in_CSel,
|
|
+ ARM64in_Call,
|
|
+ ARM64in_AddToSP, /* move SP by small, signed constant */
|
|
+ ARM64in_FromSP, /* move SP to integer register */
|
|
+ ARM64in_Mul,
|
|
+//ZZ ARMin_LdrEX,
|
|
+//ZZ ARMin_StrEX,
|
|
+ /* vector */
|
|
+ ARM64in_VLdStS, /* 32-bit FP load/store, with imm offset */
|
|
+ ARM64in_VLdStD, /* 64-bit FP load/store, with imm offset */
|
|
+ ARM64in_VLdStQ,
|
|
+ ARM64in_VCvtI2F,
|
|
+ ARM64in_VCvtF2I,
|
|
+ ARM64in_VCvtSD,
|
|
+ ARM64in_VUnaryD,
|
|
+ ARM64in_VUnaryS,
|
|
+ ARM64in_VBinD,
|
|
+ ARM64in_VBinS,
|
|
+ ARM64in_VCmpD,
|
|
+ ARM64in_VCmpS,
|
|
+ ARM64in_FPCR,
|
|
+//ZZ ARMin_VAluS,
|
|
+//ZZ ARMin_VCMovD,
|
|
+//ZZ ARMin_VCMovS,
|
|
+//ZZ ARMin_VXferD,
|
|
+//ZZ ARMin_VXferS,
|
|
+//ZZ ARMin_VCvtID,
|
|
+//ZZ ARMin_MFence,
|
|
+//ZZ ARMin_CLREX,
|
|
+//ZZ /* Neon */
|
|
+//ZZ ARMin_NLdStD,
|
|
+//ZZ ARMin_NUnary,
|
|
+//ZZ ARMin_NUnaryS,
|
|
+//ZZ ARMin_NDual,
|
|
+//ZZ ARMin_NBinary,
|
|
+//ZZ ARMin_NBinaryS,
|
|
+//ZZ ARMin_NShift,
|
|
+//ZZ ARMin_NShl64, // special case 64-bit shift of Dreg by immediate
|
|
+ ARM64in_VImmQ,
|
|
+ ARM64in_VDfromX, /* Move an Xreg to a Dreg */
|
|
+ ARM64in_VQfromXX, /* Move 2 Xregs to a Qreg */
|
|
+ ARM64in_VXfromQ, /* Move half a Qreg to an Xreg */
|
|
+ ARM64in_VMov, /* vector reg-reg move, 16, 8 or 4 bytes */
|
|
+ /* infrastructure */
|
|
+ ARM64in_EvCheck, /* Event check */
|
|
+//ZZ ARMin_ProfInc /* 64-bit profile counter increment */
|
|
+ }
|
|
+ ARM64InstrTag;
|
|
+
|
|
+/* Destinations are on the LEFT (first operand) */
|
|
+
|
|
+typedef
|
|
+ struct {
|
|
+ ARM64InstrTag tag;
|
|
+ union {
|
|
+ /* --- INTEGER INSTRUCTIONS --- */
|
|
+ /* 64 bit ADD/SUB reg, reg or uimm12<<{0,12} */
|
|
+ struct {
|
|
+ HReg dst;
|
|
+ HReg argL;
|
|
+ ARM64RIA* argR;
|
|
+ Bool isAdd;
|
|
+ } Arith;
|
|
+ /* 64 or 32 bit CMP reg, reg or aimm (SUB and set flags) */
|
|
+ struct {
|
|
+ HReg argL;
|
|
+ ARM64RIA* argR;
|
|
+ Bool is64;
|
|
+ } Cmp;
|
|
+ /* 64 bit AND/OR/XOR reg, reg or bitfield-immediate */
|
|
+ struct {
|
|
+ HReg dst;
|
|
+ HReg argL;
|
|
+ ARM64RIL* argR;
|
|
+ ARM64LogicOp op;
|
|
+ } Logic;
|
|
+ /* 64 bit TST reg, reg or bimm (AND and set flags) */
|
|
+ struct {
|
|
+ HReg argL;
|
|
+ ARM64RIL* argR;
|
|
+ } Test;
|
|
+ /* 64 bit SHL/SHR/SAR, 2nd arg is reg or imm */
|
|
+ struct {
|
|
+ HReg dst;
|
|
+ HReg argL;
|
|
+ ARM64RI6* argR;
|
|
+ ARM64ShiftOp op;
|
|
+ } Shift;
|
|
+ /* NOT/NEG/CLZ, 64 bit only */
|
|
+ struct {
|
|
+ HReg dst;
|
|
+ HReg src;
|
|
+ ARM64UnaryOp op;
|
|
+ } Unary;
|
|
+ /* MOV dst, src -- reg-reg move for integer registers */
|
|
+ struct {
|
|
+ HReg dst;
|
|
+ HReg src;
|
|
+ } MovI;
|
|
+ /* Pseudo-insn; make a 64-bit immediate */
|
|
+ struct {
|
|
+ HReg dst;
|
|
+ ULong imm64;
|
|
+ } Imm64;
|
|
+ /* 64-bit load or store */
|
|
+ struct {
|
|
+ Bool isLoad;
|
|
+ HReg rD;
|
|
+ ARM64AMode* amode;
|
|
+ } LdSt64;
|
|
+ /* zx-32-to-64-bit load, or 32-bit store */
|
|
+ struct {
|
|
+ Bool isLoad;
|
|
+ HReg rD;
|
|
+ ARM64AMode* amode;
|
|
+ } LdSt32;
|
|
+ /* zx-16-to-64-bit load, or 16-bit store */
|
|
+ struct {
|
|
+ Bool isLoad;
|
|
+ HReg rD;
|
|
+ ARM64AMode* amode;
|
|
+ } LdSt16;
|
|
+ /* zx-8-to-64-bit load, or 8-bit store */
|
|
+ struct {
|
|
+ Bool isLoad;
|
|
+ HReg rD;
|
|
+ ARM64AMode* amode;
|
|
+ } LdSt8;
|
|
+ /* Update the guest PC value, then exit requesting to chain
|
|
+ to it. May be conditional. Urr, use of Addr64 implicitly
|
|
+ assumes that wordsize(guest) == wordsize(host). */
|
|
+ struct {
|
|
+ Addr64 dstGA; /* next guest address */
|
|
+ ARM64AMode* amPC; /* amode in guest state for PC */
|
|
+ ARM64CondCode cond; /* can be ARM64cc_AL */
|
|
+ Bool toFastEP; /* chain to the slow or fast point? */
|
|
+ } XDirect;
|
|
+ /* Boring transfer to a guest address not known at JIT time.
|
|
+ Not chainable. May be conditional. */
|
|
+ struct {
|
|
+ HReg dstGA;
|
|
+ ARM64AMode* amPC;
|
|
+ ARM64CondCode cond; /* can be ARM64cc_AL */
|
|
+ } XIndir;
|
|
+ /* Assisted transfer to a guest address, most general case.
|
|
+ Not chainable. May be conditional. */
|
|
+ struct {
|
|
+ HReg dstGA;
|
|
+ ARM64AMode* amPC;
|
|
+ ARM64CondCode cond; /* can be ARM64cc_AL */
|
|
+ IRJumpKind jk;
|
|
+ } XAssisted;
|
|
+ /* CSEL: dst = if cond then argL else argR. cond may be anything. */
|
|
+ struct {
|
|
+ HReg dst;
|
|
+ HReg argL;
|
|
+ HReg argR;
|
|
+ ARM64CondCode cond;
|
|
+ } CSel;
|
|
+ /* Pseudo-insn. Call target (an absolute address), on given
|
|
+ condition (which could be ARM64cc_AL). */
|
|
+ struct {
|
|
+ RetLoc rloc; /* where the return value will be */
|
|
+ HWord target;
|
|
+ ARM64CondCode cond;
|
|
+ Int nArgRegs; /* # regs carrying args: 0 .. 8 */
|
|
+ } Call;
|
|
+ /* move SP by small, signed constant */
|
|
+ struct {
|
|
+ Int simm; /* needs to be 0 % 16 and in the range -4095
|
|
+ .. 4095 inclusive */
|
|
+ } AddToSP;
|
|
+ /* move SP to integer register */
|
|
+ struct {
|
|
+ HReg dst;
|
|
+ } FromSP;
|
|
+ /* Integer multiply, with 3 variants:
|
|
+ (PLAIN) lo64(64 * 64)
|
|
+ (ZX) hi64(64 *u 64)
|
|
+ (SX) hi64(64 *s 64)
|
|
+ */
|
|
+ struct {
|
|
+ HReg dst;
|
|
+ HReg argL;
|
|
+ HReg argR;
|
|
+ ARM64MulOp op;
|
|
+ } Mul;
|
|
+//ZZ /* LDREX{,H,B} r2, [r4] and
|
|
+//ZZ LDREXD r2, r3, [r4] (on LE hosts, transferred value is r3:r2)
|
|
+//ZZ Again, hardwired registers since this is not performance
|
|
+//ZZ critical, and there are possibly constraints on the
|
|
+//ZZ registers that we can't express in the register allocator.*/
|
|
+//ZZ struct {
|
|
+//ZZ Int szB; /* 1, 2, 4 or 8 */
|
|
+//ZZ } LdrEX;
|
|
+//ZZ /* STREX{,H,B} r0, r2, [r4] and
|
|
+//ZZ STREXD r0, r2, r3, [r4] (on LE hosts, transferred value is r3:r2)
|
|
+//ZZ r0 = SC( [r4] = r2 ) (8, 16, 32 bit transfers)
|
|
+//ZZ r0 = SC( [r4] = r3:r2) (64 bit transfers)
|
|
+//ZZ Ditto comment re fixed registers. */
|
|
+//ZZ struct {
|
|
+//ZZ Int szB; /* 1, 2, 4 or 8 */
|
|
+//ZZ } StrEX;
|
|
+ /* --- INSTRUCTIONS INVOLVING VECTOR REGISTERS --- */
|
|
+ /* 32-bit Fp load/store */
|
|
+ struct {
|
|
+ Bool isLoad;
|
|
+ HReg sD;
|
|
+ HReg rN;
|
|
+ UInt uimm12; /* 0 .. 16380 inclusive, 0 % 4 */
|
|
+ } VLdStS;
|
|
+ /* 64-bit Fp load/store */
|
|
+ struct {
|
|
+ Bool isLoad;
|
|
+ HReg dD;
|
|
+ HReg rN;
|
|
+ UInt uimm12; /* 0 .. 32760 inclusive, 0 % 8 */
|
|
+ } VLdStD;
|
|
+ /* 128-bit Vector load/store. */
|
|
+ struct {
|
|
+ Bool isLoad;
|
|
+ HReg rQ; // data
|
|
+ HReg rN; // address
|
|
+ } VLdStQ;
|
|
+ /* Scalar conversion of int to float. */
|
|
+ struct {
|
|
+ ARM64CvtOp how;
|
|
+ HReg rD; // dst, a D or S register
|
|
+ HReg rS; // src, a W or X register
|
|
+ } VCvtI2F;
|
|
+ /* Scalar conversion of float to int, w/ specified RM. */
|
|
+ struct {
|
|
+ ARM64CvtOp how;
|
|
+ HReg rD; // dst, a W or X register
|
|
+ HReg rS; // src, a D or S register
|
|
+ UChar armRM; // ARM encoded RM:
|
|
+ // 00=nearest, 01=+inf, 10=-inf, 11=zero
|
|
+ } VCvtF2I;
|
|
+ /* Convert between 32-bit and 64-bit FP values (both
|
|
+ ways). (FCVT) */
|
|
+ struct {
|
|
+ Bool sToD; /* True: F32->F64. False: F64->F32 */
|
|
+ HReg dst;
|
|
+ HReg src;
|
|
+ } VCvtSD;
|
|
+ /* 64-bit FP unary */
|
|
+ struct {
|
|
+ ARM64FpUnaryOp op;
|
|
+ HReg dst;
|
|
+ HReg src;
|
|
+ } VUnaryD;
|
|
+ /* 32-bit FP unary */
|
|
+ struct {
|
|
+ ARM64FpUnaryOp op;
|
|
+ HReg dst;
|
|
+ HReg src;
|
|
+ } VUnaryS;
|
|
+ /* 64-bit FP binary arithmetic */
|
|
+ struct {
|
|
+ ARM64FpBinOp op;
|
|
+ HReg dst;
|
|
+ HReg argL;
|
|
+ HReg argR;
|
|
+ } VBinD;
|
|
+ /* 32-bit FP binary arithmetic */
|
|
+ struct {
|
|
+ ARM64FpBinOp op;
|
|
+ HReg dst;
|
|
+ HReg argL;
|
|
+ HReg argR;
|
|
+ } VBinS;
|
|
+ /* 64-bit FP compare */
|
|
+ struct {
|
|
+ HReg argL;
|
|
+ HReg argR;
|
|
+ } VCmpD;
|
|
+ /* 32-bit FP compare */
|
|
+ struct {
|
|
+ HReg argL;
|
|
+ HReg argR;
|
|
+ } VCmpS;
|
|
+ /* Move a 32-bit value to/from the FPCR */
|
|
+ struct {
|
|
+ Bool toFPCR;
|
|
+ HReg iReg;
|
|
+ } FPCR;
|
|
+//ZZ /* 32-bit FP binary arithmetic */
|
|
+//ZZ struct {
|
|
+//ZZ ARMVfpOp op;
|
|
+//ZZ HReg dst;
|
|
+//ZZ HReg argL;
|
|
+//ZZ HReg argR;
|
|
+//ZZ } VAluS;
|
|
+//ZZ /* 64-bit FP mov src to dst on the given condition, which may
|
|
+//ZZ not be ARMcc_AL. */
|
|
+//ZZ struct {
|
|
+//ZZ ARMCondCode cond;
|
|
+//ZZ HReg dst;
|
|
+//ZZ HReg src;
|
|
+//ZZ } VCMovD;
|
|
+//ZZ /* 32-bit FP mov src to dst on the given condition, which may
|
|
+//ZZ not be ARMcc_AL. */
|
|
+//ZZ struct {
|
|
+//ZZ ARMCondCode cond;
|
|
+//ZZ HReg dst;
|
|
+//ZZ HReg src;
|
|
+//ZZ } VCMovS;
|
|
+//ZZ /* Transfer a VFP D reg to/from two integer registers (VMOV) */
|
|
+//ZZ struct {
|
|
+//ZZ Bool toD;
|
|
+//ZZ HReg dD;
|
|
+//ZZ HReg rHi;
|
|
+//ZZ HReg rLo;
|
|
+//ZZ } VXferD;
|
|
+//ZZ /* Transfer a VFP S reg to/from an integer register (VMOV) */
|
|
+//ZZ struct {
|
|
+//ZZ Bool toS;
|
|
+//ZZ HReg fD;
|
|
+//ZZ HReg rLo;
|
|
+//ZZ } VXferS;
|
|
+//ZZ /* Convert between 32-bit ints and 64-bit FP values (both ways
|
|
+//ZZ and both signednesses). (FSITOD, FUITOD, FTOSID, FTOUID) */
|
|
+//ZZ struct {
|
|
+//ZZ Bool iToD; /* True: I32->F64. False: F64->I32 */
|
|
+//ZZ Bool syned; /* True: I32 is signed. False: I32 is unsigned */
|
|
+//ZZ HReg dst;
|
|
+//ZZ HReg src;
|
|
+//ZZ } VCvtID;
|
|
+//ZZ /* Mem fence. An insn which fences all loads and stores as
|
|
+//ZZ much as possible before continuing. On ARM we emit the
|
|
+//ZZ sequence
|
|
+//ZZ mcr 15,0,r0,c7,c10,4 (DSB)
|
|
+//ZZ mcr 15,0,r0,c7,c10,5 (DMB)
|
|
+//ZZ mcr 15,0,r0,c7,c5,4 (ISB)
|
|
+//ZZ which is probably total overkill, but better safe than
|
|
+//ZZ sorry.
|
|
+//ZZ */
|
|
+//ZZ struct {
|
|
+//ZZ } MFence;
|
|
+//ZZ /* A CLREX instruction. */
|
|
+//ZZ struct {
|
|
+//ZZ } CLREX;
|
|
+//ZZ /* Neon data processing instruction: 3 registers of the same
|
|
+//ZZ length */
|
|
+//ZZ struct {
|
|
+//ZZ ARMNeonBinOp op;
|
|
+//ZZ HReg dst;
|
|
+//ZZ HReg argL;
|
|
+//ZZ HReg argR;
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool Q;
|
|
+//ZZ } NBinary;
|
|
+//ZZ struct {
|
|
+//ZZ ARMNeonBinOp op;
|
|
+//ZZ ARMNRS* dst;
|
|
+//ZZ ARMNRS* argL;
|
|
+//ZZ ARMNRS* argR;
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool Q;
|
|
+//ZZ } NBinaryS;
|
|
+//ZZ struct {
|
|
+//ZZ ARMNeonShiftOp op;
|
|
+//ZZ HReg dst;
|
|
+//ZZ HReg argL;
|
|
+//ZZ HReg argR;
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool Q;
|
|
+//ZZ } NShift;
|
|
+//ZZ struct {
|
|
+//ZZ HReg dst;
|
|
+//ZZ HReg src;
|
|
+//ZZ UInt amt; /* 1..63 only */
|
|
+//ZZ } NShl64;
|
|
+//ZZ struct {
|
|
+//ZZ Bool isLoad;
|
|
+//ZZ HReg dD;
|
|
+//ZZ ARMAModeN *amode;
|
|
+//ZZ } NLdStD
|
|
+//ZZ struct {
|
|
+//ZZ ARMNeonUnOpS op;
|
|
+//ZZ ARMNRS* dst;
|
|
+//ZZ ARMNRS* src;
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool Q;
|
|
+//ZZ } NUnaryS;
|
|
+//ZZ struct {
|
|
+//ZZ ARMNeonUnOp op;
|
|
+//ZZ HReg dst;
|
|
+//ZZ HReg src;
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool Q;
|
|
+//ZZ } NUnary;
|
|
+//ZZ /* Takes two arguments and modifies them both. */
|
|
+//ZZ struct {
|
|
+//ZZ ARMNeonDualOp op;
|
|
+//ZZ HReg arg1;
|
|
+//ZZ HReg arg2;
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool Q;
|
|
+//ZZ } NDual;
|
|
+ struct {
|
|
+ HReg rQ;
|
|
+ UShort imm; /* Same 1-bit-per-byte encoding as IR */
|
|
+ } VImmQ;
|
|
+ struct {
|
|
+ HReg rD;
|
|
+ HReg rX;
|
|
+ } VDfromX;
|
|
+ struct {
|
|
+ HReg rQ;
|
|
+ HReg rXhi;
|
|
+ HReg rXlo;
|
|
+ } VQfromXX;
|
|
+ struct {
|
|
+ HReg rX;
|
|
+ HReg rQ;
|
|
+ UInt laneNo; /* either 0 or 1 */
|
|
+ } VXfromQ;
|
|
+ /* MOV dst, src -- reg-reg move for vector registers */
|
|
+ struct {
|
|
+ UInt szB; // 16=mov qD,qS; 8=mov dD,dS; 4=mov sD,sS
|
|
+ HReg dst;
|
|
+ HReg src;
|
|
+ } VMov;
|
|
+ struct {
|
|
+ ARM64AMode* amCounter;
|
|
+ ARM64AMode* amFailAddr;
|
|
+ } EvCheck;
|
|
+//ZZ struct {
|
|
+//ZZ /* No fields. The address of the counter to inc is
|
|
+//ZZ installed later, post-translation, by patching it in,
|
|
+//ZZ as it is not known at translation time. */
|
|
+//ZZ } ProfInc;
|
|
+ } ARM64in;
|
|
+ }
|
|
+ ARM64Instr;
|
|
+
|
|
+//ZZ
|
|
+extern ARM64Instr* ARM64Instr_Arith ( HReg, HReg, ARM64RIA*, Bool isAdd );
|
|
+extern ARM64Instr* ARM64Instr_Cmp ( HReg, ARM64RIA*, Bool is64 );
|
|
+extern ARM64Instr* ARM64Instr_Logic ( HReg, HReg, ARM64RIL*, ARM64LogicOp );
|
|
+extern ARM64Instr* ARM64Instr_Test ( HReg, ARM64RIL* );
|
|
+extern ARM64Instr* ARM64Instr_Shift ( HReg, HReg, ARM64RI6*, ARM64ShiftOp );
|
|
+extern ARM64Instr* ARM64Instr_Unary ( HReg, HReg, ARM64UnaryOp );
|
|
+//ZZ extern ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg, ARMRI84* );
|
|
+extern ARM64Instr* ARM64Instr_MovI ( HReg, HReg );
|
|
+extern ARM64Instr* ARM64Instr_Imm64 ( HReg, ULong );
|
|
+extern ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg, ARM64AMode* );
|
|
+extern ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg, ARM64AMode* );
|
|
+extern ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg, ARM64AMode* );
|
|
+extern ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg, ARM64AMode* );
|
|
+//ZZ extern ARMInstr* ARMInstr_Ld8S ( ARMCondCode, HReg, ARMAMode2* );
|
|
+extern ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
|
|
+ ARM64CondCode cond, Bool toFastEP );
|
|
+extern ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
|
|
+ ARM64CondCode cond );
|
|
+extern ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
|
|
+ ARM64CondCode cond, IRJumpKind jk );
|
|
+extern ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
|
|
+ ARM64CondCode cond );
|
|
+extern ARM64Instr* ARM64Instr_Call ( ARM64CondCode, HWord, Int nArgRegs,
|
|
+ RetLoc rloc );
|
|
+extern ARM64Instr* ARM64Instr_AddToSP ( Int simm );
|
|
+extern ARM64Instr* ARM64Instr_FromSP ( HReg dst );
|
|
+extern ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
|
|
+ ARM64MulOp op );
|
|
+
|
|
+//ZZ extern ARMInstr* ARMInstr_Mul ( ARMMulOp op );
|
|
+//ZZ extern ARMInstr* ARMInstr_LdrEX ( Int szB );
|
|
+//ZZ extern ARMInstr* ARMInstr_StrEX ( Int szB );
|
|
+extern ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN,
|
|
+ UInt uimm12 /* 0 .. 16380, 0 % 4 */ );
|
|
+extern ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN,
|
|
+ UInt uimm12 /* 0 .. 32760, 0 % 8 */ );
|
|
+extern ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN );
|
|
+extern ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS );
|
|
+extern ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
|
|
+ UChar armRM );
|
|
+extern ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src );
|
|
+extern ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src );
|
|
+extern ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src );
|
|
+extern ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op, HReg, HReg, HReg );
|
|
+extern ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op, HReg, HReg, HReg );
|
|
+extern ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR );
|
|
+extern ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR );
|
|
+extern ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg );
|
|
+//ZZ extern ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg, HReg, HReg );
|
|
+//ZZ extern ARMInstr* ARMInstr_VCMovD ( ARMCondCode, HReg dst, HReg src );
|
|
+//ZZ extern ARMInstr* ARMInstr_VCMovS ( ARMCondCode, HReg dst, HReg src );
|
|
+//ZZ extern ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo );
|
|
+//ZZ extern ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo );
|
|
+//ZZ extern ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
|
|
+//ZZ HReg dst, HReg src );
|
|
+//ZZ extern ARMInstr* ARMInstr_MFence ( void );
|
|
+//ZZ extern ARMInstr* ARMInstr_CLREX ( void );
|
|
+//ZZ extern ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg, ARMAModeN* );
|
|
+//ZZ extern ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp, HReg, HReg, UInt, Bool );
|
|
+//ZZ extern ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS, ARMNRS*, ARMNRS*,
|
|
+//ZZ UInt, Bool );
|
|
+//ZZ extern ARMInstr* ARMInstr_NDual ( ARMNeonDualOp, HReg, HReg, UInt, Bool );
|
|
+//ZZ extern ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp, HReg, HReg, HReg,
|
|
+//ZZ UInt, Bool );
|
|
+//ZZ extern ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp, HReg, HReg, HReg,
|
|
+//ZZ UInt, Bool );
|
|
+//ZZ extern ARMInstr* ARMInstr_NShl64 ( HReg, HReg, UInt );
|
|
+extern ARM64Instr* ARM64Instr_VImmQ ( HReg, UShort );
|
|
+extern ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX );
|
|
+extern ARM64Instr* ARM64Instr_VQfromXX( HReg rQ, HReg rXhi, HReg rXlo );
|
|
+extern ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo );
|
|
+extern ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src );
|
|
+
|
|
+extern ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
|
|
+ ARM64AMode* amFailAddr );
|
|
+//ZZ extern ARMInstr* ARMInstr_ProfInc ( void );
|
|
+
|
|
+extern void ppARM64Instr ( ARM64Instr* );
|
|
+
|
|
+
|
|
+/* Some functions that insulate the register allocator from details
|
|
+ of the underlying instruction set. */
|
|
+extern void getRegUsage_ARM64Instr ( HRegUsage*, ARM64Instr*, Bool );
|
|
+extern void mapRegs_ARM64Instr ( HRegRemap*, ARM64Instr*, Bool );
|
|
+extern Bool isMove_ARM64Instr ( ARM64Instr*, HReg*, HReg* );
|
|
+extern Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
|
|
+ UChar* buf, Int nbuf, ARM64Instr* i,
|
|
+ Bool mode64,
|
|
+ void* disp_cp_chain_me_to_slowEP,
|
|
+ void* disp_cp_chain_me_to_fastEP,
|
|
+ void* disp_cp_xindir,
|
|
+ void* disp_cp_xassisted );
|
|
+
|
|
+extern void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
|
|
+ HReg rreg, Int offset, Bool );
|
|
+extern void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
|
|
+ HReg rreg, Int offset, Bool );
|
|
+
|
|
+extern void getAllocableRegs_ARM64 ( Int*, HReg** );
|
|
+extern HInstrArray* iselSB_ARM64 ( IRSB*,
|
|
+ VexArch,
|
|
+ VexArchInfo*,
|
|
+ VexAbiInfo*,
|
|
+ Int offs_Host_EvC_Counter,
|
|
+ Int offs_Host_EvC_FailAddr,
|
|
+ Bool chainingAllowed,
|
|
+ Bool addProfInc,
|
|
+ Addr64 max_ga );
|
|
+
|
|
+/* How big is an event check? This is kind of a kludge because it
|
|
+ depends on the offsets of host_EvC_FAILADDR and
|
|
+ host_EvC_COUNTER. */
|
|
+extern Int evCheckSzB_ARM64 ( void );
|
|
+
|
|
+/* Perform a chaining and unchaining of an XDirect jump. */
|
|
+extern VexInvalRange chainXDirect_ARM64 ( void* place_to_chain,
|
|
+ void* disp_cp_chain_me_EXPECTED,
|
|
+ void* place_to_jump_to );
|
|
+
|
|
+//ZZ extern VexInvalRange unchainXDirect_ARM ( void* place_to_unchain,
|
|
+//ZZ void* place_to_jump_to_EXPECTED,
|
|
+//ZZ void* disp_cp_chain_me );
|
|
+//ZZ
|
|
+//ZZ /* Patch the counter location into an existing ProfInc point. */
|
|
+//ZZ extern VexInvalRange patchProfInc_ARM ( void* place_to_patch,
|
|
+//ZZ ULong* location_of_counter );
|
|
+
|
|
+
|
|
+#endif /* ndef __VEX_HOST_ARM64_DEFS_H */
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- end host_arm64_defs.h ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
Index: VEX/priv/ir_opt.c
|
|
===================================================================
|
|
--- VEX/priv/ir_opt.c.orig
|
|
+++ VEX/priv/ir_opt.c
|
|
@@ -1178,6 +1178,14 @@ static Bool isZeroU32 ( IRExpr* e )
|
|
&& e->Iex.Const.con->Ico.U32 == 0);
|
|
}
|
|
|
|
+/* Is this literally IRExpr_Const(IRConst_U64(0)) ? */
|
|
+static Bool isZeroU64 ( IRExpr* e )
|
|
+{
|
|
+ return toBool( e->tag == Iex_Const
|
|
+ && e->Iex.Const.con->tag == Ico_U64
|
|
+ && e->Iex.Const.con->Ico.U64 == 0);
|
|
+}
|
|
+
|
|
/* Is this an integer constant with value 0 ? */
|
|
static Bool isZeroU ( IRExpr* e )
|
|
{
|
|
@@ -1224,6 +1232,7 @@ static IRExpr* mkZeroOfPrimopResultType
|
|
case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
|
|
case Iop_Sub32:
|
|
case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
|
|
+ case Iop_And64:
|
|
case Iop_Sub64:
|
|
case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
|
|
case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
|
|
@@ -2149,6 +2158,14 @@ static IRExpr* fold_Expr ( IRExpr** env,
|
|
e2 = e->Iex.Binop.arg1;
|
|
break;
|
|
}
|
|
+ if (/* could handle other And cases here too, but so
|
|
+ far not */
|
|
+ e->Iex.Binop.op == Iop_And64
|
|
+ && (isZeroU64(e->Iex.Binop.arg1)
|
|
+ || isZeroU64(e->Iex.Binop.arg2))) {
|
|
+ e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
|
|
+ break;
|
|
+ }
|
|
break;
|
|
|
|
case Iop_OrV128:
|
|
Index: VEX/priv/host_arm64_isel.c
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ VEX/priv/host_arm64_isel.c
|
|
@@ -0,0 +1,6928 @@
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- begin host_arm64_isel.c ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
+
|
|
+/*
|
|
+ This file is part of Valgrind, a dynamic binary instrumentation
|
|
+ framework.
|
|
+
|
|
+ Copyright (C) 2013-2013 OpenWorks
|
|
+ info@open-works.net
|
|
+
|
|
+ This program is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU General Public License as
|
|
+ published by the Free Software Foundation; either version 2 of the
|
|
+ License, or (at your option) any later version.
|
|
+
|
|
+ This program is distributed in the hope that it will be useful, but
|
|
+ WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License
|
|
+ along with this program; if not, write to the Free Software
|
|
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
+ 02110-1301, USA.
|
|
+
|
|
+ The GNU General Public License is contained in the file COPYING.
|
|
+*/
|
|
+
|
|
+#include "libvex_basictypes.h"
|
|
+#include "libvex_ir.h"
|
|
+#include "libvex.h"
|
|
+#include "ir_match.h"
|
|
+
|
|
+#include "main_util.h"
|
|
+#include "main_globals.h"
|
|
+#include "host_generic_regs.h"
|
|
+#include "host_generic_simd64.h" // for 32-bit SIMD helpers
|
|
+#include "host_arm64_defs.h"
|
|
+
|
|
+
|
|
+//ZZ /*---------------------------------------------------------*/
|
|
+//ZZ /*--- ARMvfp control word stuff ---*/
|
|
+//ZZ /*---------------------------------------------------------*/
|
|
+//ZZ
|
|
+//ZZ /* Vex-generated code expects to run with the FPU set as follows: all
|
|
+//ZZ exceptions masked, round-to-nearest, non-vector mode, with the NZCV
|
|
+//ZZ flags cleared, and FZ (flush to zero) disabled. Curiously enough,
|
|
+//ZZ this corresponds to a FPSCR value of zero.
|
|
+//ZZ
|
|
+//ZZ fpscr should therefore be zero on entry to Vex-generated code, and
|
|
+//ZZ should be unchanged at exit. (Or at least the bottom 28 bits
|
|
+//ZZ should be zero).
|
|
+//ZZ */
|
|
+//ZZ
|
|
+//ZZ #define DEFAULT_FPSCR 0
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISelEnv ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* This carries around:
|
|
+
|
|
+ - A mapping from IRTemp to IRType, giving the type of any IRTemp we
|
|
+ might encounter. This is computed before insn selection starts,
|
|
+ and does not change.
|
|
+
|
|
+ - A mapping from IRTemp to HReg. This tells the insn selector
|
|
+ which virtual register is associated with each IRTemp temporary.
|
|
+ This is computed before insn selection starts, and does not
|
|
+ change. We expect this mapping to map precisely the same set of
|
|
+ IRTemps as the type mapping does.
|
|
+
|
|
+ |vregmap| holds the primary register for the IRTemp.
|
|
+ |vregmapHI| is only used for 128-bit integer-typed
|
|
+ IRTemps. It holds the identity of a second
|
|
+ 64-bit virtual HReg, which holds the high half
|
|
+ of the value.
|
|
+
|
|
+ - The code array, that is, the insns selected so far.
|
|
+
|
|
+ - A counter, for generating new virtual registers.
|
|
+
|
|
+ - The host hardware capabilities word. This is set at the start
|
|
+ and does not change.
|
|
+
|
|
+ - A Bool for indicating whether we may generate chain-me
|
|
+ instructions for control flow transfers, or whether we must use
|
|
+ XAssisted.
|
|
+
|
|
+ - The maximum guest address of any guest insn in this block.
|
|
+ Actually, the address of the highest-addressed byte from any insn
|
|
+ in this block. Is set at the start and does not change. This is
|
|
+ used for detecting jumps which are definitely forward-edges from
|
|
+ this block, and therefore can be made (chained) to the fast entry
|
|
+ point of the destination, thereby avoiding the destination's
|
|
+ event check.
|
|
+
|
|
+ - An IRExpr*, which may be NULL, holding the IR expression (an
|
|
+ IRRoundingMode-encoded value) to which the FPU's rounding mode
|
|
+ was most recently set. Setting to NULL is always safe. Used to
|
|
+ avoid redundant settings of the FPU's rounding mode, as
|
|
+ described in set_FPCR_rounding_mode below.
|
|
+
|
|
+ Note, this is all (well, mostly) host-independent.
|
|
+*/
|
|
+
|
|
+typedef
|
|
+ struct {
|
|
+ /* Constant -- are set at the start and do not change. */
|
|
+ IRTypeEnv* type_env;
|
|
+
|
|
+ HReg* vregmap;
|
|
+ HReg* vregmapHI;
|
|
+ Int n_vregmap;
|
|
+
|
|
+ UInt hwcaps;
|
|
+
|
|
+ Bool chainingAllowed;
|
|
+ Addr64 max_ga;
|
|
+
|
|
+ /* These are modified as we go along. */
|
|
+ HInstrArray* code;
|
|
+ Int vreg_ctr;
|
|
+
|
|
+ IRExpr* previous_rm;
|
|
+ }
|
|
+ ISelEnv;
|
|
+
|
|
+static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
|
|
+{
|
|
+ vassert(tmp >= 0);
|
|
+ vassert(tmp < env->n_vregmap);
|
|
+ return env->vregmap[tmp];
|
|
+}
|
|
+
|
|
+static void addInstr ( ISelEnv* env, ARM64Instr* instr )
|
|
+{
|
|
+ addHInstr(env->code, instr);
|
|
+ if (vex_traceflags & VEX_TRACE_VCODE) {
|
|
+ ppARM64Instr(instr);
|
|
+ vex_printf("\n");
|
|
+ }
|
|
+}
|
|
+
|
|
+static HReg newVRegI ( ISelEnv* env )
|
|
+{
|
|
+ HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
|
|
+ env->vreg_ctr++;
|
|
+ return reg;
|
|
+}
|
|
+
|
|
+static HReg newVRegD ( ISelEnv* env )
|
|
+{
|
|
+ HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
|
|
+ env->vreg_ctr++;
|
|
+ return reg;
|
|
+}
|
|
+
|
|
+//ZZ static HReg newVRegF ( ISelEnv* env )
|
|
+//ZZ {
|
|
+//ZZ HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
|
|
+//ZZ env->vreg_ctr++;
|
|
+//ZZ return reg;
|
|
+//ZZ }
|
|
+
|
|
+static HReg newVRegV ( ISelEnv* env )
|
|
+{
|
|
+ HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
|
|
+ env->vreg_ctr++;
|
|
+ return reg;
|
|
+}
|
|
+
|
|
+//ZZ /* These are duplicated in guest_arm_toIR.c */
|
|
+//ZZ static IRExpr* unop ( IROp op, IRExpr* a )
|
|
+//ZZ {
|
|
+//ZZ return IRExpr_Unop(op, a);
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
|
|
+//ZZ {
|
|
+//ZZ return IRExpr_Binop(op, a1, a2);
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static IRExpr* bind ( Int binder )
|
|
+//ZZ {
|
|
+//ZZ return IRExpr_Binder(binder);
|
|
+//ZZ }
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISEL: Forward declarations ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* These are organised as iselXXX and iselXXX_wrk pairs. The
|
|
+ iselXXX_wrk do the real work, but are not to be called directly.
|
|
+ For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
|
|
+ checks that all returned registers are virtual. You should not
|
|
+ call the _wrk version directly.
|
|
+
|
|
+ Because some forms of ARM64 memory amodes are implicitly scaled by
|
|
+ the access size, iselIntExpr_AMode takes an IRType which tells it
|
|
+ the type of the access for which the amode is to be used. This
|
|
+ type needs to be correct, else you'll get incorrect code.
|
|
+*/
|
|
+static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
|
|
+ IRExpr* e, IRType dty );
|
|
+static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env,
|
|
+ IRExpr* e, IRType dty );
|
|
+
|
|
+static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e );
|
|
+static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e );
|
|
+
|
|
+static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e );
|
|
+static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e );
|
|
+
|
|
+static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e );
|
|
+static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e );
|
|
+
|
|
+static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
|
|
+static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
|
|
+
|
|
+static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
|
|
+static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
|
|
+
|
|
+static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
|
|
+ ISelEnv* env, IRExpr* e );
|
|
+static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
|
|
+ ISelEnv* env, IRExpr* e );
|
|
+
|
|
+
|
|
+//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
|
|
+//ZZ ISelEnv* env, IRExpr* e );
|
|
+//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo,
|
|
+//ZZ ISelEnv* env, IRExpr* e );
|
|
+
|
|
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
|
|
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
|
|
+
|
|
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
|
|
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
|
|
+
|
|
+//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
|
|
+//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
|
|
+
|
|
+static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
|
|
+static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
|
|
+
|
|
+static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISEL: Misc helpers ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* Generate an amode suitable for a 64-bit sized access relative to
|
|
+ the baseblock register (X21). This generates an RI12 amode, which
|
|
+ means its scaled by the access size, which is why the access size
|
|
+ -- 64 bit -- is stated explicitly here. Consequently |off| needs
|
|
+ to be divisible by 8. */
|
|
+static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
|
|
+{
|
|
+ vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
|
|
+ vassert((off & 7) == 0); /* ditto */
|
|
+ return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
|
|
+}
|
|
+
|
|
+/* Ditto, for 32 bit accesses. */
|
|
+static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
|
|
+{
|
|
+ vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
|
|
+ vassert((off & 3) == 0); /* ditto */
|
|
+ return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
|
|
+}
|
|
+
|
|
+/* Ditto, for 16 bit accesses. */
|
|
+static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
|
|
+{
|
|
+ vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
|
|
+ vassert((off & 1) == 0); /* ditto */
|
|
+ return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
|
|
+}
|
|
+
|
|
+/* Ditto, for 8 bit accesses. */
|
|
+static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
|
|
+{
|
|
+ vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
|
|
+ return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
|
|
+}
|
|
+
|
|
+static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
|
|
+{
|
|
+ vassert(off < (1<<12));
|
|
+ HReg r = newVRegI(env);
|
|
+ addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
|
|
+ ARM64RIA_I12(off,0), True/*isAdd*/));
|
|
+ return r;
|
|
+}
|
|
+
|
|
+static HReg get_baseblock_register ( void )
|
|
+{
|
|
+ return hregARM64_X21();
|
|
+}
|
|
+
|
|
+/* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
|
|
+ a new register, and return the new register. */
|
|
+static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
|
|
+{
|
|
+ HReg dst = newVRegI(env);
|
|
+ ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
|
|
+ addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
|
|
+ return dst;
|
|
+}
|
|
+
|
|
+/* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
|
|
+ a new register, and return the new register. */
|
|
+static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
|
|
+{
|
|
+ HReg dst = newVRegI(env);
|
|
+ ARM64RI6* n48 = ARM64RI6_I6(48);
|
|
+ addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
|
|
+ addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
|
|
+ return dst;
|
|
+}
|
|
+
|
|
+/* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
|
|
+ a new register, and return the new register. */
|
|
+static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
|
|
+{
|
|
+ HReg dst = newVRegI(env);
|
|
+ ARM64RI6* n48 = ARM64RI6_I6(48);
|
|
+ addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
|
|
+ addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR));
|
|
+ return dst;
|
|
+}
|
|
+
|
|
+/* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
|
|
+ a new register, and return the new register. */
|
|
+static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
|
|
+{
|
|
+ HReg dst = newVRegI(env);
|
|
+ ARM64RI6* n32 = ARM64RI6_I6(32);
|
|
+ addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
|
|
+ addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
|
|
+ return dst;
|
|
+}
|
|
+
|
|
+/* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
|
|
+ a new register, and return the new register. */
|
|
+static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
|
|
+{
|
|
+ HReg dst = newVRegI(env);
|
|
+ ARM64RI6* n56 = ARM64RI6_I6(56);
|
|
+ addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
|
|
+ addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
|
|
+ return dst;
|
|
+}
|
|
+
|
|
+static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
|
|
+{
|
|
+ HReg dst = newVRegI(env);
|
|
+ ARM64RI6* n56 = ARM64RI6_I6(56);
|
|
+ addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
|
|
+ addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR));
|
|
+ return dst;
|
|
+}
|
|
+
|
|
+/* Is this IRExpr_Const(IRConst_U64(0)) ? */
|
|
+static Bool isZeroU64 ( IRExpr* e ) {
|
|
+ if (e->tag != Iex_Const) return False;
|
|
+ IRConst* con = e->Iex.Const.con;
|
|
+ vassert(con->tag == Ico_U64);
|
|
+ return con->Ico.U64 == 0;
|
|
+}
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISEL: FP rounding mode helpers ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* Set the FP rounding mode: 'mode' is an I32-typed expression
|
|
+ denoting a value in the range 0 .. 3, indicating a round mode
|
|
+ encoded as per type IRRoundingMode -- the first four values only
|
|
+ (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the PPC
|
|
+ FSCR to have the same rounding.
|
|
+
|
|
+ For speed & simplicity, we're setting the *entire* FPCR here.
|
|
+
|
|
+ Setting the rounding mode is expensive. So this function tries to
|
|
+ avoid repeatedly setting the rounding mode to the same thing by
|
|
+ first comparing 'mode' to the 'mode' tree supplied in the previous
|
|
+ call to this function, if any. (The previous value is stored in
|
|
+ env->previous_rm.) If 'mode' is a single IR temporary 't' and
|
|
+ env->previous_rm is also just 't', then the setting is skipped.
|
|
+
|
|
+ This is safe because of the SSA property of IR: an IR temporary can
|
|
+ only be defined once and so will have the same value regardless of
|
|
+ where it appears in the block. Cool stuff, SSA.
|
|
+
|
|
+ A safety condition: all attempts to set the RM must be aware of
|
|
+ this mechanism - by being routed through the functions here.
|
|
+
|
|
+ Of course this only helps if blocks where the RM is set more than
|
|
+ once and it is set to the same value each time, *and* that value is
|
|
+ held in the same IR temporary each time. In order to assure the
|
|
+ latter as much as possible, the IR optimiser takes care to do CSE
|
|
+ on any block with any sign of floating point activity.
|
|
+*/
|
|
+static
|
|
+void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
|
|
+{
|
|
+ vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
|
|
+
|
|
+ /* Do we need to do anything? */
|
|
+ if (env->previous_rm
|
|
+ && env->previous_rm->tag == Iex_RdTmp
|
|
+ && mode->tag == Iex_RdTmp
|
|
+ && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
|
|
+ /* no - setting it to what it was before. */
|
|
+ vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* No luck - we better set it, and remember what we set it to. */
|
|
+ env->previous_rm = mode;
|
|
+
|
|
+ /* Only supporting the rounding-mode bits - the rest of FPCR is set
|
|
+ to zero - so we can set the whole register at once (faster). */
|
|
+
|
|
+ /* This isn't simple, because 'mode' carries an IR rounding
|
|
+ encoding, and we need to translate that to an ARM64 FP one:
|
|
+ The IR encoding:
|
|
+ 00 to nearest (the default)
|
|
+ 10 to +infinity
|
|
+ 01 to -infinity
|
|
+ 11 to zero
|
|
+ The ARM64 FP encoding:
|
|
+ 00 to nearest
|
|
+ 01 to +infinity
|
|
+ 10 to -infinity
|
|
+ 11 to zero
|
|
+ Easy enough to do; just swap the two bits.
|
|
+ */
|
|
+ HReg irrm = iselIntExpr_R(env, mode);
|
|
+ HReg tL = newVRegI(env);
|
|
+ HReg tR = newVRegI(env);
|
|
+ HReg t3 = newVRegI(env);
|
|
+ /* tL = irrm << 1;
|
|
+ tR = irrm >> 1; if we're lucky, these will issue together
|
|
+ tL &= 2;
|
|
+ tR &= 1; ditto
|
|
+ t3 = tL | tR;
|
|
+ t3 <<= 22;
|
|
+ fmxr fpscr, t3
|
|
+ */
|
|
+ ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
|
|
+ ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
|
|
+ vassert(ril_one && ril_two);
|
|
+ addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
|
|
+ addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
|
|
+ addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
|
|
+ addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
|
|
+ addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
|
|
+ addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
|
|
+ addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
|
|
+}
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISEL: Function call helpers ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* Used only in doHelperCall. See big comment in doHelperCall re
|
|
+ handling of register-parameter args. This function figures out
|
|
+ whether evaluation of an expression might require use of a fixed
|
|
+ register. If in doubt return True (safe but suboptimal).
|
|
+*/
|
|
+static
|
|
+Bool mightRequireFixedRegs ( IRExpr* e )
|
|
+{
|
|
+ if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
|
|
+ // These are always "safe" -- either a copy of SP in some
|
|
+ // arbitrary vreg, or a copy of x21, respectively.
|
|
+ return False;
|
|
+ }
|
|
+ /* Else it's a "normal" expression. */
|
|
+ switch (e->tag) {
|
|
+ case Iex_RdTmp: case Iex_Const: case Iex_Get:
|
|
+ return False;
|
|
+ default:
|
|
+ return True;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* Do a complete function call. |guard| is a Ity_Bit expression
|
|
+ indicating whether or not the call happens. If guard==NULL, the
|
|
+ call is unconditional. |retloc| is set to indicate where the
|
|
+ return value is after the call. The caller (of this fn) must
|
|
+ generate code to add |stackAdjustAfterCall| to the stack pointer
|
|
+ after the call is done. Returns True iff it managed to handle this
|
|
+ combination of arg/return types, else returns False. */
|
|
+
|
|
+static
|
|
+Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
|
|
+ /*OUT*/RetLoc* retloc,
|
|
+ ISelEnv* env,
|
|
+ IRExpr* guard,
|
|
+ IRCallee* cee, IRType retTy, IRExpr** args )
|
|
+{
|
|
+ ARM64CondCode cc;
|
|
+ HReg argregs[ARM64_N_ARGREGS];
|
|
+ HReg tmpregs[ARM64_N_ARGREGS];
|
|
+ Bool go_fast;
|
|
+ Int n_args, i, nextArgReg;
|
|
+ ULong target;
|
|
+
|
|
+ vassert(ARM64_N_ARGREGS == 8);
|
|
+
|
|
+ /* Set default returns. We'll update them later if needed. */
|
|
+ *stackAdjustAfterCall = 0;
|
|
+ *retloc = mk_RetLoc_INVALID();
|
|
+
|
|
+ /* These are used for cross-checking that IR-level constraints on
|
|
+ the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
|
|
+ UInt nVECRETs = 0;
|
|
+ UInt nBBPTRs = 0;
|
|
+
|
|
+ /* Marshal args for a call and do the call.
|
|
+
|
|
+ This function only deals with a tiny set of possibilities, which
|
|
+ cover all helpers in practice. The restrictions are that only
|
|
+ arguments in registers are supported, hence only
|
|
+ ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
|
|
+ fact the only supported arg type is I64.
|
|
+
|
|
+ The return type can be I{64,32} or V128. In the V128 case, it
|
|
+ is expected that |args| will contain the special node
|
|
+ IRExpr_VECRET(), in which case this routine generates code to
|
|
+ allocate space on the stack for the vector return value. Since
|
|
+ we are not passing any scalars on the stack, it is enough to
|
|
+ preallocate the return space before marshalling any arguments,
|
|
+ in this case.
|
|
+
|
|
+ |args| may also contain IRExpr_BBPTR(), in which case the
|
|
+ value in x21 is passed as the corresponding argument.
|
|
+
|
|
+ Generating code which is both efficient and correct when
|
|
+ parameters are to be passed in registers is difficult, for the
|
|
+ reasons elaborated in detail in comments attached to
|
|
+ doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
|
|
+ of the method described in those comments.
|
|
+
|
|
+ The problem is split into two cases: the fast scheme and the
|
|
+ slow scheme. In the fast scheme, arguments are computed
|
|
+ directly into the target (real) registers. This is only safe
|
|
+ when we can be sure that computation of each argument will not
|
|
+ trash any real registers set by computation of any other
|
|
+ argument.
|
|
+
|
|
+ In the slow scheme, all args are first computed into vregs, and
|
|
+ once they are all done, they are moved to the relevant real
|
|
+ regs. This always gives correct code, but it also gives a bunch
|
|
+ of vreg-to-rreg moves which are usually redundant but are hard
|
|
+ for the register allocator to get rid of.
|
|
+
|
|
+ To decide which scheme to use, all argument expressions are
|
|
+ first examined. If they are all so simple that it is clear they
|
|
+ will be evaluated without use of any fixed registers, use the
|
|
+ fast scheme, else use the slow scheme. Note also that only
|
|
+ unconditional calls may use the fast scheme, since having to
|
|
+ compute a condition expression could itself trash real
|
|
+ registers.
|
|
+
|
|
+ Note this requires being able to examine an expression and
|
|
+ determine whether or not evaluation of it might use a fixed
|
|
+ register. That requires knowledge of how the rest of this insn
|
|
+ selector works. Currently just the following 3 are regarded as
|
|
+ safe -- hopefully they cover the majority of arguments in
|
|
+ practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
|
|
+ */
|
|
+
|
|
+ /* Note that the cee->regparms field is meaningless on ARM64 hosts
|
|
+ (since there is only one calling convention) and so we always
|
|
+ ignore it. */
|
|
+
|
|
+ n_args = 0;
|
|
+ for (i = 0; args[i]; i++) {
|
|
+ IRExpr* arg = args[i];
|
|
+ if (UNLIKELY(arg->tag == Iex_VECRET)) {
|
|
+ nVECRETs++;
|
|
+ } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
|
|
+ nBBPTRs++;
|
|
+ }
|
|
+ n_args++;
|
|
+ }
|
|
+
|
|
+ /* If this fails, the IR is ill-formed */
|
|
+ vassert(nBBPTRs == 0 || nBBPTRs == 1);
|
|
+
|
|
+ /* If we have a VECRET, allocate space on the stack for the return
|
|
+ value, and record the stack pointer after that. */
|
|
+ HReg r_vecRetAddr = INVALID_HREG;
|
|
+ if (nVECRETs == 1) {
|
|
+ vassert(retTy == Ity_V128 || retTy == Ity_V256);
|
|
+ vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
|
|
+ r_vecRetAddr = newVRegI(env);
|
|
+ addInstr(env, ARM64Instr_AddToSP(-16));
|
|
+ addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
|
|
+ } else {
|
|
+ // If either of these fail, the IR is ill-formed
|
|
+ vassert(retTy != Ity_V128 && retTy != Ity_V256);
|
|
+ vassert(nVECRETs == 0);
|
|
+ }
|
|
+
|
|
+ argregs[0] = hregARM64_X0();
|
|
+ argregs[1] = hregARM64_X1();
|
|
+ argregs[2] = hregARM64_X2();
|
|
+ argregs[3] = hregARM64_X3();
|
|
+ argregs[4] = hregARM64_X4();
|
|
+ argregs[5] = hregARM64_X5();
|
|
+ argregs[6] = hregARM64_X6();
|
|
+ argregs[7] = hregARM64_X7();
|
|
+
|
|
+ tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
|
|
+ tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
|
|
+
|
|
+ /* First decide which scheme (slow or fast) is to be used. First
|
|
+ assume the fast scheme, and select slow if any contraindications
|
|
+ (wow) appear. */
|
|
+
|
|
+ go_fast = True;
|
|
+
|
|
+ if (guard) {
|
|
+ if (guard->tag == Iex_Const
|
|
+ && guard->Iex.Const.con->tag == Ico_U1
|
|
+ && guard->Iex.Const.con->Ico.U1 == True) {
|
|
+ /* unconditional */
|
|
+ } else {
|
|
+ /* Not manifestly unconditional -- be conservative. */
|
|
+ go_fast = False;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (go_fast) {
|
|
+ for (i = 0; i < n_args; i++) {
|
|
+ if (mightRequireFixedRegs(args[i])) {
|
|
+ go_fast = False;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (go_fast) {
|
|
+ if (retTy == Ity_V128 || retTy == Ity_V256)
|
|
+ go_fast = False;
|
|
+ }
|
|
+
|
|
+ /* At this point the scheme to use has been established. Generate
|
|
+ code to get the arg values into the argument rregs. If we run
|
|
+ out of arg regs, give up. */
|
|
+
|
|
+ if (go_fast) {
|
|
+
|
|
+ /* FAST SCHEME */
|
|
+ nextArgReg = 0;
|
|
+
|
|
+ for (i = 0; i < n_args; i++) {
|
|
+ IRExpr* arg = args[i];
|
|
+
|
|
+ IRType aTy = Ity_INVALID;
|
|
+ if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
|
|
+ aTy = typeOfIRExpr(env->type_env, args[i]);
|
|
+
|
|
+ if (nextArgReg >= ARM64_N_ARGREGS)
|
|
+ return False; /* out of argregs */
|
|
+
|
|
+ if (aTy == Ity_I64) {
|
|
+ addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
|
|
+ iselIntExpr_R(env, args[i]) ));
|
|
+ nextArgReg++;
|
|
+ }
|
|
+ else if (arg->tag == Iex_BBPTR) {
|
|
+ vassert(0); //ATC
|
|
+ addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
|
|
+ hregARM64_X21() ));
|
|
+ nextArgReg++;
|
|
+ }
|
|
+ else if (arg->tag == Iex_VECRET) {
|
|
+ // because of the go_fast logic above, we can't get here,
|
|
+ // since vector return values makes us use the slow path
|
|
+ // instead.
|
|
+ vassert(0);
|
|
+ }
|
|
+ else
|
|
+ return False; /* unhandled arg type */
|
|
+ }
|
|
+
|
|
+ /* Fast scheme only applies for unconditional calls. Hence: */
|
|
+ cc = ARM64cc_AL;
|
|
+
|
|
+ } else {
|
|
+
|
|
+ /* SLOW SCHEME; move via temporaries */
|
|
+ nextArgReg = 0;
|
|
+
|
|
+ for (i = 0; i < n_args; i++) {
|
|
+ IRExpr* arg = args[i];
|
|
+
|
|
+ IRType aTy = Ity_INVALID;
|
|
+ if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
|
|
+ aTy = typeOfIRExpr(env->type_env, args[i]);
|
|
+
|
|
+ if (nextArgReg >= ARM64_N_ARGREGS)
|
|
+ return False; /* out of argregs */
|
|
+
|
|
+ if (aTy == Ity_I64) {
|
|
+ tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
|
|
+ nextArgReg++;
|
|
+ }
|
|
+ else if (arg->tag == Iex_BBPTR) {
|
|
+ vassert(0); //ATC
|
|
+ tmpregs[nextArgReg] = hregARM64_X21();
|
|
+ nextArgReg++;
|
|
+ }
|
|
+ else if (arg->tag == Iex_VECRET) {
|
|
+ vassert(!hregIsInvalid(r_vecRetAddr));
|
|
+ tmpregs[nextArgReg] = r_vecRetAddr;
|
|
+ nextArgReg++;
|
|
+ }
|
|
+ else
|
|
+ return False; /* unhandled arg type */
|
|
+ }
|
|
+
|
|
+ /* Now we can compute the condition. We can't do it earlier
|
|
+ because the argument computations could trash the condition
|
|
+ codes. Be a bit clever to handle the common case where the
|
|
+ guard is 1:Bit. */
|
|
+ cc = ARM64cc_AL;
|
|
+ if (guard) {
|
|
+ if (guard->tag == Iex_Const
|
|
+ && guard->Iex.Const.con->tag == Ico_U1
|
|
+ && guard->Iex.Const.con->Ico.U1 == True) {
|
|
+ /* unconditional -- do nothing */
|
|
+ } else {
|
|
+ cc = iselCondCode( env, guard );
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Move the args to their final destinations. */
|
|
+ for (i = 0; i < nextArgReg; i++) {
|
|
+ vassert(!(hregIsInvalid(tmpregs[i])));
|
|
+ /* None of these insns, including any spill code that might
|
|
+ be generated, may alter the condition codes. */
|
|
+ addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
|
|
+ }
|
|
+
|
|
+ }
|
|
+
|
|
+ /* Should be assured by checks above */
|
|
+ vassert(nextArgReg <= ARM64_N_ARGREGS);
|
|
+
|
|
+ /* Do final checks, set the return values, and generate the call
|
|
+ instruction proper. */
|
|
+ vassert(nBBPTRs == 0 || nBBPTRs == 1);
|
|
+ vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
|
|
+ vassert(*stackAdjustAfterCall == 0);
|
|
+ vassert(is_RetLoc_INVALID(*retloc));
|
|
+ switch (retTy) {
|
|
+ case Ity_INVALID:
|
|
+ /* Function doesn't return a value. */
|
|
+ *retloc = mk_RetLoc_simple(RLPri_None);
|
|
+ break;
|
|
+ case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
|
|
+ *retloc = mk_RetLoc_simple(RLPri_Int);
|
|
+ break;
|
|
+ case Ity_V128:
|
|
+ *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
|
|
+ *stackAdjustAfterCall = 16;
|
|
+ break;
|
|
+ case Ity_V256:
|
|
+ vassert(0); // ATC
|
|
+ *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
|
|
+ *stackAdjustAfterCall = 32;
|
|
+ break;
|
|
+ default:
|
|
+ /* IR can denote other possible return types, but we don't
|
|
+ handle those here. */
|
|
+ vassert(0);
|
|
+ }
|
|
+
|
|
+ /* Finally, generate the call itself. This needs the *retloc value
|
|
+ set in the switch above, which is why it's at the end. */
|
|
+
|
|
+ /* nextArgReg doles out argument registers. Since these are
|
|
+ assigned in the order x0 .. x7, its numeric value at this point,
|
|
+ which must be between 0 and 8 inclusive, is going to be equal to
|
|
+ the number of arg regs in use for the call. Hence bake that
|
|
+ number into the call (we'll need to know it when doing register
|
|
+ allocation, to know what regs the call reads.) */
|
|
+
|
|
+ target = (HWord)Ptr_to_ULong(cee->addr);
|
|
+ addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
|
|
+
|
|
+ return True; /* success */
|
|
+}
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISEL: Integer expressions (64/32 bit) ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* Select insns for an integer-typed expression, and add them to the
|
|
+ code list. Return a reg holding the result. This reg will be a
|
|
+ virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
|
|
+ want to modify it, ask for a new vreg, copy it in there, and modify
|
|
+ the copy. The register allocator will do its best to map both
|
|
+ vregs to the same real register, so the copies will often disappear
|
|
+ later in the game.
|
|
+
|
|
+ This should handle expressions of 64- and 32-bit type. All results
|
|
+ are returned in a 64-bit register. For 32-bit expressions, the
|
|
+ upper 32 bits are arbitrary, so you should mask or sign extend
|
|
+ partial values if necessary.
|
|
+*/
|
|
+
|
|
+/* --------------------- AMode --------------------- */
|
|
+
|
|
+/* Return an AMode which computes the value of the specified
|
|
+ expression, possibly also adding insns to the code list as a
|
|
+ result. The expression may only be a 64-bit one.
|
|
+*/
|
|
+
|
|
+static Bool isValidScale ( UChar scale )
|
|
+{
|
|
+ switch (scale) {
|
|
+ case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
|
|
+ default: return False;
|
|
+ }
|
|
+}
|
|
+
|
|
+static Bool sane_AMode ( ARM64AMode* am )
|
|
+{
|
|
+ switch (am->tag) {
|
|
+ case ARM64am_RI9:
|
|
+ return
|
|
+ toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
|
|
+ && (hregIsVirtual(am->ARM64am.RI9.reg)
|
|
+ /* || sameHReg(am->ARM64am.RI9.reg,
|
|
+ hregARM64_X21()) */ )
|
|
+ && am->ARM64am.RI9.simm9 >= -256
|
|
+ && am->ARM64am.RI9.simm9 <= 255 );
|
|
+ case ARM64am_RI12:
|
|
+ return
|
|
+ toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
|
|
+ && (hregIsVirtual(am->ARM64am.RI12.reg)
|
|
+ /* || sameHReg(am->ARM64am.RI12.reg,
|
|
+ hregARM64_X21()) */ )
|
|
+ && am->ARM64am.RI12.uimm12 < 4096
|
|
+ && isValidScale(am->ARM64am.RI12.szB) );
|
|
+ case ARM64am_RR:
|
|
+ return
|
|
+ toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
|
|
+ && hregIsVirtual(am->ARM64am.RR.base)
|
|
+ && hregClass(am->ARM64am.RR.index) == HRcInt64
|
|
+ && hregIsVirtual(am->ARM64am.RR.index) );
|
|
+ default:
|
|
+ vpanic("sane_AMode: unknown ARM64 AMode1 tag");
|
|
+ }
|
|
+}
|
|
+
|
|
+static
|
|
+ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
|
|
+{
|
|
+ ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
|
|
+ vassert(sane_AMode(am));
|
|
+ return am;
|
|
+}
|
|
+
|
|
+static
|
|
+ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
|
|
+{
|
|
+ IRType ty = typeOfIRExpr(env->type_env,e);
|
|
+ vassert(ty == Ity_I64);
|
|
+
|
|
+ ULong szBbits = 0;
|
|
+ switch (dty) {
|
|
+ case Ity_I64: szBbits = 3; break;
|
|
+ case Ity_I32: szBbits = 2; break;
|
|
+ case Ity_I16: szBbits = 1; break;
|
|
+ case Ity_I8: szBbits = 0; break;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+
|
|
+ /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
|
|
+ we're going to create an amode suitable for LDU* or STU*
|
|
+ instructions, which use unscaled immediate offsets. */
|
|
+ if (e->tag == Iex_Binop
|
|
+ && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
|
|
+ && e->Iex.Binop.arg2->tag == Iex_Const
|
|
+ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
|
|
+ Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
|
|
+ if (simm >= -256 && simm <= 255) {
|
|
+ HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ return ARM64AMode_RI9(reg, (Int)simm);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Add64(expr, uimm12 * transfer-size) */
|
|
+ if (e->tag == Iex_Binop
|
|
+ && e->Iex.Binop.op == Iop_Add64
|
|
+ && e->Iex.Binop.arg2->tag == Iex_Const
|
|
+ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
|
|
+ ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
|
|
+ ULong szB = 1 << szBbits;
|
|
+ if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
|
|
+ && (uimm >> szBbits) < 4096) {
|
|
+ HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Add64(expr1, expr2) */
|
|
+ if (e->tag == Iex_Binop
|
|
+ && e->Iex.Binop.op == Iop_Add64) {
|
|
+ HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+ return ARM64AMode_RR(reg1, reg2);
|
|
+ }
|
|
+
|
|
+ /* Doesn't match anything in particular. Generate it into
|
|
+ a register and use that. */
|
|
+ HReg reg = iselIntExpr_R(env, e);
|
|
+ return ARM64AMode_RI9(reg, 0);
|
|
+}
|
|
+
|
|
+//ZZ /* --------------------- AModeV --------------------- */
|
|
+//ZZ
|
|
+//ZZ /* Return an AModeV which computes the value of the specified
|
|
+//ZZ expression, possibly also adding insns to the code list as a
|
|
+//ZZ result. The expression may only be a 32-bit one.
|
|
+//ZZ */
|
|
+//ZZ
|
|
+//ZZ static Bool sane_AModeV ( ARMAModeV* am )
|
|
+//ZZ {
|
|
+//ZZ return toBool( hregClass(am->reg) == HRcInt32
|
|
+//ZZ && hregIsVirtual(am->reg)
|
|
+//ZZ && am->simm11 >= -1020 && am->simm11 <= 1020
|
|
+//ZZ && 0 == (am->simm11 & 3) );
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
|
|
+//ZZ {
|
|
+//ZZ ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
|
|
+//ZZ vassert(sane_AModeV(am));
|
|
+//ZZ return am;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
|
|
+//ZZ {
|
|
+//ZZ IRType ty = typeOfIRExpr(env->type_env,e);
|
|
+//ZZ vassert(ty == Ity_I32);
|
|
+//ZZ
|
|
+//ZZ /* {Add32,Sub32}(expr, simm8 << 2) */
|
|
+//ZZ if (e->tag == Iex_Binop
|
|
+//ZZ && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
|
|
+//ZZ && e->Iex.Binop.arg2->tag == Iex_Const
|
|
+//ZZ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
|
|
+//ZZ Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
|
|
+//ZZ if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
|
|
+//ZZ HReg reg;
|
|
+//ZZ if (e->Iex.Binop.op == Iop_Sub32)
|
|
+//ZZ simm = -simm;
|
|
+//ZZ reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+//ZZ return mkARMAModeV(reg, simm);
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* Doesn't match anything in particular. Generate it into
|
|
+//ZZ a register and use that. */
|
|
+//ZZ {
|
|
+//ZZ HReg reg = iselIntExpr_R(env, e);
|
|
+//ZZ return mkARMAModeV(reg, 0);
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* -------------------- AModeN -------------------- */
|
|
+//ZZ
|
|
+//ZZ static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
|
|
+//ZZ {
|
|
+//ZZ return iselIntExpr_AModeN_wrk(env, e);
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
|
|
+//ZZ {
|
|
+//ZZ HReg reg = iselIntExpr_R(env, e);
|
|
+//ZZ return mkARMAModeN_R(reg);
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ
|
|
+//ZZ /* --------------------- RI84 --------------------- */
|
|
+//ZZ
|
|
+//ZZ /* Select instructions to generate 'e' into a RI84. If mayInv is
|
|
+//ZZ true, then the caller will also accept an I84 form that denotes
|
|
+//ZZ 'not e'. In this case didInv may not be NULL, and *didInv is set
|
|
+//ZZ to True. This complication is so as to allow generation of an RI84
|
|
+//ZZ which is suitable for use in either an AND or BIC instruction,
|
|
+//ZZ without knowing (before this call) which one.
|
|
+//ZZ */
|
|
+//ZZ static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
|
|
+//ZZ ISelEnv* env, IRExpr* e )
|
|
+//ZZ {
|
|
+//ZZ ARMRI84* ri;
|
|
+//ZZ if (mayInv)
|
|
+//ZZ vassert(didInv != NULL);
|
|
+//ZZ ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
|
|
+//ZZ /* sanity checks ... */
|
|
+//ZZ switch (ri->tag) {
|
|
+//ZZ case ARMri84_I84:
|
|
+//ZZ return ri;
|
|
+//ZZ case ARMri84_R:
|
|
+//ZZ vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
|
|
+//ZZ vassert(hregIsVirtual(ri->ARMri84.R.reg));
|
|
+//ZZ return ri;
|
|
+//ZZ default:
|
|
+//ZZ vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* DO NOT CALL THIS DIRECTLY ! */
|
|
+//ZZ static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
|
|
+//ZZ ISelEnv* env, IRExpr* e )
|
|
+//ZZ {
|
|
+//ZZ IRType ty = typeOfIRExpr(env->type_env,e);
|
|
+//ZZ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
|
|
+//ZZ
|
|
+//ZZ if (didInv) *didInv = False;
|
|
+//ZZ
|
|
+//ZZ /* special case: immediate */
|
|
+//ZZ if (e->tag == Iex_Const) {
|
|
+//ZZ UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
|
|
+//ZZ switch (e->Iex.Const.con->tag) {
|
|
+//ZZ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
|
|
+//ZZ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
|
|
+//ZZ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
|
|
+//ZZ default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
|
|
+//ZZ }
|
|
+//ZZ if (fitsIn8x4(&u8, &u4, u)) {
|
|
+//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
|
|
+//ZZ }
|
|
+//ZZ if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
|
|
+//ZZ vassert(didInv);
|
|
+//ZZ *didInv = True;
|
|
+//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
|
|
+//ZZ }
|
|
+//ZZ /* else fail, fall through to default case */
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* default case: calculate into a register and return that */
|
|
+//ZZ {
|
|
+//ZZ HReg r = iselIntExpr_R ( env, e );
|
|
+//ZZ return ARMRI84_R(r);
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+
|
|
+
|
|
+/* --------------------- RIA --------------------- */
|
|
+
|
|
+/* Select instructions to generate 'e' into a RIA. */
|
|
+
|
|
+static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
|
|
+ /* sanity checks ... */
|
|
+ switch (ri->tag) {
|
|
+ case ARM64riA_I12:
|
|
+ vassert(ri->ARM64riA.I12.imm12 < 4096);
|
|
+ vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
|
|
+ return ri;
|
|
+ case ARM64riA_R:
|
|
+ vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
|
|
+ vassert(hregIsVirtual(ri->ARM64riA.R.reg));
|
|
+ return ri;
|
|
+ default:
|
|
+ vpanic("iselIntExpr_RIA: unknown arm RIA tag");
|
|
+ }
|
|
+}
|
|
+
|
|
+/* DO NOT CALL THIS DIRECTLY ! */
|
|
+static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ IRType ty = typeOfIRExpr(env->type_env,e);
|
|
+ vassert(ty == Ity_I64 || ty == Ity_I32);
|
|
+
|
|
+ /* special case: immediate */
|
|
+ if (e->tag == Iex_Const) {
|
|
+ ULong u = 0xF000000ULL; /* invalid */
|
|
+ switch (e->Iex.Const.con->tag) {
|
|
+ case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
|
|
+ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
|
|
+ default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
|
|
+ }
|
|
+ if (0 == (u & ~(0xFFFULL << 0)))
|
|
+ return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
|
|
+ if (0 == (u & ~(0xFFFULL << 12)))
|
|
+ return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
|
|
+ /* else fail, fall through to default case */
|
|
+ }
|
|
+
|
|
+ /* default case: calculate into a register and return that */
|
|
+ {
|
|
+ HReg r = iselIntExpr_R ( env, e );
|
|
+ return ARM64RIA_R(r);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* --------------------- RIL --------------------- */
|
|
+
|
|
+/* Select instructions to generate 'e' into a RIL. At this point we
|
|
+ have to deal with the strange bitfield-immediate encoding for logic
|
|
+ instructions. */
|
|
+
|
|
+
|
|
+// The following four functions
|
|
+// CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
|
|
+// are copied, with modifications, from
|
|
+// https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
|
|
+// which has the following copyright notice:
|
|
+/*
|
|
+ Copyright 2013, ARM Limited
|
|
+ All rights reserved.
|
|
+
|
|
+ Redistribution and use in source and binary forms, with or without
|
|
+ modification, are permitted provided that the following conditions are met:
|
|
+
|
|
+ * Redistributions of source code must retain the above copyright notice,
|
|
+ this list of conditions and the following disclaimer.
|
|
+ * Redistributions in binary form must reproduce the above copyright notice,
|
|
+ this list of conditions and the following disclaimer in the documentation
|
|
+ and/or other materials provided with the distribution.
|
|
+ * Neither the name of ARM Limited nor the names of its contributors may be
|
|
+ used to endorse or promote products derived from this software without
|
|
+ specific prior written permission.
|
|
+
|
|
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
|
|
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
|
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
+*/
|
|
+
|
|
+static Int CountLeadingZeros(ULong value, Int width)
|
|
+{
|
|
+ vassert(width == 32 || width == 64);
|
|
+ Int count = 0;
|
|
+ ULong bit_test = 1ULL << (width - 1);
|
|
+ while ((count < width) && ((bit_test & value) == 0)) {
|
|
+ count++;
|
|
+ bit_test >>= 1;
|
|
+ }
|
|
+ return count;
|
|
+}
|
|
+
|
|
+static Int CountTrailingZeros(ULong value, Int width)
|
|
+{
|
|
+ vassert(width == 32 || width == 64);
|
|
+ Int count = 0;
|
|
+ while ((count < width) && (((value >> count) & 1) == 0)) {
|
|
+ count++;
|
|
+ }
|
|
+ return count;
|
|
+}
|
|
+
|
|
+static Int CountSetBits(ULong value, Int width)
|
|
+{
|
|
+ // TODO: Other widths could be added here, as the implementation already
|
|
+ // supports them.
|
|
+ vassert(width == 32 || width == 64);
|
|
+
|
|
+ // Mask out unused bits to ensure that they are not counted.
|
|
+ value &= (0xffffffffffffffffULL >> (64-width));
|
|
+
|
|
+ // Add up the set bits.
|
|
+ // The algorithm works by adding pairs of bit fields together iteratively,
|
|
+ // where the size of each bit field doubles each time.
|
|
+ // An example for an 8-bit value:
|
|
+ // Bits: h g f e d c b a
|
|
+ // \ | \ | \ | \ |
|
|
+ // value = h+g f+e d+c b+a
|
|
+ // \ | \ |
|
|
+ // value = h+g+f+e d+c+b+a
|
|
+ // \ |
|
|
+ // value = h+g+f+e+d+c+b+a
|
|
+ value = ((value >> 1) & 0x5555555555555555) + (value & 0x5555555555555555);
|
|
+ value = ((value >> 2) & 0x3333333333333333) + (value & 0x3333333333333333);
|
|
+ value = ((value >> 4) & 0x0f0f0f0f0f0f0f0f) + (value & 0x0f0f0f0f0f0f0f0f);
|
|
+ value = ((value >> 8) & 0x00ff00ff00ff00ff) + (value & 0x00ff00ff00ff00ff);
|
|
+ value = ((value >> 16) & 0x0000ffff0000ffff) + (value & 0x0000ffff0000ffff);
|
|
+ value = ((value >> 32) & 0x00000000ffffffff) + (value & 0x00000000ffffffff);
|
|
+
|
|
+ return value;
|
|
+}
|
|
+
|
|
+static Bool isImmLogical ( /*OUT*/UInt* n,
|
|
+ /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
|
|
+ ULong value, UInt width )
|
|
+{
|
|
+ // Test if a given value can be encoded in the immediate field of a
|
|
+ // logical instruction.
|
|
+
|
|
+ // If it can be encoded, the function returns true, and values
|
|
+ // pointed to by n, imm_s and imm_r are updated with immediates
|
|
+ // encoded in the format required by the corresponding fields in the
|
|
+ // logical instruction. If it can not be encoded, the function
|
|
+ // returns false, and the values pointed to by n, imm_s and imm_r
|
|
+ // are undefined.
|
|
+ vassert(n != NULL && imm_s != NULL && imm_r != NULL);
|
|
+ vassert(width == 32 || width == 64);
|
|
+
|
|
+ // Logical immediates are encoded using parameters n, imm_s and imm_r using
|
|
+ // the following table:
|
|
+ //
|
|
+ // N imms immr size S R
|
|
+ // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
|
|
+ // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
|
|
+ // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
|
|
+ // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
|
|
+ // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
|
|
+ // 0 11110s xxxxxr 2 UInt(s) UInt(r)
|
|
+ // (s bits must not be all set)
|
|
+ //
|
|
+ // A pattern is constructed of size bits, where the least significant S+1
|
|
+ // bits are set. The pattern is rotated right by R, and repeated across a
|
|
+ // 32 or 64-bit value, depending on destination register width.
|
|
+ //
|
|
+ // To test if an arbitrary immediate can be encoded using this scheme, an
|
|
+ // iterative algorithm is used.
|
|
+ //
|
|
+ // TODO: This code does not consider using X/W register overlap to support
|
|
+ // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
|
|
+ // are an encodable logical immediate.
|
|
+
|
|
+ // 1. If the value has all set or all clear bits, it can't be encoded.
|
|
+ if ((value == 0) || (value == 0xffffffffffffffffULL) ||
|
|
+ ((width == 32) && (value == 0xffffffff))) {
|
|
+ return False;
|
|
+ }
|
|
+
|
|
+ UInt lead_zero = CountLeadingZeros(value, width);
|
|
+ UInt lead_one = CountLeadingZeros(~value, width);
|
|
+ UInt trail_zero = CountTrailingZeros(value, width);
|
|
+ UInt trail_one = CountTrailingZeros(~value, width);
|
|
+ UInt set_bits = CountSetBits(value, width);
|
|
+
|
|
+ // The fixed bits in the immediate s field.
|
|
+ // If width == 64 (X reg), start at 0xFFFFFF80.
|
|
+ // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
|
|
+ // widths won't be executed.
|
|
+ Int imm_s_fixed = (width == 64) ? -128 : -64;
|
|
+ Int imm_s_mask = 0x3F;
|
|
+
|
|
+ for (;;) {
|
|
+ // 2. If the value is two bits wide, it can be encoded.
|
|
+ if (width == 2) {
|
|
+ *n = 0;
|
|
+ *imm_s = 0x3C;
|
|
+ *imm_r = (value & 3) - 1;
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ *n = (width == 64) ? 1 : 0;
|
|
+ *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
|
|
+ if ((lead_zero + set_bits) == width) {
|
|
+ *imm_r = 0;
|
|
+ } else {
|
|
+ *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
|
|
+ }
|
|
+
|
|
+ // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
|
|
+ // the bit width of the value, it can be encoded.
|
|
+ if (lead_zero + trail_zero + set_bits == width) {
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ // 4. If the sum of leading ones, trailing ones and unset bits in the
|
|
+ // value is equal to the bit width of the value, it can be encoded.
|
|
+ if (lead_one + trail_one + (width - set_bits) == width) {
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ // 5. If the most-significant half of the bitwise value is equal to the
|
|
+ // least-significant half, return to step 2 using the least-significant
|
|
+ // half of the value.
|
|
+ ULong mask = (1ULL << (width >> 1)) - 1;
|
|
+ if ((value & mask) == ((value >> (width >> 1)) & mask)) {
|
|
+ width >>= 1;
|
|
+ set_bits >>= 1;
|
|
+ imm_s_fixed >>= 1;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ // 6. Otherwise, the value can't be encoded.
|
|
+ return False;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* Create a RIL for the given immediate, if it is representable, or
|
|
+ return NULL if not. */
|
|
+
|
|
+static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
|
|
+{
|
|
+ UInt n = 0, imm_s = 0, imm_r = 0;
|
|
+ Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
|
|
+ if (!ok) return NULL;
|
|
+ vassert(n < 2 && imm_s < 64 && imm_r < 64);
|
|
+ return ARM64RIL_I13(n, imm_r, imm_s);
|
|
+}
|
|
+
|
|
+/* So, finally .. */
|
|
+
|
|
+static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
|
|
+ /* sanity checks ... */
|
|
+ switch (ri->tag) {
|
|
+ case ARM64riL_I13:
|
|
+ vassert(ri->ARM64riL.I13.bitN < 2);
|
|
+ vassert(ri->ARM64riL.I13.immR < 64);
|
|
+ vassert(ri->ARM64riL.I13.immS < 64);
|
|
+ return ri;
|
|
+ case ARM64riL_R:
|
|
+ vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
|
|
+ vassert(hregIsVirtual(ri->ARM64riL.R.reg));
|
|
+ return ri;
|
|
+ default:
|
|
+ vpanic("iselIntExpr_RIL: unknown arm RIL tag");
|
|
+ }
|
|
+}
|
|
+
|
|
+/* DO NOT CALL THIS DIRECTLY ! */
|
|
+static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ IRType ty = typeOfIRExpr(env->type_env,e);
|
|
+ vassert(ty == Ity_I64 || ty == Ity_I32);
|
|
+
|
|
+ /* special case: immediate */
|
|
+ if (e->tag == Iex_Const) {
|
|
+ ARM64RIL* maybe = NULL;
|
|
+ if (ty == Ity_I64) {
|
|
+ vassert(e->Iex.Const.con->tag == Ico_U64);
|
|
+ maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
|
|
+ } else {
|
|
+ vassert(ty == Ity_I32);
|
|
+ vassert(e->Iex.Const.con->tag == Ico_U32);
|
|
+ UInt u32 = e->Iex.Const.con->Ico.U32;
|
|
+ ULong u64 = (ULong)u32;
|
|
+ /* First try with 32 leading zeroes. */
|
|
+ maybe = mb_mkARM64RIL_I(u64);
|
|
+ /* If that doesn't work, try with 2 copies, since it doesn't
|
|
+ matter what winds up in the upper 32 bits. */
|
|
+ if (!maybe) {
|
|
+ maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
|
|
+ }
|
|
+ }
|
|
+ if (maybe) return maybe;
|
|
+ /* else fail, fall through to default case */
|
|
+ }
|
|
+
|
|
+ /* default case: calculate into a register and return that */
|
|
+ {
|
|
+ HReg r = iselIntExpr_R ( env, e );
|
|
+ return ARM64RIL_R(r);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* --------------------- RI6 --------------------- */
|
|
+
|
|
+/* Select instructions to generate 'e' into a RI6. */
|
|
+
|
|
+static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
|
|
+ /* sanity checks ... */
|
|
+ switch (ri->tag) {
|
|
+ case ARM64ri6_I6:
|
|
+ vassert(ri->ARM64ri6.I6.imm6 < 64);
|
|
+ vassert(ri->ARM64ri6.I6.imm6 > 0);
|
|
+ return ri;
|
|
+ case ARM64ri6_R:
|
|
+ vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
|
|
+ vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
|
|
+ return ri;
|
|
+ default:
|
|
+ vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
|
|
+ }
|
|
+}
|
|
+
|
|
+/* DO NOT CALL THIS DIRECTLY ! */
|
|
+static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ IRType ty = typeOfIRExpr(env->type_env,e);
|
|
+ vassert(ty == Ity_I64 || ty == Ity_I8);
|
|
+
|
|
+ /* special case: immediate */
|
|
+ if (e->tag == Iex_Const) {
|
|
+ switch (e->Iex.Const.con->tag) {
|
|
+ case Ico_U8: {
|
|
+ UInt u = e->Iex.Const.con->Ico.U8;
|
|
+ if (u > 0 && u < 64)
|
|
+ return ARM64RI6_I6(u);
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ /* else fail, fall through to default case */
|
|
+ }
|
|
+
|
|
+ /* default case: calculate into a register and return that */
|
|
+ {
|
|
+ HReg r = iselIntExpr_R ( env, e );
|
|
+ return ARM64RI6_R(r);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* ------------------- CondCode ------------------- */
|
|
+
|
|
+/* Generate code to evaluated a bit-typed expression, returning the
|
|
+ condition code which would correspond when the expression would
|
|
+ notionally have returned 1. */
|
|
+
|
|
+static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ ARM64CondCode cc = iselCondCode_wrk(env,e);
|
|
+ vassert(cc != ARM64cc_NV);
|
|
+ return cc;
|
|
+}
|
|
+
|
|
+static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ vassert(e);
|
|
+ vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
|
|
+
|
|
+ /* var */
|
|
+ if (e->tag == Iex_RdTmp) {
|
|
+ HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
+ /* Cmp doesn't modify rTmp; so this is OK. */
|
|
+ ARM64RIL* one = mb_mkARM64RIL_I(1);
|
|
+ vassert(one);
|
|
+ addInstr(env, ARM64Instr_Test(rTmp, one));
|
|
+ return ARM64cc_NE;
|
|
+ }
|
|
+
|
|
+ /* Not1(e) */
|
|
+ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
|
|
+ /* Generate code for the arg, and negate the test condition */
|
|
+ ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
|
|
+ if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
|
|
+ return ARM64cc_AL;
|
|
+ } else {
|
|
+ return 1 ^ cc;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* --- patterns rooted at: 64to1 --- */
|
|
+
|
|
+ if (e->tag == Iex_Unop
|
|
+ && e->Iex.Unop.op == Iop_64to1) {
|
|
+ HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+ ARM64RIL* one = mb_mkARM64RIL_I(1);
|
|
+ vassert(one); /* '1' must be representable */
|
|
+ addInstr(env, ARM64Instr_Test(rTmp, one));
|
|
+ return ARM64cc_NE;
|
|
+ }
|
|
+
|
|
+ /* --- patterns rooted at: CmpNEZ8 --- */
|
|
+
|
|
+ if (e->tag == Iex_Unop
|
|
+ && e->Iex.Unop.op == Iop_CmpNEZ8) {
|
|
+ HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+ ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
|
|
+ addInstr(env, ARM64Instr_Test(r1, xFF));
|
|
+ return ARM64cc_NE;
|
|
+ }
|
|
+
|
|
+ /* --- patterns rooted at: CmpNEZ64 --- */
|
|
+
|
|
+ if (e->tag == Iex_Unop
|
|
+ && e->Iex.Unop.op == Iop_CmpNEZ64) {
|
|
+ HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+ ARM64RIA* zero = ARM64RIA_I12(0,0);
|
|
+ addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
|
|
+ return ARM64cc_NE;
|
|
+ }
|
|
+
|
|
+ /* --- patterns rooted at: CmpNEZ32 --- */
|
|
+
|
|
+ if (e->tag == Iex_Unop
|
|
+ && e->Iex.Unop.op == Iop_CmpNEZ32) {
|
|
+ HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+ ARM64RIA* zero = ARM64RIA_I12(0,0);
|
|
+ addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
|
|
+ return ARM64cc_NE;
|
|
+ }
|
|
+
|
|
+ /* --- Cmp*64*(x,y) --- */
|
|
+ if (e->tag == Iex_Binop
|
|
+ && (e->Iex.Binop.op == Iop_CmpEQ64
|
|
+ || e->Iex.Binop.op == Iop_CmpNE64
|
|
+ || e->Iex.Binop.op == Iop_CmpLT64S
|
|
+ || e->Iex.Binop.op == Iop_CmpLT64U
|
|
+ || e->Iex.Binop.op == Iop_CmpLE64S
|
|
+ || e->Iex.Binop.op == Iop_CmpLE64U)) {
|
|
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
|
|
+ addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ case Iop_CmpEQ64: return ARM64cc_EQ;
|
|
+ case Iop_CmpNE64: return ARM64cc_NE;
|
|
+ case Iop_CmpLT64S: return ARM64cc_LT;
|
|
+ case Iop_CmpLT64U: return ARM64cc_CC;
|
|
+ case Iop_CmpLE64S: return ARM64cc_LE;
|
|
+ case Iop_CmpLE64U: return ARM64cc_LS;
|
|
+ default: vpanic("iselCondCode(arm64): CmpXX64");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* --- Cmp*32*(x,y) --- */
|
|
+ if (e->tag == Iex_Binop
|
|
+ && (e->Iex.Binop.op == Iop_CmpEQ32
|
|
+ || e->Iex.Binop.op == Iop_CmpNE32
|
|
+ || e->Iex.Binop.op == Iop_CmpLT32S
|
|
+ || e->Iex.Binop.op == Iop_CmpLT32U
|
|
+ || e->Iex.Binop.op == Iop_CmpLE32S
|
|
+ || e->Iex.Binop.op == Iop_CmpLE32U)) {
|
|
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
|
|
+ addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ case Iop_CmpEQ32: return ARM64cc_EQ;
|
|
+ case Iop_CmpNE32: return ARM64cc_NE;
|
|
+ case Iop_CmpLT32S: return ARM64cc_LT;
|
|
+ case Iop_CmpLT32U: return ARM64cc_CC;
|
|
+ case Iop_CmpLE32S: return ARM64cc_LE;
|
|
+ case Iop_CmpLE32U: return ARM64cc_LS;
|
|
+ default: vpanic("iselCondCode(arm64): CmpXX32");
|
|
+ }
|
|
+ }
|
|
+
|
|
+//ZZ /* const */
|
|
+//ZZ /* Constant 1:Bit */
|
|
+//ZZ if (e->tag == Iex_Const) {
|
|
+//ZZ HReg r;
|
|
+//ZZ vassert(e->Iex.Const.con->tag == Ico_U1);
|
|
+//ZZ vassert(e->Iex.Const.con->Ico.U1 == True
|
|
+//ZZ || e->Iex.Const.con->Ico.U1 == False);
|
|
+//ZZ r = newVRegI(env);
|
|
+//ZZ addInstr(env, ARMInstr_Imm32(r, 0));
|
|
+//ZZ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
|
|
+//ZZ return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ // JRS 2013-Jan-03: this seems completely nonsensical
|
|
+//ZZ /* --- CasCmpEQ* --- */
|
|
+//ZZ /* Ist_Cas has a dummy argument to compare with, so comparison is
|
|
+//ZZ always true. */
|
|
+//ZZ //if (e->tag == Iex_Binop
|
|
+//ZZ // && (e->Iex.Binop.op == Iop_CasCmpEQ32
|
|
+//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ16
|
|
+//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
|
|
+//ZZ // return ARMcc_AL;
|
|
+//ZZ //}
|
|
+
|
|
+ ppIRExpr(e);
|
|
+ vpanic("iselCondCode");
|
|
+}
|
|
+
|
|
+
|
|
+/* --------------------- Reg --------------------- */
|
|
+
|
|
+static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ HReg r = iselIntExpr_R_wrk(env, e);
|
|
+ /* sanity checks ... */
|
|
+# if 0
|
|
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
|
|
+# endif
|
|
+ vassert(hregClass(r) == HRcInt64);
|
|
+ vassert(hregIsVirtual(r));
|
|
+ return r;
|
|
+}
|
|
+
|
|
+/* DO NOT CALL THIS DIRECTLY ! */
|
|
+static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ IRType ty = typeOfIRExpr(env->type_env,e);
|
|
+ vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
|
|
+
|
|
+ switch (e->tag) {
|
|
+
|
|
+ /* --------- TEMP --------- */
|
|
+ case Iex_RdTmp: {
|
|
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
+ }
|
|
+
|
|
+ /* --------- LOAD --------- */
|
|
+ case Iex_Load: {
|
|
+ HReg dst = newVRegI(env);
|
|
+
|
|
+ if (e->Iex.Load.end != Iend_LE)
|
|
+ goto irreducible;
|
|
+
|
|
+ if (ty == Ity_I64) {
|
|
+ ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
|
|
+ addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
|
|
+ return dst;
|
|
+ }
|
|
+ if (ty == Ity_I32) {
|
|
+ ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
|
|
+ addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
|
|
+ return dst;
|
|
+ }
|
|
+ if (ty == Ity_I16) {
|
|
+ ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
|
|
+ addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
|
|
+ return dst;
|
|
+ }
|
|
+ if (ty == Ity_I8) {
|
|
+ ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
|
|
+ addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
|
|
+ return dst;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* --------- BINARY OP --------- */
|
|
+ case Iex_Binop: {
|
|
+
|
|
+ ARM64LogicOp lop = 0; /* invalid */
|
|
+ ARM64ShiftOp sop = 0; /* invalid */
|
|
+
|
|
+ /* Special-case 0-x into a Neg instruction. Not because it's
|
|
+ particularly useful but more so as to give value flow using
|
|
+ this instruction, so as to check its assembly correctness for
|
|
+ implementation of Left32/Left64. */
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ case Iop_Sub64:
|
|
+ if (isZeroU64(e->Iex.Binop.arg1)) {
|
|
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+ HReg dst = newVRegI(env);
|
|
+ addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
|
|
+ return dst;
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* ADD/SUB */
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ case Iop_Add64: case Iop_Add32:
|
|
+ case Iop_Sub64: case Iop_Sub32: {
|
|
+ Bool isAdd = e->Iex.Binop.op == Iop_Add64
|
|
+ || e->Iex.Binop.op == Iop_Add32;
|
|
+ HReg dst = newVRegI(env);
|
|
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
|
|
+ addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
|
|
+ return dst;
|
|
+ }
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* AND/OR/XOR */
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
|
|
+ case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop;
|
|
+ case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
|
|
+ log_binop: {
|
|
+ HReg dst = newVRegI(env);
|
|
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
|
|
+ addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
|
|
+ return dst;
|
|
+ }
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* SHL/SHR/SAR */
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop;
|
|
+ case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop;
|
|
+ case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
|
|
+ sh_binop: {
|
|
+ HReg dst = newVRegI(env);
|
|
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
|
|
+ addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_Shr32:
|
|
+ case Iop_Sar32: {
|
|
+ Bool zx = e->Iex.Binop.op == Iop_Shr32;
|
|
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
|
|
+ HReg dst = zx ? widen_z_32_to_64(env, argL)
|
|
+ : widen_s_32_to_64(env, argL);
|
|
+ addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
|
|
+ return dst;
|
|
+ }
|
|
+ default: break;
|
|
+ }
|
|
+
|
|
+ /* MUL */
|
|
+ if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
|
|
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+ HReg dst = newVRegI(env);
|
|
+ addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
|
|
+ return dst;
|
|
+ }
|
|
+
|
|
+ /* MULL */
|
|
+ if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
|
|
+ Bool isS = e->Iex.Binop.op == Iop_MullS32;
|
|
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
|
|
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+ HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
|
|
+ HReg dst = newVRegI(env);
|
|
+ addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
|
|
+ return dst;
|
|
+ }
|
|
+
|
|
+ /* Handle misc other ops. */
|
|
+
|
|
+//ZZ if (e->Iex.Binop.op == Iop_Max32U) {
|
|
+//ZZ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+//ZZ HReg dst = newVRegI(env);
|
|
+//ZZ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
|
|
+//ZZ ARMRI84_R(argR)));
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(dst, argL));
|
|
+//ZZ addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+
|
|
+ if (e->Iex.Binop.op == Iop_32HLto64) {
|
|
+ HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+ HReg lo32 = widen_z_32_to_64(env, lo32s);
|
|
+ HReg hi32 = newVRegI(env);
|
|
+ addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
|
|
+ ARM64sh_SHL));
|
|
+ addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
|
|
+ ARM64lo_OR));
|
|
+ return hi32;
|
|
+ }
|
|
+
|
|
+ if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
|
|
+ Bool isD = e->Iex.Binop.op == Iop_CmpF64;
|
|
+ HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
|
|
+ HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
|
|
+ HReg dst = newVRegI(env);
|
|
+ HReg imm = newVRegI(env);
|
|
+ /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
|
|
+ create in dst, the IRCmpF64Result encoded result. */
|
|
+ addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
|
|
+ addInstr(env, ARM64Instr_Imm64(dst, 0));
|
|
+ addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
|
|
+ addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
|
|
+ addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
|
|
+ addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
|
|
+ addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
|
|
+ addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
|
|
+ addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
|
|
+ addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
|
|
+ return dst;
|
|
+ }
|
|
+
|
|
+ { /* local scope */
|
|
+ ARM64CvtOp cvt_op = ARM64cvt_INVALID;
|
|
+ Bool srcIsD = False;
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ case Iop_F64toI64S:
|
|
+ cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
|
|
+ case Iop_F64toI64U:
|
|
+ cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
|
|
+ case Iop_F64toI32S:
|
|
+ cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
|
|
+ case Iop_F64toI32U:
|
|
+ cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
|
|
+ case Iop_F32toI32S:
|
|
+ cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
|
|
+ case Iop_F32toI64U:
|
|
+ cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ if (cvt_op != ARM64cvt_INVALID) {
|
|
+ /* This is all a bit dodgy, because we can't handle a
|
|
+ non-constant (not-known-at-JIT-time) rounding mode
|
|
+ indication. That's because there's no instruction
|
|
+ AFAICS that does this conversion but rounds according to
|
|
+ FPCR.RM, so we have to bake the rounding mode into the
|
|
+ instruction right now. But that should be OK because
|
|
+ (1) the front end attaches a literal Irrm_ value to the
|
|
+ conversion binop, and (2) iropt will never float that
|
|
+ off via CSE, into a literal. Hence we should always
|
|
+ have an Irrm_ value as the first arg. */
|
|
+ IRExpr* arg1 = e->Iex.Binop.arg1;
|
|
+ if (arg1->tag != Iex_Const) goto irreducible;
|
|
+ IRConst* arg1con = arg1->Iex.Const.con;
|
|
+ vassert(arg1con->tag == Ico_U32); // else ill-typed IR
|
|
+ UInt irrm = arg1con->Ico.U32;
|
|
+ /* Find the ARM-encoded equivalent for |irrm|. */
|
|
+ UInt armrm = 4; /* impossible */
|
|
+ switch (irrm) {
|
|
+ case Irrm_NEAREST: armrm = 0; break;
|
|
+ case Irrm_NegINF: armrm = 2; break;
|
|
+ case Irrm_PosINF: armrm = 1; break;
|
|
+ case Irrm_ZERO: armrm = 3; break;
|
|
+ default: goto irreducible;
|
|
+ }
|
|
+ HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
|
|
+ (env, e->Iex.Binop.arg2);
|
|
+ HReg dst = newVRegI(env);
|
|
+ addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
|
|
+ return dst;
|
|
+ }
|
|
+ } /* local scope */
|
|
+
|
|
+//ZZ if (e->Iex.Binop.op == Iop_GetElem8x8
|
|
+//ZZ || e->Iex.Binop.op == Iop_GetElem16x4
|
|
+//ZZ || e->Iex.Binop.op == Iop_GetElem32x2) {
|
|
+//ZZ HReg res = newVRegI(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ UInt index, size;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM target supports GetElem with constant "
|
|
+//ZZ "second argument only\n");
|
|
+//ZZ }
|
|
+//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
|
|
+//ZZ case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
|
|
+//ZZ case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
|
|
+//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
|
|
+//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
|
|
+//ZZ size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ if (e->Iex.Binop.op == Iop_GetElem8x16
|
|
+//ZZ || e->Iex.Binop.op == Iop_GetElem16x8
|
|
+//ZZ || e->Iex.Binop.op == Iop_GetElem32x4) {
|
|
+//ZZ HReg res = newVRegI(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ UInt index, size;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM target supports GetElem with constant "
|
|
+//ZZ "second argument only\n");
|
|
+//ZZ }
|
|
+//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
|
|
+//ZZ case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
|
|
+//ZZ case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
|
|
+//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
|
|
+//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
|
|
+//ZZ size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+
|
|
+ /* All cases involving host-side helper calls. */
|
|
+ void* fn = NULL;
|
|
+ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Add16x2:
|
|
+//ZZ fn = &h_generic_calc_Add16x2; break;
|
|
+//ZZ case Iop_Sub16x2:
|
|
+//ZZ fn = &h_generic_calc_Sub16x2; break;
|
|
+//ZZ case Iop_HAdd16Ux2:
|
|
+//ZZ fn = &h_generic_calc_HAdd16Ux2; break;
|
|
+//ZZ case Iop_HAdd16Sx2:
|
|
+//ZZ fn = &h_generic_calc_HAdd16Sx2; break;
|
|
+//ZZ case Iop_HSub16Ux2:
|
|
+//ZZ fn = &h_generic_calc_HSub16Ux2; break;
|
|
+//ZZ case Iop_HSub16Sx2:
|
|
+//ZZ fn = &h_generic_calc_HSub16Sx2; break;
|
|
+//ZZ case Iop_QAdd16Sx2:
|
|
+//ZZ fn = &h_generic_calc_QAdd16Sx2; break;
|
|
+//ZZ case Iop_QAdd16Ux2:
|
|
+//ZZ fn = &h_generic_calc_QAdd16Ux2; break;
|
|
+//ZZ case Iop_QSub16Sx2:
|
|
+//ZZ fn = &h_generic_calc_QSub16Sx2; break;
|
|
+//ZZ case Iop_Add8x4:
|
|
+//ZZ fn = &h_generic_calc_Add8x4; break;
|
|
+//ZZ case Iop_Sub8x4:
|
|
+//ZZ fn = &h_generic_calc_Sub8x4; break;
|
|
+//ZZ case Iop_HAdd8Ux4:
|
|
+//ZZ fn = &h_generic_calc_HAdd8Ux4; break;
|
|
+//ZZ case Iop_HAdd8Sx4:
|
|
+//ZZ fn = &h_generic_calc_HAdd8Sx4; break;
|
|
+//ZZ case Iop_HSub8Ux4:
|
|
+//ZZ fn = &h_generic_calc_HSub8Ux4; break;
|
|
+//ZZ case Iop_HSub8Sx4:
|
|
+//ZZ fn = &h_generic_calc_HSub8Sx4; break;
|
|
+//ZZ case Iop_QAdd8Sx4:
|
|
+//ZZ fn = &h_generic_calc_QAdd8Sx4; break;
|
|
+//ZZ case Iop_QAdd8Ux4:
|
|
+//ZZ fn = &h_generic_calc_QAdd8Ux4; break;
|
|
+//ZZ case Iop_QSub8Sx4:
|
|
+//ZZ fn = &h_generic_calc_QSub8Sx4; break;
|
|
+//ZZ case Iop_QSub8Ux4:
|
|
+//ZZ fn = &h_generic_calc_QSub8Ux4; break;
|
|
+//ZZ case Iop_Sad8Ux4:
|
|
+//ZZ fn = &h_generic_calc_Sad8Ux4; break;
|
|
+//ZZ case Iop_QAdd32S:
|
|
+//ZZ fn = &h_generic_calc_QAdd32S; break;
|
|
+//ZZ case Iop_QSub32S:
|
|
+//ZZ fn = &h_generic_calc_QSub32S; break;
|
|
+//ZZ case Iop_QSub16Ux2:
|
|
+//ZZ fn = &h_generic_calc_QSub16Ux2; break;
|
|
+ case Iop_DivU32:
|
|
+ fn = &h_calc_udiv32_w_arm_semantics; break;
|
|
+ case Iop_DivS32:
|
|
+ fn = &h_calc_sdiv32_w_arm_semantics; break;
|
|
+ case Iop_DivU64:
|
|
+ fn = &h_calc_udiv64_w_arm_semantics; break;
|
|
+ case Iop_DivS64:
|
|
+ fn = &h_calc_sdiv64_w_arm_semantics; break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (fn) {
|
|
+ HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+ HReg res = newVRegI(env);
|
|
+ addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
|
|
+ addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
|
|
+ addInstr(env, ARM64Instr_Call( ARM64cc_AL, (HWord)Ptr_to_ULong(fn),
|
|
+ 2, mk_RetLoc_simple(RLPri_Int) ));
|
|
+ addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
|
|
+ return res;
|
|
+ }
|
|
+
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* --------- UNARY OP --------- */
|
|
+ case Iex_Unop: {
|
|
+
|
|
+ switch (e->Iex.Unop.op) {
|
|
+ case Iop_16Uto64: {
|
|
+ /* This probably doesn't occur often enough to be worth
|
|
+ rolling the extension into the load. */
|
|
+ IRExpr* arg = e->Iex.Unop.arg;
|
|
+ HReg src = iselIntExpr_R(env, arg);
|
|
+ HReg dst = widen_z_16_to_64(env, src);
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_32Uto64: {
|
|
+ IRExpr* arg = e->Iex.Unop.arg;
|
|
+ if (arg->tag == Iex_Load) {
|
|
+ /* This correctly zero extends because _LdSt32 is
|
|
+ defined to do a zero extending load. */
|
|
+ HReg dst = newVRegI(env);
|
|
+ ARM64AMode* am
|
|
+ = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
|
|
+ addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
|
|
+ return dst;
|
|
+ }
|
|
+ /* else be lame and mask it */
|
|
+ HReg src = iselIntExpr_R(env, arg);
|
|
+ HReg dst = widen_z_32_to_64(env, src);
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
|
|
+ case Iop_8Uto64: {
|
|
+ IRExpr* arg = e->Iex.Unop.arg;
|
|
+ if (arg->tag == Iex_Load) {
|
|
+ /* This correctly zero extends because _LdSt8 is
|
|
+ defined to do a zero extending load. */
|
|
+ HReg dst = newVRegI(env);
|
|
+ ARM64AMode* am
|
|
+ = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
|
|
+ addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
|
|
+ return dst;
|
|
+ }
|
|
+ /* else be lame and mask it */
|
|
+ HReg src = iselIntExpr_R(env, arg);
|
|
+ HReg dst = widen_z_8_to_64(env, src);
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_128HIto64: {
|
|
+ HReg rHi, rLo;
|
|
+ iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
|
|
+ return rHi; /* and abandon rLo */
|
|
+ }
|
|
+ case Iop_8Sto32: case Iop_8Sto64: {
|
|
+ IRExpr* arg = e->Iex.Unop.arg;
|
|
+ HReg src = iselIntExpr_R(env, arg);
|
|
+ HReg dst = widen_s_8_to_64(env, src);
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_16Sto32: case Iop_16Sto64: {
|
|
+ IRExpr* arg = e->Iex.Unop.arg;
|
|
+ HReg src = iselIntExpr_R(env, arg);
|
|
+ HReg dst = widen_s_16_to_64(env, src);
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_32Sto64: {
|
|
+ IRExpr* arg = e->Iex.Unop.arg;
|
|
+ HReg src = iselIntExpr_R(env, arg);
|
|
+ HReg dst = widen_s_32_to_64(env, src);
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_Not32:
|
|
+ case Iop_Not64: {
|
|
+ HReg dst = newVRegI(env);
|
|
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+ addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_Clz64: {
|
|
+ HReg dst = newVRegI(env);
|
|
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+ addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_Left32:
|
|
+ case Iop_Left64: {
|
|
+ /* Left64(src) = src | -src. Left32 can use the same
|
|
+ implementation since in that case we don't care what
|
|
+ the upper 32 bits become. */
|
|
+ HReg dst = newVRegI(env);
|
|
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+ addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
|
|
+ addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
|
|
+ ARM64lo_OR));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_CmpwNEZ64: {
|
|
+ /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
|
|
+ = Left64(src) >>s 63 */
|
|
+ HReg dst = newVRegI(env);
|
|
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+ addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
|
|
+ addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
|
|
+ ARM64lo_OR));
|
|
+ addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
|
|
+ ARM64sh_SAR));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_CmpwNEZ32: {
|
|
+ /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
|
|
+ = Left64(src & 0xFFFFFFFF) >>s 63 */
|
|
+ HReg dst = newVRegI(env);
|
|
+ HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+ HReg src = widen_z_32_to_64(env, pre);
|
|
+ addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
|
|
+ addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
|
|
+ ARM64lo_OR));
|
|
+ addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
|
|
+ ARM64sh_SAR));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_V128to64: case Iop_V128HIto64: {
|
|
+ HReg dst = newVRegI(env);
|
|
+ HReg src = iselV128Expr(env, e->Iex.Unop.arg);
|
|
+ UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
|
|
+ addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_1Sto32:
|
|
+ case Iop_1Sto64: {
|
|
+ /* As with the iselStmt case for 'tmp:I1 = expr', we could
|
|
+ do a lot better here if it ever became necessary. */
|
|
+ HReg zero = newVRegI(env);
|
|
+ HReg one = newVRegI(env);
|
|
+ HReg dst = newVRegI(env);
|
|
+ addInstr(env, ARM64Instr_Imm64(zero, 0));
|
|
+ addInstr(env, ARM64Instr_Imm64(one, 1));
|
|
+ ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
|
|
+ addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
|
|
+ addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
|
|
+ ARM64sh_SHL));
|
|
+ addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
|
|
+ ARM64sh_SAR));
|
|
+ return dst;
|
|
+ }
|
|
+
|
|
+//ZZ case Iop_64HIto32: {
|
|
+//ZZ HReg rHi, rLo;
|
|
+//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
|
|
+//ZZ return rHi; /* and abandon rLo .. poor wee thing :-) */
|
|
+//ZZ }
|
|
+//ZZ case Iop_64to32: {
|
|
+//ZZ HReg rHi, rLo;
|
|
+//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
|
|
+//ZZ return rLo; /* similar stupid comment to the above ... */
|
|
+//ZZ }
|
|
+//ZZ case Iop_64to8: {
|
|
+//ZZ HReg rHi, rLo;
|
|
+//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
|
|
+//ZZ HReg tHi = newVRegI(env);
|
|
+//ZZ HReg tLo = newVRegI(env);
|
|
+//ZZ HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
|
|
+//ZZ rHi = tHi;
|
|
+//ZZ rLo = tLo;
|
|
+//ZZ } else {
|
|
+//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
|
|
+//ZZ }
|
|
+//ZZ return rLo;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_1Uto32:
|
|
+//ZZ /* 1Uto32(tmp). Since I1 values generated into registers
|
|
+//ZZ are guaranteed to have value either only zero or one,
|
|
+//ZZ we can simply return the value of the register in this
|
|
+//ZZ case. */
|
|
+//ZZ if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
|
|
+//ZZ HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+//ZZ /* else fall through */
|
|
+//ZZ case Iop_1Uto8: {
|
|
+//ZZ HReg dst = newVRegI(env);
|
|
+//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
|
|
+//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_1Sto32: {
|
|
+//ZZ HReg dst = newVRegI(env);
|
|
+//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
|
|
+//ZZ ARMRI5* amt = ARMRI5_I5(31);
|
|
+//ZZ /* This is really rough. We could do much better here;
|
|
+//ZZ perhaps mvn{cond} dst, #0 as the second insn?
|
|
+//ZZ (same applies to 1Sto64) */
|
|
+//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
|
|
+//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
|
|
+//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
|
|
+//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_Clz32: {
|
|
+//ZZ /* Count leading zeroes; easy on ARM. */
|
|
+//ZZ HReg dst = newVRegI(env);
|
|
+//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_CmpwNEZ32: {
|
|
+//ZZ HReg dst = newVRegI(env);
|
|
+//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
|
|
+//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_ReinterpF32asI32: {
|
|
+//ZZ HReg dst = newVRegI(env);
|
|
+//ZZ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+
|
|
+ case Iop_64to32:
|
|
+ case Iop_64to16:
|
|
+ case Iop_64to8:
|
|
+ /* These are no-ops. */
|
|
+ return iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+//ZZ /* All Unop cases involving host-side helper calls. */
|
|
+//ZZ void* fn = NULL;
|
|
+//ZZ switch (e->Iex.Unop.op) {
|
|
+//ZZ case Iop_CmpNEZ16x2:
|
|
+//ZZ fn = &h_generic_calc_CmpNEZ16x2; break;
|
|
+//ZZ case Iop_CmpNEZ8x4:
|
|
+//ZZ fn = &h_generic_calc_CmpNEZ8x4; break;
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ if (fn) {
|
|
+//ZZ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+//ZZ HReg res = newVRegI(env);
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
|
|
+//ZZ addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
|
|
+//ZZ 1, RetLocInt ));
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* --------- GET --------- */
|
|
+ case Iex_Get: {
|
|
+ if (ty == Ity_I64
|
|
+ && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < 8192-8) {
|
|
+ HReg dst = newVRegI(env);
|
|
+ ARM64AMode* am
|
|
+ = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
|
|
+ addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
|
|
+ return dst;
|
|
+ }
|
|
+ if (ty == Ity_I32
|
|
+ && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < 4096-4) {
|
|
+ HReg dst = newVRegI(env);
|
|
+ ARM64AMode* am
|
|
+ = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
|
|
+ addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
|
|
+ return dst;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* --------- CCALL --------- */
|
|
+ case Iex_CCall: {
|
|
+ HReg dst = newVRegI(env);
|
|
+ vassert(ty == e->Iex.CCall.retty);
|
|
+
|
|
+ /* be very restrictive for now. Only 64-bit ints allowed for
|
|
+ args, and 64 bits for return type. Don't forget to change
|
|
+ the RetLoc if more types are allowed in future. */
|
|
+ if (e->Iex.CCall.retty != Ity_I64)
|
|
+ goto irreducible;
|
|
+
|
|
+ /* Marshal args, do the call, clear stack. */
|
|
+ UInt addToSp = 0;
|
|
+ RetLoc rloc = mk_RetLoc_INVALID();
|
|
+ Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
|
|
+ e->Iex.CCall.cee, e->Iex.CCall.retty,
|
|
+ e->Iex.CCall.args );
|
|
+ /* */
|
|
+ if (ok) {
|
|
+ vassert(is_sane_RetLoc(rloc));
|
|
+ vassert(rloc.pri == RLPri_Int);
|
|
+ vassert(addToSp == 0);
|
|
+ addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
|
|
+ return dst;
|
|
+ }
|
|
+ /* else fall through; will hit the irreducible: label */
|
|
+ }
|
|
+
|
|
+ /* --------- LITERAL --------- */
|
|
+ /* 64-bit literals */
|
|
+ case Iex_Const: {
|
|
+ ULong u = 0;
|
|
+ HReg dst = newVRegI(env);
|
|
+ switch (e->Iex.Const.con->tag) {
|
|
+ case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
|
|
+ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
|
|
+ case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
|
|
+ case Ico_U8: u = e->Iex.Const.con->Ico.U8; break;
|
|
+ default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
|
|
+ }
|
|
+ addInstr(env, ARM64Instr_Imm64(dst, u));
|
|
+ return dst;
|
|
+ }
|
|
+
|
|
+ /* --------- MULTIPLEX --------- */
|
|
+ case Iex_ITE: {
|
|
+ /* ITE(ccexpr, iftrue, iffalse) */
|
|
+ if (ty == Ity_I64 || ty == Ity_I32) {
|
|
+ ARM64CondCode cc;
|
|
+ HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
|
|
+ HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
|
|
+ HReg dst = newVRegI(env);
|
|
+ cc = iselCondCode(env, e->Iex.ITE.cond);
|
|
+ addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
|
|
+ return dst;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ } /* switch (e->tag) */
|
|
+
|
|
+ /* We get here if no pattern matched. */
|
|
+ irreducible:
|
|
+ ppIRExpr(e);
|
|
+ vpanic("iselIntExpr_R: cannot reduce tree");
|
|
+}
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISEL: Integer expressions (128 bit) ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* Compute a 128-bit value into a register pair, which is returned as
|
|
+ the first two parameters. As with iselIntExpr_R, these may be
|
|
+ either real or virtual regs; in any case they must not be changed
|
|
+ by subsequent code emitted by the caller. */
|
|
+
|
|
+static void iselInt128Expr ( HReg* rHi, HReg* rLo,
|
|
+ ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ iselInt128Expr_wrk(rHi, rLo, env, e);
|
|
+# if 0
|
|
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
|
|
+# endif
|
|
+ vassert(hregClass(*rHi) == HRcInt64);
|
|
+ vassert(hregIsVirtual(*rHi));
|
|
+ vassert(hregClass(*rLo) == HRcInt64);
|
|
+ vassert(hregIsVirtual(*rLo));
|
|
+}
|
|
+
|
|
+/* DO NOT CALL THIS DIRECTLY ! */
|
|
+static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
|
|
+ ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ vassert(e);
|
|
+ vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
|
|
+
|
|
+ /* --------- BINARY ops --------- */
|
|
+ if (e->tag == Iex_Binop) {
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ /* 64 x 64 -> 128 multiply */
|
|
+ case Iop_MullU64:
|
|
+ /*case Iop_MullS64:*/ {
|
|
+ Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
|
|
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+ HReg dstLo = newVRegI(env);
|
|
+ HReg dstHi = newVRegI(env);
|
|
+ addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
|
|
+ ARM64mul_PLAIN));
|
|
+ addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
|
|
+ syned ? ARM64mul_SX : ARM64mul_ZX));
|
|
+ *rHi = dstHi;
|
|
+ *rLo = dstLo;
|
|
+ return;
|
|
+ }
|
|
+ /* 64HLto128(e1,e2) */
|
|
+ case Iop_64HLto128:
|
|
+ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+ return;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ } /* if (e->tag == Iex_Binop) */
|
|
+
|
|
+ ppIRExpr(e);
|
|
+ vpanic("iselInt128Expr(arm64)");
|
|
+}
|
|
+
|
|
+
|
|
+//ZZ /* -------------------- 64-bit -------------------- */
|
|
+//ZZ
|
|
+//ZZ /* Compute a 64-bit value into a register pair, which is returned as
|
|
+//ZZ the first two parameters. As with iselIntExpr_R, these may be
|
|
+//ZZ either real or virtual regs; in any case they must not be changed
|
|
+//ZZ by subsequent code emitted by the caller. */
|
|
+//ZZ
|
|
+//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
|
|
+//ZZ {
|
|
+//ZZ iselInt64Expr_wrk(rHi, rLo, env, e);
|
|
+//ZZ # if 0
|
|
+//ZZ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
|
|
+//ZZ # endif
|
|
+//ZZ vassert(hregClass(*rHi) == HRcInt32);
|
|
+//ZZ vassert(hregIsVirtual(*rHi));
|
|
+//ZZ vassert(hregClass(*rLo) == HRcInt32);
|
|
+//ZZ vassert(hregIsVirtual(*rLo));
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* DO NOT CALL THIS DIRECTLY ! */
|
|
+//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
|
|
+//ZZ {
|
|
+//ZZ vassert(e);
|
|
+//ZZ vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
|
|
+//ZZ
|
|
+//ZZ /* 64-bit literal */
|
|
+//ZZ if (e->tag == Iex_Const) {
|
|
+//ZZ ULong w64 = e->Iex.Const.con->Ico.U64;
|
|
+//ZZ UInt wHi = toUInt(w64 >> 32);
|
|
+//ZZ UInt wLo = toUInt(w64);
|
|
+//ZZ HReg tHi = newVRegI(env);
|
|
+//ZZ HReg tLo = newVRegI(env);
|
|
+//ZZ vassert(e->Iex.Const.con->tag == Ico_U64);
|
|
+//ZZ addInstr(env, ARMInstr_Imm32(tHi, wHi));
|
|
+//ZZ addInstr(env, ARMInstr_Imm32(tLo, wLo));
|
|
+//ZZ *rHi = tHi;
|
|
+//ZZ *rLo = tLo;
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* read 64-bit IRTemp */
|
|
+//ZZ if (e->tag == Iex_RdTmp) {
|
|
+//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
|
|
+//ZZ HReg tHi = newVRegI(env);
|
|
+//ZZ HReg tLo = newVRegI(env);
|
|
+//ZZ HReg tmp = iselNeon64Expr(env, e);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
|
|
+//ZZ *rHi = tHi;
|
|
+//ZZ *rLo = tLo;
|
|
+//ZZ } else {
|
|
+//ZZ lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
|
|
+//ZZ }
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* 64-bit load */
|
|
+//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
|
|
+//ZZ HReg tLo, tHi, rA;
|
|
+//ZZ vassert(e->Iex.Load.ty == Ity_I64);
|
|
+//ZZ rA = iselIntExpr_R(env, e->Iex.Load.addr);
|
|
+//ZZ tHi = newVRegI(env);
|
|
+//ZZ tLo = newVRegI(env);
|
|
+//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
|
|
+//ZZ tHi, ARMAMode1_RI(rA, 4)));
|
|
+//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
|
|
+//ZZ tLo, ARMAMode1_RI(rA, 0)));
|
|
+//ZZ *rHi = tHi;
|
|
+//ZZ *rLo = tLo;
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* 64-bit GET */
|
|
+//ZZ if (e->tag == Iex_Get) {
|
|
+//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
|
|
+//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
|
|
+//ZZ HReg tHi = newVRegI(env);
|
|
+//ZZ HReg tLo = newVRegI(env);
|
|
+//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
|
|
+//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
|
|
+//ZZ *rHi = tHi;
|
|
+//ZZ *rLo = tLo;
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* --------- BINARY ops --------- */
|
|
+//ZZ if (e->tag == Iex_Binop) {
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ
|
|
+//ZZ /* 32 x 32 -> 64 multiply */
|
|
+//ZZ case Iop_MullS32:
|
|
+//ZZ case Iop_MullU32: {
|
|
+//ZZ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+//ZZ HReg tHi = newVRegI(env);
|
|
+//ZZ HReg tLo = newVRegI(env);
|
|
+//ZZ ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
|
|
+//ZZ ? ARMmul_SX : ARMmul_ZX;
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
|
|
+//ZZ addInstr(env, ARMInstr_Mul(mop));
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
|
|
+//ZZ *rHi = tHi;
|
|
+//ZZ *rLo = tLo;
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_Or64: {
|
|
+//ZZ HReg xLo, xHi, yLo, yHi;
|
|
+//ZZ HReg tHi = newVRegI(env);
|
|
+//ZZ HReg tLo = newVRegI(env);
|
|
+//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
|
|
+//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
|
|
+//ZZ *rHi = tHi;
|
|
+//ZZ *rLo = tLo;
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_Add64: {
|
|
+//ZZ HReg xLo, xHi, yLo, yHi;
|
|
+//ZZ HReg tHi = newVRegI(env);
|
|
+//ZZ HReg tLo = newVRegI(env);
|
|
+//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
|
|
+//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
|
|
+//ZZ *rHi = tHi;
|
|
+//ZZ *rLo = tLo;
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* 32HLto64(e1,e2) */
|
|
+//ZZ case Iop_32HLto64: {
|
|
+//ZZ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+//ZZ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* --------- UNARY ops --------- */
|
|
+//ZZ if (e->tag == Iex_Unop) {
|
|
+//ZZ switch (e->Iex.Unop.op) {
|
|
+//ZZ
|
|
+//ZZ /* ReinterpF64asI64 */
|
|
+//ZZ case Iop_ReinterpF64asI64: {
|
|
+//ZZ HReg dstHi = newVRegI(env);
|
|
+//ZZ HReg dstLo = newVRegI(env);
|
|
+//ZZ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
|
|
+//ZZ *rHi = dstHi;
|
|
+//ZZ *rLo = dstLo;
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* Left64(e) */
|
|
+//ZZ case Iop_Left64: {
|
|
+//ZZ HReg yLo, yHi;
|
|
+//ZZ HReg tHi = newVRegI(env);
|
|
+//ZZ HReg tLo = newVRegI(env);
|
|
+//ZZ HReg zero = newVRegI(env);
|
|
+//ZZ /* yHi:yLo = arg */
|
|
+//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
|
|
+//ZZ /* zero = 0 */
|
|
+//ZZ addInstr(env, ARMInstr_Imm32(zero, 0));
|
|
+//ZZ /* tLo = 0 - yLo, and set carry */
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
|
|
+//ZZ tLo, zero, ARMRI84_R(yLo)));
|
|
+//ZZ /* tHi = 0 - yHi - carry */
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SBC,
|
|
+//ZZ tHi, zero, ARMRI84_R(yHi)));
|
|
+//ZZ /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
|
|
+//ZZ back in, so as to give the final result
|
|
+//ZZ tHi:tLo = arg | -arg. */
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
|
|
+//ZZ *rHi = tHi;
|
|
+//ZZ *rLo = tLo;
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* CmpwNEZ64(e) */
|
|
+//ZZ case Iop_CmpwNEZ64: {
|
|
+//ZZ HReg srcLo, srcHi;
|
|
+//ZZ HReg tmp1 = newVRegI(env);
|
|
+//ZZ HReg tmp2 = newVRegI(env);
|
|
+//ZZ /* srcHi:srcLo = arg */
|
|
+//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
|
|
+//ZZ /* tmp1 = srcHi | srcLo */
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR,
|
|
+//ZZ tmp1, srcHi, ARMRI84_R(srcLo)));
|
|
+//ZZ /* tmp2 = (tmp1 | -tmp1) >>s 31 */
|
|
+//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR,
|
|
+//ZZ tmp2, tmp2, ARMRI84_R(tmp1)));
|
|
+//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR,
|
|
+//ZZ tmp2, tmp2, ARMRI5_I5(31)));
|
|
+//ZZ *rHi = tmp2;
|
|
+//ZZ *rLo = tmp2;
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_1Sto64: {
|
|
+//ZZ HReg dst = newVRegI(env);
|
|
+//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
|
|
+//ZZ ARMRI5* amt = ARMRI5_I5(31);
|
|
+//ZZ /* This is really rough. We could do much better here;
|
|
+//ZZ perhaps mvn{cond} dst, #0 as the second insn?
|
|
+//ZZ (same applies to 1Sto32) */
|
|
+//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
|
|
+//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
|
|
+//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
|
|
+//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
|
|
+//ZZ *rHi = dst;
|
|
+//ZZ *rLo = dst;
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ } /* if (e->tag == Iex_Unop) */
|
|
+//ZZ
|
|
+//ZZ /* --------- MULTIPLEX --------- */
|
|
+//ZZ if (e->tag == Iex_ITE) { // VFD
|
|
+//ZZ IRType tyC;
|
|
+//ZZ HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
|
|
+//ZZ ARMCondCode cc;
|
|
+//ZZ tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
|
|
+//ZZ vassert(tyC == Ity_I1);
|
|
+//ZZ iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
|
|
+//ZZ iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
|
|
+//ZZ dstHi = newVRegI(env);
|
|
+//ZZ dstLo = newVRegI(env);
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
|
|
+//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
|
|
+//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
|
|
+//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
|
|
+//ZZ *rHi = dstHi;
|
|
+//ZZ *rLo = dstLo;
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* It is convenient sometimes to call iselInt64Expr even when we
|
|
+//ZZ have NEON support (e.g. in do_helper_call we need 64-bit
|
|
+//ZZ arguments as 2 x 32 regs). */
|
|
+//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
|
|
+//ZZ HReg tHi = newVRegI(env);
|
|
+//ZZ HReg tLo = newVRegI(env);
|
|
+//ZZ HReg tmp = iselNeon64Expr(env, e);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
|
|
+//ZZ *rHi = tHi;
|
|
+//ZZ *rLo = tLo;
|
|
+//ZZ return ;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ppIRExpr(e);
|
|
+//ZZ vpanic("iselInt64Expr");
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ
|
|
+//ZZ /*---------------------------------------------------------*/
|
|
+//ZZ /*--- ISEL: Vector (NEON) expressions (64 bit) ---*/
|
|
+//ZZ /*---------------------------------------------------------*/
|
|
+//ZZ
|
|
+//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
|
|
+//ZZ {
|
|
+//ZZ HReg r = iselNeon64Expr_wrk( env, e );
|
|
+//ZZ vassert(hregClass(r) == HRcFlt64);
|
|
+//ZZ vassert(hregIsVirtual(r));
|
|
+//ZZ return r;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* DO NOT CALL THIS DIRECTLY */
|
|
+//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
|
|
+//ZZ {
|
|
+//ZZ IRType ty = typeOfIRExpr(env->type_env, e);
|
|
+//ZZ MatchInfo mi;
|
|
+//ZZ vassert(e);
|
|
+//ZZ vassert(ty == Ity_I64);
|
|
+//ZZ
|
|
+//ZZ if (e->tag == Iex_RdTmp) {
|
|
+//ZZ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ if (e->tag == Iex_Const) {
|
|
+//ZZ HReg rLo, rHi;
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ iselInt64Expr(&rHi, &rLo, env, e);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* 64-bit load */
|
|
+//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
|
|
+//ZZ vassert(ty == Ity_I64);
|
|
+//ZZ addInstr(env, ARMInstr_NLdStD(True, res, am));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* 64-bit GET */
|
|
+//ZZ if (e->tag == Iex_Get) {
|
|
+//ZZ HReg addr = newVRegI(env);
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ vassert(ty == Ity_I64);
|
|
+//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
|
|
+//ZZ addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* --------- BINARY ops --------- */
|
|
+//ZZ if (e->tag == Iex_Binop) {
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ
|
|
+//ZZ /* 32 x 32 -> 64 multiply */
|
|
+//ZZ case Iop_MullS32:
|
|
+//ZZ case Iop_MullU32: {
|
|
+//ZZ HReg rLo, rHi;
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ iselInt64Expr(&rHi, &rLo, env, e);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_And64: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
|
|
+//ZZ res, argL, argR, 4, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Or64: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
|
|
+//ZZ res, argL, argR, 4, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Xor64: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
|
|
+//ZZ res, argL, argR, 4, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* 32HLto64(e1,e2) */
|
|
+//ZZ case Iop_32HLto64: {
|
|
+//ZZ HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_Add8x8:
|
|
+//ZZ case Iop_Add16x4:
|
|
+//ZZ case Iop_Add32x2:
|
|
+//ZZ case Iop_Add64: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Add8x8: size = 0; break;
|
|
+//ZZ case Iop_Add16x4: size = 1; break;
|
|
+//ZZ case Iop_Add32x2: size = 2; break;
|
|
+//ZZ case Iop_Add64: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Add32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Recps32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Rsqrts32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ // These 6 verified 18 Apr 2013
|
|
+//ZZ case Iop_InterleaveHI32x2:
|
|
+//ZZ case Iop_InterleaveLO32x2:
|
|
+//ZZ case Iop_InterleaveOddLanes8x8:
|
|
+//ZZ case Iop_InterleaveEvenLanes8x8:
|
|
+//ZZ case Iop_InterleaveOddLanes16x4:
|
|
+//ZZ case Iop_InterleaveEvenLanes16x4: {
|
|
+//ZZ HReg rD = newVRegD(env);
|
|
+//ZZ HReg rM = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool resRd; // is the result in rD or rM ?
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break;
|
|
+//ZZ case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break;
|
|
+//ZZ case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break;
|
|
+//ZZ case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break;
|
|
+//ZZ case Iop_InterleaveHI32x2: resRd = False; size = 2; break;
|
|
+//ZZ case Iop_InterleaveLO32x2: resRd = True; size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
|
|
+//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
|
|
+//ZZ return resRd ? rD : rM;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ // These 4 verified 18 Apr 2013
|
|
+//ZZ case Iop_InterleaveHI8x8:
|
|
+//ZZ case Iop_InterleaveLO8x8:
|
|
+//ZZ case Iop_InterleaveHI16x4:
|
|
+//ZZ case Iop_InterleaveLO16x4: {
|
|
+//ZZ HReg rD = newVRegD(env);
|
|
+//ZZ HReg rM = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool resRd; // is the result in rD or rM ?
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_InterleaveHI8x8: resRd = False; size = 0; break;
|
|
+//ZZ case Iop_InterleaveLO8x8: resRd = True; size = 0; break;
|
|
+//ZZ case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
|
|
+//ZZ case Iop_InterleaveLO16x4: resRd = True; size = 1; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
|
|
+//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
|
|
+//ZZ return resRd ? rD : rM;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ // These 4 verified 18 Apr 2013
|
|
+//ZZ case Iop_CatOddLanes8x8:
|
|
+//ZZ case Iop_CatEvenLanes8x8:
|
|
+//ZZ case Iop_CatOddLanes16x4:
|
|
+//ZZ case Iop_CatEvenLanes16x4: {
|
|
+//ZZ HReg rD = newVRegD(env);
|
|
+//ZZ HReg rM = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool resRd; // is the result in rD or rM ?
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_CatOddLanes8x8: resRd = False; size = 0; break;
|
|
+//ZZ case Iop_CatEvenLanes8x8: resRd = True; size = 0; break;
|
|
+//ZZ case Iop_CatOddLanes16x4: resRd = False; size = 1; break;
|
|
+//ZZ case Iop_CatEvenLanes16x4: resRd = True; size = 1; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
|
|
+//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
|
|
+//ZZ return resRd ? rD : rM;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_QAdd8Ux8:
|
|
+//ZZ case Iop_QAdd16Ux4:
|
|
+//ZZ case Iop_QAdd32Ux2:
|
|
+//ZZ case Iop_QAdd64Ux1: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QAdd8Ux8: size = 0; break;
|
|
+//ZZ case Iop_QAdd16Ux4: size = 1; break;
|
|
+//ZZ case Iop_QAdd32Ux2: size = 2; break;
|
|
+//ZZ case Iop_QAdd64Ux1: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QAdd8Sx8:
|
|
+//ZZ case Iop_QAdd16Sx4:
|
|
+//ZZ case Iop_QAdd32Sx2:
|
|
+//ZZ case Iop_QAdd64Sx1: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QAdd8Sx8: size = 0; break;
|
|
+//ZZ case Iop_QAdd16Sx4: size = 1; break;
|
|
+//ZZ case Iop_QAdd32Sx2: size = 2; break;
|
|
+//ZZ case Iop_QAdd64Sx1: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Sub8x8:
|
|
+//ZZ case Iop_Sub16x4:
|
|
+//ZZ case Iop_Sub32x2:
|
|
+//ZZ case Iop_Sub64: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Sub8x8: size = 0; break;
|
|
+//ZZ case Iop_Sub16x4: size = 1; break;
|
|
+//ZZ case Iop_Sub32x2: size = 2; break;
|
|
+//ZZ case Iop_Sub64: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Sub32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QSub8Ux8:
|
|
+//ZZ case Iop_QSub16Ux4:
|
|
+//ZZ case Iop_QSub32Ux2:
|
|
+//ZZ case Iop_QSub64Ux1: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QSub8Ux8: size = 0; break;
|
|
+//ZZ case Iop_QSub16Ux4: size = 1; break;
|
|
+//ZZ case Iop_QSub32Ux2: size = 2; break;
|
|
+//ZZ case Iop_QSub64Ux1: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QSub8Sx8:
|
|
+//ZZ case Iop_QSub16Sx4:
|
|
+//ZZ case Iop_QSub32Sx2:
|
|
+//ZZ case Iop_QSub64Sx1: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QSub8Sx8: size = 0; break;
|
|
+//ZZ case Iop_QSub16Sx4: size = 1; break;
|
|
+//ZZ case Iop_QSub32Sx2: size = 2; break;
|
|
+//ZZ case Iop_QSub64Sx1: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Max8Ux8:
|
|
+//ZZ case Iop_Max16Ux4:
|
|
+//ZZ case Iop_Max32Ux2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Max8Ux8: size = 0; break;
|
|
+//ZZ case Iop_Max16Ux4: size = 1; break;
|
|
+//ZZ case Iop_Max32Ux2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Max8Sx8:
|
|
+//ZZ case Iop_Max16Sx4:
|
|
+//ZZ case Iop_Max32Sx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Max8Sx8: size = 0; break;
|
|
+//ZZ case Iop_Max16Sx4: size = 1; break;
|
|
+//ZZ case Iop_Max32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Min8Ux8:
|
|
+//ZZ case Iop_Min16Ux4:
|
|
+//ZZ case Iop_Min32Ux2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Min8Ux8: size = 0; break;
|
|
+//ZZ case Iop_Min16Ux4: size = 1; break;
|
|
+//ZZ case Iop_Min32Ux2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Min8Sx8:
|
|
+//ZZ case Iop_Min16Sx4:
|
|
+//ZZ case Iop_Min32Sx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Min8Sx8: size = 0; break;
|
|
+//ZZ case Iop_Min16Sx4: size = 1; break;
|
|
+//ZZ case Iop_Min32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Sar8x8:
|
|
+//ZZ case Iop_Sar16x4:
|
|
+//ZZ case Iop_Sar32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ HReg argR2 = newVRegD(env);
|
|
+//ZZ HReg zero = newVRegD(env);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Sar8x8: size = 0; break;
|
|
+//ZZ case Iop_Sar16x4: size = 1; break;
|
|
+//ZZ case Iop_Sar32x2: size = 2; break;
|
|
+//ZZ case Iop_Sar64: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
|
|
+//ZZ argR2, zero, argR, size, False));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
|
|
+//ZZ res, argL, argR2, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Sal8x8:
|
|
+//ZZ case Iop_Sal16x4:
|
|
+//ZZ case Iop_Sal32x2:
|
|
+//ZZ case Iop_Sal64x1: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Sal8x8: size = 0; break;
|
|
+//ZZ case Iop_Sal16x4: size = 1; break;
|
|
+//ZZ case Iop_Sal32x2: size = 2; break;
|
|
+//ZZ case Iop_Sal64x1: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Shr8x8:
|
|
+//ZZ case Iop_Shr16x4:
|
|
+//ZZ case Iop_Shr32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ HReg argR2 = newVRegD(env);
|
|
+//ZZ HReg zero = newVRegD(env);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Shr8x8: size = 0; break;
|
|
+//ZZ case Iop_Shr16x4: size = 1; break;
|
|
+//ZZ case Iop_Shr32x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
|
|
+//ZZ argR2, zero, argR, size, False));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ res, argL, argR2, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Shl8x8:
|
|
+//ZZ case Iop_Shl16x4:
|
|
+//ZZ case Iop_Shl32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Shl8x8: size = 0; break;
|
|
+//ZZ case Iop_Shl16x4: size = 1; break;
|
|
+//ZZ case Iop_Shl32x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QShl8x8:
|
|
+//ZZ case Iop_QShl16x4:
|
|
+//ZZ case Iop_QShl32x2:
|
|
+//ZZ case Iop_QShl64x1: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QShl8x8: size = 0; break;
|
|
+//ZZ case Iop_QShl16x4: size = 1; break;
|
|
+//ZZ case Iop_QShl32x2: size = 2; break;
|
|
+//ZZ case Iop_QShl64x1: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QSal8x8:
|
|
+//ZZ case Iop_QSal16x4:
|
|
+//ZZ case Iop_QSal32x2:
|
|
+//ZZ case Iop_QSal64x1: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QSal8x8: size = 0; break;
|
|
+//ZZ case Iop_QSal16x4: size = 1; break;
|
|
+//ZZ case Iop_QSal32x2: size = 2; break;
|
|
+//ZZ case Iop_QSal64x1: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QShlN8x8:
|
|
+//ZZ case Iop_QShlN16x4:
|
|
+//ZZ case Iop_QShlN32x2:
|
|
+//ZZ case Iop_QShlN64x1: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ UInt size, imm;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
|
|
+//ZZ "second argument only\n");
|
|
+//ZZ }
|
|
+//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QShlN8x8: size = 8 | imm; break;
|
|
+//ZZ case Iop_QShlN16x4: size = 16 | imm; break;
|
|
+//ZZ case Iop_QShlN32x2: size = 32 | imm; break;
|
|
+//ZZ case Iop_QShlN64x1: size = 64 | imm; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
|
|
+//ZZ res, argL, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QShlN8Sx8:
|
|
+//ZZ case Iop_QShlN16Sx4:
|
|
+//ZZ case Iop_QShlN32Sx2:
|
|
+//ZZ case Iop_QShlN64Sx1: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ UInt size, imm;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
|
|
+//ZZ "second argument only\n");
|
|
+//ZZ }
|
|
+//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QShlN8Sx8: size = 8 | imm; break;
|
|
+//ZZ case Iop_QShlN16Sx4: size = 16 | imm; break;
|
|
+//ZZ case Iop_QShlN32Sx2: size = 32 | imm; break;
|
|
+//ZZ case Iop_QShlN64Sx1: size = 64 | imm; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
|
|
+//ZZ res, argL, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QSalN8x8:
|
|
+//ZZ case Iop_QSalN16x4:
|
|
+//ZZ case Iop_QSalN32x2:
|
|
+//ZZ case Iop_QSalN64x1: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ UInt size, imm;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
|
|
+//ZZ "second argument only\n");
|
|
+//ZZ }
|
|
+//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QSalN8x8: size = 8 | imm; break;
|
|
+//ZZ case Iop_QSalN16x4: size = 16 | imm; break;
|
|
+//ZZ case Iop_QSalN32x2: size = 32 | imm; break;
|
|
+//ZZ case Iop_QSalN64x1: size = 64 | imm; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
|
|
+//ZZ res, argL, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_ShrN8x8:
|
|
+//ZZ case Iop_ShrN16x4:
|
|
+//ZZ case Iop_ShrN32x2:
|
|
+//ZZ case Iop_Shr64: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg tmp = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+//ZZ HReg argR2 = newVRegI(env);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_ShrN8x8: size = 0; break;
|
|
+//ZZ case Iop_ShrN16x4: size = 1; break;
|
|
+//ZZ case Iop_ShrN32x2: size = 2; break;
|
|
+//ZZ case Iop_Shr64: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ res, argL, tmp, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_ShlN8x8:
|
|
+//ZZ case Iop_ShlN16x4:
|
|
+//ZZ case Iop_ShlN32x2:
|
|
+//ZZ case Iop_Shl64: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg tmp = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ /* special-case Shl64(x, imm8) since the Neon front
|
|
+//ZZ end produces a lot of those for V{LD,ST}{1,2,3,4}. */
|
|
+//ZZ if (e->Iex.Binop.op == Iop_Shl64
|
|
+//ZZ && e->Iex.Binop.arg2->tag == Iex_Const) {
|
|
+//ZZ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
|
|
+//ZZ Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ if (nshift >= 1 && nshift <= 63) {
|
|
+//ZZ addInstr(env, ARMInstr_NShl64(res, argL, nshift));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ /* else fall through to general case */
|
|
+//ZZ }
|
|
+//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_ShlN8x8: size = 0; break;
|
|
+//ZZ case Iop_ShlN16x4: size = 1; break;
|
|
+//ZZ case Iop_ShlN32x2: size = 2; break;
|
|
+//ZZ case Iop_Shl64: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
|
|
+//ZZ tmp, argR, 0, False));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ res, argL, tmp, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_SarN8x8:
|
|
+//ZZ case Iop_SarN16x4:
|
|
+//ZZ case Iop_SarN32x2:
|
|
+//ZZ case Iop_Sar64: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg tmp = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+//ZZ HReg argR2 = newVRegI(env);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_SarN8x8: size = 0; break;
|
|
+//ZZ case Iop_SarN16x4: size = 1; break;
|
|
+//ZZ case Iop_SarN32x2: size = 2; break;
|
|
+//ZZ case Iop_Sar64: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
|
|
+//ZZ res, argL, tmp, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpGT8Ux8:
|
|
+//ZZ case Iop_CmpGT16Ux4:
|
|
+//ZZ case Iop_CmpGT32Ux2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_CmpGT8Ux8: size = 0; break;
|
|
+//ZZ case Iop_CmpGT16Ux4: size = 1; break;
|
|
+//ZZ case Iop_CmpGT32Ux2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpGT8Sx8:
|
|
+//ZZ case Iop_CmpGT16Sx4:
|
|
+//ZZ case Iop_CmpGT32Sx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_CmpGT8Sx8: size = 0; break;
|
|
+//ZZ case Iop_CmpGT16Sx4: size = 1; break;
|
|
+//ZZ case Iop_CmpGT32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpEQ8x8:
|
|
+//ZZ case Iop_CmpEQ16x4:
|
|
+//ZZ case Iop_CmpEQ32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_CmpEQ8x8: size = 0; break;
|
|
+//ZZ case Iop_CmpEQ16x4: size = 1; break;
|
|
+//ZZ case Iop_CmpEQ32x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Mul8x8:
|
|
+//ZZ case Iop_Mul16x4:
|
|
+//ZZ case Iop_Mul32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Mul8x8: size = 0; break;
|
|
+//ZZ case Iop_Mul16x4: size = 1; break;
|
|
+//ZZ case Iop_Mul32x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Mul32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QDMulHi16Sx4:
|
|
+//ZZ case Iop_QDMulHi32Sx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QDMulHi16Sx4: size = 1; break;
|
|
+//ZZ case Iop_QDMulHi32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_QRDMulHi16Sx4:
|
|
+//ZZ case Iop_QRDMulHi32Sx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QRDMulHi16Sx4: size = 1; break;
|
|
+//ZZ case Iop_QRDMulHi32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_PwAdd8x8:
|
|
+//ZZ case Iop_PwAdd16x4:
|
|
+//ZZ case Iop_PwAdd32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_PwAdd8x8: size = 0; break;
|
|
+//ZZ case Iop_PwAdd16x4: size = 1; break;
|
|
+//ZZ case Iop_PwAdd32x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwAdd32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwMin8Ux8:
|
|
+//ZZ case Iop_PwMin16Ux4:
|
|
+//ZZ case Iop_PwMin32Ux2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_PwMin8Ux8: size = 0; break;
|
|
+//ZZ case Iop_PwMin16Ux4: size = 1; break;
|
|
+//ZZ case Iop_PwMin32Ux2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwMin8Sx8:
|
|
+//ZZ case Iop_PwMin16Sx4:
|
|
+//ZZ case Iop_PwMin32Sx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_PwMin8Sx8: size = 0; break;
|
|
+//ZZ case Iop_PwMin16Sx4: size = 1; break;
|
|
+//ZZ case Iop_PwMin32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwMax8Ux8:
|
|
+//ZZ case Iop_PwMax16Ux4:
|
|
+//ZZ case Iop_PwMax32Ux2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_PwMax8Ux8: size = 0; break;
|
|
+//ZZ case Iop_PwMax16Ux4: size = 1; break;
|
|
+//ZZ case Iop_PwMax32Ux2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwMax8Sx8:
|
|
+//ZZ case Iop_PwMax16Sx4:
|
|
+//ZZ case Iop_PwMax32Sx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_PwMax8Sx8: size = 0; break;
|
|
+//ZZ case Iop_PwMax16Sx4: size = 1; break;
|
|
+//ZZ case Iop_PwMax32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Perm8x8: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
|
|
+//ZZ res, argL, argR, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PolynomialMul8x8: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
|
|
+//ZZ res, argL, argR, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Max32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
|
|
+//ZZ res, argL, argR, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Min32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
|
|
+//ZZ res, argL, argR, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwMax32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
|
|
+//ZZ res, argL, argR, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwMin32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
|
|
+//ZZ res, argL, argR, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpGT32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
|
|
+//ZZ res, argL, argR, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpGE32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
|
|
+//ZZ res, argL, argR, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpEQ32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
|
|
+//ZZ res, argL, argR, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_F32ToFixed32Ux2_RZ:
|
|
+//ZZ case Iop_F32ToFixed32Sx2_RZ:
|
|
+//ZZ case Iop_Fixed32UToF32x2_RN:
|
|
+//ZZ case Iop_Fixed32SToF32x2_RN: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ ARMNeonUnOp op;
|
|
+//ZZ UInt imm6;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant "
|
|
+//ZZ "second argument less than 33 only\n");
|
|
+//ZZ }
|
|
+//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ vassert(imm6 <= 32 && imm6 > 0);
|
|
+//ZZ imm6 = 64 - imm6;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
|
|
+//ZZ case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
|
|
+//ZZ case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
|
|
+//ZZ case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ /*
|
|
+//ZZ FIXME: is this here or not?
|
|
+//ZZ case Iop_VDup8x8:
|
|
+//ZZ case Iop_VDup16x4:
|
|
+//ZZ case Iop_VDup32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ UInt index;
|
|
+//ZZ UInt imm4;
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM supports Iop_VDup with constant "
|
|
+//ZZ "second argument less than 16 only\n");
|
|
+//ZZ }
|
|
+//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
|
|
+//ZZ case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
|
|
+//ZZ case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ if (imm4 >= 16) {
|
|
+//ZZ vpanic("ARM supports Iop_VDup with constant "
|
|
+//ZZ "second argument less than 16 only\n");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
|
|
+//ZZ res, argL, imm4, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ */
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* --------- UNARY ops --------- */
|
|
+//ZZ if (e->tag == Iex_Unop) {
|
|
+//ZZ switch (e->Iex.Unop.op) {
|
|
+//ZZ
|
|
+//ZZ /* 32Uto64 */
|
|
+//ZZ case Iop_32Uto64: {
|
|
+//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+//ZZ HReg rHi = newVRegI(env);
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ addInstr(env, ARMInstr_Imm32(rHi, 0));
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* 32Sto64 */
|
|
+//ZZ case Iop_32Sto64: {
|
|
+//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+//ZZ HReg rHi = newVRegI(env);
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(rHi, rLo));
|
|
+//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* The next 3 are pass-throughs */
|
|
+//ZZ /* ReinterpF64asI64 */
|
|
+//ZZ case Iop_ReinterpF64asI64:
|
|
+//ZZ /* Left64(e) */
|
|
+//ZZ case Iop_Left64:
|
|
+//ZZ /* CmpwNEZ64(e) */
|
|
+//ZZ case Iop_1Sto64: {
|
|
+//ZZ HReg rLo, rHi;
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ iselInt64Expr(&rHi, &rLo, env, e);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_Not64: {
|
|
+//ZZ DECLARE_PATTERN(p_veqz_8x8);
|
|
+//ZZ DECLARE_PATTERN(p_veqz_16x4);
|
|
+//ZZ DECLARE_PATTERN(p_veqz_32x2);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_8sx8);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_16sx4);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_32sx2);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_8ux8);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_16ux4);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_32ux2);
|
|
+//ZZ DEFINE_PATTERN(p_veqz_8x8,
|
|
+//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_veqz_16x4,
|
|
+//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_veqz_32x2,
|
|
+//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_8sx8,
|
|
+//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_16sx4,
|
|
+//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_32sx2,
|
|
+//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_8ux8,
|
|
+//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_16ux4,
|
|
+//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_32ux2,
|
|
+//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
|
|
+//ZZ if (matchIRExpr(&mi, p_veqz_8x8, e)) {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
|
|
+//ZZ res, argL, argR, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
|
|
+//ZZ res, argL, argR, 1, False));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
|
|
+//ZZ res, argL, argR, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
|
|
+//ZZ res, argL, argR, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
|
|
+//ZZ res, argL, argR, 1, False));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
|
|
+//ZZ res, argL, argR, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ } else {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ case Iop_Dup8x8:
|
|
+//ZZ case Iop_Dup16x4:
|
|
+//ZZ case Iop_Dup32x2: {
|
|
+//ZZ HReg res, arg;
|
|
+//ZZ UInt size;
|
|
+//ZZ DECLARE_PATTERN(p_vdup_8x8);
|
|
+//ZZ DECLARE_PATTERN(p_vdup_16x4);
|
|
+//ZZ DECLARE_PATTERN(p_vdup_32x2);
|
|
+//ZZ DEFINE_PATTERN(p_vdup_8x8,
|
|
+//ZZ unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
|
|
+//ZZ DEFINE_PATTERN(p_vdup_16x4,
|
|
+//ZZ unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
|
|
+//ZZ DEFINE_PATTERN(p_vdup_32x2,
|
|
+//ZZ unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
|
|
+//ZZ if (matchIRExpr(&mi, p_vdup_8x8, e)) {
|
|
+//ZZ UInt index;
|
|
+//ZZ UInt imm4;
|
|
+//ZZ if (mi.bindee[1]->tag == Iex_Const &&
|
|
+//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
|
|
+//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
|
|
+//ZZ imm4 = (index << 1) + 1;
|
|
+//ZZ if (index < 8) {
|
|
+//ZZ res = newVRegD(env);
|
|
+//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnaryS(
|
|
+//ZZ ARMneon_VDUP,
|
|
+//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
|
|
+//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
|
|
+//ZZ imm4, False
|
|
+//ZZ ));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
|
|
+//ZZ UInt index;
|
|
+//ZZ UInt imm4;
|
|
+//ZZ if (mi.bindee[1]->tag == Iex_Const &&
|
|
+//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
|
|
+//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
|
|
+//ZZ imm4 = (index << 2) + 2;
|
|
+//ZZ if (index < 4) {
|
|
+//ZZ res = newVRegD(env);
|
|
+//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnaryS(
|
|
+//ZZ ARMneon_VDUP,
|
|
+//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
|
|
+//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
|
|
+//ZZ imm4, False
|
|
+//ZZ ));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
|
|
+//ZZ UInt index;
|
|
+//ZZ UInt imm4;
|
|
+//ZZ if (mi.bindee[1]->tag == Iex_Const &&
|
|
+//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
|
|
+//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
|
|
+//ZZ imm4 = (index << 3) + 4;
|
|
+//ZZ if (index < 2) {
|
|
+//ZZ res = newVRegD(env);
|
|
+//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnaryS(
|
|
+//ZZ ARMneon_VDUP,
|
|
+//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
|
|
+//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
|
|
+//ZZ imm4, False
|
|
+//ZZ ));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+//ZZ res = newVRegD(env);
|
|
+//ZZ switch (e->Iex.Unop.op) {
|
|
+//ZZ case Iop_Dup8x8: size = 0; break;
|
|
+//ZZ case Iop_Dup16x4: size = 1; break;
|
|
+//ZZ case Iop_Dup32x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Abs8x8:
|
|
+//ZZ case Iop_Abs16x4:
|
|
+//ZZ case Iop_Abs32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Abs8x8: size = 0; break;
|
|
+//ZZ case Iop_Abs16x4: size = 1; break;
|
|
+//ZZ case Iop_Abs32x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Reverse64_8x8:
|
|
+//ZZ case Iop_Reverse64_16x4:
|
|
+//ZZ case Iop_Reverse64_32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Reverse64_8x8: size = 0; break;
|
|
+//ZZ case Iop_Reverse64_16x4: size = 1; break;
|
|
+//ZZ case Iop_Reverse64_32x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Reverse32_8x8:
|
|
+//ZZ case Iop_Reverse32_16x4: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Reverse32_8x8: size = 0; break;
|
|
+//ZZ case Iop_Reverse32_16x4: size = 1; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Reverse16_8x8: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpwNEZ64: {
|
|
+//ZZ HReg x_lsh = newVRegD(env);
|
|
+//ZZ HReg x_rsh = newVRegD(env);
|
|
+//ZZ HReg lsh_amt = newVRegD(env);
|
|
+//ZZ HReg rsh_amt = newVRegD(env);
|
|
+//ZZ HReg zero = newVRegD(env);
|
|
+//ZZ HReg tmp = newVRegD(env);
|
|
+//ZZ HReg tmp2 = newVRegD(env);
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg x = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
|
|
+//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
|
|
+//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
|
|
+//ZZ rsh_amt, zero, lsh_amt, 2, False));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ x_lsh, x, lsh_amt, 3, False));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ x_rsh, x, rsh_amt, 3, False));
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
|
|
+//ZZ tmp, x_lsh, x_rsh, 0, False));
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
|
|
+//ZZ res, tmp, x, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpNEZ8x8:
|
|
+//ZZ case Iop_CmpNEZ16x4:
|
|
+//ZZ case Iop_CmpNEZ32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg tmp = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Unop.op) {
|
|
+//ZZ case Iop_CmpNEZ8x8: size = 0; break;
|
|
+//ZZ case Iop_CmpNEZ16x4: size = 1; break;
|
|
+//ZZ case Iop_CmpNEZ32x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_NarrowUn16to8x8:
|
|
+//ZZ case Iop_NarrowUn32to16x4:
|
|
+//ZZ case Iop_NarrowUn64to32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_NarrowUn16to8x8: size = 0; break;
|
|
+//ZZ case Iop_NarrowUn32to16x4: size = 1; break;
|
|
+//ZZ case Iop_NarrowUn64to32x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QNarrowUn16Sto8Sx8:
|
|
+//ZZ case Iop_QNarrowUn32Sto16Sx4:
|
|
+//ZZ case Iop_QNarrowUn64Sto32Sx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
|
|
+//ZZ case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
|
|
+//ZZ case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QNarrowUn16Sto8Ux8:
|
|
+//ZZ case Iop_QNarrowUn32Sto16Ux4:
|
|
+//ZZ case Iop_QNarrowUn64Sto32Ux2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
|
|
+//ZZ case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
|
|
+//ZZ case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QNarrowUn16Uto8Ux8:
|
|
+//ZZ case Iop_QNarrowUn32Uto16Ux4:
|
|
+//ZZ case Iop_QNarrowUn64Uto32Ux2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
|
|
+//ZZ case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
|
|
+//ZZ case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwAddL8Sx8:
|
|
+//ZZ case Iop_PwAddL16Sx4:
|
|
+//ZZ case Iop_PwAddL32Sx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_PwAddL8Sx8: size = 0; break;
|
|
+//ZZ case Iop_PwAddL16Sx4: size = 1; break;
|
|
+//ZZ case Iop_PwAddL32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwAddL8Ux8:
|
|
+//ZZ case Iop_PwAddL16Ux4:
|
|
+//ZZ case Iop_PwAddL32Ux2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_PwAddL8Ux8: size = 0; break;
|
|
+//ZZ case Iop_PwAddL16Ux4: size = 1; break;
|
|
+//ZZ case Iop_PwAddL32Ux2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Cnt8x8: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Clz8Sx8:
|
|
+//ZZ case Iop_Clz16Sx4:
|
|
+//ZZ case Iop_Clz32Sx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Clz8Sx8: size = 0; break;
|
|
+//ZZ case Iop_Clz16Sx4: size = 1; break;
|
|
+//ZZ case Iop_Clz32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Cls8Sx8:
|
|
+//ZZ case Iop_Cls16Sx4:
|
|
+//ZZ case Iop_Cls32Sx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Cls8Sx8: size = 0; break;
|
|
+//ZZ case Iop_Cls16Sx4: size = 1; break;
|
|
+//ZZ case Iop_Cls32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
|
|
+//ZZ res, arg, size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_FtoI32Sx2_RZ: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
|
|
+//ZZ res, arg, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_FtoI32Ux2_RZ: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
|
|
+//ZZ res, arg, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_I32StoFx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
|
|
+//ZZ res, arg, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_I32UtoFx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
|
|
+//ZZ res, arg, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_F32toF16x4: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
|
|
+//ZZ res, arg, 2, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Recip32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
|
|
+//ZZ res, argL, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Recip32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
|
|
+//ZZ res, argL, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Abs32Fx2: {
|
|
+//ZZ DECLARE_PATTERN(p_vabd_32fx2);
|
|
+//ZZ DEFINE_PATTERN(p_vabd_32fx2,
|
|
+//ZZ unop(Iop_Abs32Fx2,
|
|
+//ZZ binop(Iop_Sub32Fx2,
|
|
+//ZZ bind(0),
|
|
+//ZZ bind(1))));
|
|
+//ZZ if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
|
|
+//ZZ res, argL, argR, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ } else {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
|
|
+//ZZ res, arg, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ case Iop_Rsqrte32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
|
|
+//ZZ res, arg, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Rsqrte32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
|
|
+//ZZ res, arg, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Neg32Fx2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
|
|
+//ZZ res, arg, 0, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ } /* if (e->tag == Iex_Unop) */
|
|
+//ZZ
|
|
+//ZZ if (e->tag == Iex_Triop) {
|
|
+//ZZ IRTriop *triop = e->Iex.Triop.details;
|
|
+//ZZ
|
|
+//ZZ switch (triop->op) {
|
|
+//ZZ case Iop_Extract64: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, triop->arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, triop->arg2);
|
|
+//ZZ UInt imm4;
|
|
+//ZZ if (triop->arg3->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
|
|
+//ZZ vpanic("ARM target supports Iop_Extract64 with constant "
|
|
+//ZZ "third argument less than 16 only\n");
|
|
+//ZZ }
|
|
+//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8;
|
|
+//ZZ if (imm4 >= 8) {
|
|
+//ZZ vpanic("ARM target supports Iop_Extract64 with constant "
|
|
+//ZZ "third argument less than 16 only\n");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
|
|
+//ZZ res, argL, argR, imm4, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_SetElem8x8:
|
|
+//ZZ case Iop_SetElem16x4:
|
|
+//ZZ case Iop_SetElem32x2: {
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ HReg dreg = iselNeon64Expr(env, triop->arg1);
|
|
+//ZZ HReg arg = iselIntExpr_R(env, triop->arg3);
|
|
+//ZZ UInt index, size;
|
|
+//ZZ if (triop->arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM target supports SetElem with constant "
|
|
+//ZZ "second argument only\n");
|
|
+//ZZ }
|
|
+//ZZ index = triop->arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ switch (triop->op) {
|
|
+//ZZ case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
|
|
+//ZZ case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
|
|
+//ZZ case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
|
|
+//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
|
|
+//ZZ mkARMNRS(ARMNRS_Scalar, res, index),
|
|
+//ZZ mkARMNRS(ARMNRS_Reg, arg, 0),
|
|
+//ZZ size, False));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* --------- MULTIPLEX --------- */
|
|
+//ZZ if (e->tag == Iex_ITE) { // VFD
|
|
+//ZZ HReg rLo, rHi;
|
|
+//ZZ HReg res = newVRegD(env);
|
|
+//ZZ iselInt64Expr(&rHi, &rLo, env, e);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ ppIRExpr(e);
|
|
+//ZZ vpanic("iselNeon64Expr");
|
|
+//ZZ }
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISEL: Vector (NEON) expressions (128 bit) ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ HReg r = iselV128Expr_wrk( env, e );
|
|
+ vassert(hregClass(r) == HRcVec128);
|
|
+ vassert(hregIsVirtual(r));
|
|
+ return r;
|
|
+}
|
|
+
|
|
+/* DO NOT CALL THIS DIRECTLY */
|
|
+static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ IRType ty = typeOfIRExpr(env->type_env, e);
|
|
+ vassert(e);
|
|
+ vassert(ty == Ity_V128);
|
|
+
|
|
+ if (e->tag == Iex_RdTmp) {
|
|
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Const) {
|
|
+ /* Only a very limited range of constants is handled. */
|
|
+ vassert(e->Iex.Const.con->tag == Ico_V128);
|
|
+ UShort con = e->Iex.Const.con->Ico.V128;
|
|
+ if (con == 0x0000) {
|
|
+ HReg res = newVRegV(env);
|
|
+ addInstr(env, ARM64Instr_VImmQ(res, con));
|
|
+ return res;
|
|
+ }
|
|
+ /* Unhandled */
|
|
+ goto v128_expr_bad;
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Load) {
|
|
+ HReg res = newVRegV(env);
|
|
+ HReg rN = iselIntExpr_R(env, e->Iex.Load.addr);
|
|
+ vassert(ty == Ity_V128);
|
|
+ addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
|
|
+ return res;
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Get) {
|
|
+ UInt offs = (UInt)e->Iex.Get.offset;
|
|
+ if (offs < (1<<12)) {
|
|
+ HReg addr = mk_baseblock_128bit_access_addr(env, offs);
|
|
+ HReg res = newVRegV(env);
|
|
+ vassert(ty == Ity_V128);
|
|
+ addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
|
|
+ return res;
|
|
+ }
|
|
+ goto v128_expr_bad;
|
|
+ }
|
|
+
|
|
+//ZZ if (e->tag == Iex_Unop) {
|
|
+//ZZ switch (e->Iex.Unop.op) {
|
|
+//ZZ case Iop_NotV128: {
|
|
+//ZZ DECLARE_PATTERN(p_veqz_8x16);
|
|
+//ZZ DECLARE_PATTERN(p_veqz_16x8);
|
|
+//ZZ DECLARE_PATTERN(p_veqz_32x4);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_8sx16);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_16sx8);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_32sx4);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_8ux16);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_16ux8);
|
|
+//ZZ DECLARE_PATTERN(p_vcge_32ux4);
|
|
+//ZZ DEFINE_PATTERN(p_veqz_8x16,
|
|
+//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_veqz_16x8,
|
|
+//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_veqz_32x4,
|
|
+//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_8sx16,
|
|
+//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_16sx8,
|
|
+//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_32sx4,
|
|
+//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_8ux16,
|
|
+//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_16ux8,
|
|
+//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
|
|
+//ZZ DEFINE_PATTERN(p_vcge_32ux4,
|
|
+//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
|
|
+//ZZ if (matchIRExpr(&mi, p_veqz_8x16, e)) {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
|
|
+//ZZ res, argL, argR, 0, True));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
|
|
+//ZZ res, argL, argR, 1, True));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
|
|
+//ZZ res, argL, argR, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
|
|
+//ZZ res, argL, argR, 0, True));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
|
|
+//ZZ res, argL, argR, 1, True));
|
|
+//ZZ return res;
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
|
|
+//ZZ res, argL, argR, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ } else {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ case Iop_Dup8x16:
|
|
+//ZZ case Iop_Dup16x8:
|
|
+//ZZ case Iop_Dup32x4: {
|
|
+//ZZ HReg res, arg;
|
|
+//ZZ UInt size;
|
|
+//ZZ DECLARE_PATTERN(p_vdup_8x16);
|
|
+//ZZ DECLARE_PATTERN(p_vdup_16x8);
|
|
+//ZZ DECLARE_PATTERN(p_vdup_32x4);
|
|
+//ZZ DEFINE_PATTERN(p_vdup_8x16,
|
|
+//ZZ unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
|
|
+//ZZ DEFINE_PATTERN(p_vdup_16x8,
|
|
+//ZZ unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
|
|
+//ZZ DEFINE_PATTERN(p_vdup_32x4,
|
|
+//ZZ unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
|
|
+//ZZ if (matchIRExpr(&mi, p_vdup_8x16, e)) {
|
|
+//ZZ UInt index;
|
|
+//ZZ UInt imm4;
|
|
+//ZZ if (mi.bindee[1]->tag == Iex_Const &&
|
|
+//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
|
|
+//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
|
|
+//ZZ imm4 = (index << 1) + 1;
|
|
+//ZZ if (index < 8) {
|
|
+//ZZ res = newVRegV(env);
|
|
+//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnaryS(
|
|
+//ZZ ARMneon_VDUP,
|
|
+//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
|
|
+//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
|
|
+//ZZ imm4, True
|
|
+//ZZ ));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
|
|
+//ZZ UInt index;
|
|
+//ZZ UInt imm4;
|
|
+//ZZ if (mi.bindee[1]->tag == Iex_Const &&
|
|
+//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
|
|
+//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
|
|
+//ZZ imm4 = (index << 2) + 2;
|
|
+//ZZ if (index < 4) {
|
|
+//ZZ res = newVRegV(env);
|
|
+//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnaryS(
|
|
+//ZZ ARMneon_VDUP,
|
|
+//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
|
|
+//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
|
|
+//ZZ imm4, True
|
|
+//ZZ ));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
|
|
+//ZZ UInt index;
|
|
+//ZZ UInt imm4;
|
|
+//ZZ if (mi.bindee[1]->tag == Iex_Const &&
|
|
+//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
|
|
+//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
|
|
+//ZZ imm4 = (index << 3) + 4;
|
|
+//ZZ if (index < 2) {
|
|
+//ZZ res = newVRegV(env);
|
|
+//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
|
|
+//ZZ addInstr(env, ARMInstr_NUnaryS(
|
|
+//ZZ ARMneon_VDUP,
|
|
+//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
|
|
+//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
|
|
+//ZZ imm4, True
|
|
+//ZZ ));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+//ZZ res = newVRegV(env);
|
|
+//ZZ switch (e->Iex.Unop.op) {
|
|
+//ZZ case Iop_Dup8x16: size = 0; break;
|
|
+//ZZ case Iop_Dup16x8: size = 1; break;
|
|
+//ZZ case Iop_Dup32x4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Abs8x16:
|
|
+//ZZ case Iop_Abs16x8:
|
|
+//ZZ case Iop_Abs32x4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Abs8x16: size = 0; break;
|
|
+//ZZ case Iop_Abs16x8: size = 1; break;
|
|
+//ZZ case Iop_Abs32x4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Reverse64_8x16:
|
|
+//ZZ case Iop_Reverse64_16x8:
|
|
+//ZZ case Iop_Reverse64_32x4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Reverse64_8x16: size = 0; break;
|
|
+//ZZ case Iop_Reverse64_16x8: size = 1; break;
|
|
+//ZZ case Iop_Reverse64_32x4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
|
|
+//ZZ res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Reverse32_8x16:
|
|
+//ZZ case Iop_Reverse32_16x8: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Reverse32_8x16: size = 0; break;
|
|
+//ZZ case Iop_Reverse32_16x8: size = 1; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
|
|
+//ZZ res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Reverse16_8x16: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
|
|
+//ZZ res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpNEZ64x2: {
|
|
+//ZZ HReg x_lsh = newVRegV(env);
|
|
+//ZZ HReg x_rsh = newVRegV(env);
|
|
+//ZZ HReg lsh_amt = newVRegV(env);
|
|
+//ZZ HReg rsh_amt = newVRegV(env);
|
|
+//ZZ HReg zero = newVRegV(env);
|
|
+//ZZ HReg tmp = newVRegV(env);
|
|
+//ZZ HReg tmp2 = newVRegV(env);
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg x = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
|
|
+//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
|
|
+//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
|
|
+//ZZ rsh_amt, zero, lsh_amt, 2, True));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ x_lsh, x, lsh_amt, 3, True));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ x_rsh, x, rsh_amt, 3, True));
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
|
|
+//ZZ tmp, x_lsh, x_rsh, 0, True));
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
|
|
+//ZZ res, tmp, x, 0, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpNEZ8x16:
|
|
+//ZZ case Iop_CmpNEZ16x8:
|
|
+//ZZ case Iop_CmpNEZ32x4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg tmp = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Unop.op) {
|
|
+//ZZ case Iop_CmpNEZ8x16: size = 0; break;
|
|
+//ZZ case Iop_CmpNEZ16x8: size = 1; break;
|
|
+//ZZ case Iop_CmpNEZ32x4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Widen8Uto16x8:
|
|
+//ZZ case Iop_Widen16Uto32x4:
|
|
+//ZZ case Iop_Widen32Uto64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Unop.op) {
|
|
+//ZZ case Iop_Widen8Uto16x8: size = 0; break;
|
|
+//ZZ case Iop_Widen16Uto32x4: size = 1; break;
|
|
+//ZZ case Iop_Widen32Uto64x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
|
|
+//ZZ res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Widen8Sto16x8:
|
|
+//ZZ case Iop_Widen16Sto32x4:
|
|
+//ZZ case Iop_Widen32Sto64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Unop.op) {
|
|
+//ZZ case Iop_Widen8Sto16x8: size = 0; break;
|
|
+//ZZ case Iop_Widen16Sto32x4: size = 1; break;
|
|
+//ZZ case Iop_Widen32Sto64x2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
|
|
+//ZZ res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwAddL8Sx16:
|
|
+//ZZ case Iop_PwAddL16Sx8:
|
|
+//ZZ case Iop_PwAddL32Sx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_PwAddL8Sx16: size = 0; break;
|
|
+//ZZ case Iop_PwAddL16Sx8: size = 1; break;
|
|
+//ZZ case Iop_PwAddL32Sx4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
|
|
+//ZZ res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwAddL8Ux16:
|
|
+//ZZ case Iop_PwAddL16Ux8:
|
|
+//ZZ case Iop_PwAddL32Ux4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_PwAddL8Ux16: size = 0; break;
|
|
+//ZZ case Iop_PwAddL16Ux8: size = 1; break;
|
|
+//ZZ case Iop_PwAddL32Ux4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
|
|
+//ZZ res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Cnt8x16: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Clz8Sx16:
|
|
+//ZZ case Iop_Clz16Sx8:
|
|
+//ZZ case Iop_Clz32Sx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Clz8Sx16: size = 0; break;
|
|
+//ZZ case Iop_Clz16Sx8: size = 1; break;
|
|
+//ZZ case Iop_Clz32Sx4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Cls8Sx16:
|
|
+//ZZ case Iop_Cls16Sx8:
|
|
+//ZZ case Iop_Cls32Sx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Cls8Sx16: size = 0; break;
|
|
+//ZZ case Iop_Cls16Sx8: size = 1; break;
|
|
+//ZZ case Iop_Cls32Sx4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_FtoI32Sx4_RZ: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
|
|
+//ZZ res, arg, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_FtoI32Ux4_RZ: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
|
|
+//ZZ res, arg, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_I32StoFx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
|
|
+//ZZ res, arg, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_I32UtoFx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
|
|
+//ZZ res, arg, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_F16toF32x4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
|
|
+//ZZ res, arg, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Recip32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
|
|
+//ZZ res, argL, 0, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Recip32x4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
|
|
+//ZZ res, argL, 0, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Abs32Fx4: {
|
|
+//ZZ DECLARE_PATTERN(p_vabd_32fx4);
|
|
+//ZZ DEFINE_PATTERN(p_vabd_32fx4,
|
|
+//ZZ unop(Iop_Abs32Fx4,
|
|
+//ZZ binop(Iop_Sub32Fx4,
|
|
+//ZZ bind(0),
|
|
+//ZZ bind(1))));
|
|
+//ZZ if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
|
|
+//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
|
|
+//ZZ res, argL, argR, 0, True));
|
|
+//ZZ return res;
|
|
+//ZZ } else {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
|
|
+//ZZ res, argL, 0, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ case Iop_Rsqrte32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
|
|
+//ZZ res, argL, 0, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Rsqrte32x4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
|
|
+//ZZ res, argL, 0, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Neg32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
|
|
+//ZZ res, arg, 0, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ /* ... */
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+
|
|
+ if (e->tag == Iex_Binop) {
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ case Iop_64HLtoV128: {
|
|
+ HReg res = newVRegV(env);
|
|
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+ addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
|
|
+ return res;
|
|
+ }
|
|
+//ZZ case Iop_AndV128: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
|
|
+//ZZ res, argL, argR, 4, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_OrV128: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
|
|
+//ZZ res, argL, argR, 4, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_XorV128: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
|
|
+//ZZ res, argL, argR, 4, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Add8x16:
|
|
+//ZZ case Iop_Add16x8:
|
|
+//ZZ case Iop_Add32x4:
|
|
+//ZZ case Iop_Add64x2: {
|
|
+//ZZ /*
|
|
+//ZZ FIXME: remove this if not used
|
|
+//ZZ DECLARE_PATTERN(p_vrhadd_32sx4);
|
|
+//ZZ ULong one = (1LL << 32) | 1LL;
|
|
+//ZZ DEFINE_PATTERN(p_vrhadd_32sx4,
|
|
+//ZZ binop(Iop_Add32x4,
|
|
+//ZZ binop(Iop_Add32x4,
|
|
+//ZZ binop(Iop_SarN32x4,
|
|
+//ZZ bind(0),
|
|
+//ZZ mkU8(1)),
|
|
+//ZZ binop(Iop_SarN32x4,
|
|
+//ZZ bind(1),
|
|
+//ZZ mkU8(1))),
|
|
+//ZZ binop(Iop_SarN32x4,
|
|
+//ZZ binop(Iop_Add32x4,
|
|
+//ZZ binop(Iop_Add32x4,
|
|
+//ZZ binop(Iop_AndV128,
|
|
+//ZZ bind(0),
|
|
+//ZZ mkU128(one)),
|
|
+//ZZ binop(Iop_AndV128,
|
|
+//ZZ bind(1),
|
|
+//ZZ mkU128(one))),
|
|
+//ZZ mkU128(one)),
|
|
+//ZZ mkU8(1))));
|
|
+//ZZ */
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Add8x16: size = 0; break;
|
|
+//ZZ case Iop_Add16x8: size = 1; break;
|
|
+//ZZ case Iop_Add32x4: size = 2; break;
|
|
+//ZZ case Iop_Add64x2: size = 3; break;
|
|
+//ZZ default:
|
|
+//ZZ ppIROp(e->Iex.Binop.op);
|
|
+//ZZ vpanic("Illegal element size in VADD");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Add32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Recps32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Rsqrts32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ // These 6 verified 18 Apr 2013
|
|
+//ZZ case Iop_InterleaveEvenLanes8x16:
|
|
+//ZZ case Iop_InterleaveOddLanes8x16:
|
|
+//ZZ case Iop_InterleaveEvenLanes16x8:
|
|
+//ZZ case Iop_InterleaveOddLanes16x8:
|
|
+//ZZ case Iop_InterleaveEvenLanes32x4:
|
|
+//ZZ case Iop_InterleaveOddLanes32x4: {
|
|
+//ZZ HReg rD = newVRegV(env);
|
|
+//ZZ HReg rM = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool resRd; // is the result in rD or rM ?
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break;
|
|
+//ZZ case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break;
|
|
+//ZZ case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break;
|
|
+//ZZ case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break;
|
|
+//ZZ case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break;
|
|
+//ZZ case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
|
|
+//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
|
|
+//ZZ return resRd ? rD : rM;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ // These 6 verified 18 Apr 2013
|
|
+//ZZ case Iop_InterleaveHI8x16:
|
|
+//ZZ case Iop_InterleaveLO8x16:
|
|
+//ZZ case Iop_InterleaveHI16x8:
|
|
+//ZZ case Iop_InterleaveLO16x8:
|
|
+//ZZ case Iop_InterleaveHI32x4:
|
|
+//ZZ case Iop_InterleaveLO32x4: {
|
|
+//ZZ HReg rD = newVRegV(env);
|
|
+//ZZ HReg rM = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool resRd; // is the result in rD or rM ?
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
|
|
+//ZZ case Iop_InterleaveLO8x16: resRd = True; size = 0; break;
|
|
+//ZZ case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
|
|
+//ZZ case Iop_InterleaveLO16x8: resRd = True; size = 1; break;
|
|
+//ZZ case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
|
|
+//ZZ case Iop_InterleaveLO32x4: resRd = True; size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
|
|
+//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
|
|
+//ZZ return resRd ? rD : rM;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ // These 6 verified 18 Apr 2013
|
|
+//ZZ case Iop_CatOddLanes8x16:
|
|
+//ZZ case Iop_CatEvenLanes8x16:
|
|
+//ZZ case Iop_CatOddLanes16x8:
|
|
+//ZZ case Iop_CatEvenLanes16x8:
|
|
+//ZZ case Iop_CatOddLanes32x4:
|
|
+//ZZ case Iop_CatEvenLanes32x4: {
|
|
+//ZZ HReg rD = newVRegV(env);
|
|
+//ZZ HReg rM = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ Bool resRd; // is the result in rD or rM ?
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_CatOddLanes8x16: resRd = False; size = 0; break;
|
|
+//ZZ case Iop_CatEvenLanes8x16: resRd = True; size = 0; break;
|
|
+//ZZ case Iop_CatOddLanes16x8: resRd = False; size = 1; break;
|
|
+//ZZ case Iop_CatEvenLanes16x8: resRd = True; size = 1; break;
|
|
+//ZZ case Iop_CatOddLanes32x4: resRd = False; size = 2; break;
|
|
+//ZZ case Iop_CatEvenLanes32x4: resRd = True; size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
|
|
+//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
|
|
+//ZZ return resRd ? rD : rM;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_QAdd8Ux16:
|
|
+//ZZ case Iop_QAdd16Ux8:
|
|
+//ZZ case Iop_QAdd32Ux4:
|
|
+//ZZ case Iop_QAdd64Ux2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QAdd8Ux16: size = 0; break;
|
|
+//ZZ case Iop_QAdd16Ux8: size = 1; break;
|
|
+//ZZ case Iop_QAdd32Ux4: size = 2; break;
|
|
+//ZZ case Iop_QAdd64Ux2: size = 3; break;
|
|
+//ZZ default:
|
|
+//ZZ ppIROp(e->Iex.Binop.op);
|
|
+//ZZ vpanic("Illegal element size in VQADDU");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QAdd8Sx16:
|
|
+//ZZ case Iop_QAdd16Sx8:
|
|
+//ZZ case Iop_QAdd32Sx4:
|
|
+//ZZ case Iop_QAdd64Sx2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QAdd8Sx16: size = 0; break;
|
|
+//ZZ case Iop_QAdd16Sx8: size = 1; break;
|
|
+//ZZ case Iop_QAdd32Sx4: size = 2; break;
|
|
+//ZZ case Iop_QAdd64Sx2: size = 3; break;
|
|
+//ZZ default:
|
|
+//ZZ ppIROp(e->Iex.Binop.op);
|
|
+//ZZ vpanic("Illegal element size in VQADDS");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Sub8x16:
|
|
+//ZZ case Iop_Sub16x8:
|
|
+//ZZ case Iop_Sub32x4:
|
|
+//ZZ case Iop_Sub64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Sub8x16: size = 0; break;
|
|
+//ZZ case Iop_Sub16x8: size = 1; break;
|
|
+//ZZ case Iop_Sub32x4: size = 2; break;
|
|
+//ZZ case Iop_Sub64x2: size = 3; break;
|
|
+//ZZ default:
|
|
+//ZZ ppIROp(e->Iex.Binop.op);
|
|
+//ZZ vpanic("Illegal element size in VSUB");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Sub32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QSub8Ux16:
|
|
+//ZZ case Iop_QSub16Ux8:
|
|
+//ZZ case Iop_QSub32Ux4:
|
|
+//ZZ case Iop_QSub64Ux2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QSub8Ux16: size = 0; break;
|
|
+//ZZ case Iop_QSub16Ux8: size = 1; break;
|
|
+//ZZ case Iop_QSub32Ux4: size = 2; break;
|
|
+//ZZ case Iop_QSub64Ux2: size = 3; break;
|
|
+//ZZ default:
|
|
+//ZZ ppIROp(e->Iex.Binop.op);
|
|
+//ZZ vpanic("Illegal element size in VQSUBU");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QSub8Sx16:
|
|
+//ZZ case Iop_QSub16Sx8:
|
|
+//ZZ case Iop_QSub32Sx4:
|
|
+//ZZ case Iop_QSub64Sx2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QSub8Sx16: size = 0; break;
|
|
+//ZZ case Iop_QSub16Sx8: size = 1; break;
|
|
+//ZZ case Iop_QSub32Sx4: size = 2; break;
|
|
+//ZZ case Iop_QSub64Sx2: size = 3; break;
|
|
+//ZZ default:
|
|
+//ZZ ppIROp(e->Iex.Binop.op);
|
|
+//ZZ vpanic("Illegal element size in VQSUBS");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Max8Ux16:
|
|
+//ZZ case Iop_Max16Ux8:
|
|
+//ZZ case Iop_Max32Ux4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Max8Ux16: size = 0; break;
|
|
+//ZZ case Iop_Max16Ux8: size = 1; break;
|
|
+//ZZ case Iop_Max32Ux4: size = 2; break;
|
|
+//ZZ default: vpanic("Illegal element size in VMAXU");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Max8Sx16:
|
|
+//ZZ case Iop_Max16Sx8:
|
|
+//ZZ case Iop_Max32Sx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Max8Sx16: size = 0; break;
|
|
+//ZZ case Iop_Max16Sx8: size = 1; break;
|
|
+//ZZ case Iop_Max32Sx4: size = 2; break;
|
|
+//ZZ default: vpanic("Illegal element size in VMAXU");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Min8Ux16:
|
|
+//ZZ case Iop_Min16Ux8:
|
|
+//ZZ case Iop_Min32Ux4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Min8Ux16: size = 0; break;
|
|
+//ZZ case Iop_Min16Ux8: size = 1; break;
|
|
+//ZZ case Iop_Min32Ux4: size = 2; break;
|
|
+//ZZ default: vpanic("Illegal element size in VMAXU");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Min8Sx16:
|
|
+//ZZ case Iop_Min16Sx8:
|
|
+//ZZ case Iop_Min32Sx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Min8Sx16: size = 0; break;
|
|
+//ZZ case Iop_Min16Sx8: size = 1; break;
|
|
+//ZZ case Iop_Min32Sx4: size = 2; break;
|
|
+//ZZ default: vpanic("Illegal element size in VMAXU");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Sar8x16:
|
|
+//ZZ case Iop_Sar16x8:
|
|
+//ZZ case Iop_Sar32x4:
|
|
+//ZZ case Iop_Sar64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ HReg argR2 = newVRegV(env);
|
|
+//ZZ HReg zero = newVRegV(env);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Sar8x16: size = 0; break;
|
|
+//ZZ case Iop_Sar16x8: size = 1; break;
|
|
+//ZZ case Iop_Sar32x4: size = 2; break;
|
|
+//ZZ case Iop_Sar64x2: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
|
|
+//ZZ argR2, zero, argR, size, True));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
|
|
+//ZZ res, argL, argR2, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Sal8x16:
|
|
+//ZZ case Iop_Sal16x8:
|
|
+//ZZ case Iop_Sal32x4:
|
|
+//ZZ case Iop_Sal64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Sal8x16: size = 0; break;
|
|
+//ZZ case Iop_Sal16x8: size = 1; break;
|
|
+//ZZ case Iop_Sal32x4: size = 2; break;
|
|
+//ZZ case Iop_Sal64x2: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Shr8x16:
|
|
+//ZZ case Iop_Shr16x8:
|
|
+//ZZ case Iop_Shr32x4:
|
|
+//ZZ case Iop_Shr64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ HReg argR2 = newVRegV(env);
|
|
+//ZZ HReg zero = newVRegV(env);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Shr8x16: size = 0; break;
|
|
+//ZZ case Iop_Shr16x8: size = 1; break;
|
|
+//ZZ case Iop_Shr32x4: size = 2; break;
|
|
+//ZZ case Iop_Shr64x2: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
|
|
+//ZZ argR2, zero, argR, size, True));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ res, argL, argR2, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Shl8x16:
|
|
+//ZZ case Iop_Shl16x8:
|
|
+//ZZ case Iop_Shl32x4:
|
|
+//ZZ case Iop_Shl64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Shl8x16: size = 0; break;
|
|
+//ZZ case Iop_Shl16x8: size = 1; break;
|
|
+//ZZ case Iop_Shl32x4: size = 2; break;
|
|
+//ZZ case Iop_Shl64x2: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QShl8x16:
|
|
+//ZZ case Iop_QShl16x8:
|
|
+//ZZ case Iop_QShl32x4:
|
|
+//ZZ case Iop_QShl64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QShl8x16: size = 0; break;
|
|
+//ZZ case Iop_QShl16x8: size = 1; break;
|
|
+//ZZ case Iop_QShl32x4: size = 2; break;
|
|
+//ZZ case Iop_QShl64x2: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QSal8x16:
|
|
+//ZZ case Iop_QSal16x8:
|
|
+//ZZ case Iop_QSal32x4:
|
|
+//ZZ case Iop_QSal64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QSal8x16: size = 0; break;
|
|
+//ZZ case Iop_QSal16x8: size = 1; break;
|
|
+//ZZ case Iop_QSal32x4: size = 2; break;
|
|
+//ZZ case Iop_QSal64x2: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QShlN8x16:
|
|
+//ZZ case Iop_QShlN16x8:
|
|
+//ZZ case Iop_QShlN32x4:
|
|
+//ZZ case Iop_QShlN64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ UInt size, imm;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
|
|
+//ZZ "second argument only\n");
|
|
+//ZZ }
|
|
+//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QShlN8x16: size = 8 | imm; break;
|
|
+//ZZ case Iop_QShlN16x8: size = 16 | imm; break;
|
|
+//ZZ case Iop_QShlN32x4: size = 32 | imm; break;
|
|
+//ZZ case Iop_QShlN64x2: size = 64 | imm; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
|
|
+//ZZ res, argL, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QShlN8Sx16:
|
|
+//ZZ case Iop_QShlN16Sx8:
|
|
+//ZZ case Iop_QShlN32Sx4:
|
|
+//ZZ case Iop_QShlN64Sx2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ UInt size, imm;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM taget supports Iop_QShlNASxB with constant "
|
|
+//ZZ "second argument only\n");
|
|
+//ZZ }
|
|
+//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QShlN8Sx16: size = 8 | imm; break;
|
|
+//ZZ case Iop_QShlN16Sx8: size = 16 | imm; break;
|
|
+//ZZ case Iop_QShlN32Sx4: size = 32 | imm; break;
|
|
+//ZZ case Iop_QShlN64Sx2: size = 64 | imm; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
|
|
+//ZZ res, argL, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_QSalN8x16:
|
|
+//ZZ case Iop_QSalN16x8:
|
|
+//ZZ case Iop_QSalN32x4:
|
|
+//ZZ case Iop_QSalN64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ UInt size, imm;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
|
|
+//ZZ "second argument only\n");
|
|
+//ZZ }
|
|
+//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QSalN8x16: size = 8 | imm; break;
|
|
+//ZZ case Iop_QSalN16x8: size = 16 | imm; break;
|
|
+//ZZ case Iop_QSalN32x4: size = 32 | imm; break;
|
|
+//ZZ case Iop_QSalN64x2: size = 64 | imm; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
|
|
+//ZZ res, argL, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_ShrN8x16:
|
|
+//ZZ case Iop_ShrN16x8:
|
|
+//ZZ case Iop_ShrN32x4:
|
|
+//ZZ case Iop_ShrN64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg tmp = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+//ZZ HReg argR2 = newVRegI(env);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_ShrN8x16: size = 0; break;
|
|
+//ZZ case Iop_ShrN16x8: size = 1; break;
|
|
+//ZZ case Iop_ShrN32x4: size = 2; break;
|
|
+//ZZ case Iop_ShrN64x2: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
|
|
+//ZZ tmp, argR2, 0, True));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ res, argL, tmp, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_ShlN8x16:
|
|
+//ZZ case Iop_ShlN16x8:
|
|
+//ZZ case Iop_ShlN32x4:
|
|
+//ZZ case Iop_ShlN64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg tmp = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_ShlN8x16: size = 0; break;
|
|
+//ZZ case Iop_ShlN16x8: size = 1; break;
|
|
+//ZZ case Iop_ShlN32x4: size = 2; break;
|
|
+//ZZ case Iop_ShlN64x2: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
|
|
+//ZZ res, argL, tmp, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_SarN8x16:
|
|
+//ZZ case Iop_SarN16x8:
|
|
+//ZZ case Iop_SarN32x4:
|
|
+//ZZ case Iop_SarN64x2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg tmp = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+//ZZ HReg argR2 = newVRegI(env);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_SarN8x16: size = 0; break;
|
|
+//ZZ case Iop_SarN16x8: size = 1; break;
|
|
+//ZZ case Iop_SarN32x4: size = 2; break;
|
|
+//ZZ case Iop_SarN64x2: size = 3; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
|
|
+//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
|
|
+//ZZ res, argL, tmp, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpGT8Ux16:
|
|
+//ZZ case Iop_CmpGT16Ux8:
|
|
+//ZZ case Iop_CmpGT32Ux4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_CmpGT8Ux16: size = 0; break;
|
|
+//ZZ case Iop_CmpGT16Ux8: size = 1; break;
|
|
+//ZZ case Iop_CmpGT32Ux4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpGT8Sx16:
|
|
+//ZZ case Iop_CmpGT16Sx8:
|
|
+//ZZ case Iop_CmpGT32Sx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_CmpGT8Sx16: size = 0; break;
|
|
+//ZZ case Iop_CmpGT16Sx8: size = 1; break;
|
|
+//ZZ case Iop_CmpGT32Sx4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpEQ8x16:
|
|
+//ZZ case Iop_CmpEQ16x8:
|
|
+//ZZ case Iop_CmpEQ32x4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size;
|
|
+//ZZ switch (e->Iex.Binop.op) {
|
|
+//ZZ case Iop_CmpEQ8x16: size = 0; break;
|
|
+//ZZ case Iop_CmpEQ16x8: size = 1; break;
|
|
+//ZZ case Iop_CmpEQ32x4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Mul8x16:
|
|
+//ZZ case Iop_Mul16x8:
|
|
+//ZZ case Iop_Mul32x4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Mul8x16: size = 0; break;
|
|
+//ZZ case Iop_Mul16x8: size = 1; break;
|
|
+//ZZ case Iop_Mul32x4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Mul32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Mull8Ux8:
|
|
+//ZZ case Iop_Mull16Ux4:
|
|
+//ZZ case Iop_Mull32Ux2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Mull8Ux8: size = 0; break;
|
|
+//ZZ case Iop_Mull16Ux4: size = 1; break;
|
|
+//ZZ case Iop_Mull32Ux2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_Mull8Sx8:
|
|
+//ZZ case Iop_Mull16Sx4:
|
|
+//ZZ case Iop_Mull32Sx2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_Mull8Sx8: size = 0; break;
|
|
+//ZZ case Iop_Mull16Sx4: size = 1; break;
|
|
+//ZZ case Iop_Mull32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_QDMulHi16Sx8:
|
|
+//ZZ case Iop_QDMulHi32Sx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QDMulHi16Sx8: size = 1; break;
|
|
+//ZZ case Iop_QDMulHi32Sx4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_QRDMulHi16Sx8:
|
|
+//ZZ case Iop_QRDMulHi32Sx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QRDMulHi16Sx8: size = 1; break;
|
|
+//ZZ case Iop_QRDMulHi32Sx4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_QDMulLong16Sx4:
|
|
+//ZZ case Iop_QDMulLong32Sx2: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_QDMulLong16Sx4: size = 1; break;
|
|
+//ZZ case Iop_QDMulLong32Sx2: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PolynomialMul8x16: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Max32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
|
|
+//ZZ res, argL, argR, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_Min32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
|
|
+//ZZ res, argL, argR, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwMax32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
|
|
+//ZZ res, argL, argR, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_PwMin32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
|
|
+//ZZ res, argL, argR, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpGT32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
|
|
+//ZZ res, argL, argR, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpGE32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
|
|
+//ZZ res, argL, argR, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_CmpEQ32Fx4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
|
|
+//ZZ res, argL, argR, 2, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ case Iop_PolynomialMull8x8: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ case Iop_F32ToFixed32Ux4_RZ:
|
|
+//ZZ case Iop_F32ToFixed32Sx4_RZ:
|
|
+//ZZ case Iop_Fixed32UToF32x4_RN:
|
|
+//ZZ case Iop_Fixed32SToF32x4_RN: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ ARMNeonUnOp op;
|
|
+//ZZ UInt imm6;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant "
|
|
+//ZZ "second argument less than 33 only\n");
|
|
+//ZZ }
|
|
+//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ vassert(imm6 <= 32 && imm6 > 0);
|
|
+//ZZ imm6 = 64 - imm6;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
|
|
+//ZZ case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
|
|
+//ZZ case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
|
|
+//ZZ case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ /*
|
|
+//ZZ FIXME remove if not used
|
|
+//ZZ case Iop_VDup8x16:
|
|
+//ZZ case Iop_VDup16x8:
|
|
+//ZZ case Iop_VDup32x4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
|
|
+//ZZ UInt imm4;
|
|
+//ZZ UInt index;
|
|
+//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
|
|
+//ZZ vpanic("ARM supports Iop_VDup with constant "
|
|
+//ZZ "second argument less than 16 only\n");
|
|
+//ZZ }
|
|
+//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
|
|
+//ZZ case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
|
|
+//ZZ case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ if (imm4 >= 16) {
|
|
+//ZZ vpanic("ARM supports Iop_VDup with constant "
|
|
+//ZZ "second argument less than 16 only\n");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
|
|
+//ZZ res, argL, imm4, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ */
|
|
+//ZZ case Iop_PwAdd8x16:
|
|
+//ZZ case Iop_PwAdd16x8:
|
|
+//ZZ case Iop_PwAdd32x4: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
|
|
+//ZZ UInt size = 0;
|
|
+//ZZ switch(e->Iex.Binop.op) {
|
|
+//ZZ case Iop_PwAdd8x16: size = 0; break;
|
|
+//ZZ case Iop_PwAdd16x8: size = 1; break;
|
|
+//ZZ case Iop_PwAdd32x4: size = 2; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
|
|
+//ZZ res, argL, argR, size, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+ /* ... */
|
|
+ default:
|
|
+ break;
|
|
+ } /* switch on the binop */
|
|
+ } /* if (e->tag == Iex_Binop) */
|
|
+
|
|
+//ZZ if (e->tag == Iex_Triop) {
|
|
+//ZZ IRTriop *triop = e->Iex.Triop.details;
|
|
+//ZZ
|
|
+//ZZ switch (triop->op) {
|
|
+//ZZ case Iop_ExtractV128: {
|
|
+//ZZ HReg res = newVRegV(env);
|
|
+//ZZ HReg argL = iselNeonExpr(env, triop->arg1);
|
|
+//ZZ HReg argR = iselNeonExpr(env, triop->arg2);
|
|
+//ZZ UInt imm4;
|
|
+//ZZ if (triop->arg3->tag != Iex_Const ||
|
|
+//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
|
|
+//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant "
|
|
+//ZZ "third argument less than 16 only\n");
|
|
+//ZZ }
|
|
+//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8;
|
|
+//ZZ if (imm4 >= 16) {
|
|
+//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant "
|
|
+//ZZ "third argument less than 16 only\n");
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
|
|
+//ZZ res, argL, argR, imm4, True));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ if (e->tag == Iex_ITE) { // VFD
|
|
+//ZZ ARMCondCode cc;
|
|
+//ZZ HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue);
|
|
+//ZZ HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse);
|
|
+//ZZ HReg dst = newVRegV(env);
|
|
+//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
|
|
+//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
|
|
+//ZZ addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+
|
|
+ v128_expr_bad:
|
|
+ ppIRExpr(e);
|
|
+ vpanic("iselV128Expr_wrk");
|
|
+}
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISEL: Floating point expressions (64 bit) ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* Compute a 64-bit floating point value into a register, the identity
|
|
+ of which is returned. As with iselIntExpr_R, the reg may be either
|
|
+ real or virtual; in any case it must not be changed by subsequent
|
|
+ code emitted by the caller. */
|
|
+
|
|
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ HReg r = iselDblExpr_wrk( env, e );
|
|
+# if 0
|
|
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
|
|
+# endif
|
|
+ vassert(hregClass(r) == HRcFlt64);
|
|
+ vassert(hregIsVirtual(r));
|
|
+ return r;
|
|
+}
|
|
+
|
|
+/* DO NOT CALL THIS DIRECTLY */
|
|
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ IRType ty = typeOfIRExpr(env->type_env,e);
|
|
+ vassert(e);
|
|
+ vassert(ty == Ity_F64);
|
|
+
|
|
+ if (e->tag == Iex_RdTmp) {
|
|
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Const) {
|
|
+ IRConst* con = e->Iex.Const.con;
|
|
+ if (con->tag == Ico_F64i) {
|
|
+ HReg src = newVRegI(env);
|
|
+ HReg dst = newVRegD(env);
|
|
+ addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
|
|
+ addInstr(env, ARM64Instr_VDfromX(dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
|
|
+ vassert(e->Iex.Load.ty == Ity_F64);
|
|
+ HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
|
|
+ HReg res = newVRegD(env);
|
|
+ addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
|
|
+ return res;
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Get) {
|
|
+ Int offs = e->Iex.Get.offset;
|
|
+ if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
|
|
+ HReg rD = newVRegD(env);
|
|
+ HReg rN = get_baseblock_register();
|
|
+ addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
|
|
+ return rD;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Unop) {
|
|
+ switch (e->Iex.Unop.op) {
|
|
+//ZZ case Iop_ReinterpI64asF64: {
|
|
+//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
|
|
+//ZZ return iselNeon64Expr(env, e->Iex.Unop.arg);
|
|
+//ZZ } else {
|
|
+//ZZ HReg srcHi, srcLo;
|
|
+//ZZ HReg dst = newVRegD(env);
|
|
+//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+ case Iop_NegF64: {
|
|
+ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
|
|
+ HReg dst = newVRegD(env);
|
|
+ addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_AbsF64: {
|
|
+ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
|
|
+ HReg dst = newVRegD(env);
|
|
+ addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_F32toF64: {
|
|
+ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
|
|
+ HReg dst = newVRegD(env);
|
|
+ addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_I32UtoF64:
|
|
+ case Iop_I32StoF64: {
|
|
+ /* Rounding mode is not involved here, since the
|
|
+ conversion can always be done without loss of
|
|
+ precision. */
|
|
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+ HReg dst = newVRegD(env);
|
|
+ Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
|
|
+ ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
|
|
+ addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Binop) {
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ case Iop_RoundF64toInt: {
|
|
+ HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
|
|
+ HReg dst = newVRegD(env);
|
|
+ set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
|
|
+ addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_SqrtF64: {
|
|
+ HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
|
|
+ HReg dst = newVRegD(env);
|
|
+ set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
|
|
+ addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_I64StoF64:
|
|
+ case Iop_I64UtoF64: {
|
|
+ ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
|
|
+ ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
|
|
+ HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+ set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
|
|
+ HReg dstS = newVRegD(env);
|
|
+ addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
|
|
+ return dstS;
|
|
+ }
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Triop) {
|
|
+ IRTriop* triop = e->Iex.Triop.details;
|
|
+ ARM64FpBinOp dblop = ARM64fpb_INVALID;
|
|
+ switch (triop->op) {
|
|
+ case Iop_DivF64: dblop = ARM64fpb_DIV; break;
|
|
+ case Iop_MulF64: dblop = ARM64fpb_MUL; break;
|
|
+ case Iop_SubF64: dblop = ARM64fpb_SUB; break;
|
|
+ case Iop_AddF64: dblop = ARM64fpb_ADD; break;
|
|
+ default: break;
|
|
+ }
|
|
+ if (dblop != ARM64fpb_INVALID) {
|
|
+ HReg argL = iselDblExpr(env, triop->arg2);
|
|
+ HReg argR = iselDblExpr(env, triop->arg3);
|
|
+ HReg dst = newVRegD(env);
|
|
+ set_FPCR_rounding_mode(env, triop->arg1);
|
|
+ addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
|
|
+ return dst;
|
|
+ }
|
|
+ }
|
|
+
|
|
+//ZZ if (e->tag == Iex_ITE) { // VFD
|
|
+//ZZ if (ty == Ity_F64
|
|
+//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
|
|
+//ZZ HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
|
|
+//ZZ HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
|
|
+//ZZ HReg dst = newVRegD(env);
|
|
+//ZZ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
|
|
+//ZZ ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
|
|
+//ZZ addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+
|
|
+ ppIRExpr(e);
|
|
+ vpanic("iselDblExpr_wrk");
|
|
+}
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISEL: Floating point expressions (32 bit) ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* Compute a 32-bit floating point value into a register, the identity
|
|
+ of which is returned. As with iselIntExpr_R, the reg may be either
|
|
+ real or virtual; in any case it must not be changed by subsequent
|
|
+ code emitted by the caller. Values are generated into HRcFlt64
|
|
+ registers despite the values themselves being Ity_F32s. */
|
|
+
|
|
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ HReg r = iselFltExpr_wrk( env, e );
|
|
+# if 0
|
|
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
|
|
+# endif
|
|
+ vassert(hregClass(r) == HRcFlt64);
|
|
+ vassert(hregIsVirtual(r));
|
|
+ return r;
|
|
+}
|
|
+
|
|
+/* DO NOT CALL THIS DIRECTLY */
|
|
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
|
|
+{
|
|
+ IRType ty = typeOfIRExpr(env->type_env,e);
|
|
+ vassert(e);
|
|
+ vassert(ty == Ity_F32);
|
|
+
|
|
+ if (e->tag == Iex_RdTmp) {
|
|
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Const) {
|
|
+ /* This is something of a kludge. Since a 32 bit floating point
|
|
+ zero is just .. all zeroes, just create a 64 bit zero word
|
|
+ and transfer it. This avoids having to create a SfromW
|
|
+ instruction for this specific case. */
|
|
+ IRConst* con = e->Iex.Const.con;
|
|
+ if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
|
|
+ HReg src = newVRegI(env);
|
|
+ HReg dst = newVRegD(env);
|
|
+ addInstr(env, ARM64Instr_Imm64(src, 0));
|
|
+ addInstr(env, ARM64Instr_VDfromX(dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ }
|
|
+
|
|
+//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
|
|
+//ZZ ARMAModeV* am;
|
|
+//ZZ HReg res = newVRegF(env);
|
|
+//ZZ vassert(e->Iex.Load.ty == Ity_F32);
|
|
+//ZZ am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
|
|
+//ZZ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+
|
|
+ if (e->tag == Iex_Get) {
|
|
+ Int offs = e->Iex.Get.offset;
|
|
+ if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
|
|
+ HReg rD = newVRegD(env);
|
|
+ HReg rN = get_baseblock_register();
|
|
+ addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
|
|
+ return rD;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Unop) {
|
|
+ switch (e->Iex.Unop.op) {
|
|
+//ZZ case Iop_ReinterpI32asF32: {
|
|
+//ZZ HReg dst = newVRegF(env);
|
|
+//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
+//ZZ addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+ case Iop_NegF32: {
|
|
+ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
|
|
+ HReg dst = newVRegD(env);
|
|
+ addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_AbsF32: {
|
|
+ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
|
|
+ HReg dst = newVRegD(env);
|
|
+ addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Binop) {
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ case Iop_RoundF32toInt: {
|
|
+ HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
|
|
+ HReg dst = newVRegD(env);
|
|
+ set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
|
|
+ addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_SqrtF32: {
|
|
+ HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
|
|
+ HReg dst = newVRegD(env);
|
|
+ set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
|
|
+ addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src));
|
|
+ return dst;
|
|
+ }
|
|
+ case Iop_F64toF32: {
|
|
+ HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
|
|
+ set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
|
|
+ HReg dstS = newVRegD(env);
|
|
+ addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD));
|
|
+ return dstS;
|
|
+ }
|
|
+ case Iop_I32StoF32:
|
|
+ case Iop_I64UtoF32:
|
|
+ case Iop_I64StoF32: {
|
|
+ ARM64CvtOp cvt_op = ARM64cvt_INVALID;
|
|
+ switch (e->Iex.Binop.op) {
|
|
+ case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
|
|
+ case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
|
|
+ case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+ HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
+ set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
|
|
+ HReg dstS = newVRegD(env);
|
|
+ addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
|
|
+ return dstS;
|
|
+ }
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (e->tag == Iex_Triop) {
|
|
+ IRTriop* triop = e->Iex.Triop.details;
|
|
+ ARM64FpBinOp sglop = ARM64fpb_INVALID;
|
|
+ switch (triop->op) {
|
|
+ case Iop_DivF32: sglop = ARM64fpb_DIV; break;
|
|
+ case Iop_MulF32: sglop = ARM64fpb_MUL; break;
|
|
+ case Iop_SubF32: sglop = ARM64fpb_SUB; break;
|
|
+ case Iop_AddF32: sglop = ARM64fpb_ADD; break;
|
|
+ default: break;
|
|
+ }
|
|
+ if (sglop != ARM64fpb_INVALID) {
|
|
+ HReg argL = iselFltExpr(env, triop->arg2);
|
|
+ HReg argR = iselFltExpr(env, triop->arg3);
|
|
+ HReg dst = newVRegD(env);
|
|
+ set_FPCR_rounding_mode(env, triop->arg1);
|
|
+ addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
|
|
+ return dst;
|
|
+ }
|
|
+ }
|
|
+
|
|
+//ZZ
|
|
+//ZZ if (e->tag == Iex_ITE) { // VFD
|
|
+//ZZ if (ty == Ity_F32
|
|
+//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
|
|
+//ZZ ARMCondCode cc;
|
|
+//ZZ HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
|
|
+//ZZ HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
|
|
+//ZZ HReg dst = newVRegF(env);
|
|
+//ZZ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
|
|
+//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
|
|
+//ZZ addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
|
|
+//ZZ return dst;
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+
|
|
+ ppIRExpr(e);
|
|
+ vpanic("iselFltExpr_wrk");
|
|
+}
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISEL: Statements ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+static void iselStmt ( ISelEnv* env, IRStmt* stmt )
|
|
+{
|
|
+ if (vex_traceflags & VEX_TRACE_VCODE) {
|
|
+ vex_printf("\n-- ");
|
|
+ ppIRStmt(stmt);
|
|
+ vex_printf("\n");
|
|
+ }
|
|
+ switch (stmt->tag) {
|
|
+
|
|
+ /* --------- STORE --------- */
|
|
+ /* little-endian write to memory */
|
|
+ case Ist_Store: {
|
|
+ IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
|
|
+ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
|
|
+ IREndness end = stmt->Ist.Store.end;
|
|
+
|
|
+ if (tya != Ity_I64 || end != Iend_LE)
|
|
+ goto stmt_fail;
|
|
+
|
|
+ if (tyd == Ity_I64) {
|
|
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
|
|
+ ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
|
|
+ addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
|
|
+ return;
|
|
+ }
|
|
+ if (tyd == Ity_I32) {
|
|
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
|
|
+ ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
|
|
+ addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
|
|
+ return;
|
|
+ }
|
|
+ if (tyd == Ity_I16) {
|
|
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
|
|
+ ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
|
|
+ addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
|
|
+ return;
|
|
+ }
|
|
+ if (tyd == Ity_I8) {
|
|
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
|
|
+ ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
|
|
+ addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
|
|
+ return;
|
|
+ }
|
|
+ if (tyd == Ity_V128) {
|
|
+ HReg qD = iselV128Expr(env, stmt->Ist.Store.data);
|
|
+ HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
|
|
+ addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
|
|
+ return;
|
|
+ }
|
|
+ if (tyd == Ity_F64) {
|
|
+ HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
|
|
+ HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
|
|
+ addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
|
|
+ return;
|
|
+ }
|
|
+
|
|
+//ZZ if (tyd == Ity_I16) {
|
|
+//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
|
|
+//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
|
|
+//ZZ addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
|
|
+//ZZ False/*!isLoad*/,
|
|
+//ZZ False/*!isSignedLoad*/, rD, am));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ if (tyd == Ity_I8) {
|
|
+//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
|
|
+//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
|
|
+//ZZ addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ if (tyd == Ity_I64) {
|
|
+//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
|
|
+//ZZ HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
|
|
+//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
|
|
+//ZZ addInstr(env, ARMInstr_NLdStD(False, dD, am));
|
|
+//ZZ } else {
|
|
+//ZZ HReg rDhi, rDlo, rA;
|
|
+//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
|
|
+//ZZ rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
|
|
+//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
|
|
+//ZZ ARMAMode1_RI(rA,4)));
|
|
+//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
|
|
+//ZZ ARMAMode1_RI(rA,0)));
|
|
+//ZZ }
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ if (tyd == Ity_F64) {
|
|
+//ZZ HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
|
|
+//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
|
|
+//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ if (tyd == Ity_F32) {
|
|
+//ZZ HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
|
|
+//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
|
|
+//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ if (tyd == Ity_V128) {
|
|
+//ZZ HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
|
|
+//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
|
|
+//ZZ addInstr(env, ARMInstr_NLdStQ(False, qD, am));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+
|
|
+ break;
|
|
+ }
|
|
+
|
|
+//ZZ /* --------- CONDITIONAL STORE --------- */
|
|
+//ZZ /* conditional little-endian write to memory */
|
|
+//ZZ case Ist_StoreG: {
|
|
+//ZZ IRStoreG* sg = stmt->Ist.StoreG.details;
|
|
+//ZZ IRType tya = typeOfIRExpr(env->type_env, sg->addr);
|
|
+//ZZ IRType tyd = typeOfIRExpr(env->type_env, sg->data);
|
|
+//ZZ IREndness end = sg->end;
|
|
+//ZZ
|
|
+//ZZ if (tya != Ity_I32 || end != Iend_LE)
|
|
+//ZZ goto stmt_fail;
|
|
+//ZZ
|
|
+//ZZ switch (tyd) {
|
|
+//ZZ case Ity_I8:
|
|
+//ZZ case Ity_I32: {
|
|
+//ZZ HReg rD = iselIntExpr_R(env, sg->data);
|
|
+//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr);
|
|
+//ZZ ARMCondCode cc = iselCondCode(env, sg->guard);
|
|
+//ZZ addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
|
|
+//ZZ (cc, False/*!isLoad*/, rD, am));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ case Ity_I16: {
|
|
+//ZZ HReg rD = iselIntExpr_R(env, sg->data);
|
|
+//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr);
|
|
+//ZZ ARMCondCode cc = iselCondCode(env, sg->guard);
|
|
+//ZZ addInstr(env, ARMInstr_LdSt16(cc,
|
|
+//ZZ False/*!isLoad*/,
|
|
+//ZZ False/*!isSignedLoad*/, rD, am));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* --------- CONDITIONAL LOAD --------- */
|
|
+//ZZ /* conditional little-endian load from memory */
|
|
+//ZZ case Ist_LoadG: {
|
|
+//ZZ IRLoadG* lg = stmt->Ist.LoadG.details;
|
|
+//ZZ IRType tya = typeOfIRExpr(env->type_env, lg->addr);
|
|
+//ZZ IREndness end = lg->end;
|
|
+//ZZ
|
|
+//ZZ if (tya != Ity_I32 || end != Iend_LE)
|
|
+//ZZ goto stmt_fail;
|
|
+//ZZ
|
|
+//ZZ switch (lg->cvt) {
|
|
+//ZZ case ILGop_8Uto32:
|
|
+//ZZ case ILGop_Ident32: {
|
|
+//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt);
|
|
+//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr);
|
|
+//ZZ HReg rD = lookupIRTemp(env, lg->dst);
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt));
|
|
+//ZZ ARMCondCode cc = iselCondCode(env, lg->guard);
|
|
+//ZZ addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
|
|
+//ZZ : ARMInstr_LdSt8U)
|
|
+//ZZ (cc, True/*isLoad*/, rD, am));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ case ILGop_16Sto32:
|
|
+//ZZ case ILGop_16Uto32:
|
|
+//ZZ case ILGop_8Sto32: {
|
|
+//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt);
|
|
+//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr);
|
|
+//ZZ HReg rD = lookupIRTemp(env, lg->dst);
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt));
|
|
+//ZZ ARMCondCode cc = iselCondCode(env, lg->guard);
|
|
+//ZZ if (lg->cvt == ILGop_8Sto32) {
|
|
+//ZZ addInstr(env, ARMInstr_Ld8S(cc, rD, am));
|
|
+//ZZ } else {
|
|
+//ZZ vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
|
|
+//ZZ Bool sx = lg->cvt == ILGop_16Sto32;
|
|
+//ZZ addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
|
|
+//ZZ }
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+
|
|
+ /* --------- PUT --------- */
|
|
+ /* write guest state, fixed offset */
|
|
+ case Ist_Put: {
|
|
+ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
|
|
+ UInt offs = (UInt)stmt->Ist.Put.offset;
|
|
+ if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
|
|
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
|
|
+ ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
|
|
+ addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
|
|
+ return;
|
|
+ }
|
|
+ if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
|
|
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
|
|
+ ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
|
|
+ addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
|
|
+ return;
|
|
+ }
|
|
+ if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
|
|
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
|
|
+ ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
|
|
+ addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
|
|
+ return;
|
|
+ }
|
|
+ if (tyd == Ity_I8 && offs < (1<<12)) {
|
|
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
|
|
+ ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
|
|
+ addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
|
|
+ return;
|
|
+ }
|
|
+ if (tyd == Ity_V128 && offs < (1<<12)) {
|
|
+ HReg qD = iselV128Expr(env, stmt->Ist.Put.data);
|
|
+ HReg addr = mk_baseblock_128bit_access_addr(env, offs);
|
|
+ addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
|
|
+ return;
|
|
+ }
|
|
+ if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
|
|
+ HReg dD = iselDblExpr(env, stmt->Ist.Put.data);
|
|
+ HReg bbp = get_baseblock_register();
|
|
+ addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
|
|
+ return;
|
|
+ }
|
|
+ if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
|
|
+ HReg dD = iselFltExpr(env, stmt->Ist.Put.data);
|
|
+ HReg bbp = get_baseblock_register();
|
|
+ addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs));
|
|
+ return;
|
|
+ }
|
|
+
|
|
+//ZZ if (tyd == Ity_I64) {
|
|
+//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
|
|
+//ZZ HReg addr = newVRegI(env);
|
|
+//ZZ HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
|
|
+//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
|
|
+//ZZ stmt->Ist.Put.offset));
|
|
+//ZZ addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
|
|
+//ZZ } else {
|
|
+//ZZ HReg rDhi, rDlo;
|
|
+//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
|
|
+//ZZ stmt->Ist.Put.offset + 0);
|
|
+//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
|
|
+//ZZ stmt->Ist.Put.offset + 4);
|
|
+//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
|
|
+//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
|
|
+//ZZ rDhi, am4));
|
|
+//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
|
|
+//ZZ rDlo, am0));
|
|
+//ZZ }
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ if (tyd == Ity_F64) {
|
|
+//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4.
|
|
+//ZZ // In which case we'll have to generate more longwinded code.
|
|
+//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
|
|
+//ZZ HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
|
|
+//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ if (tyd == Ity_F32) {
|
|
+//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4.
|
|
+//ZZ // In which case we'll have to generate more longwinded code.
|
|
+//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
|
|
+//ZZ HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
|
|
+//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* --------- TMP --------- */
|
|
+ /* assign value to temporary */
|
|
+ case Ist_WrTmp: {
|
|
+ IRTemp tmp = stmt->Ist.WrTmp.tmp;
|
|
+ IRType ty = typeOfIRTemp(env->type_env, tmp);
|
|
+
|
|
+ if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
|
|
+ /* We could do a lot better here. But for the time being: */
|
|
+ HReg dst = lookupIRTemp(env, tmp);
|
|
+ HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
|
|
+ addInstr(env, ARM64Instr_MovI(dst, rD));
|
|
+ return;
|
|
+ }
|
|
+ if (ty == Ity_I1) {
|
|
+ /* Here, we are generating a I1 value into a 64 bit register.
|
|
+ Make sure the value in the register is only zero or one,
|
|
+ but no other. This allows optimisation of the
|
|
+ 1Uto64(tmp:I1) case, by making it simply a copy of the
|
|
+ register holding 'tmp'. The point being that the value in
|
|
+ the register holding 'tmp' can only have been created
|
|
+ here. LATER: that seems dangerous; safer to do 'tmp & 1'
|
|
+ in that case. Also, could do this just with a single CINC
|
|
+ insn. */
|
|
+ HReg zero = newVRegI(env);
|
|
+ HReg one = newVRegI(env);
|
|
+ HReg dst = lookupIRTemp(env, tmp);
|
|
+ addInstr(env, ARM64Instr_Imm64(zero, 0));
|
|
+ addInstr(env, ARM64Instr_Imm64(one, 1));
|
|
+ ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
|
|
+ addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
|
|
+ return;
|
|
+ }
|
|
+ if (ty == Ity_F64) {
|
|
+ HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
|
|
+ HReg dst = lookupIRTemp(env, tmp);
|
|
+ addInstr(env, ARM64Instr_VMov(8, dst, src));
|
|
+ return;
|
|
+ }
|
|
+ if (ty == Ity_F32) {
|
|
+ HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
|
|
+ HReg dst = lookupIRTemp(env, tmp);
|
|
+ addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
|
|
+ return;
|
|
+ }
|
|
+ if (ty == Ity_V128) {
|
|
+ HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
|
|
+ HReg dst = lookupIRTemp(env, tmp);
|
|
+ addInstr(env, ARM64Instr_VMov(16, dst, src));
|
|
+ return;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* --------- Call to DIRTY helper --------- */
|
|
+ /* call complex ("dirty") helper function */
|
|
+ case Ist_Dirty: {
|
|
+ IRDirty* d = stmt->Ist.Dirty.details;
|
|
+
|
|
+ /* Figure out the return type, if any. */
|
|
+ IRType retty = Ity_INVALID;
|
|
+ if (d->tmp != IRTemp_INVALID)
|
|
+ retty = typeOfIRTemp(env->type_env, d->tmp);
|
|
+
|
|
+ Bool retty_ok = False;
|
|
+ switch (retty) {
|
|
+ case Ity_INVALID: /* function doesn't return anything */
|
|
+ case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
|
|
+ case Ity_V128:
|
|
+ retty_ok = True; break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ if (!retty_ok)
|
|
+ break; /* will go to stmt_fail: */
|
|
+
|
|
+ /* Marshal args, do the call, and set the return value to 0x555..555
|
|
+ if this is a conditional call that returns a value and the
|
|
+ call is skipped. */
|
|
+ UInt addToSp = 0;
|
|
+ RetLoc rloc = mk_RetLoc_INVALID();
|
|
+ doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
|
|
+ vassert(is_sane_RetLoc(rloc));
|
|
+
|
|
+ /* Now figure out what to do with the returned value, if any. */
|
|
+ switch (retty) {
|
|
+ case Ity_INVALID: {
|
|
+ /* No return value. Nothing to do. */
|
|
+ vassert(d->tmp == IRTemp_INVALID);
|
|
+ vassert(rloc.pri == RLPri_None);
|
|
+ vassert(addToSp == 0);
|
|
+ return;
|
|
+ }
|
|
+ case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
|
|
+ vassert(rloc.pri == RLPri_Int);
|
|
+ vassert(addToSp == 0);
|
|
+ /* The returned value is in x0. Park it in the register
|
|
+ associated with tmp. */
|
|
+ HReg dst = lookupIRTemp(env, d->tmp);
|
|
+ addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
|
|
+ return;
|
|
+ }
|
|
+ case Ity_V128: {
|
|
+ /* The returned value is on the stack, and *retloc tells
|
|
+ us where. Fish it off the stack and then move the
|
|
+ stack pointer upwards to clear it, as directed by
|
|
+ doHelperCall. */
|
|
+ vassert(rloc.pri == RLPri_V128SpRel);
|
|
+ vassert(rloc.spOff < 256); // stay sane
|
|
+ vassert(addToSp >= 16); // ditto
|
|
+ vassert(addToSp < 256); // ditto
|
|
+ HReg dst = lookupIRTemp(env, d->tmp);
|
|
+ HReg tmp = newVRegI(env); // the address of the returned value
|
|
+ addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
|
|
+ addInstr(env, ARM64Instr_Arith(tmp, tmp,
|
|
+ ARM64RIA_I12((UShort)rloc.spOff, 0),
|
|
+ True/*isAdd*/ ));
|
|
+ addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
|
|
+ addInstr(env, ARM64Instr_AddToSP(addToSp));
|
|
+ return;
|
|
+ }
|
|
+ default:
|
|
+ /*NOTREACHED*/
|
|
+ vassert(0);
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+
|
|
+//ZZ /* --------- Load Linked and Store Conditional --------- */
|
|
+//ZZ case Ist_LLSC: {
|
|
+//ZZ if (stmt->Ist.LLSC.storedata == NULL) {
|
|
+//ZZ /* LL */
|
|
+//ZZ IRTemp res = stmt->Ist.LLSC.result;
|
|
+//ZZ IRType ty = typeOfIRTemp(env->type_env, res);
|
|
+//ZZ if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
|
|
+//ZZ Int szB = 0;
|
|
+//ZZ HReg r_dst = lookupIRTemp(env, res);
|
|
+//ZZ HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
|
|
+//ZZ switch (ty) {
|
|
+//ZZ case Ity_I8: szB = 1; break;
|
|
+//ZZ case Ity_I16: szB = 2; break;
|
|
+//ZZ case Ity_I32: szB = 4; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
|
|
+//ZZ addInstr(env, ARMInstr_LdrEX(szB));
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ if (ty == Ity_I64) {
|
|
+//ZZ HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
|
|
+//ZZ addInstr(env, ARMInstr_LdrEX(8));
|
|
+//ZZ /* Result is in r3:r2. On a non-NEON capable CPU, we must
|
|
+//ZZ move it into a result register pair. On a NEON capable
|
|
+//ZZ CPU, the result register will be a 64 bit NEON
|
|
+//ZZ register, so we must move it there instead. */
|
|
+//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
|
|
+//ZZ HReg dst = lookupIRTemp(env, res);
|
|
+//ZZ addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
|
|
+//ZZ hregARM_R2()));
|
|
+//ZZ } else {
|
|
+//ZZ HReg r_dst_hi, r_dst_lo;
|
|
+//ZZ lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
|
|
+//ZZ }
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ /*NOTREACHED*/
|
|
+//ZZ vassert(0);
|
|
+//ZZ } else {
|
|
+//ZZ /* SC */
|
|
+//ZZ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
|
|
+//ZZ if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
|
|
+//ZZ Int szB = 0;
|
|
+//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
|
|
+//ZZ HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
|
|
+//ZZ switch (tyd) {
|
|
+//ZZ case Ity_I8: szB = 1; break;
|
|
+//ZZ case Ity_I16: szB = 2; break;
|
|
+//ZZ case Ity_I32: szB = 4; break;
|
|
+//ZZ default: vassert(0);
|
|
+//ZZ }
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
|
|
+//ZZ addInstr(env, ARMInstr_StrEX(szB));
|
|
+//ZZ } else {
|
|
+//ZZ vassert(tyd == Ity_I64);
|
|
+//ZZ /* This is really ugly. There is no is/is-not NEON
|
|
+//ZZ decision akin to the case for LL, because iselInt64Expr
|
|
+//ZZ fudges this for us, and always gets the result into two
|
|
+//ZZ GPRs even if this means moving it from a NEON
|
|
+//ZZ register. */
|
|
+//ZZ HReg rDhi, rDlo;
|
|
+//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
|
|
+//ZZ HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
|
|
+//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
|
|
+//ZZ addInstr(env, ARMInstr_StrEX(8));
|
|
+//ZZ }
|
|
+//ZZ /* now r0 is 1 if failed, 0 if success. Change to IR
|
|
+//ZZ conventions (0 is fail, 1 is success). Also transfer
|
|
+//ZZ result to r_res. */
|
|
+//ZZ IRTemp res = stmt->Ist.LLSC.result;
|
|
+//ZZ IRType ty = typeOfIRTemp(env->type_env, res);
|
|
+//ZZ HReg r_res = lookupIRTemp(env, res);
|
|
+//ZZ ARMRI84* one = ARMRI84_I84(1,0);
|
|
+//ZZ vassert(ty == Ity_I1);
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
|
|
+//ZZ /* And be conservative -- mask off all but the lowest bit */
|
|
+//ZZ addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* --------- MEM FENCE --------- */
|
|
+//ZZ case Ist_MBE:
|
|
+//ZZ switch (stmt->Ist.MBE.event) {
|
|
+//ZZ case Imbe_Fence:
|
|
+//ZZ addInstr(env, ARMInstr_MFence());
|
|
+//ZZ return;
|
|
+//ZZ case Imbe_CancelReservation:
|
|
+//ZZ addInstr(env, ARMInstr_CLREX());
|
|
+//ZZ return;
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+//ZZ break;
|
|
+
|
|
+ /* --------- INSTR MARK --------- */
|
|
+ /* Doesn't generate any executable code ... */
|
|
+ case Ist_IMark:
|
|
+ return;
|
|
+
|
|
+ /* --------- NO-OP --------- */
|
|
+ case Ist_NoOp:
|
|
+ return;
|
|
+
|
|
+ /* --------- EXIT --------- */
|
|
+ case Ist_Exit: {
|
|
+ if (stmt->Ist.Exit.dst->tag != Ico_U64)
|
|
+ vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
|
|
+
|
|
+ ARM64CondCode cc
|
|
+ = iselCondCode(env, stmt->Ist.Exit.guard);
|
|
+ ARM64AMode* amPC
|
|
+ = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
|
|
+
|
|
+
|
|
+ /* Case: boring transfer to known address */
|
|
+ if (stmt->Ist.Exit.jk == Ijk_Boring
|
|
+ /*ATC || stmt->Ist.Exit.jk == Ijk_Call */
|
|
+ /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) {
|
|
+ if (env->chainingAllowed) {
|
|
+ /* .. almost always true .. */
|
|
+ /* Skip the event check at the dst if this is a forwards
|
|
+ edge. */
|
|
+ Bool toFastEP
|
|
+ = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
|
|
+ if (0) vex_printf("%s", toFastEP ? "Y" : ",");
|
|
+ addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
|
|
+ amPC, cc, toFastEP));
|
|
+ } else {
|
|
+ /* .. very occasionally .. */
|
|
+ /* We can't use chaining, so ask for an assisted transfer,
|
|
+ as that's the only alternative that is allowable. */
|
|
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
|
|
+ addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
|
|
+ }
|
|
+ return;
|
|
+ }
|
|
+
|
|
+//ZZ /* Case: assisted transfer to arbitrary address */
|
|
+//ZZ switch (stmt->Ist.Exit.jk) {
|
|
+//ZZ /* Keep this list in sync with that in iselNext below */
|
|
+//ZZ case Ijk_ClientReq:
|
|
+//ZZ case Ijk_NoDecode:
|
|
+//ZZ case Ijk_NoRedir:
|
|
+//ZZ case Ijk_Sys_syscall:
|
|
+//ZZ case Ijk_TInval:
|
|
+//ZZ case Ijk_Yield:
|
|
+//ZZ {
|
|
+//ZZ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
|
|
+//ZZ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
|
|
+//ZZ stmt->Ist.Exit.jk));
|
|
+//ZZ return;
|
|
+//ZZ }
|
|
+//ZZ default:
|
|
+//ZZ break;
|
|
+//ZZ }
|
|
+
|
|
+ /* Do we ever expect to see any other kind? */
|
|
+ goto stmt_fail;
|
|
+ }
|
|
+
|
|
+ default: break;
|
|
+ }
|
|
+ stmt_fail:
|
|
+ ppIRStmt(stmt);
|
|
+ vpanic("iselStmt");
|
|
+}
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- ISEL: Basic block terminators (Nexts) ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+static void iselNext ( ISelEnv* env,
|
|
+ IRExpr* next, IRJumpKind jk, Int offsIP )
|
|
+{
|
|
+ if (vex_traceflags & VEX_TRACE_VCODE) {
|
|
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
|
|
+ ppIRExpr( next );
|
|
+ vex_printf( "; exit-");
|
|
+ ppIRJumpKind(jk);
|
|
+ vex_printf( "\n");
|
|
+ }
|
|
+
|
|
+ /* Case: boring transfer to known address */
|
|
+ if (next->tag == Iex_Const) {
|
|
+ IRConst* cdst = next->Iex.Const.con;
|
|
+ vassert(cdst->tag == Ico_U64);
|
|
+ if (jk == Ijk_Boring || jk == Ijk_Call) {
|
|
+ /* Boring transfer to known address */
|
|
+ ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
|
|
+ if (env->chainingAllowed) {
|
|
+ /* .. almost always true .. */
|
|
+ /* Skip the event check at the dst if this is a forwards
|
|
+ edge. */
|
|
+ Bool toFastEP
|
|
+ = ((Addr64)cdst->Ico.U64) > env->max_ga;
|
|
+ if (0) vex_printf("%s", toFastEP ? "X" : ".");
|
|
+ addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
|
|
+ amPC, ARM64cc_AL,
|
|
+ toFastEP));
|
|
+ } else {
|
|
+ /* .. very occasionally .. */
|
|
+ /* We can't use chaining, so ask for an assisted transfer,
|
|
+ as that's the only alternative that is allowable. */
|
|
+ HReg r = iselIntExpr_R(env, next);
|
|
+ addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
|
|
+ Ijk_Boring));
|
|
+ }
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Case: call/return (==boring) transfer to any address */
|
|
+ switch (jk) {
|
|
+ case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
|
|
+ HReg r = iselIntExpr_R(env, next);
|
|
+ ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
|
|
+ if (env->chainingAllowed) {
|
|
+ addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
|
|
+ } else {
|
|
+ addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
|
|
+ Ijk_Boring));
|
|
+ }
|
|
+ return;
|
|
+ }
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* Case: assisted transfer to arbitrary address */
|
|
+ switch (jk) {
|
|
+ /* Keep this list in sync with that for Ist_Exit above */
|
|
+ case Ijk_ClientReq:
|
|
+ case Ijk_NoDecode:
|
|
+//ZZ case Ijk_NoRedir:
|
|
+ case Ijk_Sys_syscall:
|
|
+//ZZ case Ijk_TInval:
|
|
+//ZZ case Ijk_Yield:
|
|
+ {
|
|
+ HReg r = iselIntExpr_R(env, next);
|
|
+ ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
|
|
+ addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
|
|
+ return;
|
|
+ }
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
|
|
+ ppIRExpr( next );
|
|
+ vex_printf( "; exit-");
|
|
+ ppIRJumpKind(jk);
|
|
+ vex_printf( "\n");
|
|
+ vassert(0); // are we expecting any other kind?
|
|
+}
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- Insn selector top-level ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* Translate an entire SB to arm64 code. */
|
|
+
|
|
+HInstrArray* iselSB_ARM64 ( IRSB* bb,
|
|
+ VexArch arch_host,
|
|
+ VexArchInfo* archinfo_host,
|
|
+ VexAbiInfo* vbi/*UNUSED*/,
|
|
+ Int offs_Host_EvC_Counter,
|
|
+ Int offs_Host_EvC_FailAddr,
|
|
+ Bool chainingAllowed,
|
|
+ Bool addProfInc,
|
|
+ Addr64 max_ga )
|
|
+{
|
|
+ Int i, j;
|
|
+ HReg hreg, hregHI;
|
|
+ ISelEnv* env;
|
|
+ UInt hwcaps_host = archinfo_host->hwcaps;
|
|
+ ARM64AMode *amCounter, *amFailAddr;
|
|
+
|
|
+ /* sanity ... */
|
|
+ vassert(arch_host == VexArchARM64);
|
|
+
|
|
+ /* guard against unexpected space regressions */
|
|
+ vassert(sizeof(ARM64Instr) <= 32);
|
|
+
|
|
+ /* Make up an initial environment to use. */
|
|
+ env = LibVEX_Alloc(sizeof(ISelEnv));
|
|
+ env->vreg_ctr = 0;
|
|
+
|
|
+ /* Set up output code array. */
|
|
+ env->code = newHInstrArray();
|
|
+
|
|
+ /* Copy BB's type env. */
|
|
+ env->type_env = bb->tyenv;
|
|
+
|
|
+ /* Make up an IRTemp -> virtual HReg mapping. This doesn't
|
|
+ change as we go along. */
|
|
+ env->n_vregmap = bb->tyenv->types_used;
|
|
+ env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
|
|
+ env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
|
|
+
|
|
+ /* and finally ... */
|
|
+ env->chainingAllowed = chainingAllowed;
|
|
+ env->hwcaps = hwcaps_host;
|
|
+ env->previous_rm = NULL;
|
|
+ env->max_ga = max_ga;
|
|
+
|
|
+ /* For each IR temporary, allocate a suitably-kinded virtual
|
|
+ register. */
|
|
+ j = 0;
|
|
+ for (i = 0; i < env->n_vregmap; i++) {
|
|
+ hregHI = hreg = INVALID_HREG;
|
|
+ switch (bb->tyenv->types[i]) {
|
|
+ case Ity_I1:
|
|
+ case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
|
|
+ hreg = mkHReg(j++, HRcInt64, True);
|
|
+ break;
|
|
+ case Ity_I128:
|
|
+ hreg = mkHReg(j++, HRcInt64, True);
|
|
+ hregHI = mkHReg(j++, HRcInt64, True);
|
|
+ break;
|
|
+ case Ity_F32: // we'll use HRcFlt64 regs for F32 too
|
|
+ case Ity_F64:
|
|
+ hreg = mkHReg(j++, HRcFlt64, True);
|
|
+ break;
|
|
+ case Ity_V128:
|
|
+ hreg = mkHReg(j++, HRcVec128, True);
|
|
+ break;
|
|
+ default:
|
|
+ ppIRType(bb->tyenv->types[i]);
|
|
+ vpanic("iselBB(arm64): IRTemp type");
|
|
+ }
|
|
+ env->vregmap[i] = hreg;
|
|
+ env->vregmapHI[i] = hregHI;
|
|
+ }
|
|
+ env->vreg_ctr = j;
|
|
+
|
|
+ /* The very first instruction must be an event check. */
|
|
+ amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
|
|
+ amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
|
|
+ addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
|
|
+
|
|
+ /* Possibly a block counter increment (for profiling). At this
|
|
+ point we don't know the address of the counter, so just pretend
|
|
+ it is zero. It will have to be patched later, but before this
|
|
+ translation is used, by a call to LibVEX_patchProfCtr. */
|
|
+ if (addProfInc) {
|
|
+ vassert(0);
|
|
+ //addInstr(env, ARM64Instr_ProfInc());
|
|
+ }
|
|
+
|
|
+ /* Ok, finally we can iterate over the statements. */
|
|
+ for (i = 0; i < bb->stmts_used; i++)
|
|
+ iselStmt(env, bb->stmts[i]);
|
|
+
|
|
+ iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
|
|
+
|
|
+ /* record the number of vregs we used. */
|
|
+ env->code->n_vregs = env->vreg_ctr;
|
|
+ return env->code;
|
|
+}
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- end host_arm64_isel.c ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
Index: VEX/priv/guest_arm64_toIR.c
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ VEX/priv/guest_arm64_toIR.c
|
|
@@ -0,0 +1,5087 @@
|
|
+/* -*- mode: C; c-basic-offset: 3; -*- */
|
|
+
|
|
+/*--------------------------------------------------------------------*/
|
|
+/*--- begin guest_arm64_toIR.c ---*/
|
|
+/*--------------------------------------------------------------------*/
|
|
+
|
|
+/*
|
|
+ This file is part of Valgrind, a dynamic binary instrumentation
|
|
+ framework.
|
|
+
|
|
+ Copyright (C) 2013-2013 OpenWorks
|
|
+ info@open-works.net
|
|
+
|
|
+ This program is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU General Public License as
|
|
+ published by the Free Software Foundation; either version 2 of the
|
|
+ License, or (at your option) any later version.
|
|
+
|
|
+ This program is distributed in the hope that it will be useful, but
|
|
+ WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License
|
|
+ along with this program; if not, write to the Free Software
|
|
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
+ 02110-1301, USA.
|
|
+
|
|
+ The GNU General Public License is contained in the file COPYING.
|
|
+*/
|
|
+
|
|
+//ZZ /* XXXX thumb to check:
|
|
+//ZZ that all cases where putIRegT writes r15, we generate a jump.
|
|
+//ZZ
|
|
+//ZZ All uses of newTemp assign to an IRTemp and not a UInt
|
|
+//ZZ
|
|
+//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is
|
|
+//ZZ backed out before the memory op, and restored afterwards. This
|
|
+//ZZ needs to happen even after we go uncond. (and for sure it doesn't
|
|
+//ZZ happen for VFP loads/stores right now).
|
|
+//ZZ
|
|
+//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we
|
|
+//ZZ should.
|
|
+//ZZ
|
|
+//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
|
|
+//ZZ taking into account the number of insns guarded by an IT.
|
|
+//ZZ
|
|
+//ZZ remove the nasty hack, in the spechelper, of looking for Or32(...,
|
|
+//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead
|
|
+//ZZ use Slice44 as specified in comments in the spechelper.
|
|
+//ZZ
|
|
+//ZZ add specialisations for armg_calculate_flag_c and _v, as they
|
|
+//ZZ are moderately often needed in Thumb code.
|
|
+//ZZ
|
|
+//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong.
|
|
+//ZZ
|
|
+//ZZ Correctness (obscure): in m_transtab, when invalidating code
|
|
+//ZZ address ranges, invalidate up to 18 bytes after the end of the
|
|
+//ZZ range. This is because the ITSTATE optimisation at the top of
|
|
+//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any
|
|
+//ZZ given instruction, and so might depend on the invalidated area.
|
|
+//ZZ */
|
|
+//ZZ
|
|
+//ZZ /* Limitations, etc
|
|
+//ZZ
|
|
+//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
|
|
+//ZZ These instructions are non-restartable in the case where the
|
|
+//ZZ transfer(s) fault.
|
|
+//ZZ
|
|
+//ZZ - SWP: the restart jump back is Ijk_Boring; it should be
|
|
+//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in
|
|
+//ZZ guest_x86_toIR.c.
|
|
+//ZZ */
|
|
+
|
|
+/* "Special" instructions.
|
|
+
|
|
+ This instruction decoder can decode four special instructions
|
|
+ which mean nothing natively (are no-ops as far as regs/mem are
|
|
+ concerned) but have meaning for supporting Valgrind. A special
|
|
+ instruction is flagged by a 16-byte preamble:
|
|
+
|
|
+ 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
|
|
+ (ror x12, x12, #3; ror x12, x12, #13
|
|
+ ror x12, x12, #51; ror x12, x12, #61)
|
|
+
|
|
+ Following that, one of the following 3 are allowed
|
|
+ (standard interpretation in parentheses):
|
|
+
|
|
+ AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
|
|
+ AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
|
|
+ AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
|
|
+ AA090129 (orr x9,x9,x9) IR injection
|
|
+
|
|
+ Any other bytes following the 16-byte preamble are illegal and
|
|
+ constitute a failure in instruction decoding. This all assumes
|
|
+ that the preamble will never occur except in specific code
|
|
+ fragments designed for Valgrind to catch.
|
|
+*/
|
|
+
|
|
+/* Translates ARM64 code to IR. */
|
|
+
|
|
+#include "libvex_basictypes.h"
|
|
+#include "libvex_ir.h"
|
|
+#include "libvex.h"
|
|
+#include "libvex_guest_arm64.h"
|
|
+
|
|
+#include "main_util.h"
|
|
+#include "main_globals.h"
|
|
+#include "guest_generic_bb_to_IR.h"
|
|
+#include "guest_arm64_defs.h"
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Globals ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+/* These are set at the start of the translation of a instruction, so
|
|
+ that we don't have to pass them around endlessly. CONST means does
|
|
+ not change during translation of the instruction.
|
|
+*/
|
|
+
|
|
+/* CONST: is the host bigendian? We need to know this in order to do
|
|
+ sub-register accesses to the SIMD/FP registers correctly. */
|
|
+static Bool host_is_bigendian;
|
|
+
|
|
+/* CONST: The guest address for the instruction currently being
|
|
+ translated. */
|
|
+static Addr64 guest_PC_curr_instr;
|
|
+
|
|
+/* MOD: The IRSB* into which we're generating code. */
|
|
+static IRSB* irsb;
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Debugging output ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+#define DIP(format, args...) \
|
|
+ if (vex_traceflags & VEX_TRACE_FE) \
|
|
+ vex_printf(format, ## args)
|
|
+
|
|
+#define DIS(buf, format, args...) \
|
|
+ if (vex_traceflags & VEX_TRACE_FE) \
|
|
+ vex_sprintf(buf, format, ## args)
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Helper bits and pieces for deconstructing the ---*/
|
|
+/*--- arm insn stream. ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+/* Do a little-endian load of a 32-bit word, regardless of the
|
|
+ endianness of the underlying host. */
|
|
+static inline UInt getUIntLittleEndianly ( UChar* p )
|
|
+{
|
|
+ UInt w = 0;
|
|
+ w = (w << 8) | p[3];
|
|
+ w = (w << 8) | p[2];
|
|
+ w = (w << 8) | p[1];
|
|
+ w = (w << 8) | p[0];
|
|
+ return w;
|
|
+}
|
|
+
|
|
+/* Sign extend a N-bit value up to 64 bits, by copying
|
|
+ bit N-1 into all higher positions. */
|
|
+static ULong sx_to_64 ( ULong x, UInt n )
|
|
+{
|
|
+ vassert(n > 1 && n < 64);
|
|
+ Long r = (Long)x;
|
|
+ r = (r << (64-n)) >> (64-n);
|
|
+ return (ULong)r;
|
|
+}
|
|
+
|
|
+//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
|
|
+//ZZ endianness of the underlying host. */
|
|
+//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
|
|
+//ZZ {
|
|
+//ZZ UShort w = 0;
|
|
+//ZZ w = (w << 8) | p[1];
|
|
+//ZZ w = (w << 8) | p[0];
|
|
+//ZZ return w;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
|
|
+//ZZ vassert(sh >= 0 && sh < 32);
|
|
+//ZZ if (sh == 0)
|
|
+//ZZ return x;
|
|
+//ZZ else
|
|
+//ZZ return (x << (32-sh)) | (x >> sh);
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static Int popcount32 ( UInt x )
|
|
+//ZZ {
|
|
+//ZZ Int res = 0, i;
|
|
+//ZZ for (i = 0; i < 32; i++) {
|
|
+//ZZ res += (x & 1);
|
|
+//ZZ x >>= 1;
|
|
+//ZZ }
|
|
+//ZZ return res;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
|
|
+//ZZ {
|
|
+//ZZ UInt mask = 1 << ix;
|
|
+//ZZ x &= ~mask;
|
|
+//ZZ x |= ((b << ix) & mask);
|
|
+//ZZ return x;
|
|
+//ZZ }
|
|
+
|
|
+#define BITS2(_b1,_b0) \
|
|
+ (((_b1) << 1) | (_b0))
|
|
+
|
|
+#define BITS3(_b2,_b1,_b0) \
|
|
+ (((_b2) << 2) | ((_b1) << 1) | (_b0))
|
|
+
|
|
+#define BITS4(_b3,_b2,_b1,_b0) \
|
|
+ (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
|
|
+
|
|
+#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
|
|
+ ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
|
|
+ | BITS4((_b3),(_b2),(_b1),(_b0)))
|
|
+
|
|
+#define BITS5(_b4,_b3,_b2,_b1,_b0) \
|
|
+ (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
|
|
+#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
|
|
+ (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
|
|
+#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
|
|
+ (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
|
|
+
|
|
+#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
|
|
+ (((_b8) << 8) \
|
|
+ | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
|
|
+
|
|
+#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
|
|
+ (((_b9) << 9) | ((_b8) << 8) \
|
|
+ | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
|
|
+
|
|
+#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
|
|
+ (((_b10) << 10) \
|
|
+ | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
|
|
+
|
|
+// produces _uint[_bMax:_bMin]
|
|
+#define SLICE_UInt(_uint,_bMax,_bMin) \
|
|
+ (( ((UInt)(_uint)) >> (_bMin)) \
|
|
+ & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Helper bits and pieces for creating IR fragments. ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+static IRExpr* mkV128 ( UShort w )
|
|
+{
|
|
+ return IRExpr_Const(IRConst_V128(w));
|
|
+}
|
|
+
|
|
+static IRExpr* mkU64 ( ULong i )
|
|
+{
|
|
+ return IRExpr_Const(IRConst_U64(i));
|
|
+}
|
|
+
|
|
+static IRExpr* mkU32 ( UInt i )
|
|
+{
|
|
+ return IRExpr_Const(IRConst_U32(i));
|
|
+}
|
|
+
|
|
+static IRExpr* mkU8 ( UInt i )
|
|
+{
|
|
+ vassert(i < 256);
|
|
+ return IRExpr_Const(IRConst_U8( (UChar)i ));
|
|
+}
|
|
+
|
|
+static IRExpr* mkexpr ( IRTemp tmp )
|
|
+{
|
|
+ return IRExpr_RdTmp(tmp);
|
|
+}
|
|
+
|
|
+static IRExpr* unop ( IROp op, IRExpr* a )
|
|
+{
|
|
+ return IRExpr_Unop(op, a);
|
|
+}
|
|
+
|
|
+static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
|
|
+{
|
|
+ return IRExpr_Binop(op, a1, a2);
|
|
+}
|
|
+
|
|
+static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
|
|
+{
|
|
+ return IRExpr_Triop(op, a1, a2, a3);
|
|
+}
|
|
+
|
|
+static IRExpr* loadLE ( IRType ty, IRExpr* addr )
|
|
+{
|
|
+ return IRExpr_Load(Iend_LE, ty, addr);
|
|
+}
|
|
+
|
|
+/* Add a statement to the list held by "irbb". */
|
|
+static void stmt ( IRStmt* st )
|
|
+{
|
|
+ addStmtToIRSB( irsb, st );
|
|
+}
|
|
+
|
|
+static void assign ( IRTemp dst, IRExpr* e )
|
|
+{
|
|
+ stmt( IRStmt_WrTmp(dst, e) );
|
|
+}
|
|
+
|
|
+static void storeLE ( IRExpr* addr, IRExpr* data )
|
|
+{
|
|
+ stmt( IRStmt_Store(Iend_LE, addr, data) );
|
|
+}
|
|
+
|
|
+//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
|
|
+//ZZ {
|
|
+//ZZ if (guardT == IRTemp_INVALID) {
|
|
+//ZZ /* unconditional */
|
|
+//ZZ storeLE(addr, data);
|
|
+//ZZ } else {
|
|
+//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
|
|
+//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
|
|
+//ZZ IRExpr* addr, IRExpr* alt,
|
|
+//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
|
|
+//ZZ {
|
|
+//ZZ if (guardT == IRTemp_INVALID) {
|
|
+//ZZ /* unconditional */
|
|
+//ZZ IRExpr* loaded = NULL;
|
|
+//ZZ switch (cvt) {
|
|
+//ZZ case ILGop_Ident32:
|
|
+//ZZ loaded = loadLE(Ity_I32, addr); break;
|
|
+//ZZ case ILGop_8Uto32:
|
|
+//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
|
|
+//ZZ case ILGop_8Sto32:
|
|
+//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
|
|
+//ZZ case ILGop_16Uto32:
|
|
+//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
|
|
+//ZZ case ILGop_16Sto32:
|
|
+//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
|
|
+//ZZ default:
|
|
+//ZZ vassert(0);
|
|
+//ZZ }
|
|
+//ZZ vassert(loaded != NULL);
|
|
+//ZZ assign(dst, loaded);
|
|
+//ZZ } else {
|
|
+//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
|
|
+//ZZ loaded data before putting the data in 'dst'. If the load
|
|
+//ZZ does not take place, 'alt' is placed directly in 'dst'. */
|
|
+//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
|
|
+//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+
|
|
+/* Generate a new temporary of the given type. */
|
|
+static IRTemp newTemp ( IRType ty )
|
|
+{
|
|
+ vassert(isPlausibleIRType(ty));
|
|
+ return newIRTemp( irsb->tyenv, ty );
|
|
+}
|
|
+
|
|
+//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
|
|
+//ZZ IRRoundingMode. */
|
|
+//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
|
|
+//ZZ {
|
|
+//ZZ return mkU32(Irrm_NEAREST);
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* Generate an expression for SRC rotated right by ROT. */
|
|
+//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
|
|
+//ZZ {
|
|
+//ZZ vassert(rot >= 0 && rot < 32);
|
|
+//ZZ if (rot == 0)
|
|
+//ZZ return mkexpr(src);
|
|
+//ZZ return
|
|
+//ZZ binop(Iop_Or32,
|
|
+//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
|
|
+//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static IRExpr* mkU128 ( ULong i )
|
|
+//ZZ {
|
|
+//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* Generate a 4-aligned version of the given expression if
|
|
+//ZZ the given condition is true. Else return it unchanged. */
|
|
+//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
|
|
+//ZZ {
|
|
+//ZZ if (b)
|
|
+//ZZ return binop(Iop_And32, e, mkU32(~3));
|
|
+//ZZ else
|
|
+//ZZ return e;
|
|
+//ZZ }
|
|
+
|
|
+/* Other IR construction helpers. */
|
|
+static IROp mkAND ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_I32: return Iop_And32;
|
|
+ case Ity_I64: return Iop_And64;
|
|
+ default: vpanic("mkAND");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkOR ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_I32: return Iop_Or32;
|
|
+ case Ity_I64: return Iop_Or64;
|
|
+ default: vpanic("mkOR");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkXOR ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_I32: return Iop_Xor32;
|
|
+ case Ity_I64: return Iop_Xor64;
|
|
+ default: vpanic("mkXOR");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkSHL ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_I32: return Iop_Shl32;
|
|
+ case Ity_I64: return Iop_Shl64;
|
|
+ default: vpanic("mkSHL");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkSHR ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_I32: return Iop_Shr32;
|
|
+ case Ity_I64: return Iop_Shr64;
|
|
+ default: vpanic("mkSHR");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkSAR ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_I32: return Iop_Sar32;
|
|
+ case Ity_I64: return Iop_Sar64;
|
|
+ default: vpanic("mkSAR");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkNOT ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_I32: return Iop_Not32;
|
|
+ case Ity_I64: return Iop_Not64;
|
|
+ default: vpanic("mkNOT");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkADD ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_I32: return Iop_Add32;
|
|
+ case Ity_I64: return Iop_Add64;
|
|
+ default: vpanic("mkADD");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkSUB ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_I32: return Iop_Sub32;
|
|
+ case Ity_I64: return Iop_Sub64;
|
|
+ default: vpanic("mkSUB");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkADDF ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_F32: return Iop_AddF32;
|
|
+ case Ity_F64: return Iop_AddF64;
|
|
+ default: vpanic("mkADDF");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkSUBF ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_F32: return Iop_SubF32;
|
|
+ case Ity_F64: return Iop_SubF64;
|
|
+ default: vpanic("mkSUBF");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkMULF ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_F32: return Iop_MulF32;
|
|
+ case Ity_F64: return Iop_MulF64;
|
|
+ default: vpanic("mkMULF");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkDIVF ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_F32: return Iop_DivF32;
|
|
+ case Ity_F64: return Iop_DivF64;
|
|
+ default: vpanic("mkMULF");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkNEGF ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_F32: return Iop_NegF32;
|
|
+ case Ity_F64: return Iop_NegF64;
|
|
+ default: vpanic("mkNEGF");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkABSF ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_F32: return Iop_AbsF32;
|
|
+ case Ity_F64: return Iop_AbsF64;
|
|
+ default: vpanic("mkNEGF");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkSQRTF ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_F32: return Iop_SqrtF32;
|
|
+ case Ity_F64: return Iop_SqrtF64;
|
|
+ default: vpanic("mkNEGF");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IRExpr* mkU ( IRType ty, ULong imm ) {
|
|
+ switch (ty) {
|
|
+ case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
|
|
+ case Ity_I64: return mkU64(imm);
|
|
+ default: vpanic("mkU");
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Generate IR to create 'arg rotated right by imm', for sane values
|
|
+ of 'ty' and 'imm'. */
|
|
+static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
|
|
+{
|
|
+ UInt w = 0;
|
|
+ if (ty == Ity_I64) {
|
|
+ w = 64;
|
|
+ } else {
|
|
+ vassert(ty == Ity_I32);
|
|
+ w = 32;
|
|
+ }
|
|
+ vassert(w != 0);
|
|
+ vassert(imm < w);
|
|
+ if (imm == 0) {
|
|
+ return arg;
|
|
+ }
|
|
+ IRTemp res = newTemp(ty);
|
|
+ assign(res, binop(mkOR(ty),
|
|
+ binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
|
|
+ binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
|
|
+ return res;
|
|
+}
|
|
+
|
|
+/* Generate IR to set the returned temp to either all-zeroes or
|
|
+ all ones, as a copy of arg<imm>. */
|
|
+static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
|
|
+{
|
|
+ UInt w = 0;
|
|
+ if (ty == Ity_I64) {
|
|
+ w = 64;
|
|
+ } else {
|
|
+ vassert(ty == Ity_I32);
|
|
+ w = 32;
|
|
+ }
|
|
+ vassert(w != 0);
|
|
+ vassert(imm < w);
|
|
+ IRTemp res = newTemp(ty);
|
|
+ assign(res, binop(mkSAR(ty),
|
|
+ binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
|
|
+ mkU8(w - 1)));
|
|
+ return res;
|
|
+}
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Helpers for accessing guest registers. ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
|
|
+#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
|
|
+#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
|
|
+#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
|
|
+#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
|
|
+#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
|
|
+#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
|
|
+#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
|
|
+#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
|
|
+#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
|
|
+#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
|
|
+#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
|
|
+#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
|
|
+#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
|
|
+#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
|
|
+#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
|
|
+#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
|
|
+#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
|
|
+#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
|
|
+#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
|
|
+#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
|
|
+#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
|
|
+#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
|
|
+#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
|
|
+#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
|
|
+#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
|
|
+#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
|
|
+#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
|
|
+#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
|
|
+#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
|
|
+#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
|
|
+
|
|
+#define OFFB_SP offsetof(VexGuestARM64State,guest_SP)
|
|
+#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
|
|
+
|
|
+#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
|
|
+#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
|
|
+#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
|
|
+#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
|
|
+
|
|
+#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
|
|
+#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
|
|
+
|
|
+#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
|
|
+#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
|
|
+#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
|
|
+#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
|
|
+#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
|
|
+#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
|
|
+#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
|
|
+#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
|
|
+#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
|
|
+#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
|
|
+#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
|
|
+#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
|
|
+#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
|
|
+#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
|
|
+#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
|
|
+#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
|
|
+#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
|
|
+#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
|
|
+#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
|
|
+#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
|
|
+#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
|
|
+#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
|
|
+#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
|
|
+#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
|
|
+#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
|
|
+#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
|
|
+#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
|
|
+#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
|
|
+#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
|
|
+#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
|
|
+#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
|
|
+#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
|
|
+
|
|
+#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
|
|
+#define OFFB_FPSR offsetof(VexGuestARM64State,guest_FPSR)
|
|
+//ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
|
|
+//ZZ #define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE)
|
|
+//ZZ #define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32)
|
|
+//ZZ #define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0)
|
|
+//ZZ #define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1)
|
|
+//ZZ #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2)
|
|
+//ZZ #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3)
|
|
+
|
|
+#define OFFB_TISTART offsetof(VexGuestARM64State,guest_TISTART)
|
|
+#define OFFB_TILEN offsetof(VexGuestARM64State,guest_TILEN)
|
|
+
|
|
+
|
|
+/* ---------------- Integer registers ---------------- */
|
|
+
|
|
+static Int offsetIReg64 ( UInt iregNo )
|
|
+{
|
|
+ /* Do we care about endianness here? We do if sub-parts of integer
|
|
+ registers are accessed. */
|
|
+ switch (iregNo) {
|
|
+ case 0: return OFFB_X0;
|
|
+ case 1: return OFFB_X1;
|
|
+ case 2: return OFFB_X2;
|
|
+ case 3: return OFFB_X3;
|
|
+ case 4: return OFFB_X4;
|
|
+ case 5: return OFFB_X5;
|
|
+ case 6: return OFFB_X6;
|
|
+ case 7: return OFFB_X7;
|
|
+ case 8: return OFFB_X8;
|
|
+ case 9: return OFFB_X9;
|
|
+ case 10: return OFFB_X10;
|
|
+ case 11: return OFFB_X11;
|
|
+ case 12: return OFFB_X12;
|
|
+ case 13: return OFFB_X13;
|
|
+ case 14: return OFFB_X14;
|
|
+ case 15: return OFFB_X15;
|
|
+ case 16: return OFFB_X16;
|
|
+ case 17: return OFFB_X17;
|
|
+ case 18: return OFFB_X18;
|
|
+ case 19: return OFFB_X19;
|
|
+ case 20: return OFFB_X20;
|
|
+ case 21: return OFFB_X21;
|
|
+ case 22: return OFFB_X22;
|
|
+ case 23: return OFFB_X23;
|
|
+ case 24: return OFFB_X24;
|
|
+ case 25: return OFFB_X25;
|
|
+ case 26: return OFFB_X26;
|
|
+ case 27: return OFFB_X27;
|
|
+ case 28: return OFFB_X28;
|
|
+ case 29: return OFFB_X29;
|
|
+ case 30: return OFFB_X30;
|
|
+ /* but not 31 */
|
|
+ default: vassert(0);
|
|
+ }
|
|
+}
|
|
+
|
|
+static Int offsetIReg64orSP ( UInt iregNo )
|
|
+{
|
|
+ return iregNo == 31 ? OFFB_SP : offsetIReg64(iregNo);
|
|
+}
|
|
+
|
|
+static const HChar* nameIReg64orZR ( UInt iregNo )
|
|
+{
|
|
+ vassert(iregNo < 32);
|
|
+ static const HChar* names[32]
|
|
+ = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
|
|
+ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
|
|
+ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
|
|
+ "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
|
|
+ return names[iregNo];
|
|
+}
|
|
+
|
|
+static const HChar* nameIReg64orSP ( UInt iregNo )
|
|
+{
|
|
+ if (iregNo == 31) {
|
|
+ return "sp";
|
|
+ }
|
|
+ vassert(iregNo < 31);
|
|
+ return nameIReg64orZR(iregNo);
|
|
+}
|
|
+
|
|
+static IRExpr* getIReg64orSP ( UInt iregNo )
|
|
+{
|
|
+ vassert(iregNo < 32);
|
|
+ return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
|
|
+}
|
|
+
|
|
+static IRExpr* getIReg64orZR ( UInt iregNo )
|
|
+{
|
|
+ if (iregNo == 31) {
|
|
+ return mkU64(0);
|
|
+ }
|
|
+ vassert(iregNo < 31);
|
|
+ return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
|
|
+}
|
|
+
|
|
+static void putIReg64orSP ( UInt iregNo, IRExpr* e )
|
|
+{
|
|
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
|
|
+ stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
|
|
+}
|
|
+
|
|
+static void putIReg64orZR ( UInt iregNo, IRExpr* e )
|
|
+{
|
|
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
|
|
+ if (iregNo == 31) {
|
|
+ return;
|
|
+ }
|
|
+ vassert(iregNo < 31);
|
|
+ stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
|
|
+}
|
|
+
|
|
+static const HChar* nameIReg32orZR ( UInt iregNo )
|
|
+{
|
|
+ vassert(iregNo < 32);
|
|
+ static const HChar* names[32]
|
|
+ = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
|
|
+ "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
|
|
+ "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
|
|
+ "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
|
|
+ return names[iregNo];
|
|
+}
|
|
+
|
|
+static const HChar* nameIReg32orSP ( UInt iregNo )
|
|
+{
|
|
+ if (iregNo == 31) {
|
|
+ return "wsp";
|
|
+ }
|
|
+ vassert(iregNo < 31);
|
|
+ return nameIReg32orZR(iregNo);
|
|
+}
|
|
+
|
|
+static IRExpr* getIReg32orSP ( UInt iregNo )
|
|
+{
|
|
+ vassert(iregNo < 32);
|
|
+ return unop(Iop_64to32,
|
|
+ IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
|
|
+}
|
|
+
|
|
+static IRExpr* getIReg32orZR ( UInt iregNo )
|
|
+{
|
|
+ if (iregNo == 31) {
|
|
+ return mkU32(0);
|
|
+ }
|
|
+ vassert(iregNo < 31);
|
|
+ return unop(Iop_64to32,
|
|
+ IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
|
|
+}
|
|
+
|
|
+static void putIReg32orSP ( UInt iregNo, IRExpr* e )
|
|
+{
|
|
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
|
|
+ stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
|
|
+}
|
|
+
|
|
+static void putIReg32orZR ( UInt iregNo, IRExpr* e )
|
|
+{
|
|
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
|
|
+ if (iregNo == 31) {
|
|
+ return;
|
|
+ }
|
|
+ vassert(iregNo < 31);
|
|
+ stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
|
|
+}
|
|
+
|
|
+static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
|
|
+{
|
|
+ vassert(is64 == True || is64 == False);
|
|
+ return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
|
|
+}
|
|
+
|
|
+static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
|
|
+{
|
|
+ vassert(is64 == True || is64 == False);
|
|
+ return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
|
|
+}
|
|
+
|
|
+static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
|
|
+{
|
|
+ vassert(is64 == True || is64 == False);
|
|
+ return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
|
|
+}
|
|
+
|
|
+static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
|
|
+{
|
|
+ vassert(is64 == True || is64 == False);
|
|
+ if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
|
|
+}
|
|
+
|
|
+static void putPC ( IRExpr* e )
|
|
+{
|
|
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
|
|
+ stmt( IRStmt_Put(OFFB_PC, e) );
|
|
+}
|
|
+
|
|
+
|
|
+/* ---------------- Vector (Q) registers ---------------- */
|
|
+
|
|
+static Int offsetQReg128 ( UInt qregNo )
|
|
+{
|
|
+ /* We don't care about endianness at this point. It only becomes
|
|
+ relevant when dealing with sections of these registers.*/
|
|
+ switch (qregNo) {
|
|
+ case 0: return OFFB_Q0;
|
|
+ case 1: return OFFB_Q1;
|
|
+ case 2: return OFFB_Q2;
|
|
+ case 3: return OFFB_Q3;
|
|
+ case 4: return OFFB_Q4;
|
|
+ case 5: return OFFB_Q5;
|
|
+ case 6: return OFFB_Q6;
|
|
+ case 7: return OFFB_Q7;
|
|
+ case 8: return OFFB_Q8;
|
|
+ case 9: return OFFB_Q9;
|
|
+ case 10: return OFFB_Q10;
|
|
+ case 11: return OFFB_Q11;
|
|
+ case 12: return OFFB_Q12;
|
|
+ case 13: return OFFB_Q13;
|
|
+ case 14: return OFFB_Q14;
|
|
+ case 15: return OFFB_Q15;
|
|
+ case 16: return OFFB_Q16;
|
|
+ case 17: return OFFB_Q17;
|
|
+ case 18: return OFFB_Q18;
|
|
+ case 19: return OFFB_Q19;
|
|
+ case 20: return OFFB_Q20;
|
|
+ case 21: return OFFB_Q21;
|
|
+ case 22: return OFFB_Q22;
|
|
+ case 23: return OFFB_Q23;
|
|
+ case 24: return OFFB_Q24;
|
|
+ case 25: return OFFB_Q25;
|
|
+ case 26: return OFFB_Q26;
|
|
+ case 27: return OFFB_Q27;
|
|
+ case 28: return OFFB_Q28;
|
|
+ case 29: return OFFB_Q29;
|
|
+ case 30: return OFFB_Q30;
|
|
+ case 31: return OFFB_Q31;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* Write to a complete Qreg. */
|
|
+static void putQReg128 ( UInt qregNo, IRExpr* e )
|
|
+{
|
|
+ vassert(qregNo < 32);
|
|
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
|
|
+ stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
|
|
+}
|
|
+
|
|
+/* Read a complete Qreg. */
|
|
+static IRExpr* getQReg128 ( UInt qregNo )
|
|
+{
|
|
+ vassert(qregNo < 32);
|
|
+ return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
|
|
+}
|
|
+
|
|
+/* Produce the IR type for some sub-part of a vector. For 32- and 64-
|
|
+ bit sub-parts we can choose either integer or float types, and
|
|
+ choose float on the basis that that is the common use case and so
|
|
+ will give least interference with Put-to-Get forwarding later
|
|
+ on. */
|
|
+static IRType preferredVectorSubTypeFromSize ( UInt szB )
|
|
+{
|
|
+ switch (szB) {
|
|
+ case 1: return Ity_I8;
|
|
+ case 2: return Ity_I16;
|
|
+ case 4: return Ity_I32; //Ity_F32;
|
|
+ case 8: return Ity_F64;
|
|
+ case 16: return Ity_V128;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Find the offset of the szB'th least significant bytes of the given
|
|
+ Qreg. This requires knowing the endianness of the host. */
|
|
+static Int offsetQReg ( UInt szB, UInt qregNo )
|
|
+{
|
|
+ vassert(!host_is_bigendian);
|
|
+ Int base = offsetQReg128(qregNo);
|
|
+ /* Since we're dealing with a little-endian host, all of the
|
|
+ sub-parts will have the same offset as the base register. But
|
|
+ we still need to check that szB is valid. */
|
|
+ switch (szB) {
|
|
+ case 1: case 2: case 4: case 8: case 16: break;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+ return base;
|
|
+}
|
|
+
|
|
+static void putQReg ( UInt qregNo, IRExpr* e )
|
|
+{
|
|
+ IRType ty = typeOfIRExpr(irsb->tyenv, e);
|
|
+ Int off = offsetQReg(sizeofIRType(ty), qregNo);
|
|
+ switch (ty) {
|
|
+ case Ity_I8: break;
|
|
+ case Ity_I16: break;
|
|
+ case Ity_I32: break;
|
|
+ case Ity_F32: break;
|
|
+ case Ity_I64: break;
|
|
+ case Ity_F64: break;
|
|
+ case Ity_V128: break;
|
|
+ default: vassert(0); // Other cases are ATC
|
|
+ }
|
|
+ stmt(IRStmt_Put(off, e));
|
|
+}
|
|
+
|
|
+static IRExpr* getQReg ( IRType ty, UInt qregNo )
|
|
+{
|
|
+ Int off = offsetQReg(sizeofIRType(ty), qregNo);
|
|
+ switch (ty) {
|
|
+ case Ity_I32: break;
|
|
+ case Ity_F32: break;
|
|
+ case Ity_I64: break;
|
|
+ case Ity_F64: break;
|
|
+ case Ity_V128: break;
|
|
+ default: vassert(0); // Other cases are ATC
|
|
+ }
|
|
+ return IRExpr_Get(off, ty);
|
|
+}
|
|
+
|
|
+static const HChar* nameQReg ( UInt szB, UInt qregNo )
|
|
+{
|
|
+ static const HChar* namesQ[32]
|
|
+ = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
|
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
|
|
+ "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
|
|
+ "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
|
|
+ static const HChar* namesD[32]
|
|
+ = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
|
|
+ "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
|
|
+ "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
|
|
+ "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
|
|
+ static const HChar* namesS[32]
|
|
+ = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
|
|
+ "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
|
|
+ "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
|
|
+ "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
|
|
+ static const HChar* namesH[32]
|
|
+ = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
|
|
+ "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
|
|
+ "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
|
|
+ "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
|
|
+ static const HChar* namesB[32]
|
|
+ = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
|
|
+ "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
|
|
+ "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
|
|
+ "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
|
|
+ vassert(qregNo < 32);
|
|
+ switch (szB) {
|
|
+ case 1: return namesB[qregNo];
|
|
+ case 2: return namesH[qregNo];
|
|
+ case 4: return namesS[qregNo];
|
|
+ case 8: return namesD[qregNo];
|
|
+ case 16: return namesQ[qregNo];
|
|
+ default: vassert(0);
|
|
+ }
|
|
+ /*NOTREACHED*/
|
|
+}
|
|
+
|
|
+/* Find the offset of the most significant half (8 bytes) of the given
|
|
+ Qreg. This requires knowing the endianness of the host. */
|
|
+static Int offsetQReg64HI ( UInt qregNo )
|
|
+{
|
|
+ vassert(!host_is_bigendian);
|
|
+ Int base = offsetQReg128(qregNo);
|
|
+ /* Since the host is little endian, the least significant half is
|
|
+ at the lower offset. So add 8 to get the MS half offset. */
|
|
+ return base+8;
|
|
+}
|
|
+
|
|
+static IRExpr* getQReg64HI ( UInt qregNo )
|
|
+{
|
|
+ return IRExpr_Get(offsetQReg64HI(qregNo), Ity_I64);
|
|
+}
|
|
+
|
|
+static void putQReg64HI ( UInt qregNo, IRExpr* e )
|
|
+{
|
|
+ IRType ty = typeOfIRExpr(irsb->tyenv, e);
|
|
+ Int off = offsetQReg64HI(qregNo);
|
|
+ switch (ty) {
|
|
+ case Ity_I64: break;
|
|
+ case Ity_F64: break;
|
|
+ default: vassert(0); // Other cases are plain wrong
|
|
+ }
|
|
+ stmt(IRStmt_Put(off, e));
|
|
+}
|
|
+
|
|
+//ZZ /* ---------------- Misc registers ---------------- */
|
|
+//ZZ
|
|
+//ZZ static void putMiscReg32 ( UInt gsoffset,
|
|
+//ZZ IRExpr* e, /* :: Ity_I32 */
|
|
+//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
|
|
+//ZZ {
|
|
+//ZZ switch (gsoffset) {
|
|
+//ZZ case OFFB_FPSCR: break;
|
|
+//ZZ case OFFB_QFLAG32: break;
|
|
+//ZZ case OFFB_GEFLAG0: break;
|
|
+//ZZ case OFFB_GEFLAG1: break;
|
|
+//ZZ case OFFB_GEFLAG2: break;
|
|
+//ZZ case OFFB_GEFLAG3: break;
|
|
+//ZZ default: vassert(0); /* awaiting more cases */
|
|
+//ZZ }
|
|
+//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
|
|
+//ZZ
|
|
+//ZZ if (guardT == IRTemp_INVALID) {
|
|
+//ZZ /* unconditional write */
|
|
+//ZZ stmt(IRStmt_Put(gsoffset, e));
|
|
+//ZZ } else {
|
|
+//ZZ stmt(IRStmt_Put(
|
|
+//ZZ gsoffset,
|
|
+//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
|
|
+//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
|
|
+//ZZ ));
|
|
+//ZZ }
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static IRTemp get_ITSTATE ( void )
|
|
+//ZZ {
|
|
+//ZZ ASSERT_IS_THUMB;
|
|
+//ZZ IRTemp t = newTemp(Ity_I32);
|
|
+//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
|
|
+//ZZ return t;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static void put_ITSTATE ( IRTemp t )
|
|
+//ZZ {
|
|
+//ZZ ASSERT_IS_THUMB;
|
|
+//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static IRTemp get_QFLAG32 ( void )
|
|
+//ZZ {
|
|
+//ZZ IRTemp t = newTemp(Ity_I32);
|
|
+//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
|
|
+//ZZ return t;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
|
|
+//ZZ {
|
|
+//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
|
|
+//ZZ Status Register) to indicate that overflow or saturation occurred.
|
|
+//ZZ Nb: t must be zero to denote no saturation, and any nonzero
|
|
+//ZZ value to indicate saturation. */
|
|
+//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
|
|
+//ZZ {
|
|
+//ZZ IRTemp old = get_QFLAG32();
|
|
+//ZZ IRTemp nyu = newTemp(Ity_I32);
|
|
+//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
|
|
+//ZZ put_QFLAG32(nyu, condT);
|
|
+//ZZ }
|
|
+
|
|
+
|
|
+/* ---------------- FPCR stuff ---------------- */
|
|
+
|
|
+/* Generate IR to get hold of the rounding mode bits in FPCR, and
|
|
+ convert them to IR format. Bind the final result to the
|
|
+ returned temp. */
|
|
+static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
|
|
+{
|
|
+ /* The ARMvfp encoding for rounding mode bits is:
|
|
+ 00 to nearest
|
|
+ 01 to +infinity
|
|
+ 10 to -infinity
|
|
+ 11 to zero
|
|
+ We need to convert that to the IR encoding:
|
|
+ 00 to nearest (the default)
|
|
+ 10 to +infinity
|
|
+ 01 to -infinity
|
|
+ 11 to zero
|
|
+ Which can be done by swapping bits 0 and 1.
|
|
+ The rmode bits are at 23:22 in FPSCR.
|
|
+ */
|
|
+ IRTemp armEncd = newTemp(Ity_I32);
|
|
+ IRTemp swapped = newTemp(Ity_I32);
|
|
+ /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
|
|
+ we don't zero out bits 24 and above, since the assignment to
|
|
+ 'swapped' will mask them out anyway. */
|
|
+ assign(armEncd,
|
|
+ binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
|
|
+ /* Now swap them. */
|
|
+ assign(swapped,
|
|
+ binop(Iop_Or32,
|
|
+ binop(Iop_And32,
|
|
+ binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
|
|
+ mkU32(2)),
|
|
+ binop(Iop_And32,
|
|
+ binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
|
|
+ mkU32(1))
|
|
+ ));
|
|
+ return swapped;
|
|
+}
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Helpers for flag handling and conditional insns ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+static const HChar* nameARM64Condcode ( ARM64Condcode cond )
|
|
+{
|
|
+ switch (cond) {
|
|
+ case ARM64CondEQ: return "eq";
|
|
+ case ARM64CondNE: return "ne";
|
|
+ case ARM64CondCS: return "cs"; // or 'hs'
|
|
+ case ARM64CondCC: return "cc"; // or 'lo'
|
|
+ case ARM64CondMI: return "mi";
|
|
+ case ARM64CondPL: return "pl";
|
|
+ case ARM64CondVS: return "vs";
|
|
+ case ARM64CondVC: return "vc";
|
|
+ case ARM64CondHI: return "hi";
|
|
+ case ARM64CondLS: return "ls";
|
|
+ case ARM64CondGE: return "ge";
|
|
+ case ARM64CondLT: return "lt";
|
|
+ case ARM64CondGT: return "gt";
|
|
+ case ARM64CondLE: return "le";
|
|
+ case ARM64CondAL: return "al";
|
|
+ case ARM64CondNV: return "nv";
|
|
+ default: vpanic("name_ARM64Condcode");
|
|
+ }
|
|
+}
|
|
+
|
|
+/* and a handy shorthand for it */
|
|
+static const HChar* nameCC ( ARM64Condcode cond ) {
|
|
+ return nameARM64Condcode(cond);
|
|
+}
|
|
+
|
|
+
|
|
+/* Build IR to calculate some particular condition from stored
|
|
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
|
|
+ Ity_I64, suitable for narrowing. Although the return type is
|
|
+ Ity_I64, the returned value is either 0 or 1. 'cond' must be
|
|
+ :: Ity_I64 and must denote the condition to compute in
|
|
+ bits 7:4, and be zero everywhere else.
|
|
+*/
|
|
+static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
|
|
+{
|
|
+ vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
|
|
+ /* And 'cond' had better produce a value in which only bits 7:4 are
|
|
+ nonzero. However, obviously we can't assert for that. */
|
|
+
|
|
+ /* So what we're constructing for the first argument is
|
|
+ "(cond << 4) | stored-operation".
|
|
+ However, as per comments above, 'cond' must be supplied
|
|
+ pre-shifted to this function.
|
|
+
|
|
+ This pairing scheme requires that the ARM64_CC_OP_ values all fit
|
|
+ in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
|
|
+ 8 bits of the first argument. */
|
|
+ IRExpr** args
|
|
+ = mkIRExprVec_4(
|
|
+ binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
|
|
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
|
|
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
|
|
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
|
|
+ );
|
|
+ IRExpr* call
|
|
+ = mkIRExprCCall(
|
|
+ Ity_I64,
|
|
+ 0/*regparm*/,
|
|
+ "arm64g_calculate_condition", &arm64g_calculate_condition,
|
|
+ args
|
|
+ );
|
|
+
|
|
+ /* Exclude the requested condition, OP and NDEP from definedness
|
|
+ checking. We're only interested in DEP1 and DEP2. */
|
|
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
|
|
+ return call;
|
|
+}
|
|
+
|
|
+
|
|
+/* Build IR to calculate some particular condition from stored
|
|
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
|
|
+ Ity_I64, suitable for narrowing. Although the return type is
|
|
+ Ity_I64, the returned value is either 0 or 1.
|
|
+*/
|
|
+static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
|
|
+{
|
|
+ /* First arg is "(cond << 4) | condition". This requires that the
|
|
+ ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
|
|
+ (COND, OP) pair in the lowest 8 bits of the first argument. */
|
|
+ vassert(cond >= 0 && cond <= 15);
|
|
+ return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
|
|
+}
|
|
+
|
|
+
|
|
+//ZZ /* Build IR to calculate just the carry flag from stored
|
|
+//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
|
|
+//ZZ Ity_I32. */
|
|
+//ZZ static IRExpr* mk_armg_calculate_flag_c ( void )
|
|
+//ZZ {
|
|
+//ZZ IRExpr** args
|
|
+//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
|
|
+//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
|
|
+//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
|
|
+//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
|
|
+//ZZ IRExpr* call
|
|
+//ZZ = mkIRExprCCall(
|
|
+//ZZ Ity_I32,
|
|
+//ZZ 0/*regparm*/,
|
|
+//ZZ "armg_calculate_flag_c", &armg_calculate_flag_c,
|
|
+//ZZ args
|
|
+//ZZ );
|
|
+//ZZ /* Exclude OP and NDEP from definedness checking. We're only
|
|
+//ZZ interested in DEP1 and DEP2. */
|
|
+//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
|
|
+//ZZ return call;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ
|
|
+//ZZ /* Build IR to calculate just the overflow flag from stored
|
|
+//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
|
|
+//ZZ Ity_I32. */
|
|
+//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
|
|
+//ZZ {
|
|
+//ZZ IRExpr** args
|
|
+//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
|
|
+//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
|
|
+//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
|
|
+//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
|
|
+//ZZ IRExpr* call
|
|
+//ZZ = mkIRExprCCall(
|
|
+//ZZ Ity_I32,
|
|
+//ZZ 0/*regparm*/,
|
|
+//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
|
|
+//ZZ args
|
|
+//ZZ );
|
|
+//ZZ /* Exclude OP and NDEP from definedness checking. We're only
|
|
+//ZZ interested in DEP1 and DEP2. */
|
|
+//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
|
|
+//ZZ return call;
|
|
+//ZZ }
|
|
+
|
|
+
|
|
+/* Build IR to calculate N Z C V in bits 31:28 of the
|
|
+ returned word. */
|
|
+static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
|
|
+{
|
|
+ IRExpr** args
|
|
+ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
|
|
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
|
|
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
|
|
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
|
|
+ IRExpr* call
|
|
+ = mkIRExprCCall(
|
|
+ Ity_I64,
|
|
+ 0/*regparm*/,
|
|
+ "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
|
|
+ args
|
|
+ );
|
|
+ /* Exclude OP and NDEP from definedness checking. We're only
|
|
+ interested in DEP1 and DEP2. */
|
|
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
|
|
+ return call;
|
|
+}
|
|
+
|
|
+
|
|
+/* Build IR to set the flags thunk, in the most general case. */
|
|
+static
|
|
+void setFlags_D1_D2_ND ( UInt cc_op,
|
|
+ IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
|
|
+{
|
|
+ vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
|
|
+ vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
|
|
+ vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
|
|
+ vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
|
|
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
|
|
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
|
|
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
|
|
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
|
|
+}
|
|
+
|
|
+/* Build IR to set the flags thunk after ADD or SUB. */
|
|
+static
|
|
+void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
|
|
+{
|
|
+ IRTemp argL64 = IRTemp_INVALID;
|
|
+ IRTemp argR64 = IRTemp_INVALID;
|
|
+ IRTemp z64 = newTemp(Ity_I64);
|
|
+ if (is64) {
|
|
+ argL64 = argL;
|
|
+ argR64 = argR;
|
|
+ } else {
|
|
+ argL64 = newTemp(Ity_I64);
|
|
+ argR64 = newTemp(Ity_I64);
|
|
+ assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
|
|
+ assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
|
|
+ }
|
|
+ assign(z64, mkU64(0));
|
|
+ UInt cc_op = ARM64G_CC_OP_NUMBER;
|
|
+ /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
|
|
+ else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
|
|
+ else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
|
|
+ else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
|
|
+ else { vassert(0); }
|
|
+ setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
|
|
+}
|
|
+
|
|
+/* Build IR to set the flags thunk after ADD or SUB, if the given
|
|
+ condition evaluates to True at run time. If not, the flags are set
|
|
+ to the specified NZCV value. */
|
|
+static
|
|
+void setFlags_ADD_SUB_conditionally (
|
|
+ Bool is64, Bool isSUB,
|
|
+ IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
|
|
+ )
|
|
+{
|
|
+ /* Generate IR as follows:
|
|
+ CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
|
|
+ CC_DEP1 = ITE(cond, argL64, nzcv << 28)
|
|
+ CC_DEP2 = ITE(cond, argR64, 0)
|
|
+ CC_NDEP = 0
|
|
+ */
|
|
+
|
|
+ IRTemp z64 = newTemp(Ity_I64);
|
|
+ assign(z64, mkU64(0));
|
|
+
|
|
+ /* Establish the operation and operands for the True case. */
|
|
+ IRTemp t_dep1 = IRTemp_INVALID;
|
|
+ IRTemp t_dep2 = IRTemp_INVALID;
|
|
+ UInt t_op = ARM64G_CC_OP_NUMBER;
|
|
+ /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
|
|
+ else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
|
|
+ else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
|
|
+ else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
|
|
+ else { vassert(0); }
|
|
+ /* */
|
|
+ if (is64) {
|
|
+ t_dep1 = argL;
|
|
+ t_dep2 = argR;
|
|
+ } else {
|
|
+ t_dep1 = newTemp(Ity_I64);
|
|
+ t_dep2 = newTemp(Ity_I64);
|
|
+ assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
|
|
+ assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
|
|
+ }
|
|
+
|
|
+ /* Establish the operation and operands for the False case. */
|
|
+ IRTemp f_dep1 = newTemp(Ity_I64);
|
|
+ IRTemp f_dep2 = z64;
|
|
+ UInt f_op = ARM64G_CC_OP_COPY;
|
|
+ assign(f_dep1, mkU64(nzcv << 28));
|
|
+
|
|
+ /* Final thunk values */
|
|
+ IRTemp dep1 = newTemp(Ity_I64);
|
|
+ IRTemp dep2 = newTemp(Ity_I64);
|
|
+ IRTemp op = newTemp(Ity_I64);
|
|
+
|
|
+ assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
|
|
+ assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
|
|
+ assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
|
|
+
|
|
+ /* finally .. */
|
|
+ stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
|
|
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
|
|
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
|
|
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
|
|
+}
|
|
+
|
|
+/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
|
|
+static
|
|
+void setFlags_LOGIC ( Bool is64, IRTemp res )
|
|
+{
|
|
+ IRTemp res64 = IRTemp_INVALID;
|
|
+ IRTemp z64 = newTemp(Ity_I64);
|
|
+ UInt cc_op = ARM64G_CC_OP_NUMBER;
|
|
+ if (is64) {
|
|
+ res64 = res;
|
|
+ cc_op = ARM64G_CC_OP_LOGIC64;
|
|
+ } else {
|
|
+ res64 = newTemp(Ity_I64);
|
|
+ assign(res64, unop(Iop_32Uto64, mkexpr(res)));
|
|
+ cc_op = ARM64G_CC_OP_LOGIC32;
|
|
+ }
|
|
+ assign(z64, mkU64(0));
|
|
+ setFlags_D1_D2_ND(cc_op, res64, z64, z64);
|
|
+}
|
|
+
|
|
+/* Build IR to set the flags thunk to a given NZCV value. NZCV is
|
|
+ located in bits 31:28 of the supplied value. */
|
|
+static
|
|
+void setFlags_COPY ( IRTemp nzcv_28x0 )
|
|
+{
|
|
+ IRTemp z64 = newTemp(Ity_I64);
|
|
+ assign(z64, mkU64(0));
|
|
+ setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
|
|
+}
|
|
+
|
|
+
|
|
+//ZZ /* Minor variant of the above that sets NDEP to zero (if it
|
|
+//ZZ sets it at all) */
|
|
+//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
|
|
+//ZZ IRTemp t_dep2,
|
|
+//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
|
|
+//ZZ {
|
|
+//ZZ IRTemp z32 = newTemp(Ity_I32);
|
|
+//ZZ assign( z32, mkU32(0) );
|
|
+//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ
|
|
+//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
|
|
+//ZZ sets it at all) */
|
|
+//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
|
|
+//ZZ IRTemp t_ndep,
|
|
+//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
|
|
+//ZZ {
|
|
+//ZZ IRTemp z32 = newTemp(Ity_I32);
|
|
+//ZZ assign( z32, mkU32(0) );
|
|
+//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ
|
|
+//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
|
|
+//ZZ sets them at all) */
|
|
+//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
|
|
+//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
|
|
+//ZZ {
|
|
+//ZZ IRTemp z32 = newTemp(Ity_I32);
|
|
+//ZZ assign( z32, mkU32(0) );
|
|
+//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
|
|
+//ZZ }
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Misc math helpers ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+/* Generates a 64-bit byte swap. */
|
|
+static IRTemp math_BSWAP64 ( IRTemp t1 )
|
|
+{
|
|
+ IRTemp t2 = newTemp(Ity_I64);
|
|
+ IRTemp m8 = newTemp(Ity_I64);
|
|
+ IRTemp s8 = newTemp(Ity_I64);
|
|
+ IRTemp m16 = newTemp(Ity_I64);
|
|
+ IRTemp s16 = newTemp(Ity_I64);
|
|
+ IRTemp m32 = newTemp(Ity_I64);
|
|
+ assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
|
|
+ assign( s8,
|
|
+ binop(Iop_Or64,
|
|
+ binop(Iop_Shr64,
|
|
+ binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
|
|
+ mkU8(8)),
|
|
+ binop(Iop_And64,
|
|
+ binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
|
|
+ mkexpr(m8))
|
|
+ )
|
|
+ );
|
|
+
|
|
+ assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
|
|
+ assign( s16,
|
|
+ binop(Iop_Or64,
|
|
+ binop(Iop_Shr64,
|
|
+ binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
|
|
+ mkU8(16)),
|
|
+ binop(Iop_And64,
|
|
+ binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
|
|
+ mkexpr(m16))
|
|
+ )
|
|
+ );
|
|
+
|
|
+ assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
|
|
+ assign( t2,
|
|
+ binop(Iop_Or64,
|
|
+ binop(Iop_Shr64,
|
|
+ binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
|
|
+ mkU8(32)),
|
|
+ binop(Iop_And64,
|
|
+ binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
|
|
+ mkexpr(m32))
|
|
+ )
|
|
+ );
|
|
+ return t2;
|
|
+}
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- FP comparison helpers ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+/* irRes :: Ity_I32 holds a floating point comparison result encoded
|
|
+ as an IRCmpF64Result. Generate code to convert it to an
|
|
+ ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
|
|
+ Assign a new temp to hold that value, and return the temp. */
|
|
+static
|
|
+IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
|
|
+{
|
|
+ IRTemp ix = newTemp(Ity_I64);
|
|
+ IRTemp termL = newTemp(Ity_I64);
|
|
+ IRTemp termR = newTemp(Ity_I64);
|
|
+ IRTemp nzcv = newTemp(Ity_I64);
|
|
+ IRTemp irRes = newTemp(Ity_I64);
|
|
+
|
|
+ /* This is where the fun starts. We have to convert 'irRes' from
|
|
+ an IR-convention return result (IRCmpF64Result) to an
|
|
+ ARM-encoded (N,Z,C,V) group. The final result is in the bottom
|
|
+ 4 bits of 'nzcv'. */
|
|
+ /* Map compare result from IR to ARM(nzcv) */
|
|
+ /*
|
|
+ FP cmp result | IR | ARM(nzcv)
|
|
+ --------------------------------
|
|
+ UN 0x45 0011
|
|
+ LT 0x01 1000
|
|
+ GT 0x00 0010
|
|
+ EQ 0x40 0110
|
|
+ */
|
|
+ /* Now since you're probably wondering WTF ..
|
|
+
|
|
+ ix fishes the useful bits out of the IR value, bits 6 and 0, and
|
|
+ places them side by side, giving a number which is 0, 1, 2 or 3.
|
|
+
|
|
+ termL is a sequence cooked up by GNU superopt. It converts ix
|
|
+ into an almost correct value NZCV value (incredibly), except
|
|
+ for the case of UN, where it produces 0100 instead of the
|
|
+ required 0011.
|
|
+
|
|
+ termR is therefore a correction term, also computed from ix. It
|
|
+ is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
|
|
+ the final correct value, we subtract termR from termL.
|
|
+
|
|
+ Don't take my word for it. There's a test program at the bottom
|
|
+ of guest_arm_toIR.c, to try this out with.
|
|
+ */
|
|
+ assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
|
|
+
|
|
+ assign(
|
|
+ ix,
|
|
+ binop(Iop_Or64,
|
|
+ binop(Iop_And64,
|
|
+ binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
|
|
+ mkU64(3)),
|
|
+ binop(Iop_And64, mkexpr(irRes), mkU64(1))));
|
|
+
|
|
+ assign(
|
|
+ termL,
|
|
+ binop(Iop_Add64,
|
|
+ binop(Iop_Shr64,
|
|
+ binop(Iop_Sub64,
|
|
+ binop(Iop_Shl64,
|
|
+ binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
|
|
+ mkU8(62)),
|
|
+ mkU64(1)),
|
|
+ mkU8(61)),
|
|
+ mkU64(1)));
|
|
+
|
|
+ assign(
|
|
+ termR,
|
|
+ binop(Iop_And64,
|
|
+ binop(Iop_And64,
|
|
+ mkexpr(ix),
|
|
+ binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
|
|
+ mkU64(1)));
|
|
+
|
|
+ assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
|
|
+ return nzcv;
|
|
+}
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Data processing (immediate) ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+/* Helper functions for supporting "DecodeBitMasks" */
|
|
+
|
|
+static ULong dbm_ROR ( Int width, ULong x, Int rot )
|
|
+{
|
|
+ vassert(width > 0 && width <= 64);
|
|
+ vassert(rot >= 0 && rot < width);
|
|
+ if (rot == 0) return x;
|
|
+ ULong res = x >> rot;
|
|
+ res |= (x << (width - rot));
|
|
+ if (width < 64)
|
|
+ res &= ((1ULL << width) - 1);
|
|
+ return res;
|
|
+}
|
|
+
|
|
+static ULong dbm_RepTo64( Int esize, ULong x )
|
|
+{
|
|
+ switch (esize) {
|
|
+ case 64:
|
|
+ return x;
|
|
+ case 32:
|
|
+ x &= 0xFFFFFFFF; x |= (x << 32);
|
|
+ return x;
|
|
+ case 16:
|
|
+ x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
|
|
+ return x;
|
|
+ case 8:
|
|
+ x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
|
|
+ return x;
|
|
+ case 4:
|
|
+ x &= 0xF; x |= (x << 4); x |= (x << 8);
|
|
+ x |= (x << 16); x |= (x << 32);
|
|
+ return x;
|
|
+ case 2:
|
|
+ x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
|
|
+ x |= (x << 16); x |= (x << 32);
|
|
+ return x;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ vpanic("dbm_RepTo64");
|
|
+ /*NOTREACHED*/
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static Int dbm_highestSetBit ( ULong x )
|
|
+{
|
|
+ Int i;
|
|
+ for (i = 63; i >= 0; i--) {
|
|
+ if (x & (1ULL << i))
|
|
+ return i;
|
|
+ }
|
|
+ vassert(x == 0);
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+static
|
|
+Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
|
|
+ ULong immN, ULong imms, ULong immr, Bool immediate,
|
|
+ UInt M /*32 or 64*/)
|
|
+{
|
|
+ vassert(immN < (1ULL << 1));
|
|
+ vassert(imms < (1ULL << 6));
|
|
+ vassert(immr < (1ULL << 6));
|
|
+ vassert(immediate == False || immediate == True);
|
|
+ vassert(M == 32 || M == 64);
|
|
+
|
|
+ Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
|
|
+ if (len < 1) { /* printf("fail1\n"); */ return False; }
|
|
+ vassert(len <= 6);
|
|
+ vassert(M >= (1 << len));
|
|
+
|
|
+ vassert(len >= 1 && len <= 6);
|
|
+ ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
|
|
+ (1 << len) - 1;
|
|
+ vassert(levels >= 1 && levels <= 63);
|
|
+
|
|
+ if (immediate && ((imms & levels) == levels)) {
|
|
+ /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
|
|
+ return False;
|
|
+ }
|
|
+
|
|
+ ULong S = imms & levels;
|
|
+ ULong R = immr & levels;
|
|
+ Int diff = S - R;
|
|
+ diff &= 63;
|
|
+ Int esize = 1 << len;
|
|
+ vassert(2 <= esize && esize <= 64);
|
|
+
|
|
+ /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
|
|
+ same below with d. S can be 63 in which case we have an out of
|
|
+ range and hence undefined shift. */
|
|
+ vassert(S >= 0 && S <= 63);
|
|
+ vassert(esize >= (S+1));
|
|
+ ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
|
|
+ //(1ULL << (S+1)) - 1;
|
|
+ ((1ULL << S) - 1) + (1ULL << S);
|
|
+
|
|
+ Int d = // diff<len-1:0>
|
|
+ diff & ((1 << len)-1);
|
|
+ vassert(esize >= (d+1));
|
|
+ vassert(d >= 0 && d <= 63);
|
|
+
|
|
+ ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
|
|
+ //(1ULL << (d+1)) - 1;
|
|
+ ((1ULL << d) - 1) + (1ULL << d);
|
|
+
|
|
+ if (esize != 64) vassert(elem_s < (1ULL << esize));
|
|
+ if (esize != 64) vassert(elem_d < (1ULL << esize));
|
|
+
|
|
+ if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
|
|
+ if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
|
|
+
|
|
+ return True;
|
|
+}
|
|
+
|
|
+
|
|
+static
|
|
+Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
|
|
+ UInt insn)
|
|
+{
|
|
+# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
|
|
+
|
|
+ /* insn[28:23]
|
|
+ 10000x PC-rel addressing
|
|
+ 10001x Add/subtract (immediate)
|
|
+ 100100 Logical (immediate)
|
|
+ 100101 Move Wide (immediate)
|
|
+ 100110 Bitfield
|
|
+ 100111 Extract
|
|
+ */
|
|
+
|
|
+ /* ------------------ ADD/SUB{,S} imm12 ------------------ */
|
|
+ if (INSN(28,24) == BITS5(1,0,0,0,1)) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ Bool isSub = INSN(30,30) == 1;
|
|
+ Bool setCC = INSN(29,29) == 1;
|
|
+ UInt sh = INSN(23,22);
|
|
+ UInt uimm12 = INSN(21,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ const HChar* nm = isSub ? "sub" : "add";
|
|
+ if (sh >= 2) {
|
|
+ /* Invalid; fall through */
|
|
+ } else {
|
|
+ vassert(sh <= 1);
|
|
+ uimm12 <<= (12 * sh);
|
|
+ if (is64) {
|
|
+ IRTemp argL = newTemp(Ity_I64);
|
|
+ IRTemp argR = newTemp(Ity_I64);
|
|
+ IRTemp res = newTemp(Ity_I64);
|
|
+ assign(argL, getIReg64orSP(nn));
|
|
+ assign(argR, mkU64(uimm12));
|
|
+ assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
|
|
+ mkexpr(argL), mkexpr(argR)));
|
|
+ if (setCC) {
|
|
+ putIReg64orZR(dd, mkexpr(res));
|
|
+ setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
|
|
+ DIP("%ss %s, %s, 0x%x\n",
|
|
+ nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
|
|
+ } else {
|
|
+ putIReg64orSP(dd, mkexpr(res));
|
|
+ DIP("%s %s, %s, 0x%x\n",
|
|
+ nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
|
|
+ }
|
|
+ } else {
|
|
+ IRTemp argL = newTemp(Ity_I32);
|
|
+ IRTemp argR = newTemp(Ity_I32);
|
|
+ IRTemp res = newTemp(Ity_I32);
|
|
+ assign(argL, getIReg32orSP(nn));
|
|
+ assign(argR, mkU32(uimm12));
|
|
+ assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
|
|
+ mkexpr(argL), mkexpr(argR)));
|
|
+ if (setCC) {
|
|
+ putIReg32orZR(dd, mkexpr(res));
|
|
+ setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
|
|
+ DIP("%ss %s, %s, 0x%x\n",
|
|
+ nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
|
|
+ } else {
|
|
+ putIReg32orSP(dd, mkexpr(res));
|
|
+ DIP("%s %s, %s, 0x%x\n",
|
|
+ nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
|
|
+ }
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* -------------------- ADR/ADRP -------------------- */
|
|
+ if (INSN(28,24) == BITS5(1,0,0,0,0)) {
|
|
+ UInt bP = INSN(31,31);
|
|
+ UInt immLo = INSN(30,29);
|
|
+ UInt immHi = INSN(23,5);
|
|
+ UInt rD = INSN(4,0);
|
|
+ ULong uimm = (immHi << 2) | immLo;
|
|
+ ULong simm = sx_to_64(uimm, 21);
|
|
+ ULong val;
|
|
+ if (bP) {
|
|
+ val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
|
|
+ } else {
|
|
+ val = guest_PC_curr_instr + simm;
|
|
+ }
|
|
+ putIReg64orZR(rD, mkU64(val));
|
|
+ DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------------- LOGIC(imm) -------------------- */
|
|
+ if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
|
|
+ /* 31 30 28 22 21 15 9 4
|
|
+ sf op 100100 N immr imms Rn Rd
|
|
+ op=00: AND Rd|SP, Rn, #imm
|
|
+ op=01: ORR Rd|SP, Rn, #imm
|
|
+ op=10: EOR Rd|SP, Rn, #imm
|
|
+ op=11: ANDS Rd|ZR, Rn, #imm
|
|
+ */
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ UInt op = INSN(30,29);
|
|
+ UInt N = INSN(22,22);
|
|
+ UInt immR = INSN(21,16);
|
|
+ UInt immS = INSN(15,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ ULong imm = 0;
|
|
+ Bool ok;
|
|
+ if (N == 1 && !is64)
|
|
+ goto after_logic_imm; /* not allowed; fall through */
|
|
+ ok = dbm_DecodeBitMasks(&imm, NULL,
|
|
+ N, immS, immR, True, is64 ? 64 : 32);
|
|
+ if (!ok)
|
|
+ goto after_logic_imm;
|
|
+
|
|
+ const HChar* names[4] = { "and", "orr", "eor", "ands" };
|
|
+ const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
|
|
+ const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
|
|
+
|
|
+ vassert(op < 4);
|
|
+ if (is64) {
|
|
+ IRExpr* argL = getIReg64orZR(nn);
|
|
+ IRExpr* argR = mkU64(imm);
|
|
+ IRTemp res = newTemp(Ity_I64);
|
|
+ assign(res, binop(ops64[op], argL, argR));
|
|
+ if (op < 3) {
|
|
+ putIReg64orSP(dd, mkexpr(res));
|
|
+ DIP("%s %s, %s, 0x%llx\n", names[op],
|
|
+ nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
|
|
+ } else {
|
|
+ putIReg64orZR(dd, mkexpr(res));
|
|
+ setFlags_LOGIC(True/*is64*/, res);
|
|
+ DIP("%s %s, %s, 0x%llx\n", names[op],
|
|
+ nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
|
|
+ }
|
|
+ } else {
|
|
+ IRExpr* argL = getIReg32orZR(nn);
|
|
+ IRExpr* argR = mkU32((UInt)imm);
|
|
+ IRTemp res = newTemp(Ity_I32);
|
|
+ assign(res, binop(ops32[op], argL, argR));
|
|
+ if (op < 3) {
|
|
+ putIReg32orSP(dd, mkexpr(res));
|
|
+ DIP("%s %s, %s, 0x%x\n", names[op],
|
|
+ nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
|
|
+ } else {
|
|
+ putIReg32orZR(dd, mkexpr(res));
|
|
+ setFlags_LOGIC(False/*!is64*/, res);
|
|
+ DIP("%s %s, %s, 0x%x\n", names[op],
|
|
+ nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
|
|
+ }
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ after_logic_imm:
|
|
+
|
|
+ /* -------------------- MOV{Z,N,K} -------------------- */
|
|
+ if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
|
|
+ /* 31 30 28 22 20 4
|
|
+ | | | | | |
|
|
+ sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
|
|
+ sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
|
|
+ sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
|
|
+ */
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ UInt subopc = INSN(30,29);
|
|
+ UInt hw = INSN(22,21);
|
|
+ UInt imm16 = INSN(20,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
|
|
+ /* invalid; fall through */
|
|
+ } else {
|
|
+ ULong imm64 = ((ULong)imm16) << (16 * hw);
|
|
+ if (!is64)
|
|
+ vassert(imm64 < 0x100000000ULL);
|
|
+ switch (subopc) {
|
|
+ case BITS2(1,0): // MOVZ
|
|
+ putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
|
|
+ DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
|
|
+ break;
|
|
+ case BITS2(0,0): // MOVN
|
|
+ imm64 = ~imm64;
|
|
+ if (!is64)
|
|
+ imm64 &= 0xFFFFFFFFULL;
|
|
+ putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
|
|
+ DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
|
|
+ break;
|
|
+ case BITS2(1,1): // MOVK
|
|
+ /* This is more complex. We are inserting a slice into
|
|
+ the destination register, so we need to have the old
|
|
+ value of it. */
|
|
+ if (is64) {
|
|
+ IRTemp old = newTemp(Ity_I64);
|
|
+ assign(old, getIReg64orZR(dd));
|
|
+ ULong mask = 0xFFFFULL << (16 * hw);
|
|
+ IRExpr* res
|
|
+ = binop(Iop_Or64,
|
|
+ binop(Iop_And64, mkexpr(old), mkU64(~mask)),
|
|
+ mkU64(imm64));
|
|
+ putIReg64orZR(dd, res);
|
|
+ DIP("movk %s, 0x%x, lsl %u\n",
|
|
+ nameIReg64orZR(dd), imm16, 16*hw);
|
|
+ } else {
|
|
+ IRTemp old = newTemp(Ity_I32);
|
|
+ assign(old, getIReg32orZR(dd));
|
|
+ vassert(hw <= 1);
|
|
+ UInt mask = 0xFFFF << (16 * hw);
|
|
+ IRExpr* res
|
|
+ = binop(Iop_Or32,
|
|
+ binop(Iop_And32, mkexpr(old), mkU32(~mask)),
|
|
+ mkU32((UInt)imm64));
|
|
+ putIReg32orZR(dd, res);
|
|
+ DIP("movk %s, 0x%x, lsl %u\n",
|
|
+ nameIReg32orZR(dd), imm16, 16*hw);
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* -------------------- {U,S,}BFM -------------------- */
|
|
+ /* 30 28 22 21 15 9 4
|
|
+
|
|
+ sf 10 100110 N immr imms nn dd
|
|
+ UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
|
|
+ UBFM Xd, Xn, #immr, #imms when sf=1, N=1
|
|
+
|
|
+ sf 00 100110 N immr imms nn dd
|
|
+ SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
|
|
+ SBFM Xd, Xn, #immr, #imms when sf=1, N=1
|
|
+
|
|
+ sf 01 100110 N immr imms nn dd
|
|
+ BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
|
|
+ BFM Xd, Xn, #immr, #imms when sf=1, N=1
|
|
+ */
|
|
+ if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
|
|
+ UInt sf = INSN(31,31);
|
|
+ UInt opc = INSN(30,29);
|
|
+ UInt N = INSN(22,22);
|
|
+ UInt immR = INSN(21,16);
|
|
+ UInt immS = INSN(15,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ Bool inZero = False;
|
|
+ Bool extend = False;
|
|
+ const HChar* nm = "???";
|
|
+ /* skip invalid combinations */
|
|
+ switch (opc) {
|
|
+ case BITS2(0,0):
|
|
+ inZero = True; extend = True; nm = "sbfm"; break;
|
|
+ case BITS2(0,1):
|
|
+ inZero = False; extend = False; nm = "bfm"; break;
|
|
+ case BITS2(1,0):
|
|
+ inZero = True; extend = False; nm = "ubfm"; break;
|
|
+ case BITS2(1,1):
|
|
+ goto after_bfm; /* invalid */
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ if (sf == 1 && N != 1) goto after_bfm;
|
|
+ if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
|
|
+ || ((immS >> 5) & 1) != 0)) goto after_bfm;
|
|
+ ULong wmask = 0, tmask = 0;
|
|
+ Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
|
|
+ N, immS, immR, False, sf == 1 ? 64 : 32);
|
|
+ if (!ok) goto after_bfm; /* hmmm */
|
|
+
|
|
+ Bool is64 = sf == 1;
|
|
+ IRType ty = is64 ? Ity_I64 : Ity_I32;
|
|
+
|
|
+ IRTemp dst = newTemp(ty);
|
|
+ IRTemp src = newTemp(ty);
|
|
+ IRTemp bot = newTemp(ty);
|
|
+ IRTemp top = newTemp(ty);
|
|
+ IRTemp res = newTemp(ty);
|
|
+ assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
|
|
+ assign(src, getIRegOrZR(is64, nn));
|
|
+ /* perform bitfield move on low bits */
|
|
+ assign(bot, binop(mkOR(ty),
|
|
+ binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
|
|
+ binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
|
|
+ mkU(ty, wmask))));
|
|
+ /* determine extension bits (sign, zero or dest register) */
|
|
+ assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
|
|
+ /* combine extension bits and result bits */
|
|
+ assign(res, binop(mkOR(ty),
|
|
+ binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
|
|
+ binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
|
|
+ putIRegOrZR(is64, dd, mkexpr(res));
|
|
+ DIP("%s %s, %s, immR=%u, immS=%u\n",
|
|
+ nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
|
|
+ return True;
|
|
+ }
|
|
+ after_bfm:
|
|
+
|
|
+ /* ---------------------- EXTR ---------------------- */
|
|
+ /* 30 28 22 20 15 9 4
|
|
+ 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
|
|
+ 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
|
|
+ */
|
|
+ if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ UInt imm6 = INSN(15,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ Bool valid = True;
|
|
+ if (INSN(31,31) != INSN(22,22))
|
|
+ valid = False;
|
|
+ if (!is64 && imm6 >= 32)
|
|
+ valid = False;
|
|
+ if (!valid) goto after_extr;
|
|
+ IRType ty = is64 ? Ity_I64 : Ity_I32;
|
|
+ IRTemp srcHi = newTemp(ty);
|
|
+ IRTemp srcLo = newTemp(ty);
|
|
+ IRTemp res = newTemp(ty);
|
|
+ assign(srcHi, getIRegOrZR(is64, nn));
|
|
+ assign(srcLo, getIRegOrZR(is64, mm));
|
|
+ if (imm6 == 0) {
|
|
+ assign(res, mkexpr(srcLo));
|
|
+ } else {
|
|
+ UInt szBits = 8 * sizeofIRType(ty);
|
|
+ vassert(imm6 > 0 && imm6 < szBits);
|
|
+ assign(res, binop(mkOR(ty),
|
|
+ binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
|
|
+ binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
|
|
+ }
|
|
+ putIRegOrZR(is64, dd, mkexpr(res));
|
|
+ DIP("extr %s, %s, %s, #%u\n",
|
|
+ nameIRegOrZR(is64,dd),
|
|
+ nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
|
|
+ return True;
|
|
+ }
|
|
+ after_extr:
|
|
+
|
|
+ vex_printf("ARM64 front end: data_processing_immediate\n");
|
|
+ return False;
|
|
+# undef INSN
|
|
+}
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Data processing (register) instructions ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+static const HChar* nameSH ( UInt sh ) {
|
|
+ switch (sh) {
|
|
+ case 0: return "lsl";
|
|
+ case 1: return "lsr";
|
|
+ case 2: return "asr";
|
|
+ case 3: return "ror";
|
|
+ default: vassert(0);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Generate IR to get a register value, possibly shifted by an
|
|
+ immediate. Returns either a 32- or 64-bit temporary holding the
|
|
+ result. After the shift, the value can optionally be NOT-ed
|
|
+ too.
|
|
+
|
|
+ sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
|
|
+ in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
|
|
+ isn't allowed, but it's the job of the caller to check that.
|
|
+*/
|
|
+static IRTemp getShiftedIRegOrZR ( Bool is64,
|
|
+ UInt sh_how, UInt sh_amt, UInt regNo,
|
|
+ Bool invert )
|
|
+{
|
|
+ vassert(sh_how < 4);
|
|
+ vassert(sh_amt < (is64 ? 64 : 32));
|
|
+ IRType ty = is64 ? Ity_I64 : Ity_I32;
|
|
+ IRTemp t0 = newTemp(ty);
|
|
+ assign(t0, getIRegOrZR(is64, regNo));
|
|
+ IRTemp t1 = newTemp(ty);
|
|
+ switch (sh_how) {
|
|
+ case BITS2(0,0):
|
|
+ assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
|
|
+ break;
|
|
+ case BITS2(0,1):
|
|
+ assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
|
|
+ break;
|
|
+ case BITS2(1,0):
|
|
+ assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
|
|
+ break;
|
|
+ case BITS2(1,1):
|
|
+ assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ if (invert) {
|
|
+ IRTemp t2 = newTemp(ty);
|
|
+ assign(t2, unop(mkNOT(ty), mkexpr(t1)));
|
|
+ return t2;
|
|
+ } else {
|
|
+ return t1;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static
|
|
+Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
|
|
+ UInt insn)
|
|
+{
|
|
+# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
|
|
+
|
|
+ /* ------------------- ADD/SUB(reg) ------------------- */
|
|
+ /* x==0 => 32 bit op x==1 => 64 bit op
|
|
+ sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
|
|
+
|
|
+ 31 30 29 28 23 21 20 15 9 4
|
|
+ | | | | | | | | | |
|
|
+ x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
|
|
+ x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
|
|
+ x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
|
|
+ x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
|
|
+ */
|
|
+ if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
|
|
+ UInt bX = INSN(31,31);
|
|
+ UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
|
|
+ UInt bS = INSN(29, 29); /* set flags? */
|
|
+ UInt sh = INSN(23,22);
|
|
+ UInt rM = INSN(20,16);
|
|
+ UInt imm6 = INSN(15,10);
|
|
+ UInt rN = INSN(9,5);
|
|
+ UInt rD = INSN(4,0);
|
|
+ Bool isSUB = bOP == 1;
|
|
+ Bool is64 = bX == 1;
|
|
+ IRType ty = is64 ? Ity_I64 : Ity_I32;
|
|
+ if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
|
|
+ /* invalid; fall through */
|
|
+ } else {
|
|
+ IRTemp argL = newTemp(ty);
|
|
+ assign(argL, getIRegOrZR(is64, rN));
|
|
+ IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
|
|
+ IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
|
|
+ IRTemp res = newTemp(ty);
|
|
+ assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
|
|
+ if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
|
|
+ if (bS) {
|
|
+ setFlags_ADD_SUB(is64, isSUB, argL, argR);
|
|
+ }
|
|
+ DIP("%s%s %s, %s, %s, %s #%u\n",
|
|
+ bOP ? "sub" : "add", bS ? "s" : "",
|
|
+ nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
|
|
+ nameIRegOrZR(is64, rM), nameSH(sh), imm6);
|
|
+ return True;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* -------------------- LOGIC(reg) -------------------- */
|
|
+ /* x==0 => 32 bit op x==1 => 64 bit op
|
|
+ N==0 => inv? is no-op (no inversion)
|
|
+ N==1 => inv? is NOT
|
|
+ sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
|
|
+
|
|
+ 31 30 28 23 21 20 15 9 4
|
|
+ | | | | | | | | |
|
|
+ x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
|
|
+ x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
|
|
+ x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
|
|
+ x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
|
|
+ With N=1, the names are: BIC ORN EON BICS
|
|
+ */
|
|
+ if (INSN(28,24) == BITS5(0,1,0,1,0)) {
|
|
+ UInt bX = INSN(31,31);
|
|
+ UInt sh = INSN(23,22);
|
|
+ UInt bN = INSN(21,21);
|
|
+ UInt rM = INSN(20,16);
|
|
+ UInt imm6 = INSN(15,10);
|
|
+ UInt rN = INSN(9,5);
|
|
+ UInt rD = INSN(4,0);
|
|
+ Bool is64 = bX == 1;
|
|
+ IRType ty = is64 ? Ity_I64 : Ity_I32;
|
|
+ if (!is64 && imm6 > 31) {
|
|
+ /* invalid; fall though */
|
|
+ } else {
|
|
+ IRTemp argL = newTemp(ty);
|
|
+ assign(argL, getIRegOrZR(is64, rN));
|
|
+ IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
|
|
+ IROp op = Iop_INVALID;
|
|
+ switch (INSN(30,29)) {
|
|
+ case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
|
|
+ case BITS2(0,1): op = mkOR(ty); break;
|
|
+ case BITS2(1,0): op = mkXOR(ty); break;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+ IRTemp res = newTemp(ty);
|
|
+ assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
|
|
+ if (INSN(30,29) == BITS2(1,1)) {
|
|
+ setFlags_LOGIC(is64, res);
|
|
+ }
|
|
+ putIRegOrZR(is64, rD, mkexpr(res));
|
|
+
|
|
+ static const HChar* names_op[8]
|
|
+ = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
|
|
+ vassert(((bN << 2) | INSN(30,29)) < 8);
|
|
+ const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
|
|
+ /* Special-case the printing of "MOV" */
|
|
+ if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
|
|
+ DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
|
|
+ nameIRegOrZR(is64, rM));
|
|
+ } else {
|
|
+ DIP("%s %s, %s, %s, %s #%u\n", nm_op,
|
|
+ nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
|
|
+ nameIRegOrZR(is64, rM), nameSH(sh), imm6);
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* -------------------- {U,S}MULH -------------------- */
|
|
+ /* 31 23 22 20 15 9 4
|
|
+ 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
|
|
+ 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
|
|
+ */
|
|
+ if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
|
|
+ && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)
|
|
+ && INSN(23,23) == 1/*ATC*/) {
|
|
+ Bool isU = INSN(23,23) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ putIReg64orZR(dd, unop(Iop_128HIto64,
|
|
+ binop(isU ? Iop_MullU64 : Iop_MullS64,
|
|
+ getIReg64orZR(nn), getIReg64orZR(mm))));
|
|
+ DIP("%cmulh %s, %s, %s\n",
|
|
+ isU ? 'u' : 's',
|
|
+ nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------------- M{ADD,SUB} -------------------- */
|
|
+ /* 31 30 20 15 14 9 4
|
|
+ sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
|
|
+ sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
|
|
+ */
|
|
+ if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ Bool isAdd = INSN(15,15) == 0;
|
|
+ UInt aa = INSN(14,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ if (is64) {
|
|
+ putIReg64orZR(
|
|
+ dd,
|
|
+ binop(isAdd ? Iop_Add64 : Iop_Sub64,
|
|
+ getIReg64orZR(aa),
|
|
+ binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
|
|
+ } else {
|
|
+ putIReg32orZR(
|
|
+ dd,
|
|
+ binop(isAdd ? Iop_Add32 : Iop_Sub32,
|
|
+ getIReg32orZR(aa),
|
|
+ binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
|
|
+ }
|
|
+ DIP("%s %s, %s, %s, %s\n",
|
|
+ isAdd ? "madd" : "msub",
|
|
+ nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
|
|
+ nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
|
|
+ /* 31 30 28 20 15 11 9 4
|
|
+ sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
|
|
+ sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
|
|
+ sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
|
|
+ sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
|
|
+ In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
|
|
+ */
|
|
+ if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ UInt b30 = INSN(30,30);
|
|
+ UInt mm = INSN(20,16);
|
|
+ UInt cond = INSN(15,12);
|
|
+ UInt b10 = INSN(10,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
|
|
+ IRType ty = is64 ? Ity_I64 : Ity_I32;
|
|
+ IRExpr* argL = getIRegOrZR(is64, nn);
|
|
+ IRExpr* argR = getIRegOrZR(is64, mm);
|
|
+ switch (op) {
|
|
+ case BITS2(0,0):
|
|
+ break;
|
|
+ case BITS2(0,1):
|
|
+ argR = binop(mkADD(ty), argR, mkU(ty,1));
|
|
+ break;
|
|
+ case BITS2(1,0):
|
|
+ argR = unop(mkNOT(ty), argR);
|
|
+ break;
|
|
+ case BITS2(1,1):
|
|
+ argR = binop(mkSUB(ty), mkU(ty,0), argR);
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ putIRegOrZR(
|
|
+ is64, dd,
|
|
+ IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
|
|
+ argL, argR)
|
|
+ );
|
|
+ const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
|
|
+ DIP("%s %s, %s, %s, %s\n", op_nm[op],
|
|
+ nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
|
|
+ nameIRegOrZR(is64, mm), nameCC(cond));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------- ADD/SUB(extended reg) -------------- */
|
|
+ /* 28 20 15 12 9 4
|
|
+ 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
|
|
+ 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
|
|
+
|
|
+ 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
|
|
+ 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
|
|
+
|
|
+ 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
|
|
+ 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
|
|
+
|
|
+ 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
|
|
+ 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
|
|
+
|
|
+ The 'm' operand is extended per opt, thusly:
|
|
+
|
|
+ 000 Xm & 0xFF UXTB
|
|
+ 001 Xm & 0xFFFF UXTH
|
|
+ 010 Xm & (2^32)-1 UXTW
|
|
+ 011 Xm UXTX
|
|
+
|
|
+ 100 Xm sx from bit 7 SXTB
|
|
+ 101 Xm sx from bit 15 SXTH
|
|
+ 110 Xm sx from bit 31 SXTW
|
|
+ 111 Xm SXTX
|
|
+
|
|
+ In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
|
|
+ operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
|
|
+ are the identity operation on Wm.
|
|
+
|
|
+ After extension, the value is shifted left by imm3 bits, which
|
|
+ may only be in the range 0 .. 4 inclusive.
|
|
+ */
|
|
+ if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ Bool isSub = INSN(30,30) == 1;
|
|
+ Bool setCC = INSN(29,29) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ UInt opt = INSN(15,13);
|
|
+ UInt imm3 = INSN(12,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
|
|
+ "sxtb", "sxth", "sxtw", "sxtx" };
|
|
+ /* Do almost the same thing in the 32- and 64-bit cases. */
|
|
+ IRTemp xN = newTemp(Ity_I64);
|
|
+ IRTemp xM = newTemp(Ity_I64);
|
|
+ assign(xN, getIReg64orSP(nn));
|
|
+ assign(xM, getIReg64orZR(mm));
|
|
+ IRExpr* xMw = mkexpr(xM); /* "xM widened" */
|
|
+ Int shSX = 0;
|
|
+ /* widen Xm .. */
|
|
+ switch (opt) {
|
|
+ case BITS3(0,0,0): // UXTB
|
|
+ xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
|
|
+ case BITS3(0,0,1): // UXTH
|
|
+ xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
|
|
+ case BITS3(0,1,0): // UXTW -- noop for the 32bit case
|
|
+ if (is64) {
|
|
+ xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
|
|
+ }
|
|
+ break;
|
|
+ case BITS3(0,1,1): // UXTX -- always a noop
|
|
+ break;
|
|
+ case BITS3(1,0,0): // SXTB
|
|
+ shSX = 56; goto sxTo64;
|
|
+ case BITS3(1,0,1): // SXTH
|
|
+ shSX = 48; goto sxTo64;
|
|
+ case BITS3(1,1,0): // SXTW -- noop for the 32bit case
|
|
+ if (is64) {
|
|
+ shSX = 32; goto sxTo64;
|
|
+ }
|
|
+ break;
|
|
+ case BITS3(1,1,1): // SXTX -- always a noop
|
|
+ break;
|
|
+ sxTo64:
|
|
+ vassert(shSX >= 32);
|
|
+ xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
|
|
+ mkU8(shSX));
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ /* and now shift */
|
|
+ IRTemp argL = xN;
|
|
+ IRTemp argR = newTemp(Ity_I64);
|
|
+ assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
|
|
+ IRTemp res = newTemp(Ity_I64);
|
|
+ assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
|
|
+ mkexpr(argL), mkexpr(argR)));
|
|
+ if (is64) {
|
|
+ if (setCC) {
|
|
+ putIReg64orZR(dd, mkexpr(res));
|
|
+ setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
|
|
+ } else {
|
|
+ putIReg64orSP(dd, mkexpr(res));
|
|
+ }
|
|
+ } else {
|
|
+ if (setCC) {
|
|
+ IRTemp argL32 = newTemp(Ity_I32);
|
|
+ IRTemp argR32 = newTemp(Ity_I32);
|
|
+ putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
|
|
+ assign(argL32, unop(Iop_64to32, mkexpr(argL)));
|
|
+ assign(argR32, unop(Iop_64to32, mkexpr(argR)));
|
|
+ setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
|
|
+ } else {
|
|
+ putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
|
|
+ }
|
|
+ }
|
|
+ DIP("%s%s %s, %s, %s %s lsl %u\n",
|
|
+ isSub ? "sub" : "add", setCC ? "s" : "",
|
|
+ setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
|
|
+ nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
|
|
+ nameExt[opt], imm3);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ---------------- CCMP/CCMN(imm) ---------------- */
|
|
+ /* Bizarrely, these appear in the "data processing register"
|
|
+ category, even though they are operations against an
|
|
+ immediate. */
|
|
+ /* 31 29 20 15 11 9 3
|
|
+ sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
|
|
+ sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
|
|
+
|
|
+ Operation is:
|
|
+ (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
|
|
+ (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
|
|
+ */
|
|
+ if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
|
|
+ && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ Bool isSUB = INSN(30,30) == 1;
|
|
+ UInt imm5 = INSN(20,16);
|
|
+ UInt cond = INSN(15,12);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt nzcv = INSN(3,0);
|
|
+
|
|
+ IRTemp condT = newTemp(Ity_I1);
|
|
+ assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
|
|
+
|
|
+ IRType ty = is64 ? Ity_I64 : Ity_I32;
|
|
+ IRTemp argL = newTemp(ty);
|
|
+ IRTemp argR = newTemp(ty);
|
|
+
|
|
+ if (is64) {
|
|
+ assign(argL, getIReg64orZR(nn));
|
|
+ assign(argR, mkU64(imm5));
|
|
+ } else {
|
|
+ assign(argL, getIReg32orZR(nn));
|
|
+ assign(argR, mkU32(imm5));
|
|
+ }
|
|
+ setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
|
|
+
|
|
+ DIP("ccm%c %s, #%u, #%u, %s\n",
|
|
+ isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
|
|
+ imm5, nzcv, nameCC(cond));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ---------------- CCMP/CCMN(reg) ---------------- */
|
|
+ /* 31 29 20 15 11 9 3
|
|
+ sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
|
|
+ sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
|
|
+ Operation is:
|
|
+ (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
|
|
+ (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
|
|
+ */
|
|
+ if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
|
|
+ && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ Bool isSUB = INSN(30,30) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ UInt cond = INSN(15,12);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt nzcv = INSN(3,0);
|
|
+
|
|
+ IRTemp condT = newTemp(Ity_I1);
|
|
+ assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
|
|
+
|
|
+ IRType ty = is64 ? Ity_I64 : Ity_I32;
|
|
+ IRTemp argL = newTemp(ty);
|
|
+ IRTemp argR = newTemp(ty);
|
|
+
|
|
+ if (is64) {
|
|
+ assign(argL, getIReg64orZR(nn));
|
|
+ assign(argR, getIReg64orZR(mm));
|
|
+ } else {
|
|
+ assign(argL, getIReg32orZR(nn));
|
|
+ assign(argR, getIReg32orZR(mm));
|
|
+ }
|
|
+ setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
|
|
+
|
|
+ DIP("ccm%c %s, %s, #%u, %s\n",
|
|
+ isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
|
|
+ nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+
|
|
+ /* -------------- REV/REV16/REV32/RBIT -------------- */
|
|
+ /* 31 30 28 20 15 11 9 4
|
|
+
|
|
+ 1 10 11010110 00000 0000 11 n d REV Xd, Xn
|
|
+ 0 10 11010110 00000 0000 10 n d REV Wd, Wn
|
|
+
|
|
+ 1 10 11010110 00000 0000 01 n d REV16 Xd, Xn
|
|
+ 0 10 11010110 00000 0000 01 n d REV16 Wd, Wn
|
|
+
|
|
+ 1 10 11010110 00000 0000 10 n d REV32 Xd, Xn
|
|
+
|
|
+ 1 10 11010110 00000 0000 00 n d RBIT Xd, Xn
|
|
+ 0 10 11010110 00000 0000 00 n d RBIT Wd, Wn
|
|
+ */
|
|
+ /* Only REV is currently implemented. */
|
|
+ if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
|
|
+ && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,0,1)
|
|
+ && INSN(31,31) == INSN(10,10)) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ IRTemp src = newTemp(Ity_I64);
|
|
+ IRTemp dst = IRTemp_INVALID;
|
|
+ if (is64) {
|
|
+ assign(src, getIReg64orZR(nn));
|
|
+ dst = math_BSWAP64(src);
|
|
+ putIReg64orZR(dd, mkexpr(dst));
|
|
+ } else {
|
|
+ assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
|
|
+ dst = math_BSWAP64(src);
|
|
+ putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
|
|
+ }
|
|
+ DIP("rev %s, %s\n", nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------------- CLZ/CLS -------------------- */
|
|
+ /* 30 28 24 20 15 9 4
|
|
+ sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
|
|
+ sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
|
|
+ */
|
|
+ if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
|
|
+ && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ Bool isCLS = INSN(10,10) == 1;
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ IRTemp src = newTemp(Ity_I64);
|
|
+ IRTemp dst = newTemp(Ity_I64);
|
|
+ if (!isCLS) { // CLS not yet supported
|
|
+ if (is64) {
|
|
+ assign(src, getIReg64orZR(nn));
|
|
+ assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
|
|
+ mkU64(64),
|
|
+ unop(Iop_Clz64, mkexpr(src))));
|
|
+ putIReg64orZR(dd, mkexpr(dst));
|
|
+ } else {
|
|
+ assign(src, binop(Iop_Shl64,
|
|
+ unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
|
|
+ assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
|
|
+ mkU64(32),
|
|
+ unop(Iop_Clz64, mkexpr(src))));
|
|
+ putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
|
|
+ }
|
|
+ DIP("cl%c %s, %s\n",
|
|
+ isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
|
|
+ return True;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* -------------------- LSLV/LSRV/ASRV -------------------- */
|
|
+ /* 30 28 20 15 11 9 4
|
|
+ sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
|
|
+ sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
|
|
+ sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
|
|
+ */
|
|
+ if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
|
|
+ && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ UInt op = INSN(11,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ IRType ty = is64 ? Ity_I64 : Ity_I32;
|
|
+ IRTemp srcL = newTemp(ty);
|
|
+ IRTemp srcR = newTemp(Ity_I8);
|
|
+ IRTemp res = newTemp(ty);
|
|
+ IROp iop = Iop_INVALID;
|
|
+ assign(srcL, getIRegOrZR(is64, nn));
|
|
+ assign(srcR,
|
|
+ unop(Iop_64to8,
|
|
+ binop(Iop_And64,
|
|
+ getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
|
|
+ switch (op) {
|
|
+ case BITS2(0,0): iop = mkSHL(ty); break;
|
|
+ case BITS2(0,1): iop = mkSHR(ty); break;
|
|
+ case BITS2(1,0): iop = mkSAR(ty); break;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+ assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
|
|
+ putIRegOrZR(is64, dd, mkexpr(res));
|
|
+ vassert(op < 3);
|
|
+ const HChar* names[3] = { "lslv", "lsrv", "asrv" };
|
|
+ DIP("%s %s, %s, %s\n",
|
|
+ names[op], nameIRegOrZR(is64,dd),
|
|
+ nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------------- SDIV/UDIV -------------------- */
|
|
+ /* 30 28 20 15 10 9 4
|
|
+ sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
|
|
+ sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
|
|
+ */
|
|
+ if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
|
|
+ && INSN(15,11) == BITS5(0,0,0,0,1)) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ Bool isS = INSN(10,10) == 1;
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ if (isS) {
|
|
+ putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
|
|
+ getIRegOrZR(is64, nn),
|
|
+ getIRegOrZR(is64, mm)));
|
|
+ } else {
|
|
+ putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
|
|
+ getIRegOrZR(is64, nn),
|
|
+ getIRegOrZR(is64, mm)));
|
|
+ }
|
|
+ DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
|
|
+ nameIRegOrZR(is64, dd),
|
|
+ nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
|
|
+ /* 31 23 20 15 14 9 4
|
|
+ 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
|
|
+ 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
|
|
+ 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
|
|
+ 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
|
|
+ with operation
|
|
+ Xd = Xa +/- (Wn *u/s Wm)
|
|
+ */
|
|
+ if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
|
|
+ Bool isU = INSN(23,23) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ Bool isAdd = INSN(15,15) == 0;
|
|
+ UInt aa = INSN(14,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ IRTemp wN = newTemp(Ity_I32);
|
|
+ IRTemp wM = newTemp(Ity_I32);
|
|
+ IRTemp xA = newTemp(Ity_I64);
|
|
+ IRTemp muld = newTemp(Ity_I64);
|
|
+ IRTemp res = newTemp(Ity_I64);
|
|
+ assign(wN, getIReg32orZR(nn));
|
|
+ assign(wM, getIReg32orZR(mm));
|
|
+ assign(xA, getIReg64orZR(aa));
|
|
+ assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
|
|
+ mkexpr(wN), mkexpr(wM)));
|
|
+ assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
|
|
+ mkexpr(xA), mkexpr(muld)));
|
|
+ putIReg64orZR(dd, mkexpr(res));
|
|
+ DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
|
|
+ nameIReg64orZR(dd), nameIReg32orZR(nn),
|
|
+ nameIReg32orZR(mm), nameIReg64orZR(aa));
|
|
+ return True;
|
|
+ }
|
|
+ vex_printf("ARM64 front end: data_processing_register\n");
|
|
+ return False;
|
|
+# undef INSN
|
|
+}
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Load and Store instructions ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+/* Generate the EA for a "reg + reg" style amode. This is done from
|
|
+ parts of the insn, but for sanity checking sake it takes the whole
|
|
+ insn. This appears to depend on insn[15:12], with opt=insn[15:13]
|
|
+ and S=insn[12]:
|
|
+
|
|
+ The possible forms, along with their opt:S values, are:
|
|
+ 011:0 Xn|SP + Xm
|
|
+ 111:0 Xn|SP + Xm
|
|
+ 011:1 Xn|SP + Xm * transfer_szB
|
|
+ 111:1 Xn|SP + Xm * transfer_szB
|
|
+ 010:0 Xn|SP + 32Uto64(Wm)
|
|
+ 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
|
|
+ 110:0 Xn|SP + 32Sto64(Wm)
|
|
+ 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
|
|
+
|
|
+ Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
|
|
+ the transfer size is insn[23,31,30]. For integer loads/stores,
|
|
+ insn[23] is zero, hence szLg2 can be at most 3 in such cases.
|
|
+
|
|
+ If the decoding fails, it returns IRTemp_INVALID.
|
|
+
|
|
+ isInt is True iff this is decoding is for transfers to/from integer
|
|
+ registers. If False it is for transfers to/from vector registers.
|
|
+*/
|
|
+static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
|
|
+{
|
|
+ UInt optS = SLICE_UInt(insn, 15, 12);
|
|
+ UInt mm = SLICE_UInt(insn, 20, 16);
|
|
+ UInt nn = SLICE_UInt(insn, 9, 5);
|
|
+ UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
|
|
+ | SLICE_UInt(insn, 31, 30); // Log2 of the size
|
|
+
|
|
+ buf[0] = 0;
|
|
+
|
|
+ /* Sanity checks, that this really is a load/store insn. */
|
|
+ if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
|
|
+ goto fail;
|
|
+
|
|
+ if (isInt
|
|
+ && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
|
|
+ && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
|
|
+ && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
|
|
+ && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
|
|
+ goto fail;
|
|
+
|
|
+ if (!isInt
|
|
+ && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
|
|
+ goto fail;
|
|
+
|
|
+ /* Throw out non-verified but possibly valid cases. */
|
|
+ switch (szLg2) {
|
|
+ case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
|
|
+ case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
|
|
+ case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
|
|
+ case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
|
|
+ case BITS3(1,0,0): // can only ever be valid for the vector case
|
|
+ if (isInt) goto fail; else goto fail;
|
|
+ case BITS3(1,0,1): // these sizes are never valid
|
|
+ case BITS3(1,1,0):
|
|
+ case BITS3(1,1,1): goto fail;
|
|
+
|
|
+ default: vassert(0);
|
|
+ }
|
|
+
|
|
+ IRExpr* rhs = NULL;
|
|
+ switch (optS) {
|
|
+ case BITS4(1,1,1,0): goto fail; //ATC
|
|
+ case BITS4(0,1,1,0):
|
|
+ rhs = getIReg64orZR(mm);
|
|
+ vex_sprintf(buf, "[%s, %s]",
|
|
+ nameIReg64orZR(nn), nameIReg64orZR(mm));
|
|
+ break;
|
|
+ case BITS4(1,1,1,1): goto fail; //ATC
|
|
+ case BITS4(0,1,1,1):
|
|
+ rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
|
|
+ vex_sprintf(buf, "[%s, %s lsl %u]",
|
|
+ nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
|
|
+ break;
|
|
+ case BITS4(0,1,0,0):
|
|
+ rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
|
|
+ vex_sprintf(buf, "[%s, %s uxtx]",
|
|
+ nameIReg64orZR(nn), nameIReg32orZR(mm));
|
|
+ break;
|
|
+ case BITS4(0,1,0,1):
|
|
+ rhs = binop(Iop_Shl64,
|
|
+ unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
|
|
+ vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
|
|
+ nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
|
|
+ break;
|
|
+ case BITS4(1,1,0,0):
|
|
+ rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
|
|
+ vex_sprintf(buf, "[%s, %s sxtx]",
|
|
+ nameIReg64orZR(nn), nameIReg32orZR(mm));
|
|
+ break;
|
|
+ case BITS4(1,1,0,1):
|
|
+ rhs = binop(Iop_Shl64,
|
|
+ unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
|
|
+ vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
|
|
+ nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
|
|
+ break;
|
|
+ default:
|
|
+ /* The rest appear to be genuinely invalid */
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ vassert(rhs);
|
|
+ IRTemp res = newTemp(Ity_I64);
|
|
+ assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
|
|
+ return res;
|
|
+
|
|
+ fail:
|
|
+ vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
|
|
+ return IRTemp_INVALID;
|
|
+}
|
|
+
|
|
+
|
|
+/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
|
|
+ bits of DATAE :: Ity_I64. */
|
|
+static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
|
|
+{
|
|
+ IRExpr* addrE = mkexpr(addr);
|
|
+ switch (szB) {
|
|
+ case 8:
|
|
+ storeLE(addrE, dataE);
|
|
+ break;
|
|
+ case 4:
|
|
+ storeLE(addrE, unop(Iop_64to32, dataE));
|
|
+ break;
|
|
+ case 2:
|
|
+ storeLE(addrE, unop(Iop_64to16, dataE));
|
|
+ break;
|
|
+ case 1:
|
|
+ storeLE(addrE, unop(Iop_64to8, dataE));
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
|
|
+ placing the result in an Ity_I64 temporary. */
|
|
+static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
|
|
+{
|
|
+ IRTemp res = newTemp(Ity_I64);
|
|
+ IRExpr* addrE = mkexpr(addr);
|
|
+ switch (szB) {
|
|
+ case 8:
|
|
+ assign(res, loadLE(Ity_I64,addrE));
|
|
+ break;
|
|
+ case 4:
|
|
+ assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
|
|
+ break;
|
|
+ case 2:
|
|
+ assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
|
|
+ break;
|
|
+ case 1:
|
|
+ assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ return res;
|
|
+}
|
|
+
|
|
+
|
|
+static
|
|
+Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
|
|
+{
|
|
+# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
|
|
+
|
|
+ /* ------------ LDR,STR (immediate, uimm12) ----------- */
|
|
+ /* uimm12 is scaled by the transfer size
|
|
+
|
|
+ 31 29 26 21 9 4
|
|
+ | | | | | |
|
|
+ 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
|
|
+ 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
|
|
+
|
|
+ 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
|
|
+ 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
|
|
+
|
|
+ 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
|
|
+ 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
|
|
+
|
|
+ 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
|
|
+ 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
|
|
+ */
|
|
+ if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
|
|
+ UInt szLg2 = INSN(31,30);
|
|
+ UInt szB = 1 << szLg2;
|
|
+ Bool isLD = INSN(22,22) == 1;
|
|
+ UInt offs = INSN(21,10) * szB;
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ IRTemp ta = newTemp(Ity_I64);
|
|
+ assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
|
|
+ if (nn == 31) { /* FIXME generate stack alignment check */ }
|
|
+ vassert(szLg2 < 4);
|
|
+ if (isLD) {
|
|
+ putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
|
|
+ } else {
|
|
+ gen_narrowing_store(szB, ta, getIReg64orZR(tt));
|
|
+ }
|
|
+ const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
|
|
+ const HChar* st_name[4] = { "strb", "strh", "str", "str" };
|
|
+ DIP("%s %s, [%s, #%u]\n",
|
|
+ (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
|
|
+ nameIReg64orSP(nn), offs);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ------------ LDUR,STUR (immediate, simm9) ----------- */
|
|
+ /*
|
|
+ 31 29 26 20 11 9 4
|
|
+ | | | | | | |
|
|
+ (at-Rn-then-Rn=EA) | | |
|
|
+ sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
|
|
+ sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
|
|
+
|
|
+ (at-EA-then-Rn=EA)
|
|
+ sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
|
|
+ sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
|
|
+
|
|
+ (at-EA)
|
|
+ sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
|
|
+ sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
|
|
+
|
|
+ simm9 is unscaled.
|
|
+
|
|
+ The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
|
|
+ load case this is because would create two competing values for
|
|
+ Rt. In the store case the reason is unclear, but the spec
|
|
+ disallows it anyway.
|
|
+
|
|
+ Stores are narrowing, loads are unsigned widening. sz encodes
|
|
+ the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
|
|
+ */
|
|
+ if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
|
|
+ == BITS9(1,1,1, 0,0,0,0,0, 0)) {
|
|
+ UInt szLg2 = INSN(31,30);
|
|
+ UInt szB = 1 << szLg2;
|
|
+ Bool isLoad = INSN(22,22) == 1;
|
|
+ UInt imm9 = INSN(20,12);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ Bool wBack = INSN(10,10) == 1;
|
|
+ UInt how = INSN(11,10);
|
|
+ if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
|
|
+ /* undecodable; fall through */
|
|
+ } else {
|
|
+ if (nn == 31) { /* FIXME generate stack alignment check */ }
|
|
+
|
|
+ // Compute the transfer address TA and the writeback address WA.
|
|
+ IRTemp tRN = newTemp(Ity_I64);
|
|
+ assign(tRN, getIReg64orSP(nn));
|
|
+ IRTemp tEA = newTemp(Ity_I64);
|
|
+ Long simm9 = (Long)sx_to_64(imm9, 9);
|
|
+ assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
|
|
+
|
|
+ IRTemp tTA = newTemp(Ity_I64);
|
|
+ IRTemp tWA = newTemp(Ity_I64);
|
|
+ switch (how) {
|
|
+ case BITS2(0,1):
|
|
+ assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
|
|
+ case BITS2(1,1):
|
|
+ assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
|
|
+ case BITS2(0,0):
|
|
+ assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
|
|
+ default:
|
|
+ vassert(0); /* NOTREACHED */
|
|
+ }
|
|
+
|
|
+ if (isLoad) {
|
|
+ putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
|
|
+ } else {
|
|
+ gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
|
|
+ }
|
|
+
|
|
+ if (wBack)
|
|
+ putIReg64orSP(nn, mkexpr(tEA));
|
|
+
|
|
+ const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
|
|
+ const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
|
|
+ const HChar* fmt_str = NULL;
|
|
+ switch (how) {
|
|
+ case BITS2(0,1):
|
|
+ fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
|
|
+ break;
|
|
+ case BITS2(1,1):
|
|
+ fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
|
|
+ break;
|
|
+ case BITS2(0,0):
|
|
+ fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
|
|
+ nameIRegOrZR(szB == 8, tt),
|
|
+ nameIReg64orSP(nn), simm9);
|
|
+ return True;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
|
|
+ /* L==1 => mm==LD
|
|
+ L==0 => mm==ST
|
|
+ x==0 => 32 bit transfers, and zero extended loads
|
|
+ x==1 => 64 bit transfers
|
|
+ simm7 is scaled by the (single-register) transfer size
|
|
+
|
|
+ (at-Rn-then-Rn=EA)
|
|
+ x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
|
|
+
|
|
+ (at-EA-then-Rn=EA)
|
|
+ x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
|
|
+
|
|
+ (at-EA)
|
|
+ x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
|
|
+ */
|
|
+
|
|
+ UInt insn_30_23 = INSN(30,23);
|
|
+ if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
|
|
+ || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
|
|
+ || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
|
|
+ UInt bL = INSN(22,22);
|
|
+ UInt bX = INSN(31,31);
|
|
+ UInt bWBack = INSN(23,23);
|
|
+ UInt rT1 = INSN(4,0);
|
|
+ UInt rN = INSN(9,5);
|
|
+ UInt rT2 = INSN(14,10);
|
|
+ Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
|
|
+ if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
|
|
+ || (bL && rT1 == rT2)) {
|
|
+ /* undecodable; fall through */
|
|
+ } else {
|
|
+ if (rN == 31) { /* FIXME generate stack alignment check */ }
|
|
+
|
|
+ // Compute the transfer address TA and the writeback address WA.
|
|
+ IRTemp tRN = newTemp(Ity_I64);
|
|
+ assign(tRN, getIReg64orSP(rN));
|
|
+ IRTemp tEA = newTemp(Ity_I64);
|
|
+ simm7 = (bX ? 8 : 4) * simm7;
|
|
+ assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
|
|
+
|
|
+ IRTemp tTA = newTemp(Ity_I64);
|
|
+ IRTemp tWA = newTemp(Ity_I64);
|
|
+ switch (INSN(24,23)) {
|
|
+ case BITS2(0,1):
|
|
+ assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
|
|
+ case BITS2(1,1):
|
|
+ assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
|
|
+ case BITS2(1,0):
|
|
+ assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
|
|
+ default:
|
|
+ vassert(0); /* NOTREACHED */
|
|
+ }
|
|
+
|
|
+ /* Normally rN would be updated after the transfer. However, in
|
|
+ the special case typifed by
|
|
+ stp x29, x30, [sp,#-112]!
|
|
+ it is necessary to update SP before the transfer, (1)
|
|
+ because Memcheck will otherwise complain about a write
|
|
+ below the stack pointer, and (2) because the segfault
|
|
+ stack extension mechanism will otherwise extend the stack
|
|
+ only down to SP before the instruction, which might not be
|
|
+ far enough, if the -112 bit takes the actual access
|
|
+ address to the next page.
|
|
+ */
|
|
+ Bool earlyWBack
|
|
+ = bWBack && simm7 < 0
|
|
+ && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
|
|
+
|
|
+ if (bWBack && earlyWBack)
|
|
+ putIReg64orSP(rN, mkexpr(tEA));
|
|
+
|
|
+ /**/ if (bL == 1 && bX == 1) {
|
|
+ // 64 bit load
|
|
+ putIReg64orZR(rT1, loadLE(Ity_I64,
|
|
+ binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
|
|
+ putIReg64orZR(rT2, loadLE(Ity_I64,
|
|
+ binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
|
|
+ } else if (bL == 1 && bX == 0) {
|
|
+ vassert(0); //ATC
|
|
+ // 32 bit load
|
|
+ putIReg32orZR(rT1, loadLE(Ity_I32,
|
|
+ binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
|
|
+ putIReg32orZR(rT2, loadLE(Ity_I32,
|
|
+ binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
|
|
+ } else if (bL == 0 && bX == 1) {
|
|
+ // 64 bit store
|
|
+ storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
|
|
+ getIReg64orZR(rT1));
|
|
+ storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
|
|
+ getIReg64orZR(rT2));
|
|
+ } else {
|
|
+ vassert(bL == 0 && bX == 0);
|
|
+ vassert(0); //ATC
|
|
+ // 32 bit store
|
|
+ storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
|
|
+ getIReg32orZR(rT1));
|
|
+ storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
|
|
+ getIReg32orZR(rT2));
|
|
+ }
|
|
+
|
|
+ if (bWBack && !earlyWBack)
|
|
+ putIReg64orSP(rN, mkexpr(tEA));
|
|
+
|
|
+ const HChar* fmt_str = NULL;
|
|
+ switch (INSN(24,23)) {
|
|
+ case BITS2(0,1):
|
|
+ fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
|
|
+ break;
|
|
+ case BITS2(1,1):
|
|
+ fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
|
|
+ break;
|
|
+ case BITS2(1,0):
|
|
+ fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ DIP(fmt_str, bL == 0 ? "st" : "ld",
|
|
+ nameIRegOrZR(bX == 1, rT1),
|
|
+ nameIRegOrZR(bX == 1, rT2),
|
|
+ nameIReg64orSP(rN), simm7);
|
|
+ return True;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* ---------------- LDR (literal, int reg) ---------------- */
|
|
+ /* 31 29 23 4
|
|
+ 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
|
|
+ 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
|
|
+ 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
|
|
+ 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
|
|
+ Just handles the first two cases for now.
|
|
+ */
|
|
+ if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
|
|
+ UInt imm19 = INSN(23,5);
|
|
+ UInt rT = INSN(4,0);
|
|
+ UInt bX = INSN(30,30);
|
|
+ ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
|
|
+ if (bX) {
|
|
+ putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
|
|
+ } else {
|
|
+ putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
|
|
+ }
|
|
+ DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------- {LD,ST}R (integer register) --------------- */
|
|
+ /* 31 29 20 15 12 11 9 4
|
|
+ | | | | | | | |
|
|
+ 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
|
|
+
|
|
+ 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
|
|
+ */
|
|
+ if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
|
|
+ && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
|
|
+ HChar dis_buf[64];
|
|
+ UInt szLg2 = INSN(31,30);
|
|
+ Bool isLD = INSN(22,22) == 1;
|
|
+ UInt tt = INSN(4,0);
|
|
+ IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
|
|
+ if (ea != IRTemp_INVALID) {
|
|
+ switch (szLg2) {
|
|
+ case 3: /* 64 bit */
|
|
+ if (isLD) {
|
|
+ putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
|
|
+ DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
|
|
+ } else {
|
|
+ storeLE(mkexpr(ea), getIReg64orZR(tt));
|
|
+ DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
|
|
+ }
|
|
+ break;
|
|
+ case 2: /* 32 bit */
|
|
+ if (isLD) {
|
|
+ putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
|
|
+ DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
|
|
+ } else {
|
|
+ storeLE(mkexpr(ea), getIReg32orZR(tt));
|
|
+ DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
|
|
+ }
|
|
+ break;
|
|
+ case 1: /* 16 bit */
|
|
+ if (isLD) {
|
|
+ putIReg64orZR(tt, unop(Iop_16Uto64,
|
|
+ loadLE(Ity_I16, mkexpr(ea))));
|
|
+ DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
|
|
+ } else {
|
|
+ storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
|
|
+ DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
|
|
+ }
|
|
+ break;
|
|
+ case 0: /* 8 bit */
|
|
+ if (isLD) {
|
|
+ putIReg64orZR(tt, unop(Iop_8Uto64,
|
|
+ loadLE(Ity_I8, mkexpr(ea))));
|
|
+ DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
|
|
+ } else {
|
|
+ storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
|
|
+ DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* -------------- LDRS{B,H,W} (uimm12) -------------- */
|
|
+ /* 31 29 26 23 21 9 4
|
|
+ 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
|
|
+ 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
|
|
+ 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
|
|
+ where
|
|
+ Rt is Wt when x==1, Xt when x==0
|
|
+ */
|
|
+ if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
|
|
+ /* Further checks on bits 31:30 and 22 */
|
|
+ Bool valid = False;
|
|
+ switch ((INSN(31,30) << 1) | INSN(22,22)) {
|
|
+ case BITS3(1,0,0):
|
|
+ case BITS3(0,1,0): case BITS3(0,1,1):
|
|
+ case BITS3(0,0,0): case BITS3(0,0,1):
|
|
+ valid = True;
|
|
+ break;
|
|
+ }
|
|
+ if (valid) {
|
|
+ UInt szLg2 = INSN(31,30);
|
|
+ UInt bitX = INSN(22,22);
|
|
+ UInt imm12 = INSN(21,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ UInt szB = 1 << szLg2;
|
|
+ IRExpr* ea = binop(Iop_Add64,
|
|
+ getIReg64orSP(nn), mkU64(imm12 * szB));
|
|
+ switch (szB) {
|
|
+ case 4:
|
|
+ vassert(bitX == 0);
|
|
+ putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
|
|
+ DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
|
|
+ nameIReg64orSP(nn), imm12 * szB);
|
|
+ break;
|
|
+ case 2:
|
|
+ if (bitX == 1) {
|
|
+ putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
|
|
+ } else {
|
|
+ putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
|
|
+ }
|
|
+ DIP("ldrsh %s, [%s, #%u]\n",
|
|
+ nameIRegOrZR(bitX == 0, tt),
|
|
+ nameIReg64orSP(nn), imm12 * szB);
|
|
+ break;
|
|
+ case 1:
|
|
+ if (bitX == 1) {
|
|
+ putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
|
|
+ } else {
|
|
+ putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
|
|
+ }
|
|
+ DIP("ldrsb %s, [%s, #%u]\n",
|
|
+ nameIRegOrZR(bitX == 0, tt),
|
|
+ nameIReg64orSP(nn), imm12 * szB);
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ /* else fall through */
|
|
+ }
|
|
+
|
|
+ /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
|
|
+ /* (at-Rn-then-Rn=EA)
|
|
+ 31 29 23 21 20 11 9 4
|
|
+ 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
|
|
+ 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
|
|
+ 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
|
|
+
|
|
+ (at-EA-then-Rn=EA)
|
|
+ 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
|
|
+ 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
|
|
+ 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
|
|
+ where
|
|
+ Rt is Wt when x==1, Xt when x==0
|
|
+ transfer-at-Rn when [11]==0, at EA when [11]==1
|
|
+ */
|
|
+ if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
|
|
+ && INSN(21,21) == 0 && INSN(10,10) == 1) {
|
|
+ /* Further checks on bits 31:30 and 22 */
|
|
+ Bool valid = False;
|
|
+ switch ((INSN(31,30) << 1) | INSN(22,22)) {
|
|
+ case BITS3(1,0,0): // LDRSW Xt
|
|
+ case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
|
|
+ case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
|
|
+ valid = True;
|
|
+ break;
|
|
+ }
|
|
+ if (valid) {
|
|
+ UInt szLg2 = INSN(31,30);
|
|
+ UInt imm9 = INSN(20,12);
|
|
+ Bool atRN = INSN(11,11) == 0;
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ IRTemp tRN = newTemp(Ity_I64);
|
|
+ IRTemp tEA = newTemp(Ity_I64);
|
|
+ IRTemp tTA = IRTemp_INVALID;
|
|
+ ULong simm9 = sx_to_64(imm9, 9);
|
|
+ Bool is64 = INSN(22,22) == 0;
|
|
+ assign(tRN, getIReg64orSP(nn));
|
|
+ assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
|
|
+ tTA = atRN ? tRN : tEA;
|
|
+ HChar ch = '?';
|
|
+ /* There are 5 cases:
|
|
+ byte load, SX to 64
|
|
+ byte load, SX to 32, ZX to 64
|
|
+ halfword load, SX to 64
|
|
+ halfword load, SX to 32, ZX to 64
|
|
+ word load, SX to 64
|
|
+ The ifs below handle them in the listed order.
|
|
+ */
|
|
+ if (szLg2 == 0) {
|
|
+ ch = 'b';
|
|
+ if (is64) {
|
|
+ putIReg64orZR(tt, unop(Iop_8Sto64,
|
|
+ loadLE(Ity_I8, mkexpr(tTA))));
|
|
+ } else {
|
|
+ putIReg32orZR(tt, unop(Iop_8Sto32,
|
|
+ loadLE(Ity_I8, mkexpr(tTA))));
|
|
+ }
|
|
+ }
|
|
+ else if (szLg2 == 1) {
|
|
+ ch = 'h';
|
|
+ if (is64) {
|
|
+ putIReg64orZR(tt, unop(Iop_16Sto64,
|
|
+ loadLE(Ity_I16, mkexpr(tTA))));
|
|
+ } else {
|
|
+ putIReg32orZR(tt, unop(Iop_16Sto32,
|
|
+ loadLE(Ity_I16, mkexpr(tTA))));
|
|
+ }
|
|
+ }
|
|
+ else if (szLg2 == 2 && is64) {
|
|
+ ch = 'w';
|
|
+ putIReg64orZR(tt, unop(Iop_32Sto64,
|
|
+ loadLE(Ity_I32, mkexpr(tTA))));
|
|
+ }
|
|
+ else {
|
|
+ vassert(0);
|
|
+ }
|
|
+ putIReg64orSP(nn, mkexpr(tEA));
|
|
+ DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
|
|
+ ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
|
|
+ return True;
|
|
+ }
|
|
+ /* else fall through */
|
|
+ }
|
|
+
|
|
+ /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
|
|
+ /* 31 29 23 21 20 11 9 4
|
|
+ 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
|
|
+ 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
|
|
+ 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
|
|
+ where
|
|
+ Rt is Wt when x==1, Xt when x==0
|
|
+ */
|
|
+ if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
|
|
+ && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
|
|
+ /* Further checks on bits 31:30 and 22 */
|
|
+ Bool valid = False;
|
|
+ switch ((INSN(31,30) << 1) | INSN(22,22)) {
|
|
+ case BITS3(1,0,0): // LDURSW Xt
|
|
+ case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
|
|
+ case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
|
|
+ valid = True;
|
|
+ break;
|
|
+ }
|
|
+ if (valid) {
|
|
+ UInt szLg2 = INSN(31,30);
|
|
+ UInt imm9 = INSN(20,12);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ IRTemp tRN = newTemp(Ity_I64);
|
|
+ IRTemp tEA = newTemp(Ity_I64);
|
|
+ ULong simm9 = sx_to_64(imm9, 9);
|
|
+ Bool is64 = INSN(22,22) == 0;
|
|
+ assign(tRN, getIReg64orSP(nn));
|
|
+ assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
|
|
+ HChar ch = '?';
|
|
+ /* There are 5 cases:
|
|
+ byte load, SX to 64
|
|
+ byte load, SX to 32, ZX to 64
|
|
+ halfword load, SX to 64
|
|
+ halfword load, SX to 32, ZX to 64
|
|
+ word load, SX to 64
|
|
+ The ifs below handle them in the listed order.
|
|
+ */
|
|
+ if (szLg2 == 0) {
|
|
+ ch = 'b';
|
|
+ if (is64) {
|
|
+ putIReg64orZR(tt, unop(Iop_8Sto64,
|
|
+ loadLE(Ity_I8, mkexpr(tEA))));
|
|
+ } else {
|
|
+ putIReg32orZR(tt, unop(Iop_8Sto32,
|
|
+ loadLE(Ity_I8, mkexpr(tEA))));
|
|
+ }
|
|
+ }
|
|
+ else if (szLg2 == 1) {
|
|
+ ch = 'h';
|
|
+ if (is64) {
|
|
+ putIReg64orZR(tt, unop(Iop_16Sto64,
|
|
+ loadLE(Ity_I16, mkexpr(tEA))));
|
|
+ } else {
|
|
+ putIReg32orZR(tt, unop(Iop_16Sto32,
|
|
+ loadLE(Ity_I16, mkexpr(tEA))));
|
|
+ }
|
|
+ }
|
|
+ else if (szLg2 == 2 && is64) {
|
|
+ ch = 'w';
|
|
+ putIReg64orZR(tt, unop(Iop_32Sto64,
|
|
+ loadLE(Ity_I32, mkexpr(tEA))));
|
|
+ }
|
|
+ else {
|
|
+ vassert(0);
|
|
+ }
|
|
+ DIP("ldurs%c %s, [%s, #%lld]",
|
|
+ ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
|
|
+ return True;
|
|
+ }
|
|
+ /* else fall through */
|
|
+ }
|
|
+
|
|
+ /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
|
|
+ /* L==1 => mm==LD
|
|
+ L==0 => mm==ST
|
|
+ sz==00 => 32 bit (S) transfers
|
|
+ sz==01 => 64 bit (D) transfers
|
|
+ sz==10 => 128 bit (Q) transfers
|
|
+ sz==11 isn't allowed
|
|
+ simm7 is scaled by the (single-register) transfer size
|
|
+
|
|
+ 31 29 22 21 14 9 4
|
|
+ sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
|
|
+ (at-Rn-then-Rn=EA)
|
|
+
|
|
+ sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
|
|
+ (at-EA-then-Rn=EA)
|
|
+
|
|
+ sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
|
|
+ (at-EA)
|
|
+ */
|
|
+
|
|
+ UInt insn_29_23 = INSN(29,23);
|
|
+ if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
|
|
+ || insn_29_23 == BITS7(1,0,1,1,0,1,1)
|
|
+ || insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
|
|
+ UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
|
|
+ Bool isLD = INSN(22,22) == 1;
|
|
+ Bool wBack = INSN(23,23) == 1;
|
|
+ Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
|
|
+ UInt tt2 = INSN(14,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt tt1 = INSN(4,0);
|
|
+ if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
|
|
+ /* undecodable; fall through */
|
|
+ } else {
|
|
+ if (nn == 31) { /* FIXME generate stack alignment check */ }
|
|
+
|
|
+ // Compute the transfer address TA and the writeback address WA.
|
|
+ UInt szB = 4 << szSlg2; /* szB is the per-register size */
|
|
+ IRTemp tRN = newTemp(Ity_I64);
|
|
+ assign(tRN, getIReg64orSP(nn));
|
|
+ IRTemp tEA = newTemp(Ity_I64);
|
|
+ simm7 = szB * simm7;
|
|
+ assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
|
|
+
|
|
+ IRTemp tTA = newTemp(Ity_I64);
|
|
+ IRTemp tWA = newTemp(Ity_I64);
|
|
+ switch (INSN(24,23)) {
|
|
+ case BITS2(0,1):
|
|
+ assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
|
|
+ case BITS2(1,1):
|
|
+ assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
|
|
+ case BITS2(1,0):
|
|
+ assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
|
|
+ default:
|
|
+ vassert(0); /* NOTREACHED */
|
|
+ }
|
|
+
|
|
+ IRType ty = Ity_INVALID;
|
|
+ switch (szB) {
|
|
+ case 4: ty = Ity_F32; break;
|
|
+ case 8: ty = Ity_F64; break;
|
|
+ case 16: ty = Ity_V128; break;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+
|
|
+ if (isLD) {
|
|
+ putQReg(tt1,
|
|
+ loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
|
|
+ putQReg(tt2,
|
|
+ loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
|
|
+ } else {
|
|
+ storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
|
|
+ getQReg(ty, tt1));
|
|
+ storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
|
|
+ getQReg(ty, tt2));
|
|
+ }
|
|
+
|
|
+ if (wBack)
|
|
+ putIReg64orSP(nn, mkexpr(tEA));
|
|
+
|
|
+ const HChar* fmt_str = NULL;
|
|
+ switch (INSN(24,23)) {
|
|
+ case BITS2(0,1):
|
|
+ fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
|
|
+ break;
|
|
+ case BITS2(1,1):
|
|
+ fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
|
|
+ break;
|
|
+ case BITS2(1,0):
|
|
+ fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ DIP(fmt_str, isLD ? "ld" : "st",
|
|
+ nameQReg(szB, tt1), nameQReg(szB, tt2),
|
|
+ nameIReg64orSP(nn), simm7);
|
|
+ return True;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* -------------- {LD,ST}R (vector register) --------------- */
|
|
+ /* 31 29 23 20 15 12 11 9 4
|
|
+ | | | | | | | | |
|
|
+ 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
|
|
+ 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
|
|
+ 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
|
|
+
|
|
+ 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
|
|
+ 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
|
|
+ 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
|
|
+ */
|
|
+ if (INSN(29,24) == BITS6(1,1,1,1,0,0)
|
|
+ && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
|
|
+ HChar dis_buf[64];
|
|
+ UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
|
|
+ Bool isLD = INSN(22,22) == 1;
|
|
+ UInt tt = INSN(4,0);
|
|
+ if (szLg2 >= 4) goto after_LDR_STR_vector_register;
|
|
+ IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
|
|
+ if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
|
|
+ switch (szLg2) {
|
|
+ case 0: /* 8 bit */
|
|
+ if (isLD) {
|
|
+ putQReg128(tt, mkV128(0x0000));
|
|
+ putQReg(tt, loadLE(Ity_I8, mkexpr(ea)));
|
|
+ DIP("ldr %s, %s\n", nameQReg(1, tt), dis_buf);
|
|
+ } else {
|
|
+ vassert(0); //ATC
|
|
+ storeLE(mkexpr(ea), getQReg(Ity_I8, tt));
|
|
+ DIP("str %s, %s\n", nameQReg(1, tt), dis_buf);
|
|
+ }
|
|
+ break;
|
|
+ case 1:
|
|
+ if (isLD) {
|
|
+ putQReg128(tt, mkV128(0x0000));
|
|
+ putQReg(tt, loadLE(Ity_I16, mkexpr(ea)));
|
|
+ DIP("ldr %s, %s\n", nameQReg(2, tt), dis_buf);
|
|
+ } else {
|
|
+ vassert(0); //ATC
|
|
+ storeLE(mkexpr(ea), getQReg(Ity_I16, tt));
|
|
+ DIP("str %s, %s\n", nameQReg(2, tt), dis_buf);
|
|
+ }
|
|
+ break;
|
|
+ case 2: /* 32 bit */
|
|
+ if (isLD) {
|
|
+ putQReg128(tt, mkV128(0x0000));
|
|
+ putQReg(tt, loadLE(Ity_I32, mkexpr(ea)));
|
|
+ DIP("ldr %s, %s\n", nameQReg(4, tt), dis_buf);
|
|
+ } else {
|
|
+ storeLE(mkexpr(ea), getQReg(Ity_I32, tt));
|
|
+ DIP("str %s, %s\n", nameQReg(4, tt), dis_buf);
|
|
+ }
|
|
+ break;
|
|
+ case 3: /* 64 bit */
|
|
+ if (isLD) {
|
|
+ putQReg128(tt, mkV128(0x0000));
|
|
+ putQReg(tt, loadLE(Ity_I64, mkexpr(ea)));
|
|
+ DIP("ldr %s, %s\n", nameQReg(8, tt), dis_buf);
|
|
+ } else {
|
|
+ storeLE(mkexpr(ea), getQReg(Ity_I64, tt));
|
|
+ DIP("str %s, %s\n", nameQReg(8, tt), dis_buf);
|
|
+ }
|
|
+ break;
|
|
+ case 4: return False; //ATC
|
|
+ default: vassert(0);
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ after_LDR_STR_vector_register:
|
|
+
|
|
+ /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
|
|
+ /* 31 29 22 20 15 12 11 9 4
|
|
+ | | | | | | | | |
|
|
+ 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
|
|
+
|
|
+ 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
|
|
+
|
|
+ 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
|
|
+ 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
|
|
+ */
|
|
+ if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
|
|
+ && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
|
|
+ HChar dis_buf[64];
|
|
+ UInt szLg2 = INSN(31,30);
|
|
+ Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
|
|
+ UInt tt = INSN(4,0);
|
|
+ if (szLg2 == 3) goto after_LDRS_integer_register;
|
|
+ IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
|
|
+ if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
|
|
+ /* Enumerate the 5 variants explicitly. */
|
|
+ if (szLg2 == 2/*32 bit*/ && sxTo64) {
|
|
+ putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
|
|
+ DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
|
|
+ return True;
|
|
+ }
|
|
+ else
|
|
+ if (szLg2 == 1/*16 bit*/) {
|
|
+ if (sxTo64) {
|
|
+ putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
|
|
+ DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
|
|
+ } else {
|
|
+ putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
|
|
+ DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ else
|
|
+ if (szLg2 == 0/*8 bit*/) {
|
|
+ if (sxTo64) {
|
|
+ putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
|
|
+ DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
|
|
+ } else {
|
|
+ putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
|
|
+ DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ /* else it's an invalid combination */
|
|
+ }
|
|
+ after_LDRS_integer_register:
|
|
+
|
|
+ /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
|
|
+ /* This is the Unsigned offset variant only. The Post-Index and
|
|
+ Pre-Index variants are below.
|
|
+
|
|
+ 31 29 23 21 9 4
|
|
+ 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
|
|
+ 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
|
|
+ 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
|
|
+ 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
|
|
+ 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
|
|
+
|
|
+ 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
|
|
+ 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
|
|
+ 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
|
|
+ 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
|
|
+ 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
|
|
+ */
|
|
+ if (INSN(29,24) == BITS6(1,1,1,1,0,1)
|
|
+ && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
|
|
+ UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
|
|
+ Bool isLD = INSN(22,22) == 1;
|
|
+ UInt pimm12 = INSN(21,10) << szLg2;
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ IRTemp tEA = newTemp(Ity_I64);
|
|
+ IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
|
|
+ assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
|
|
+ if (isLD) {
|
|
+ if (szLg2 < 4) {
|
|
+ putQReg128(tt, mkV128(0x0000));
|
|
+ }
|
|
+ putQReg(tt, loadLE(ty, mkexpr(tEA)));
|
|
+ } else {
|
|
+ storeLE(mkexpr(tEA), getQReg(ty, tt));
|
|
+ }
|
|
+ DIP("%s %s, [%s, #%u]\n",
|
|
+ isLD ? "ldr" : "str",
|
|
+ nameQReg(1 << szLg2, tt), nameIReg64orSP(nn), pimm12);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
|
|
+ /* These are the Post-Index and Pre-Index variants.
|
|
+
|
|
+ 31 29 23 20 11 9 4
|
|
+ (at-Rn-then-Rn=EA)
|
|
+ 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
|
|
+ 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
|
|
+ 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
|
|
+ 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
|
|
+ 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
|
|
+
|
|
+ (at-EA-then-Rn=EA)
|
|
+ 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
|
|
+ 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
|
|
+ 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
|
|
+ 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
|
|
+ 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
|
|
+
|
|
+ Stores are the same except with bit 22 set to 0.
|
|
+ */
|
|
+ if (INSN(29,24) == BITS6(1,1,1,1,0,0)
|
|
+ && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
|
|
+ && INSN(21,21) == 0 && INSN(10,10) == 1) {
|
|
+ UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
|
|
+ Bool isLD = INSN(22,22) == 1;
|
|
+ UInt imm9 = INSN(20,12);
|
|
+ Bool atRN = INSN(11,11) == 0;
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ IRTemp tRN = newTemp(Ity_I64);
|
|
+ IRTemp tEA = newTemp(Ity_I64);
|
|
+ IRTemp tTA = IRTemp_INVALID;
|
|
+ IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
|
|
+ ULong simm9 = sx_to_64(imm9, 9);
|
|
+ assign(tRN, getIReg64orSP(nn));
|
|
+ assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
|
|
+ tTA = atRN ? tRN : tEA;
|
|
+ if (isLD) {
|
|
+ if (szLg2 < 4) {
|
|
+ putQReg128(tt, mkV128(0x0000));
|
|
+ }
|
|
+ putQReg(tt, loadLE(ty, mkexpr(tTA)));
|
|
+ } else {
|
|
+ storeLE(mkexpr(tTA), getQReg(ty, tt));
|
|
+ }
|
|
+ putIReg64orSP(nn, mkexpr(tEA));
|
|
+ DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
|
|
+ isLD ? "ldr" : "str",
|
|
+ nameQReg(1 << szLg2, tt), nameIReg64orSP(nn), simm9);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
|
|
+ /* 31 29 23 20 11 9 4
|
|
+ 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
|
|
+ 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
|
|
+ 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
|
|
+ 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
|
|
+ 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
|
|
+
|
|
+ 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
|
|
+ 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
|
|
+ 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
|
|
+ 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
|
|
+ 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
|
|
+ */
|
|
+ if (INSN(29,24) == BITS6(1,1,1,1,0,0)
|
|
+ && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
|
|
+ && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
|
|
+ UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
|
|
+ Bool isLD = INSN(22,22) == 1;
|
|
+ UInt imm9 = INSN(20,12);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ ULong simm9 = sx_to_64(imm9, 9);
|
|
+ IRTemp tEA = newTemp(Ity_I64);
|
|
+ IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
|
|
+ assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
|
|
+ if (isLD) {
|
|
+ if (szLg2 < 4) {
|
|
+ putQReg128(tt, mkV128(0x0000));
|
|
+ }
|
|
+ putQReg(tt, loadLE(ty, mkexpr(tEA)));
|
|
+ } else {
|
|
+ storeLE(mkexpr(tEA), getQReg(ty, tt));
|
|
+ }
|
|
+ DIP("%s %s, [%s, #%lld]\n",
|
|
+ isLD ? "ldur" : "stur",
|
|
+ nameQReg(1 << szLg2, tt), nameIReg64orSP(nn), (Long)simm9);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ---------------- LDR (literal, SIMD&FP) ---------------- */
|
|
+ /* 31 29 23 4
|
|
+ 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
|
|
+ 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
|
|
+ 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
|
|
+ */
|
|
+ if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
|
|
+ UInt szB = 4 << INSN(31,30);
|
|
+ UInt imm19 = INSN(23,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
|
|
+ IRType ty = preferredVectorSubTypeFromSize(szB);
|
|
+ putQReg(tt, loadLE(ty, mkU64(ea)));
|
|
+ DIP("ldr %s, 0x%llx (literal)\n", nameQReg(szB, tt), ea);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* FIXME Temporary hacks to get through ld.so FIXME */
|
|
+
|
|
+ /* ------------------ ST1 variants ------------------ */
|
|
+ /* st1 {vT.2d}, [<xN|SP>], #16.
|
|
+ Note that #16 is implied and cannot be set to any
|
|
+ other value.
|
|
+ 0100 1100 1001 1111 0111 11 N T
|
|
+ FIXME doesn't this assume that the host is little endian?
|
|
+ */
|
|
+ if ((insn & 0xFFFFFC00) == 0x4C9F7C00) {
|
|
+ UInt rN = INSN(9,5);
|
|
+ UInt vT = INSN(4,0);
|
|
+ IRTemp tEA = newTemp(Ity_I64);
|
|
+ assign(tEA, getIReg64orSP(rN));
|
|
+ if (rN == 31) { /* FIXME generate stack alignment check */ }
|
|
+ storeLE(mkexpr(tEA), getQReg128(vT));
|
|
+ putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
|
|
+ DIP("st1 {v%u.2d}, [%s], #16\n", vT, nameIReg64orSP(rN));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ------------------ LD1 variants ------------------ */
|
|
+ /* 31 23
|
|
+ 0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn|SP]
|
|
+ 0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn|SP]
|
|
+ 0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn|SP]
|
|
+ 0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn|SP]
|
|
+ FIXME doesn't this assume that the host is little endian?
|
|
+ */
|
|
+ if ( (insn & 0xFFFFFC00) == 0x4C407C00 // LD1 {vT.2d}, [Xn|SP]
|
|
+ || (insn & 0xFFFFFC00) == 0x4C007C00 // ST1 {vT.2d}, [Xn|SP]
|
|
+ || (insn & 0xFFFFFC00) == 0x4C407000 // LD1 {vT.16b}, [Xn|SP]
|
|
+ || (insn & 0xFFFFFC00) == 0x4C007000 // ST1 {vT.16b}, [Xn|SP]
|
|
+ ) {
|
|
+ Bool isLD = INSN(22,22) == 1;
|
|
+ UInt rN = INSN(9,5);
|
|
+ UInt vT = INSN(4,0);
|
|
+ IRTemp tEA = newTemp(Ity_I64);
|
|
+ assign(tEA, getIReg64orSP(rN));
|
|
+ if (rN == 31) { /* FIXME generate stack alignment check */ }
|
|
+ if (isLD) {
|
|
+ putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
|
|
+ } else {
|
|
+ storeLE(mkexpr(tEA), getQReg128(vT));
|
|
+ }
|
|
+ DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
|
|
+ vT, INSN(11,10) == BITS2(0,0) ? "16b" : "2d",
|
|
+ nameIReg64orSP(rN));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------------- LD{A}XR -------------------- */
|
|
+ /* FIXME: this is a hack; needs real atomicity stuff. */
|
|
+ /* 31 29 20 19 9 4
|
|
+ 1x(size) 001000010 1 1111 1 11111 n t LDAXR Rt, [Xn|SP]
|
|
+ 1x(size) 001000010 1 1111 0 11111 n t LDXR Rt, [Xn|SP]
|
|
+ */
|
|
+ if (INSN(29,20) == BITS10(0,0,1,0,0,0,0,1,0,1)
|
|
+ && (INSN(19,10) == BITS10(1,1,1,1,1,1,1,1,1,1)
|
|
+ || INSN(19,10) == BITS10(1,1,1,1,0,1,1,1,1,1))
|
|
+ && INSN(31,31) == 1) {
|
|
+ Bool is64 = INSN(30,30) == 1;
|
|
+ Bool isA = INSN(15,15) == 1;
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ if (is64) {
|
|
+ putIReg64orZR(tt, loadLE(Ity_I64, getIReg64orSP(nn)));
|
|
+ } else {
|
|
+ putIReg32orZR(tt, loadLE(Ity_I32, getIReg64orSP(nn)));
|
|
+ }
|
|
+ DIP("ld%sxr %s, [%s]\n",
|
|
+ isA ? "s" : "", nameIRegOrZR(is64, tt), nameIReg64orSP(nn));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------------- ST{L}XR -------------------- */
|
|
+ /* FIXME: this is a hack; needs real atomicity stuff. */
|
|
+ /* 31 29 20 15 14 9 4
|
|
+ 1x(size) 001000000 s 0 11111 n t STXR Ws, Rt, [Xn|SP]
|
|
+ 1x(size) 001000000 s 1 11111 n t STLXR Ws, Rt, [Xn|SP]
|
|
+ with the result coding that Ws == 0 iff the store succeeded
|
|
+ */
|
|
+ if (INSN(29,21) == BITS9(0,0,1,0,0,0,0,0,0)
|
|
+ && INSN(14,10) == BITS5(1,1,1,1,1) && INSN(31,31) == 1) {
|
|
+ Bool is64 = INSN(30,30) == 1;
|
|
+ UInt ss = INSN(20,16);
|
|
+ Bool isL = INSN(15,15) == 1;
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ if (is64) {
|
|
+ storeLE(getIReg64orSP(nn), getIReg64orZR(tt));
|
|
+ } else {
|
|
+ storeLE(getIReg64orSP(nn), getIReg32orZR(tt));
|
|
+ }
|
|
+ putIReg32orZR(ss, mkU32(0));
|
|
+ DIP("st%sxr %s, %s, [%s]\n",
|
|
+ isL ? "s" : "",
|
|
+ nameIReg32orZR(ss), nameIRegOrZR(is64, tt), nameIReg64orSP(nn));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ vex_printf("ARM64 front end: load_store\n");
|
|
+ return False;
|
|
+# undef INSN
|
|
+}
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Control flow and misc instructions ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+static
|
|
+Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn)
|
|
+{
|
|
+# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
|
|
+
|
|
+ /* ---------------------- B cond ----------------------- */
|
|
+ /* 31 24 4 3
|
|
+ 0101010 0 imm19 0 cond */
|
|
+ if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
|
|
+ UInt cond = INSN(3,0);
|
|
+ ULong uimm64 = INSN(23,5) << 2;
|
|
+ Long simm64 = (Long)sx_to_64(uimm64, 21);
|
|
+ vassert(dres->whatNext == Dis_Continue);
|
|
+ vassert(dres->len == 4);
|
|
+ vassert(dres->continueAt == 0);
|
|
+ vassert(dres->jk_StopHere == Ijk_INVALID);
|
|
+ stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
|
|
+ Ijk_Boring,
|
|
+ IRConst_U64(guest_PC_curr_instr + simm64),
|
|
+ OFFB_PC) );
|
|
+ putPC(mkU64(guest_PC_curr_instr + 4));
|
|
+ dres->whatNext = Dis_StopHere;
|
|
+ dres->jk_StopHere = Ijk_Boring;
|
|
+ DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------------- B{L} uncond -------------------- */
|
|
+ if (INSN(30,26) == BITS5(0,0,1,0,1)) {
|
|
+ /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
|
|
+ 100101 imm26 B (PC + sxTo64(imm26 << 2))
|
|
+ */
|
|
+ UInt bLink = INSN(31,31);
|
|
+ ULong uimm64 = INSN(25,0) << 2;
|
|
+ Long simm64 = (Long)sx_to_64(uimm64, 28);
|
|
+ if (bLink) {
|
|
+ putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
|
|
+ }
|
|
+ putPC(mkU64(guest_PC_curr_instr + simm64));
|
|
+ dres->whatNext = Dis_StopHere;
|
|
+ dres->jk_StopHere = Ijk_Call;
|
|
+ DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
|
|
+ guest_PC_curr_instr + simm64);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* --------------------- B{L} reg --------------------- */
|
|
+ /* 31 24 22 20 15 9 4
|
|
+ 1101011 00 10 11111 000000 nn 00000 RET Rn
|
|
+ 1101011 00 01 11111 000000 nn 00000 CALL Rn
|
|
+ 1101011 00 00 11111 000000 nn 00000 JMP Rn
|
|
+ */
|
|
+ if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
|
|
+ && INSN(20,16) == BITS5(1,1,1,1,1)
|
|
+ && INSN(15,10) == BITS6(0,0,0,0,0,0)
|
|
+ && INSN(4,0) == BITS5(0,0,0,0,0)) {
|
|
+ UInt branch_type = INSN(22,21);
|
|
+ UInt nn = INSN(9,5);
|
|
+ if (branch_type == BITS2(1,0) /* RET */) {
|
|
+ putPC(getIReg64orZR(nn));
|
|
+ dres->whatNext = Dis_StopHere;
|
|
+ dres->jk_StopHere = Ijk_Ret;
|
|
+ DIP("ret %s\n", nameIReg64orZR(nn));
|
|
+ return True;
|
|
+ }
|
|
+ if (branch_type == BITS2(0,1) /* CALL */) {
|
|
+ putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
|
|
+ putPC(getIReg64orZR(nn));
|
|
+ dres->whatNext = Dis_StopHere;
|
|
+ dres->jk_StopHere = Ijk_Call;
|
|
+ DIP("blr %s\n", nameIReg64orZR(nn));
|
|
+ return True;
|
|
+ }
|
|
+ if (branch_type == BITS2(0,0) /* JMP */) {
|
|
+ putPC(getIReg64orZR(nn));
|
|
+ dres->whatNext = Dis_StopHere;
|
|
+ dres->jk_StopHere = Ijk_Boring;
|
|
+ DIP("jmp %s\n", nameIReg64orZR(nn));
|
|
+ return True;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* -------------------- CB{N}Z -------------------- */
|
|
+ /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
|
|
+ sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
|
|
+ */
|
|
+ if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
|
|
+ Bool is64 = INSN(31,31) == 1;
|
|
+ Bool bIfZ = INSN(24,24) == 0;
|
|
+ ULong uimm64 = INSN(23,5) << 2;
|
|
+ UInt rT = INSN(4,0);
|
|
+ Long simm64 = (Long)sx_to_64(uimm64, 21);
|
|
+ IRExpr* cond = NULL;
|
|
+ if (is64) {
|
|
+ cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
|
|
+ getIReg64orZR(rT), mkU64(0));
|
|
+ } else {
|
|
+ cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
|
|
+ getIReg32orZR(rT), mkU32(0));
|
|
+ }
|
|
+ stmt( IRStmt_Exit(cond,
|
|
+ Ijk_Boring,
|
|
+ IRConst_U64(guest_PC_curr_instr + simm64),
|
|
+ OFFB_PC) );
|
|
+ putPC(mkU64(guest_PC_curr_instr + 4));
|
|
+ dres->whatNext = Dis_StopHere;
|
|
+ dres->jk_StopHere = Ijk_Boring;
|
|
+ DIP("cb%sz %s, 0x%llx\n",
|
|
+ bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
|
|
+ guest_PC_curr_instr + simm64);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------------- TB{N}Z -------------------- */
|
|
+ /* 31 30 24 23 18 5 4
|
|
+ b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
|
|
+ b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
|
|
+ */
|
|
+ if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
|
|
+ UInt b5 = INSN(31,31);
|
|
+ Bool bIfZ = INSN(24,24) == 0;
|
|
+ UInt b40 = INSN(23,19);
|
|
+ UInt imm14 = INSN(18,5);
|
|
+ UInt tt = INSN(4,0);
|
|
+ UInt bitNo = (b5 << 5) | b40;
|
|
+ ULong uimm64 = imm14 << 2;
|
|
+ Long simm64 = sx_to_64(uimm64, 16);
|
|
+ IRExpr* cond
|
|
+ = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
|
|
+ binop(Iop_And64,
|
|
+ binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
|
|
+ mkU64(1)),
|
|
+ mkU64(0));
|
|
+ stmt( IRStmt_Exit(cond,
|
|
+ Ijk_Boring,
|
|
+ IRConst_U64(guest_PC_curr_instr + simm64),
|
|
+ OFFB_PC) );
|
|
+ putPC(mkU64(guest_PC_curr_instr + 4));
|
|
+ dres->whatNext = Dis_StopHere;
|
|
+ dres->jk_StopHere = Ijk_Boring;
|
|
+ DIP("tb%sz %s, #%u, 0x%llx\n",
|
|
+ bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
|
|
+ guest_PC_curr_instr + simm64);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------------- SVC -------------------- */
|
|
+ /* 11010100 000 imm16 000 01
|
|
+ Don't bother with anything except the imm16==0 case.
|
|
+ */
|
|
+ if (INSN(31,0) == 0xD4000001) {
|
|
+ putPC(mkU64(guest_PC_curr_instr + 4));
|
|
+ dres->whatNext = Dis_StopHere;
|
|
+ dres->jk_StopHere = Ijk_Sys_syscall;
|
|
+ DIP("svc #0\n");
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ------------------ M{SR,RS} ------------------ */
|
|
+ /* Only handles the case where the system register is TPIDR_EL0.
|
|
+ 0xD51BD0 010 Rt MSR tpidr_el0, rT
|
|
+ 0xD53BD0 010 Rt MRS rT, tpidr_el0
|
|
+ */
|
|
+ if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
|
|
+ || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
|
|
+ Bool toSys = INSN(21,21) == 0;
|
|
+ UInt tt = INSN(4,0);
|
|
+ if (toSys) {
|
|
+ stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
|
|
+ DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
|
|
+ } else {
|
|
+ putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
|
|
+ DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ /* Cases for FPCR
|
|
+ 0xD51B44 000 Rt MSR fpcr, rT
|
|
+ 0xD53B44 000 Rt MSR rT, fpcr
|
|
+ */
|
|
+ if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
|
|
+ || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
|
|
+ Bool toSys = INSN(21,21) == 0;
|
|
+ UInt tt = INSN(4,0);
|
|
+ if (toSys) {
|
|
+ stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
|
|
+ DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
|
|
+ } else {
|
|
+ putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
|
|
+ DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ /* Cases for FPSR
|
|
+ 0xD51B44 001 Rt MSR fpcr, rT
|
|
+ 0xD53B44 001 Rt MSR rT, fpcr
|
|
+ */
|
|
+ if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
|
|
+ || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
|
|
+ Bool toSys = INSN(21,21) == 0;
|
|
+ UInt tt = INSN(4,0);
|
|
+ if (toSys) {
|
|
+ stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) );
|
|
+ DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
|
|
+ } else {
|
|
+ putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32));
|
|
+ DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ /* Cases for NZCV
|
|
+ D51B42 000 Rt MSR nzcv, rT
|
|
+ D53B42 000 Rt MRS rT, nzcv
|
|
+ */
|
|
+ if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
|
|
+ || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
|
|
+ Bool toSys = INSN(21,21) == 0;
|
|
+ UInt tt = INSN(4,0);
|
|
+ if (toSys) {
|
|
+ IRTemp t = newTemp(Ity_I64);
|
|
+ assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
|
|
+ setFlags_COPY(t);
|
|
+ DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
|
|
+ } else {
|
|
+ IRTemp res = newTemp(Ity_I64);
|
|
+ assign(res, mk_arm64g_calculate_flags_nzcv());
|
|
+ putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
|
|
+ DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* FIXME Temporary hacks to get through ld.so FIXME */
|
|
+ /* ------------------ ISB ------------------ */
|
|
+ if (INSN(31,0) == 0xD5033FDF) {
|
|
+ /* FIXME: not really a nop */
|
|
+ DIP("isb\n");
|
|
+ return True;
|
|
+ }
|
|
+ if (INSN(31,0) == 0xD5033BBF) {
|
|
+ /* FIXME: not really a nop */
|
|
+ DIP("dmb ish\n");
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ //fail:
|
|
+ vex_printf("ARM64 front end: branch_etc\n");
|
|
+ return False;
|
|
+# undef INSN
|
|
+}
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- SIMD and FP instructions ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+/* Generate N copies of |bit| in the bottom of a ULong. */
|
|
+static ULong Replicate ( ULong bit, Int N )
|
|
+{
|
|
+ vassert(bit <= 1 && N >= 1 && N < 64);
|
|
+ if (bit == 0) {
|
|
+ return 0;
|
|
+ } else {
|
|
+ /* Careful. This won't work for N == 64. */
|
|
+ return (1ULL << N) - 1;
|
|
+ }
|
|
+}
|
|
+
|
|
+static ULong VFPExpandImm ( ULong imm8, Int N )
|
|
+{
|
|
+ vassert(imm8 <= 0xFF);
|
|
+ vassert(N == 32 || N == 64);
|
|
+ Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
|
|
+ Int F = N - E - 1;
|
|
+ ULong imm8_6 = (imm8 >> 6) & 1;
|
|
+ /* sign: 1 bit */
|
|
+ /* exp: E bits */
|
|
+ /* frac: F bits */
|
|
+ ULong sign = (imm8 >> 7) & 1;
|
|
+ ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
|
|
+ ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
|
|
+ vassert(sign < (1ULL << 1));
|
|
+ vassert(exp < (1ULL << E));
|
|
+ vassert(frac < (1ULL << F));
|
|
+ vassert(1 + E + F == N);
|
|
+ ULong res = (sign << (E+F)) | (exp << F) | frac;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+static
|
|
+Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
|
|
+{
|
|
+# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
|
|
+
|
|
+ /* ---------------- FMOV (general) ---------------- */
|
|
+ /* case 30 23 20 18 15 9 4
|
|
+ (1) 0 00 11110 00 1 00 111 000000 n d FMOV Sd, Wn
|
|
+ (2) 1 00 11110 01 1 00 111 000000 n d FMOV Dd, Xn
|
|
+ (3) 1 00 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
|
|
+
|
|
+ (4) 0 00 11110 00 1 00 110 000000 n d FMOV Wd, Sn
|
|
+ (5) 1 00 11110 01 1 00 110 000000 n d FMOV Xd, Dn
|
|
+ (6) 1 00 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
|
|
+ */
|
|
+ if (INSN(30,24) == BITS7(0,0,1,1,1,1,0)
|
|
+ && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
|
|
+ UInt sf = INSN(31,31);
|
|
+ UInt ty = INSN(23,22); // type
|
|
+ UInt rm = INSN(20,19); // rmode
|
|
+ UInt op = INSN(18,16); // opcode
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ UInt ix = 0; // case
|
|
+ if (sf == 0) {
|
|
+ if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
|
|
+ ix = 1;
|
|
+ else
|
|
+ if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
|
|
+ ix = 4;
|
|
+ } else {
|
|
+ vassert(sf == 1);
|
|
+ if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
|
|
+ ix = 2;
|
|
+ else
|
|
+ if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
|
|
+ ix = 5;
|
|
+ else
|
|
+ if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
|
|
+ ix = 3;
|
|
+ else
|
|
+ if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
|
|
+ ix = 6;
|
|
+ }
|
|
+ if (ix > 0) {
|
|
+ switch (ix) {
|
|
+ case 1:
|
|
+ putQReg128(dd, mkV128(0));
|
|
+ putQReg(dd, getIReg32orZR(nn));
|
|
+ DIP("fmov s%u, w%u\n", dd, nn);
|
|
+ break;
|
|
+ case 2:
|
|
+ putQReg128(dd, mkV128(0));
|
|
+ putQReg(dd, getIReg64orZR(nn));
|
|
+ DIP("fmov d%u, x%u\n", dd, nn);
|
|
+ break;
|
|
+ case 3:
|
|
+ putQReg64HI(dd, getIReg64orZR(nn));
|
|
+ DIP("fmov v%u.d[1], x%u\n", dd, nn);
|
|
+ break;
|
|
+ case 4:
|
|
+ putIReg32orZR(dd, getQReg(Ity_I32, nn));
|
|
+ DIP("fmov w%u, s%u\n", dd, nn);
|
|
+ break;
|
|
+ case 5:
|
|
+ putIReg64orZR(dd, getQReg(Ity_I64, nn));
|
|
+ DIP("fmov x%u, d%u\n", dd, nn);
|
|
+ break;
|
|
+ case 6:
|
|
+ putIReg64orZR(dd, getQReg64HI(nn));
|
|
+ DIP("fmov x%u, v%u.d[1]\n", dd, nn);
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ return True;
|
|
+ }
|
|
+ /* undecodable; fall through */
|
|
+ }
|
|
+
|
|
+ /* -------------- FMOV (scalar, immediate) -------------- */
|
|
+ /* 31 28 23 20 12 9 4
|
|
+ 000 11110 00 1 imm8 100 00000 d FMOV Sd, #imm
|
|
+ 000 11110 01 1 imm8 100 00000 d FMOV Dd, #imm
|
|
+ */
|
|
+ if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
|
|
+ && INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) {
|
|
+ Bool isD = INSN(22,22) == 1;
|
|
+ UInt imm8 = INSN(20,13);
|
|
+ UInt dd = INSN(4,0);
|
|
+ ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
|
|
+ if (!isD) {
|
|
+ vassert(0 == (imm & 0xFFFFFFFF00000000));
|
|
+ }
|
|
+ putQReg128(dd, mkV128(0));
|
|
+ putQReg(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
|
|
+ DIP("fmov %s, #0x%llx\n", nameQReg(isD ? 8 : 4, dd), imm);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------- {S,U}CVTF (scalar, integer) -------------- */
|
|
+ /* 31 28 23 21 20 18 15 9 4 ix
|
|
+ 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn 0
|
|
+ 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn 1
|
|
+ 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn 2
|
|
+ 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn 3
|
|
+
|
|
+ 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn 4
|
|
+ 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn 5
|
|
+ 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn 6
|
|
+ 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn 7
|
|
+
|
|
+ These are signed/unsigned conversion from integer registers to
|
|
+ FP registers, all 4 32/64-bit combinations, rounded per FPCR.
|
|
+ */
|
|
+ if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1)
|
|
+ && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
|
|
+ Bool isI64 = INSN(31,31) == 1;
|
|
+ Bool isF64 = INSN(22,22) == 1;
|
|
+ Bool isU = INSN(16,16) == 1;
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
|
|
+ const IROp ops[8]
|
|
+ = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
|
|
+ Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
|
|
+ IRExpr* src = getIRegOrZR(isI64, nn);
|
|
+ IRExpr* res = (isF64 && !isI64)
|
|
+ ? unop(ops[ix], src)
|
|
+ : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
|
|
+ putQReg128(dd, mkV128(0));
|
|
+ putQReg(dd, res);
|
|
+ DIP("%ccvtf %s, %s\n",
|
|
+ isU ? 'u' : 's', nameQReg(isF64 ? 8 : 4, dd),
|
|
+ nameIRegOrZR(isI64, nn));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------------- F{ADD,SUB,MUL,DIV} (scalar) -------------- */
|
|
+ /* 31 23 20 15 11 9 4
|
|
+ ---------------- 0000 ------ FMUL --------
|
|
+ 000 11110 001 m 0001 10 n d FDIV Sd,Sn,Sm
|
|
+ 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
|
|
+ ---------------- 0010 ------ FADD --------
|
|
+ ---------------- 0011 ------ FSUB --------
|
|
+ ---------------- 1000 ------ FNMUL --------
|
|
+ */
|
|
+ if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
|
|
+ && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
|
|
+ Bool isD = INSN(22,22) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ UInt op = INSN(15,12);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ IROp iop = Iop_INVALID;
|
|
+ IRType ty = isD ? Ity_F64 : Ity_F32;
|
|
+ UInt szB = isD ? 8 : 4;
|
|
+ Bool neg = False;
|
|
+ const HChar* nm = "???";
|
|
+ switch (op) {
|
|
+ case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ty); break;
|
|
+ case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ty); break;
|
|
+ case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ty); break;
|
|
+ case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ty); break;
|
|
+ case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty);
|
|
+ neg = True; break;
|
|
+ default: return False;
|
|
+ }
|
|
+ vassert(iop != Iop_INVALID);
|
|
+ IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
|
|
+ getQReg(ty, nn), getQReg(ty, mm));
|
|
+ IRTemp res = newTemp(ty);
|
|
+ assign(res, neg ? unop(mkNEGF(ty),resE) : resE);
|
|
+ putQReg128(dd, mkV128(0));
|
|
+ putQReg(dd, mkexpr(res));
|
|
+ DIP("%s %s, %s, %s\n",
|
|
+ nm, nameQReg(szB, dd), nameQReg(szB, nn), nameQReg(szB, mm));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */
|
|
+ /* 31 23 21 16 14 9 4
|
|
+ 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
|
|
+ 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
|
|
+ ------------------ 01 --------- FABS ------
|
|
+ ------------------ 10 --------- FNEG ------
|
|
+ ------------------ 11 --------- FQSRT -----
|
|
+ */
|
|
+ if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
|
|
+ && INSN(21,17) == BITS5(1,0,0,0,0)
|
|
+ && INSN(14,10) == BITS5(1,0,0,0,0)) {
|
|
+ Bool isD = INSN(22,22) == 1;
|
|
+ UInt opc = INSN(16,15);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ IRType ty = isD ? Ity_F64 : Ity_F32;
|
|
+ UInt szB = isD ? 8 : 4;
|
|
+ IRTemp res = newTemp(ty);
|
|
+ if (opc == BITS2(0,0)) {
|
|
+ assign(res, getQReg(ty, nn));
|
|
+ putQReg128(dd, mkV128(0x0000));
|
|
+ putQReg(dd, mkexpr(res));
|
|
+ DIP("fmov %s, %s\n", nameQReg(szB, dd), nameQReg(szB, nn));
|
|
+ return True;
|
|
+ }
|
|
+ if (opc == BITS2(1,0) || opc == BITS2(0,1)) {
|
|
+ Bool isAbs = opc == BITS2(0,1);
|
|
+ IROp op = isAbs ? mkABSF(ty) : mkNEGF(ty);
|
|
+ assign(res, unop(op, getQReg(ty, nn)));
|
|
+ putQReg128(dd, mkV128(0x0000));
|
|
+ putQReg(dd, mkexpr(res));
|
|
+ DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg",
|
|
+ nameQReg(szB, dd), nameQReg(szB, nn));
|
|
+ return True;
|
|
+ }
|
|
+ if (opc == BITS2(1,1)) {
|
|
+ assign(res,
|
|
+ binop(mkSQRTF(ty),
|
|
+ mkexpr(mk_get_IR_rounding_mode()), getQReg(ty, nn)));
|
|
+ putQReg128(dd, mkV128(0x0000));
|
|
+ putQReg(dd, mkexpr(res));
|
|
+ DIP("fsqrt %s, %s\n", nameQReg(szB, dd), nameQReg(szB, nn));
|
|
+ return True;
|
|
+ }
|
|
+ /* else fall through; other cases are ATC */
|
|
+ }
|
|
+
|
|
+ /* -------------------- FCMP,FCMPE -------------------- */
|
|
+ /* 31 23 20 15 9 4
|
|
+ 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
|
|
+ 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
|
|
+ 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
|
|
+ 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
|
|
+
|
|
+ 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
|
|
+ 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
|
|
+ 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
|
|
+ 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
|
|
+
|
|
+ FCMPE generates Invalid Operation exn if either arg is any kind
|
|
+ of NaN. FCMP generates Invalid Operation exn if either arg is a
|
|
+ signalling NaN. We ignore this detail here and produce the same
|
|
+ IR for both.
|
|
+ */
|
|
+ if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1
|
|
+ && INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) {
|
|
+ Bool isD = INSN(22,22) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ UInt nn = INSN(9,5);
|
|
+ Bool isCMPE = INSN(4,4) == 1;
|
|
+ Bool cmpZero = INSN(3,3) == 1;
|
|
+ IRType ty = isD ? Ity_F64 : Ity_F32;
|
|
+ UInt szB = isD ? 8 : 4;
|
|
+ Bool valid = True;
|
|
+ if (cmpZero && mm != 0) valid = False;
|
|
+ if (valid) {
|
|
+ IRTemp argL = newTemp(ty);
|
|
+ IRTemp argR = newTemp(ty);
|
|
+ IRTemp irRes = newTemp(Ity_I32);
|
|
+ assign(argL, getQReg(ty, nn));
|
|
+ assign(argR,
|
|
+ cmpZero
|
|
+ ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
|
|
+ : getQReg(ty, mm));
|
|
+ assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
|
|
+ mkexpr(argL), mkexpr(argR)));
|
|
+ IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
|
|
+ IRTemp nzcv_28x0 = newTemp(Ity_I64);
|
|
+ assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
|
|
+ setFlags_COPY(nzcv_28x0);
|
|
+ DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "",
|
|
+ nameQReg(szB, nn), cmpZero ? "#0.0" : nameQReg(szB, mm));
|
|
+ return True;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* -------------------- F{N}M{ADD,SUB} -------------------- */
|
|
+ /* 31 22 20 15 14 9 4 ix
|
|
+ 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
|
|
+ 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
|
|
+ 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
|
|
+ 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
|
|
+ where Fx=Dx when sz=1, Fx=Sx when sz=0
|
|
+
|
|
+ -----SPEC------ ----IMPL----
|
|
+ fmadd a + n * m a + n * m
|
|
+ fmsub a + (-n) * m a - n * m
|
|
+ fnmadd (-a) + (-n) * m -(a + n * m)
|
|
+ fnmsub (-a) + n * m -(a - n * m)
|
|
+ */
|
|
+ if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) {
|
|
+ Bool isD = INSN(22,22) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ UInt aa = INSN(14,10);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ UInt ix = (INSN(21,21) << 1) | INSN(15,15);
|
|
+ IRType ty = isD ? Ity_F64 : Ity_F32;
|
|
+ UInt szB = isD ? 8 : 4;
|
|
+ IROp opADD = mkADDF(ty);
|
|
+ IROp opSUB = mkSUBF(ty);
|
|
+ IROp opMUL = mkMULF(ty);
|
|
+ IROp opNEG = mkNEGF(ty);
|
|
+ IRTemp res = newTemp(ty);
|
|
+ IRExpr* eA = getQReg(ty, aa);
|
|
+ IRExpr* eN = getQReg(ty, nn);
|
|
+ IRExpr* eM = getQReg(ty, mm);
|
|
+ IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
|
|
+ IRExpr* eNxM = triop(opMUL, rm, eN, eM);
|
|
+ switch (ix) {
|
|
+ case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
|
|
+ case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
|
|
+ case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
|
|
+ case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+ putQReg128(dd, mkV128(0x0000));
|
|
+ putQReg(dd, mkexpr(res));
|
|
+ const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
|
|
+ DIP("%s %s, %s, %s, %s\n",
|
|
+ names[ix], nameQReg(szB, dd), nameQReg(szB, nn),
|
|
+ nameQReg(szB, mm), nameQReg(szB, aa));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
|
|
+ /* 30 23 20 18 15 9 4
|
|
+ sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
|
|
+ sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
|
|
+ ---------------- 01 -------------- FCVTP-------- (round to +inf)
|
|
+ ---------------- 10 -------------- FCVTM-------- (round to -inf)
|
|
+ ---------------- 11 -------------- FCVTZ-------- (round to zero)
|
|
+
|
|
+ Rd is Xd when sf==1, Wd when sf==0
|
|
+ Fn is Dn when x==1, Sn when x==0
|
|
+ 20:19 carry the rounding mode, using the same encoding as FPCR
|
|
+ */
|
|
+ if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1
|
|
+ && INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
|
|
+ Bool isI64 = INSN(31,31) == 1;
|
|
+ Bool isF64 = INSN(22,22) == 1;
|
|
+ UInt rm = INSN(20,19);
|
|
+ Bool isU = INSN(16,16) == 1;
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ /* Decide on the IR rounding mode to use. */
|
|
+ IRRoundingMode irrm = 8; /*impossible*/
|
|
+ HChar ch = '?';
|
|
+ switch (rm) {
|
|
+ case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
|
|
+ case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
|
|
+ case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
|
|
+ case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
|
|
+ default: vassert(0);
|
|
+ }
|
|
+ vassert(irrm != 8);
|
|
+ /* Decide on the conversion primop, based on the source size,
|
|
+ dest size and signedness (8 possibilities). Case coding:
|
|
+ F32 ->s I32 0
|
|
+ F32 ->u I32 1
|
|
+ F32 ->s I64 2
|
|
+ F32 ->u I64 3
|
|
+ F64 ->s I32 4
|
|
+ F64 ->u I32 5
|
|
+ F64 ->s I64 6
|
|
+ F64 ->u I64 7
|
|
+ */
|
|
+ UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
|
|
+ vassert(ix < 8);
|
|
+ const IROp ops[8]
|
|
+ = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
|
|
+ Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
|
|
+ IROp op = ops[ix];
|
|
+ // A bit of ATCery: bounce all cases we haven't seen an example of.
|
|
+ if (/* F32toI32S */
|
|
+ (op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
|
|
+ /* F32toI32U */
|
|
+ /* F32toI64S */
|
|
+ /* F32toI64U */
|
|
+ || (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
|
|
+ /* F64toI32S */
|
|
+ || (op == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
|
|
+ || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
|
|
+ || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
|
|
+ /* F64toI32U */
|
|
+ || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
|
|
+ || (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
|
|
+ /* F64toI64S */
|
|
+ || (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
|
|
+ /* F64toI64U */
|
|
+ || (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
|
|
+ ) {
|
|
+ /* validated */
|
|
+ } else {
|
|
+ return False;
|
|
+ }
|
|
+ UInt srcSzB = isF64 ? 8 : 4;
|
|
+ IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
|
|
+ IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
|
|
+ IRTemp src = newTemp(srcTy);
|
|
+ IRTemp dst = newTemp(dstTy);
|
|
+ assign(src, getQReg(srcTy, nn));
|
|
+ assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
|
|
+ putIRegOrZR(isI64, dd, mkexpr(dst));
|
|
+ DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
|
|
+ nameIRegOrZR(isI64, dd), nameQReg(srcSzB, nn));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */
|
|
+ /* 31 23 21 17 14 9 4
|
|
+ 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
|
|
+ rm
|
|
+ x==0 => S-registers, x==1 => D-registers
|
|
+ rm (17:15) encodings:
|
|
+ 111 per FPCR (FRINTI)
|
|
+ 001 +inf (FRINTP)
|
|
+ 010 -inf (FRINTM)
|
|
+ 011 zero (FRINTZ)
|
|
+ 000 tieeven
|
|
+ 100 tieaway
|
|
+ 110 per FPCR + "exact = TRUE"
|
|
+ 101 unallocated
|
|
+ */
|
|
+ if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
|
|
+ && INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) {
|
|
+ Bool isD = INSN(22,22) == 1;
|
|
+ UInt rm = INSN(17,15);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ IRType ty = isD ? Ity_F64 : Ity_F32;
|
|
+ UInt szB = isD ? 8 : 4;
|
|
+ IRExpr* irrmE = NULL;
|
|
+ UChar ch = '?';
|
|
+ switch (rm) {
|
|
+ case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
|
|
+ case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
|
|
+ case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
|
|
+ default: break;
|
|
+ }
|
|
+ if (irrmE) {
|
|
+ IRTemp src = newTemp(ty);
|
|
+ IRTemp dst = newTemp(ty);
|
|
+ assign(src, getQReg(ty, nn));
|
|
+ assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
|
|
+ irrmE, mkexpr(src)));
|
|
+ putQReg128(dd, mkV128(0x0000));
|
|
+ putQReg(dd, mkexpr(dst));
|
|
+ DIP("frint%c %s, %s\n", ch, nameQReg(szB, dd), nameQReg(szB, nn));
|
|
+ return True;
|
|
+ }
|
|
+ /* else unhandled rounding mode case -- fall through */
|
|
+ }
|
|
+
|
|
+ /* ------------------ FCVT (scalar) ------------------ */
|
|
+ /* 31 23 21 16 14 9 4
|
|
+ 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
|
|
+ --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
|
|
+ --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
|
|
+ --------- 00 ----- 01 --------- FCVT Dd, Sn (unimp)
|
|
+ --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
|
|
+ --------- 01 ----- 00 --------- FCVT Sd, Dn (unimp)
|
|
+ Rounding, when dst is smaller than src, is per the FPCR.
|
|
+ */
|
|
+ if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0)
|
|
+ && INSN(21,17) == BITS5(1,0,0,0,1)
|
|
+ && INSN(14,10) == BITS5(1,0,0,0,0)) {
|
|
+ UInt b2322 = INSN(23,22);
|
|
+ UInt b1615 = INSN(16,15);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
|
|
+ /* Convert S to D */
|
|
+ IRTemp res = newTemp(Ity_F64);
|
|
+ assign(res, unop(Iop_F32toF64, getQReg(Ity_F32, nn)));
|
|
+ putQReg128(dd, mkV128(0x0000));
|
|
+ putQReg(dd, mkexpr(res));
|
|
+ DIP("fcvt %s, %s\n", nameQReg(8, dd), nameQReg(4, nn));
|
|
+ return True;
|
|
+ }
|
|
+ if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
|
|
+ /* Convert D to S */
|
|
+ IRTemp res = newTemp(Ity_F32);
|
|
+ assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
|
|
+ getQReg(Ity_F64, nn)));
|
|
+ putQReg128(dd, mkV128(0x0000));
|
|
+ putQReg(dd, mkexpr(res));
|
|
+ DIP("fcvt %s, %s\n", nameQReg(4, dd), nameQReg(8, nn));
|
|
+ return True;
|
|
+ }
|
|
+ /* else unhandled */
|
|
+ }
|
|
+
|
|
+ /* ------------------ FABD (scalar) ------------------ */
|
|
+ /* 31 23 20 15 9 4
|
|
+ 011 11110 111 m 110101 n d FABD Dd, Dn, Dm
|
|
+ 011 11110 101 m 110101 n d FABD Sd, Sn, Sm
|
|
+ */
|
|
+ if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1
|
|
+ && INSN(15,10) == BITS6(1,1,0,1,0,1)) {
|
|
+ Bool isD = INSN(22,22) == 1;
|
|
+ UInt mm = INSN(20,16);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ IRType ty = isD ? Ity_F64 : Ity_F32;
|
|
+ UInt szB = isD ? 8 : 4;
|
|
+ IRTemp res = newTemp(ty);
|
|
+ assign(res, unop(mkABSF(ty), triop(mkSUBF(ty),
|
|
+ mkexpr(mk_get_IR_rounding_mode()),
|
|
+ getQReg(ty,nn), getQReg(ty,mm))));
|
|
+ putQReg128(dd, mkV128(0x0000));
|
|
+ putQReg(dd, mkexpr(res));
|
|
+ DIP("fabd %s, %s, %s\n",
|
|
+ nameQReg(szB, dd), nameQReg(szB, nn), nameQReg(szB, mm));
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* FIXME Temporary hacks to get through ld.so FIXME */
|
|
+
|
|
+ /* ------------------ movi vD.4s, #0x0 ------------------ */
|
|
+ /* 0x4F 0x00 0x04 000 vD */
|
|
+ if ((insn & 0xFFFFFFE0) == 0x4F000400) {
|
|
+ UInt vD = INSN(4,0);
|
|
+ putQReg128(vD, mkV128(0x0000));
|
|
+ DIP("movi v%u.4s, #0x0\n", vD);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ------------------ dup vD.2d, xN ------------------ */
|
|
+ /* 0x4E 0x08 0000 11 xN(5) vD(5) */
|
|
+ if ((insn & 0xFFFFFC00) == 0x4E080C00) {
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ IRTemp src64 = newTemp(Ity_I64);
|
|
+ assign(src64, getIReg64orZR(nn));
|
|
+ IRTemp res = newTemp(Ity_V128);
|
|
+ assign(res, binop(Iop_64HLtoV128, mkexpr(src64), mkexpr(src64)));
|
|
+ putQReg128(dd, mkexpr(res));
|
|
+ DIP("dup v%u.2d, x%u\n", dd, nn);
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* ---------------- MOV vD.16b, vN.16b ---------------- */
|
|
+ /* 31 23 20 15 9 4
|
|
+ 010 01110 101 m 000111 n d ORR vD.16b, vN.16b, vM.16b
|
|
+ This only handles the N == M case.
|
|
+ */
|
|
+ if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0)
|
|
+ && INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
|
|
+ UInt mm = INSN(20,16);
|
|
+ UInt nn = INSN(9,5);
|
|
+ UInt dd = INSN(4,0);
|
|
+ if (mm == nn) {
|
|
+ putQReg128(dd, getQReg128(nn));
|
|
+ DIP("mov v%u.16b, v%u.16b\n", dd, nn);
|
|
+ return True;
|
|
+ }
|
|
+ /* else it's really an ORR; fall through. */
|
|
+ }
|
|
+
|
|
+ vex_printf("ARM64 front end: simd_and_fp\n");
|
|
+ return False;
|
|
+# undef INSN
|
|
+}
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Disassemble a single ARM64 instruction ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+/* Disassemble a single ARM64 instruction into IR. The instruction
|
|
+ has is located at |guest_instr| and has guest IP of
|
|
+ |guest_PC_curr_instr|, which will have been set before the call
|
|
+ here. Returns True iff the instruction was decoded, in which case
|
|
+ *dres will be set accordingly, or False, in which case *dres should
|
|
+ be ignored by the caller. */
|
|
+
|
|
+static
|
|
+Bool disInstr_ARM64_WRK (
|
|
+ /*MB_OUT*/DisResult* dres,
|
|
+ Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
|
|
+ Bool resteerCisOk,
|
|
+ void* callback_opaque,
|
|
+ UChar* guest_instr,
|
|
+ VexArchInfo* archinfo,
|
|
+ VexAbiInfo* abiinfo
|
|
+ )
|
|
+{
|
|
+ // A macro to fish bits out of 'insn'.
|
|
+# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
|
|
+
|
|
+//ZZ DisResult dres;
|
|
+//ZZ UInt insn;
|
|
+//ZZ //Bool allow_VFP = False;
|
|
+//ZZ //UInt hwcaps = archinfo->hwcaps;
|
|
+//ZZ IRTemp condT; /* :: Ity_I32 */
|
|
+//ZZ UInt summary;
|
|
+//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
|
|
+//ZZ
|
|
+//ZZ /* What insn variants are we supporting today? */
|
|
+//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
|
|
+//ZZ // etc etc
|
|
+
|
|
+ /* Set result defaults. */
|
|
+ dres->whatNext = Dis_Continue;
|
|
+ dres->len = 4;
|
|
+ dres->continueAt = 0;
|
|
+ dres->jk_StopHere = Ijk_INVALID;
|
|
+
|
|
+ /* At least this is simple on ARM64: insns are all 4 bytes long, and
|
|
+ 4-aligned. So just fish the whole thing out of memory right now
|
|
+ and have done. */
|
|
+ UInt insn = getUIntLittleEndianly( guest_instr );
|
|
+
|
|
+ if (0) vex_printf("insn: 0x%x\n", insn);
|
|
+
|
|
+ DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
|
|
+
|
|
+ vassert(0 == (guest_PC_curr_instr & 3ULL));
|
|
+
|
|
+ /* ----------------------------------------------------------- */
|
|
+
|
|
+ /* Spot "Special" instructions (see comment at top of file). */
|
|
+ {
|
|
+ UChar* code = (UChar*)guest_instr;
|
|
+ /* Spot the 16-byte preamble:
|
|
+ 93CC0D8C ror x12, x12, #3
|
|
+ 93CC358C ror x12, x12, #13
|
|
+ 93CCCD8C ror x12, x12, #51
|
|
+ 93CCF58C ror x12, x12, #61
|
|
+ */
|
|
+ UInt word1 = 0x93CC0D8C;
|
|
+ UInt word2 = 0x93CC358C;
|
|
+ UInt word3 = 0x93CCCD8C;
|
|
+ UInt word4 = 0x93CCF58C;
|
|
+ if (getUIntLittleEndianly(code+ 0) == word1 &&
|
|
+ getUIntLittleEndianly(code+ 4) == word2 &&
|
|
+ getUIntLittleEndianly(code+ 8) == word3 &&
|
|
+ getUIntLittleEndianly(code+12) == word4) {
|
|
+ /* Got a "Special" instruction preamble. Which one is it? */
|
|
+ if (getUIntLittleEndianly(code+16) == 0xAA0A014A
|
|
+ /* orr x10,x10,x10 */) {
|
|
+ /* X3 = client_request ( X4 ) */
|
|
+ DIP("x3 = client_request ( x4 )\n");
|
|
+ putPC(mkU64( guest_PC_curr_instr + 20 ));
|
|
+ dres->jk_StopHere = Ijk_ClientReq;
|
|
+ dres->whatNext = Dis_StopHere;
|
|
+ return True;
|
|
+ }
|
|
+ else
|
|
+ if (getUIntLittleEndianly(code+16) == 0xAA0B016B
|
|
+ /* orr x11,x11,x11 */) {
|
|
+ /* X3 = guest_NRADDR */
|
|
+ DIP("x3 = guest_NRADDR\n");
|
|
+ dres->len = 20;
|
|
+ putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
|
|
+ return True;
|
|
+ }
|
|
+ else
|
|
+ if (getUIntLittleEndianly(code+16) == 0xAA0C018C
|
|
+ /* orr x12,x12,x12 */) {
|
|
+ /* branch-and-link-to-noredir X8 */
|
|
+ DIP("branch-and-link-to-noredir x8\n");
|
|
+ putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
|
|
+ putPC(getIReg64orZR(8));
|
|
+ dres->jk_StopHere = Ijk_NoRedir;
|
|
+ dres->whatNext = Dis_StopHere;
|
|
+ return True;
|
|
+ }
|
|
+ else
|
|
+ if (getUIntLittleEndianly(code+16) == 0xAA090129
|
|
+ /* orr x9,x9,x9 */) {
|
|
+ /* IR injection */
|
|
+ DIP("IR injection\n");
|
|
+ vex_inject_ir(irsb, Iend_LE);
|
|
+ // Invalidate the current insn. The reason is that the IRop we're
|
|
+ // injecting here can change. In which case the translation has to
|
|
+ // be redone. For ease of handling, we simply invalidate all the
|
|
+ // time.
|
|
+ stmt(IRStmt_Put(OFFB_TISTART, mkU64(guest_PC_curr_instr)));
|
|
+ stmt(IRStmt_Put(OFFB_TILEN, mkU64(20)));
|
|
+ putPC(mkU64( guest_PC_curr_instr + 20 ));
|
|
+ dres->whatNext = Dis_StopHere;
|
|
+ dres->jk_StopHere = Ijk_TInval;
|
|
+ return True;
|
|
+ }
|
|
+ /* We don't know what it is. */
|
|
+ return False;
|
|
+ /*NOTREACHED*/
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* ----------------------------------------------------------- */
|
|
+
|
|
+ /* Main ARM64 instruction decoder starts here. */
|
|
+
|
|
+ Bool ok = False;
|
|
+
|
|
+ /* insn[28:25] determines the top-level grouping, so let's start
|
|
+ off with that.
|
|
+
|
|
+ For all of these dis_ARM64_ functions, we pass *dres with the
|
|
+ normal default results "insn OK, 4 bytes long, keep decoding" so
|
|
+ they don't need to change it. However, decodes of control-flow
|
|
+ insns may cause *dres to change.
|
|
+ */
|
|
+ switch (INSN(28,25)) {
|
|
+ case BITS4(1,0,0,0): case BITS4(1,0,0,1):
|
|
+ // Data processing - immediate
|
|
+ ok = dis_ARM64_data_processing_immediate(dres, insn);
|
|
+ break;
|
|
+ case BITS4(1,0,1,0): case BITS4(1,0,1,1):
|
|
+ // Branch, exception generation and system instructions
|
|
+ ok = dis_ARM64_branch_etc(dres, insn);
|
|
+ break;
|
|
+ case BITS4(0,1,0,0): case BITS4(0,1,1,0):
|
|
+ case BITS4(1,1,0,0): case BITS4(1,1,1,0):
|
|
+ // Loads and stores
|
|
+ ok = dis_ARM64_load_store(dres, insn);
|
|
+ break;
|
|
+ case BITS4(0,1,0,1): case BITS4(1,1,0,1):
|
|
+ // Data processing - register
|
|
+ ok = dis_ARM64_data_processing_register(dres, insn);
|
|
+ break;
|
|
+ case BITS4(0,1,1,1): case BITS4(1,1,1,1):
|
|
+ // Data processing - SIMD and floating point
|
|
+ ok = dis_ARM64_simd_and_fp(dres, insn);
|
|
+ break;
|
|
+ case BITS4(0,0,0,0): case BITS4(0,0,0,1):
|
|
+ case BITS4(0,0,1,0): case BITS4(0,0,1,1):
|
|
+ // UNALLOCATED
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0); /* Can't happen */
|
|
+ }
|
|
+
|
|
+ /* If the next-level down decoders failed, make sure |dres| didn't
|
|
+ get changed. */
|
|
+ if (!ok) {
|
|
+ vassert(dres->whatNext == Dis_Continue);
|
|
+ vassert(dres->len == 4);
|
|
+ vassert(dres->continueAt == 0);
|
|
+ vassert(dres->jk_StopHere == Ijk_INVALID);
|
|
+ }
|
|
+
|
|
+ return ok;
|
|
+
|
|
+# undef INSN
|
|
+}
|
|
+
|
|
+
|
|
+/*------------------------------------------------------------*/
|
|
+/*--- Top-level fn ---*/
|
|
+/*------------------------------------------------------------*/
|
|
+
|
|
+/* Disassemble a single instruction into IR. The instruction
|
|
+ is located in host memory at &guest_code[delta]. */
|
|
+
|
|
+DisResult disInstr_ARM64 ( IRSB* irsb_IN,
|
|
+ Bool (*resteerOkFn) ( void*, Addr64 ),
|
|
+ Bool resteerCisOk,
|
|
+ void* callback_opaque,
|
|
+ UChar* guest_code_IN,
|
|
+ Long delta_IN,
|
|
+ Addr64 guest_IP,
|
|
+ VexArch guest_arch,
|
|
+ VexArchInfo* archinfo,
|
|
+ VexAbiInfo* abiinfo,
|
|
+ Bool host_bigendian_IN,
|
|
+ Bool sigill_diag_IN )
|
|
+{
|
|
+ DisResult dres;
|
|
+ vex_bzero(&dres, sizeof(dres));
|
|
+
|
|
+ /* Set globals (see top of this file) */
|
|
+ vassert(guest_arch == VexArchARM64);
|
|
+
|
|
+ irsb = irsb_IN;
|
|
+ host_is_bigendian = host_bigendian_IN;
|
|
+ guest_PC_curr_instr = (Addr64)guest_IP;
|
|
+
|
|
+ /* Try to decode */
|
|
+ Bool ok = disInstr_ARM64_WRK( &dres,
|
|
+ resteerOkFn, resteerCisOk, callback_opaque,
|
|
+ (UChar*)&guest_code_IN[delta_IN],
|
|
+ archinfo, abiinfo );
|
|
+ if (ok) {
|
|
+ /* All decode successes end up here. */
|
|
+ vassert(dres.len == 4 /*|| dres.len == 20*/);
|
|
+ switch (dres.whatNext) {
|
|
+ case Dis_Continue:
|
|
+ putPC( mkU64(dres.len + guest_PC_curr_instr) );
|
|
+ break;
|
|
+ case Dis_ResteerU:
|
|
+ case Dis_ResteerC:
|
|
+ putPC(mkU64(dres.continueAt));
|
|
+ break;
|
|
+ case Dis_StopHere:
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
+ }
|
|
+ DIP("\n");
|
|
+ } else {
|
|
+ /* All decode failures end up here. */
|
|
+ if (sigill_diag_IN) {
|
|
+ Int i, j;
|
|
+ UChar buf[64];
|
|
+ UInt insn
|
|
+ = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
|
|
+ vex_bzero(buf, sizeof(buf));
|
|
+ for (i = j = 0; i < 32; i++) {
|
|
+ if (i > 0) {
|
|
+ if ((i & 7) == 0) buf[j++] = ' ';
|
|
+ else if ((i & 3) == 0) buf[j++] = '\'';
|
|
+ }
|
|
+ buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
|
|
+ }
|
|
+ vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
|
|
+ vex_printf("disInstr(arm64): %s\n", buf);
|
|
+ }
|
|
+
|
|
+ /* Tell the dispatcher that this insn cannot be decoded, and so
|
|
+ has not been executed, and (is currently) the next to be
|
|
+ executed. PC should be up-to-date since it is made so at the
|
|
+ start of each insn, but nevertheless be paranoid and update
|
|
+ it again right now. */
|
|
+ putPC( mkU64(guest_PC_curr_instr) );
|
|
+ dres.whatNext = Dis_StopHere;
|
|
+ dres.len = 0;
|
|
+ dres.continueAt = 0;
|
|
+ dres.jk_StopHere = Ijk_NoDecode;
|
|
+ }
|
|
+ return dres;
|
|
+}
|
|
+
|
|
+/*--------------------------------------------------------------------*/
|
|
+/*--- end guest_arm64_toIR.c ---*/
|
|
+/*--------------------------------------------------------------------*/
|
|
Index: VEX/priv/host_generic_simd64.c
|
|
===================================================================
|
|
--- VEX/priv/host_generic_simd64.c.orig
|
|
+++ VEX/priv/host_generic_simd64.c
|
|
@@ -1553,7 +1553,11 @@ ULong h_calc_BCDtoDPB( ULong bcd )
|
|
|
|
/* ----------------------------------------------------- */
|
|
/* Signed and unsigned integer division, that behave like
|
|
- the ARMv7 UDIV ansd SDIV instructions. */
|
|
+ the ARMv7 UDIV ansd SDIV instructions.
|
|
+
|
|
+ sdiv32 also behaves like 64-bit v8 SDIV on w-regs.
|
|
+ udiv32 also behaves like 64-bit v8 UDIV on w-regs.
|
|
+*/
|
|
/* ----------------------------------------------------- */
|
|
|
|
UInt h_calc_udiv32_w_arm_semantics ( UInt x, UInt y )
|
|
@@ -1564,17 +1568,40 @@ UInt h_calc_udiv32_w_arm_semantics ( UIn
|
|
return x / y;
|
|
}
|
|
|
|
+ULong h_calc_udiv64_w_arm_semantics ( ULong x, ULong y )
|
|
+{
|
|
+ // Division by zero --> zero
|
|
+ if (UNLIKELY(y == 0)) return 0;
|
|
+ // C requires rounding towards zero, which is also what we need.
|
|
+ return x / y;
|
|
+}
|
|
+
|
|
Int h_calc_sdiv32_w_arm_semantics ( Int x, Int y )
|
|
{
|
|
// Division by zero --> zero
|
|
if (UNLIKELY(y == 0)) return 0;
|
|
- // The single case that produces an unpresentable result
|
|
+ // The single case that produces an unrepresentable result
|
|
if (UNLIKELY( ((UInt)x) == ((UInt)0x80000000)
|
|
&& ((UInt)y) == ((UInt)0xFFFFFFFF) ))
|
|
return (Int)(UInt)0x80000000;
|
|
// Else return the result rounded towards zero. C89 says
|
|
// this is implementation defined (in the signed case), but gcc
|
|
// promises to round towards zero. Nevertheless, at startup,
|
|
+ // in main_main.c, do a check for that.
|
|
+ return x / y;
|
|
+}
|
|
+
|
|
+Long h_calc_sdiv64_w_arm_semantics ( Long x, Long y )
|
|
+{
|
|
+ // Division by zero --> zero
|
|
+ if (UNLIKELY(y == 0)) return 0;
|
|
+ // The single case that produces an unrepresentable result
|
|
+ if (UNLIKELY( ((ULong)x) == ((ULong)0x8000000000000000ULL )
|
|
+ && ((ULong)y) == ((ULong)0xFFFFFFFFFFFFFFFFULL ) ))
|
|
+ return (Long)(ULong)0x8000000000000000ULL;
|
|
+ // Else return the result rounded towards zero. C89 says
|
|
+ // this is implementation defined (in the signed case), but gcc
|
|
+ // promises to round towards zero. Nevertheless, at startup,
|
|
// in main_main.c, do a check for that.
|
|
return x / y;
|
|
}
|
|
Index: VEX/priv/host_generic_simd64.h
|
|
===================================================================
|
|
--- VEX/priv/host_generic_simd64.h.orig
|
|
+++ VEX/priv/host_generic_simd64.h
|
|
@@ -166,8 +166,11 @@ extern ULong h_calc_BCDtoDPB ( ULong bcd
|
|
|
|
// Signed and unsigned integer division, that behave like
|
|
// the ARMv7 UDIV and SDIV instructions.
|
|
-extern UInt h_calc_udiv32_w_arm_semantics ( UInt, UInt );
|
|
-extern Int h_calc_sdiv32_w_arm_semantics ( Int, Int );
|
|
+extern UInt h_calc_udiv32_w_arm_semantics ( UInt, UInt );
|
|
+extern ULong h_calc_udiv64_w_arm_semantics ( ULong, ULong );
|
|
+extern Int h_calc_sdiv32_w_arm_semantics ( Int, Int );
|
|
+extern Long h_calc_sdiv64_w_arm_semantics ( Long, Long );
|
|
+
|
|
|
|
#endif /* ndef __VEX_HOST_GENERIC_SIMD64_H */
|
|
|
|
Index: VEX/priv/guest_arm64_helpers.c
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ VEX/priv/guest_arm64_helpers.c
|
|
@@ -0,0 +1,1292 @@
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- begin guest_arm64_helpers.c ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
+
|
|
+/*
|
|
+ This file is part of Valgrind, a dynamic binary instrumentation
|
|
+ framework.
|
|
+
|
|
+ Copyright (C) 2013-2013 OpenWorks
|
|
+ info@open-works.net
|
|
+
|
|
+ This program is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU General Public License as
|
|
+ published by the Free Software Foundation; either version 2 of the
|
|
+ License, or (at your option) any later version.
|
|
+
|
|
+ This program is distributed in the hope that it will be useful, but
|
|
+ WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License
|
|
+ along with this program; if not, write to the Free Software
|
|
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
+ 02110-1301, USA.
|
|
+
|
|
+ The GNU General Public License is contained in the file COPYING.
|
|
+*/
|
|
+
|
|
+#include "libvex_basictypes.h"
|
|
+#include "libvex_emnote.h"
|
|
+#include "libvex_guest_arm64.h"
|
|
+#include "libvex_ir.h"
|
|
+#include "libvex.h"
|
|
+
|
|
+#include "main_util.h"
|
|
+#include "main_globals.h"
|
|
+#include "guest_generic_bb_to_IR.h"
|
|
+#include "guest_arm64_defs.h"
|
|
+
|
|
+
|
|
+/* This file contains helper functions for arm guest code. Calls to
|
|
+ these functions are generated by the back end. These calls are of
|
|
+ course in the host machine code and this file will be compiled to
|
|
+ host machine code, so that all makes sense.
|
|
+
|
|
+ Only change the signatures of these helper functions very
|
|
+ carefully. If you change the signature here, you'll have to change
|
|
+ the parameters passed to it in the IR calls constructed by
|
|
+ guest_arm64_toIR.c.
|
|
+*/
|
|
+
|
|
+
|
|
+/* Set to 1 to get detailed profiling info about individual N, Z, C
|
|
+ and V flag evaluation. */
|
|
+#define PROFILE_NZCV_FLAGS 0
|
|
+
|
|
+#if PROFILE_NZCV_FLAGS
|
|
+
|
|
+static UInt tab_eval[ARM64G_CC_OP_NUMBER][16];
|
|
+static UInt initted = 0;
|
|
+static UInt tot_evals = 0;
|
|
+
|
|
+static void initCounts ( void )
|
|
+{
|
|
+ UInt i, j;
|
|
+ for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
|
|
+ for (j = 0; j < 16; j++) {
|
|
+ tab_eval[i][j] = 0;
|
|
+ }
|
|
+ }
|
|
+ initted = 1;
|
|
+}
|
|
+
|
|
+static void showCounts ( void )
|
|
+{
|
|
+ const HChar* nameCC[16]
|
|
+ = { "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC",
|
|
+ "HI", "LS", "GE", "LT", "GT", "LE", "AL", "NV" };
|
|
+ UInt i, j;
|
|
+ ULong sum = 0;
|
|
+ vex_printf("\nCC_OP 0 1 2 3 "
|
|
+ " 4 5 6\n");
|
|
+ vex_printf( "--------------------------------------------------"
|
|
+ "--------------------------\n");
|
|
+ for (j = 0; j < 16; j++) {
|
|
+ vex_printf("%2d %s ", j, nameCC[j]);
|
|
+ for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
|
|
+ vex_printf("%9d ", tab_eval[i][j]);
|
|
+ sum += tab_eval[i][j];
|
|
+ }
|
|
+ vex_printf("\n");
|
|
+ }
|
|
+ vex_printf("(In total %llu calls)\n", sum);
|
|
+}
|
|
+
|
|
+#define NOTE_EVAL(_cc_op, _cond) \
|
|
+ do { \
|
|
+ if (!initted) initCounts(); \
|
|
+ vassert( ((UInt)(_cc_op)) < ARM64G_CC_OP_NUMBER); \
|
|
+ vassert( ((UInt)(_cond)) < 16); \
|
|
+ tab_eval[(UInt)(_cc_op)][(UInt)(cond)]++; \
|
|
+ tot_evals++; \
|
|
+ if (0 == (tot_evals & 0x7FFF)) \
|
|
+ showCounts(); \
|
|
+ } while (0)
|
|
+
|
|
+#endif /* PROFILE_NZCV_FLAGS */
|
|
+
|
|
+
|
|
+/* Calculate the N flag from the supplied thunk components, in the
|
|
+ least significant bit of the word. Returned bits 63:1 are zero. */
|
|
+static
|
|
+ULong arm64g_calculate_flag_n ( ULong cc_op, ULong cc_dep1,
|
|
+ ULong cc_dep2, ULong cc_dep3 )
|
|
+{
|
|
+ switch (cc_op) {
|
|
+ case ARM64G_CC_OP_COPY: {
|
|
+ /* (nzcv:28x0, unused, unused) */
|
|
+ ULong nf = (cc_dep1 >> ARM64G_CC_SHIFT_N) & 1;
|
|
+ return nf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_ADD32: {
|
|
+ /* (argL, argR, unused) */
|
|
+ UInt argL = (UInt)cc_dep1;
|
|
+ UInt argR = (UInt)cc_dep2;
|
|
+ UInt res = argL + argR;
|
|
+ ULong nf = (ULong)(res >> 31);
|
|
+ return nf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_ADD64: {
|
|
+ /* (argL, argR, unused) */
|
|
+ ULong argL = cc_dep1;
|
|
+ ULong argR = cc_dep2;
|
|
+ ULong res = argL + argR;
|
|
+ ULong nf = (ULong)(res >> 63);
|
|
+ return nf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_SUB32: {
|
|
+ /* (argL, argR, unused) */
|
|
+ UInt argL = (UInt)cc_dep1;
|
|
+ UInt argR = (UInt)cc_dep2;
|
|
+ UInt res = argL - argR;
|
|
+ ULong nf = (ULong)(res >> 31);
|
|
+ return nf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_SUB64: {
|
|
+ /* (argL, argR, unused) */
|
|
+ ULong argL = cc_dep1;
|
|
+ ULong argR = cc_dep2;
|
|
+ ULong res = argL - argR;
|
|
+ ULong nf = res >> 63;
|
|
+ return nf;
|
|
+ }
|
|
+//ZZ case ARMG_CC_OP_ADC: {
|
|
+//ZZ /* (argL, argR, oldC) */
|
|
+//ZZ UInt argL = cc_dep1;
|
|
+//ZZ UInt argR = cc_dep2;
|
|
+//ZZ UInt oldC = cc_dep3;
|
|
+//ZZ vassert((oldC & ~1) == 0);
|
|
+//ZZ UInt res = argL + argR + oldC;
|
|
+//ZZ UInt nf = res >> 31;
|
|
+//ZZ return nf;
|
|
+//ZZ }
|
|
+//ZZ case ARMG_CC_OP_SBB: {
|
|
+//ZZ /* (argL, argR, oldC) */
|
|
+//ZZ UInt argL = cc_dep1;
|
|
+//ZZ UInt argR = cc_dep2;
|
|
+//ZZ UInt oldC = cc_dep3;
|
|
+//ZZ vassert((oldC & ~1) == 0);
|
|
+//ZZ UInt res = argL - argR - (oldC ^ 1);
|
|
+//ZZ UInt nf = res >> 31;
|
|
+//ZZ return nf;
|
|
+//ZZ }
|
|
+ case ARM64G_CC_OP_LOGIC32: {
|
|
+ /* (res, unused, unused) */
|
|
+ UInt res = (UInt)cc_dep1;
|
|
+ ULong nf = res >> 31;
|
|
+ return nf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_LOGIC64: {
|
|
+ /* (res, unused, unused) */
|
|
+ ULong res = cc_dep1;
|
|
+ ULong nf = res >> 63;
|
|
+ return nf;
|
|
+ }
|
|
+//ZZ case ARMG_CC_OP_MUL: {
|
|
+//ZZ /* (res, unused, oldC:oldV) */
|
|
+//ZZ UInt res = cc_dep1;
|
|
+//ZZ UInt nf = res >> 31;
|
|
+//ZZ return nf;
|
|
+//ZZ }
|
|
+//ZZ case ARMG_CC_OP_MULL: {
|
|
+//ZZ /* (resLo32, resHi32, oldC:oldV) */
|
|
+//ZZ UInt resHi32 = cc_dep2;
|
|
+//ZZ UInt nf = resHi32 >> 31;
|
|
+//ZZ return nf;
|
|
+//ZZ }
|
|
+ default:
|
|
+ /* shouldn't really make these calls from generated code */
|
|
+ vex_printf("arm64g_calculate_flag_n"
|
|
+ "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
|
|
+ cc_op, cc_dep1, cc_dep2, cc_dep3 );
|
|
+ vpanic("arm64g_calculate_flag_n");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* Calculate the Z flag from the supplied thunk components, in the
|
|
+ least significant bit of the word. Returned bits 63:1 are zero. */
|
|
+static
|
|
+ULong arm64g_calculate_flag_z ( ULong cc_op, ULong cc_dep1,
|
|
+ ULong cc_dep2, ULong cc_dep3 )
|
|
+{
|
|
+ switch (cc_op) {
|
|
+ case ARM64G_CC_OP_COPY: {
|
|
+ /* (nzcv:28x0, unused, unused) */
|
|
+ ULong zf = (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1;
|
|
+ return zf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_ADD32: {
|
|
+ /* (argL, argR, unused) */
|
|
+ UInt argL = (UInt)cc_dep1;
|
|
+ UInt argR = (UInt)cc_dep2;
|
|
+ UInt res = argL + argR;
|
|
+ ULong zf = res == 0;
|
|
+ return zf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_ADD64: {
|
|
+ /* (argL, argR, unused) */
|
|
+ ULong argL = cc_dep1;
|
|
+ ULong argR = cc_dep2;
|
|
+ ULong res = argL + argR;
|
|
+ ULong zf = res == 0;
|
|
+ return zf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_SUB32: {
|
|
+ /* (argL, argR, unused) */
|
|
+ UInt argL = (UInt)cc_dep1;
|
|
+ UInt argR = (UInt)cc_dep2;
|
|
+ UInt res = argL - argR;
|
|
+ ULong zf = res == 0;
|
|
+ return zf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_SUB64: {
|
|
+ /* (argL, argR, unused) */
|
|
+ ULong argL = cc_dep1;
|
|
+ ULong argR = cc_dep2;
|
|
+ ULong res = argL - argR;
|
|
+ ULong zf = res == 0;
|
|
+ return zf;
|
|
+ }
|
|
+//ZZ case ARMG_CC_OP_ADC: {
|
|
+//ZZ /* (argL, argR, oldC) */
|
|
+//ZZ UInt argL = cc_dep1;
|
|
+//ZZ UInt argR = cc_dep2;
|
|
+//ZZ UInt oldC = cc_dep3;
|
|
+//ZZ vassert((oldC & ~1) == 0);
|
|
+//ZZ UInt res = argL + argR + oldC;
|
|
+//ZZ UInt zf = res == 0;
|
|
+//ZZ return zf;
|
|
+//ZZ }
|
|
+//ZZ case ARMG_CC_OP_SBB: {
|
|
+//ZZ /* (argL, argR, oldC) */
|
|
+//ZZ UInt argL = cc_dep1;
|
|
+//ZZ UInt argR = cc_dep2;
|
|
+//ZZ UInt oldC = cc_dep3;
|
|
+//ZZ vassert((oldC & ~1) == 0);
|
|
+//ZZ UInt res = argL - argR - (oldC ^ 1);
|
|
+//ZZ UInt zf = res == 0;
|
|
+//ZZ return zf;
|
|
+//ZZ }
|
|
+ case ARM64G_CC_OP_LOGIC32: {
|
|
+ /* (res, unused, unused) */
|
|
+ UInt res = (UInt)cc_dep1;
|
|
+ ULong zf = res == 0;
|
|
+ return zf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_LOGIC64: {
|
|
+ /* (res, unused, unused) */
|
|
+ ULong res = cc_dep1;
|
|
+ ULong zf = res == 0;
|
|
+ return zf;
|
|
+ }
|
|
+//ZZ case ARMG_CC_OP_MUL: {
|
|
+//ZZ /* (res, unused, oldC:oldV) */
|
|
+//ZZ UInt res = cc_dep1;
|
|
+//ZZ UInt zf = res == 0;
|
|
+//ZZ return zf;
|
|
+//ZZ }
|
|
+//ZZ case ARMG_CC_OP_MULL: {
|
|
+//ZZ /* (resLo32, resHi32, oldC:oldV) */
|
|
+//ZZ UInt resLo32 = cc_dep1;
|
|
+//ZZ UInt resHi32 = cc_dep2;
|
|
+//ZZ UInt zf = (resHi32|resLo32) == 0;
|
|
+//ZZ return zf;
|
|
+//ZZ }
|
|
+ default:
|
|
+ /* shouldn't really make these calls from generated code */
|
|
+ vex_printf("arm64g_calculate_flag_z"
|
|
+ "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
|
|
+ cc_op, cc_dep1, cc_dep2, cc_dep3 );
|
|
+ vpanic("arm64g_calculate_flag_z");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
|
|
+/* Calculate the C flag from the supplied thunk components, in the
|
|
+ least significant bit of the word. Returned bits 63:1 are zero. */
|
|
+static
|
|
+ULong arm64g_calculate_flag_c ( ULong cc_op, ULong cc_dep1,
|
|
+ ULong cc_dep2, ULong cc_dep3 )
|
|
+{
|
|
+ switch (cc_op) {
|
|
+ case ARM64G_CC_OP_COPY: {
|
|
+ /* (nzcv:28x0, unused, unused) */
|
|
+ ULong cf = (cc_dep1 >> ARM64G_CC_SHIFT_C) & 1;
|
|
+ return cf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_ADD32: {
|
|
+ /* (argL, argR, unused) */
|
|
+ UInt argL = (UInt)cc_dep1;
|
|
+ UInt argR = (UInt)cc_dep2;
|
|
+ UInt res = argL + argR;
|
|
+ ULong cf = res < argL;
|
|
+ return cf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_ADD64: {
|
|
+ /* (argL, argR, unused) */
|
|
+ ULong argL = cc_dep1;
|
|
+ ULong argR = cc_dep2;
|
|
+ ULong res = argL + argR;
|
|
+ ULong cf = res < argL;
|
|
+ return cf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_SUB32: {
|
|
+ /* (argL, argR, unused) */
|
|
+ UInt argL = (UInt)cc_dep1;
|
|
+ UInt argR = (UInt)cc_dep2;
|
|
+ ULong cf = argL >= argR;
|
|
+ return cf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_SUB64: {
|
|
+ /* (argL, argR, unused) */
|
|
+ ULong argL = cc_dep1;
|
|
+ ULong argR = cc_dep2;
|
|
+ ULong cf = argL >= argR;
|
|
+ return cf;
|
|
+ }
|
|
+//ZZ case ARMG_CC_OP_ADC: {
|
|
+//ZZ /* (argL, argR, oldC) */
|
|
+//ZZ UInt argL = cc_dep1;
|
|
+//ZZ UInt argR = cc_dep2;
|
|
+//ZZ UInt oldC = cc_dep3;
|
|
+//ZZ vassert((oldC & ~1) == 0);
|
|
+//ZZ UInt res = argL + argR + oldC;
|
|
+//ZZ UInt cf = oldC ? (res <= argL) : (res < argL);
|
|
+//ZZ return cf;
|
|
+//ZZ }
|
|
+//ZZ case ARMG_CC_OP_SBB: {
|
|
+//ZZ /* (argL, argR, oldC) */
|
|
+//ZZ UInt argL = cc_dep1;
|
|
+//ZZ UInt argR = cc_dep2;
|
|
+//ZZ UInt oldC = cc_dep3;
|
|
+//ZZ vassert((oldC & ~1) == 0);
|
|
+//ZZ UInt cf = oldC ? (argL >= argR) : (argL > argR);
|
|
+//ZZ return cf;
|
|
+//ZZ }
|
|
+ case ARM64G_CC_OP_LOGIC32:
|
|
+ case ARM64G_CC_OP_LOGIC64: {
|
|
+ /* (res, unused, unused) */
|
|
+ return 0; // C after logic is zero on arm64
|
|
+ }
|
|
+//ZZ case ARMG_CC_OP_MUL: {
|
|
+//ZZ /* (res, unused, oldC:oldV) */
|
|
+//ZZ UInt oldC = (cc_dep3 >> 1) & 1;
|
|
+//ZZ vassert((cc_dep3 & ~3) == 0);
|
|
+//ZZ UInt cf = oldC;
|
|
+//ZZ return cf;
|
|
+//ZZ }
|
|
+//ZZ case ARMG_CC_OP_MULL: {
|
|
+//ZZ /* (resLo32, resHi32, oldC:oldV) */
|
|
+//ZZ UInt oldC = (cc_dep3 >> 1) & 1;
|
|
+//ZZ vassert((cc_dep3 & ~3) == 0);
|
|
+//ZZ UInt cf = oldC;
|
|
+//ZZ return cf;
|
|
+//ZZ }
|
|
+ default:
|
|
+ /* shouldn't really make these calls from generated code */
|
|
+ vex_printf("arm64g_calculate_flag_c"
|
|
+ "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
|
|
+ cc_op, cc_dep1, cc_dep2, cc_dep3 );
|
|
+ vpanic("arm64g_calculate_flag_c");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
|
|
+/* Calculate the V flag from the supplied thunk components, in the
|
|
+ least significant bit of the word. Returned bits 63:1 are zero. */
|
|
+static
|
|
+ULong arm64g_calculate_flag_v ( ULong cc_op, ULong cc_dep1,
|
|
+ ULong cc_dep2, ULong cc_dep3 )
|
|
+{
|
|
+ switch (cc_op) {
|
|
+ case ARM64G_CC_OP_COPY: {
|
|
+ /* (nzcv:28x0, unused, unused) */
|
|
+ ULong vf = (cc_dep1 >> ARM64G_CC_SHIFT_V) & 1;
|
|
+ return vf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_ADD32: {
|
|
+ /* (argL, argR, unused) */
|
|
+ UInt argL = (UInt)cc_dep1;
|
|
+ UInt argR = (UInt)cc_dep2;
|
|
+ UInt res = argL + argR;
|
|
+ ULong vf = (ULong)(((res ^ argL) & (res ^ argR)) >> 31);
|
|
+ return vf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_ADD64: {
|
|
+ /* (argL, argR, unused) */
|
|
+ ULong argL = cc_dep1;
|
|
+ ULong argR = cc_dep2;
|
|
+ ULong res = argL + argR;
|
|
+ ULong vf = ((res ^ argL) & (res ^ argR)) >> 63;
|
|
+ return vf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_SUB32: {
|
|
+ /* (argL, argR, unused) */
|
|
+ UInt argL = (UInt)cc_dep1;
|
|
+ UInt argR = (UInt)cc_dep2;
|
|
+ UInt res = argL - argR;
|
|
+ ULong vf = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31);
|
|
+ return vf;
|
|
+ }
|
|
+ case ARM64G_CC_OP_SUB64: {
|
|
+ /* (argL, argR, unused) */
|
|
+ ULong argL = cc_dep1;
|
|
+ ULong argR = cc_dep2;
|
|
+ ULong res = argL - argR;
|
|
+ ULong vf = (((argL ^ argR) & (argL ^ res))) >> 63;
|
|
+ return vf;
|
|
+ }
|
|
+//ZZ case ARMG_CC_OP_ADC: {
|
|
+//ZZ /* (argL, argR, oldC) */
|
|
+//ZZ UInt argL = cc_dep1;
|
|
+//ZZ UInt argR = cc_dep2;
|
|
+//ZZ UInt oldC = cc_dep3;
|
|
+//ZZ vassert((oldC & ~1) == 0);
|
|
+//ZZ UInt res = argL + argR + oldC;
|
|
+//ZZ UInt vf = ((res ^ argL) & (res ^ argR)) >> 31;
|
|
+//ZZ return vf;
|
|
+//ZZ }
|
|
+//ZZ case ARMG_CC_OP_SBB: {
|
|
+//ZZ /* (argL, argR, oldC) */
|
|
+//ZZ UInt argL = cc_dep1;
|
|
+//ZZ UInt argR = cc_dep2;
|
|
+//ZZ UInt oldC = cc_dep3;
|
|
+//ZZ vassert((oldC & ~1) == 0);
|
|
+//ZZ UInt res = argL - argR - (oldC ^ 1);
|
|
+//ZZ UInt vf = ((argL ^ argR) & (argL ^ res)) >> 31;
|
|
+//ZZ return vf;
|
|
+//ZZ }
|
|
+ case ARM64G_CC_OP_LOGIC32:
|
|
+ case ARM64G_CC_OP_LOGIC64: {
|
|
+ /* (res, unused, unused) */
|
|
+ return 0; // V after logic is zero on arm64
|
|
+ }
|
|
+//ZZ case ARMG_CC_OP_MUL: {
|
|
+//ZZ /* (res, unused, oldC:oldV) */
|
|
+//ZZ UInt oldV = (cc_dep3 >> 0) & 1;
|
|
+//ZZ vassert((cc_dep3 & ~3) == 0);
|
|
+//ZZ UInt vf = oldV;
|
|
+//ZZ return vf;
|
|
+//ZZ }
|
|
+//ZZ case ARMG_CC_OP_MULL: {
|
|
+//ZZ /* (resLo32, resHi32, oldC:oldV) */
|
|
+//ZZ UInt oldV = (cc_dep3 >> 0) & 1;
|
|
+//ZZ vassert((cc_dep3 & ~3) == 0);
|
|
+//ZZ UInt vf = oldV;
|
|
+//ZZ return vf;
|
|
+//ZZ }
|
|
+ default:
|
|
+ /* shouldn't really make these calls from generated code */
|
|
+ vex_printf("arm64g_calculate_flag_v"
|
|
+ "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
|
|
+ cc_op, cc_dep1, cc_dep2, cc_dep3 );
|
|
+ vpanic("arm64g_calculate_flag_v");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
|
|
+/* Calculate NZCV from the supplied thunk components, in the positions
|
|
+ they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
|
|
+ Returned bits 27:0 are zero. */
|
|
+ULong arm64g_calculate_flags_nzcv ( ULong cc_op, ULong cc_dep1,
|
|
+ ULong cc_dep2, ULong cc_dep3 )
|
|
+{
|
|
+ ULong f;
|
|
+ ULong res = 0;
|
|
+ f = 1 & arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ res |= (f << ARM64G_CC_SHIFT_N);
|
|
+ f = 1 & arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ res |= (f << ARM64G_CC_SHIFT_Z);
|
|
+ f = 1 & arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ res |= (f << ARM64G_CC_SHIFT_C);
|
|
+ f = 1 & arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ res |= (f << ARM64G_CC_SHIFT_V);
|
|
+ return res;
|
|
+}
|
|
+
|
|
+//ZZ
|
|
+//ZZ /* CALLED FROM GENERATED CODE: CLEAN HELPER */
|
|
+//ZZ /* Calculate the QC flag from the arguments, in the lowest bit
|
|
+//ZZ of the word (bit 0). Urr, having this out of line is bizarre.
|
|
+//ZZ Push back inline. */
|
|
+//ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
|
|
+//ZZ UInt resR1, UInt resR2 )
|
|
+//ZZ {
|
|
+//ZZ if (resL1 != resR1 || resL2 != resR2)
|
|
+//ZZ return 1;
|
|
+//ZZ else
|
|
+//ZZ return 0;
|
|
+//ZZ }
|
|
+
|
|
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
|
|
+/* Calculate the specified condition from the thunk components, in the
|
|
+ lowest bit of the word (bit 0). Returned bits 63:1 are zero. */
|
|
+ULong arm64g_calculate_condition ( /* ARM64Condcode << 4 | cc_op */
|
|
+ ULong cond_n_op ,
|
|
+ ULong cc_dep1,
|
|
+ ULong cc_dep2, ULong cc_dep3 )
|
|
+{
|
|
+ ULong cond = cond_n_op >> 4;
|
|
+ ULong cc_op = cond_n_op & 0xF;
|
|
+ ULong inv = cond & 1;
|
|
+ ULong nf, zf, vf, cf;
|
|
+
|
|
+# if PROFILE_NZCV_FLAGS
|
|
+ NOTE_EVAL(cc_op, cond);
|
|
+# endif
|
|
+
|
|
+ // vex_printf("XXXXXXXX %llx %llx %llx %llx\n",
|
|
+ // cond_n_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+
|
|
+ switch (cond) {
|
|
+ case ARM64CondEQ: // Z=1 => z
|
|
+ case ARM64CondNE: // Z=0
|
|
+ zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ return inv ^ zf;
|
|
+
|
|
+ case ARM64CondCS: // C=1 => c
|
|
+ case ARM64CondCC: // C=0
|
|
+ cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ return inv ^ cf;
|
|
+
|
|
+ case ARM64CondMI: // N=1 => n
|
|
+ case ARM64CondPL: // N=0
|
|
+ nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ return inv ^ nf;
|
|
+
|
|
+ case ARM64CondVS: // V=1 => v
|
|
+ case ARM64CondVC: // V=0
|
|
+ vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ return inv ^ vf;
|
|
+
|
|
+ case ARM64CondHI: // C=1 && Z=0 => c & ~z
|
|
+ case ARM64CondLS: // C=0 || Z=1
|
|
+ cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ return inv ^ (1 & (cf & ~zf));
|
|
+
|
|
+ case ARM64CondGE: // N=V => ~(n^v)
|
|
+ case ARM64CondLT: // N!=V
|
|
+ nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ return inv ^ (1 & ~(nf ^ vf));
|
|
+
|
|
+ case ARM64CondGT: // Z=0 && N=V => ~z & ~(n^v) => ~(z | (n^v))
|
|
+ case ARM64CondLE: // Z=1 || N!=V
|
|
+ nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
|
|
+ return inv ^ (1 & ~(zf | (nf ^ vf)));
|
|
+
|
|
+ case ARM64CondAL: // 1
|
|
+ case ARM64CondNV: // 1
|
|
+ return 1;
|
|
+
|
|
+ default:
|
|
+ /* shouldn't really make these calls from generated code */
|
|
+ vex_printf("arm64g_calculate_condition(ARM64)"
|
|
+ "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
|
|
+ cond, cc_op, cc_dep1, cc_dep2, cc_dep3 );
|
|
+ vpanic("armg_calculate_condition(ARM64)");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- Flag-helpers translation-time function specialisers. ---*/
|
|
+/*--- These help iropt specialise calls the above run-time ---*/
|
|
+/*--- flags functions. ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
+
|
|
+/* Used by the optimiser to try specialisations. Returns an
|
|
+ equivalent expression, or NULL if none. */
|
|
+
|
|
+static Bool isU64 ( IRExpr* e, ULong n )
|
|
+{
|
|
+ return
|
|
+ toBool( e->tag == Iex_Const
|
|
+ && e->Iex.Const.con->tag == Ico_U64
|
|
+ && e->Iex.Const.con->Ico.U64 == n );
|
|
+}
|
|
+
|
|
+IRExpr* guest_arm64_spechelper ( const HChar* function_name,
|
|
+ IRExpr** args,
|
|
+ IRStmt** precedingStmts,
|
|
+ Int n_precedingStmts )
|
|
+{
|
|
+# define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
|
|
+# define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
|
|
+# define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
|
|
+# define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
|
|
+
|
|
+ Int i, arity = 0;
|
|
+ for (i = 0; args[i]; i++)
|
|
+ arity++;
|
|
+//ZZ # if 0
|
|
+//ZZ vex_printf("spec request:\n");
|
|
+//ZZ vex_printf(" %s ", function_name);
|
|
+//ZZ for (i = 0; i < arity; i++) {
|
|
+//ZZ vex_printf(" ");
|
|
+//ZZ ppIRExpr(args[i]);
|
|
+//ZZ }
|
|
+//ZZ vex_printf("\n");
|
|
+//ZZ # endif
|
|
+
|
|
+ /* --------- specialising "arm64g_calculate_condition" --------- */
|
|
+
|
|
+ if (vex_streq(function_name, "arm64g_calculate_condition")) {
|
|
+
|
|
+ /* specialise calls to the "arm64g_calculate_condition" function.
|
|
+ Not sure whether this is strictly necessary, but: the
|
|
+ replacement IR must produce only the values 0 or 1. Bits
|
|
+ 63:1 are required to be zero. */
|
|
+ IRExpr *cond_n_op, *cc_dep1, *cc_dep2 ; //, *cc_ndep;
|
|
+ vassert(arity == 4);
|
|
+ cond_n_op = args[0]; /* (ARM64Condcode << 4) | ARM64G_CC_OP_* */
|
|
+ cc_dep1 = args[1];
|
|
+ cc_dep2 = args[2];
|
|
+ //cc_ndep = args[3];
|
|
+
|
|
+ /*---------------- SUB64 ----------------*/
|
|
+
|
|
+ /* 0, 1 */
|
|
+ if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB64)) {
|
|
+ /* EQ after SUB --> test argL == argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
|
|
+ }
|
|
+ if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB64)) {
|
|
+ /* NE after SUB --> test argL != argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpNE64, cc_dep1, cc_dep2));
|
|
+ }
|
|
+
|
|
+ /* 2, 3 */
|
|
+ if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB64)) {
|
|
+ /* CS after SUB --> test argL >=u argR
|
|
+ --> test argR <=u argL */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
|
|
+ }
|
|
+ if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB64)) {
|
|
+ /* CC after SUB --> test argL <u argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
|
|
+ }
|
|
+
|
|
+ /* 8, 9 */
|
|
+ if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB64)) {
|
|
+ /* LS after SUB --> test argL <=u argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
|
|
+ }
|
|
+ if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB64)) {
|
|
+ /* HI after SUB --> test argL >u argR
|
|
+ --> test argR <u argL */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
|
|
+ }
|
|
+
|
|
+ /* 10, 11 */
|
|
+ if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB64)) {
|
|
+ /* LT after SUB --> test argL <s argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
|
|
+ }
|
|
+ if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB64)) {
|
|
+ /* GE after SUB --> test argL >=s argR
|
|
+ --> test argR <=s argL */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
|
|
+ }
|
|
+
|
|
+ /* 12, 13 */
|
|
+ if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB64)) {
|
|
+ /* GT after SUB --> test argL >s argR
|
|
+ --> test argR <s argL */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
|
|
+ }
|
|
+ if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB64)) {
|
|
+ /* LE after SUB --> test argL <=s argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
|
|
+ }
|
|
+
|
|
+ /*---------------- SUB32 ----------------*/
|
|
+
|
|
+ /* 0, 1 */
|
|
+ if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB32)) {
|
|
+ /* EQ after SUB --> test argL == argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
|
|
+ unop(Iop_64to32, cc_dep2)));
|
|
+ }
|
|
+ if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB32)) {
|
|
+ /* NE after SUB --> test argL != argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
|
|
+ unop(Iop_64to32, cc_dep2)));
|
|
+ }
|
|
+
|
|
+ /* 2, 3 */
|
|
+ if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB32)) {
|
|
+ /* CS after SUB --> test argL >=u argR
|
|
+ --> test argR <=u argL */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep2),
|
|
+ unop(Iop_64to32, cc_dep1)));
|
|
+ }
|
|
+ if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB32)) {
|
|
+ /* CC after SUB --> test argL <u argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep1),
|
|
+ unop(Iop_64to32, cc_dep2)));
|
|
+ }
|
|
+
|
|
+ /* 8, 9 */
|
|
+ if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB32)) {
|
|
+ /* LS after SUB --> test argL <=u argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep1),
|
|
+ unop(Iop_64to32, cc_dep2)));
|
|
+ }
|
|
+ if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB32)) {
|
|
+ /* HI after SUB --> test argL >u argR
|
|
+ --> test argR <u argL */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep2),
|
|
+ unop(Iop_64to32, cc_dep1)));
|
|
+ }
|
|
+
|
|
+ /* 10, 11 */
|
|
+ if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB32)) {
|
|
+ /* LT after SUB --> test argL <s argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep1),
|
|
+ unop(Iop_64to32, cc_dep2)));
|
|
+ }
|
|
+ if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB32)) {
|
|
+ /* GE after SUB --> test argL >=s argR
|
|
+ --> test argR <=s argL */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep2),
|
|
+ unop(Iop_64to32, cc_dep1)));
|
|
+ }
|
|
+
|
|
+ /* 12, 13 */
|
|
+ if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB32)) {
|
|
+ /* GT after SUB --> test argL >s argR
|
|
+ --> test argR <s argL */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep2),
|
|
+ unop(Iop_64to32, cc_dep1)));
|
|
+ }
|
|
+ if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB32)) {
|
|
+ /* LE after SUB --> test argL <=s argR */
|
|
+ return unop(Iop_1Uto64,
|
|
+ binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep1),
|
|
+ unop(Iop_64to32, cc_dep2)));
|
|
+ }
|
|
+
|
|
+//ZZ /*---------------- SBB ----------------*/
|
|
+//ZZ
|
|
+//ZZ if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SBB)) {
|
|
+//ZZ /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
|
|
+//ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
|
|
+//ZZ /* HS after SBB (same as C after SBB below)
|
|
+//ZZ --> oldC ? (argL >=u argR) : (argL >u argR)
|
|
+//ZZ --> oldC ? (argR <=u argL) : (argR <u argL)
|
|
+//ZZ */
|
|
+//ZZ return
|
|
+//ZZ IRExpr_ITE(
|
|
+//ZZ binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
|
|
+//ZZ /* case oldC != 0 */
|
|
+//ZZ unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
|
|
+//ZZ /* case oldC == 0 */
|
|
+//ZZ unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
|
|
+//ZZ );
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /*---------------- LOGIC ----------------*/
|
|
+//ZZ
|
|
+//ZZ if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_LOGIC)) {
|
|
+//ZZ /* EQ after LOGIC --> test res == 0 */
|
|
+//ZZ return unop(Iop_1Uto32,
|
|
+//ZZ binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
|
|
+//ZZ }
|
|
+//ZZ if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_LOGIC)) {
|
|
+//ZZ /* NE after LOGIC --> test res != 0 */
|
|
+//ZZ return unop(Iop_1Uto32,
|
|
+//ZZ binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_LOGIC)) {
|
|
+//ZZ /* PL after LOGIC --> test (res >> 31) == 0 */
|
|
+//ZZ return unop(Iop_1Uto32,
|
|
+//ZZ binop(Iop_CmpEQ32,
|
|
+//ZZ binop(Iop_Shr32, cc_dep1, mkU8(31)),
|
|
+//ZZ mkU32(0)));
|
|
+//ZZ }
|
|
+//ZZ if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_LOGIC)) {
|
|
+//ZZ /* MI after LOGIC --> test (res >> 31) == 1 */
|
|
+//ZZ return unop(Iop_1Uto32,
|
|
+//ZZ binop(Iop_CmpEQ32,
|
|
+//ZZ binop(Iop_Shr32, cc_dep1, mkU8(31)),
|
|
+//ZZ mkU32(1)));
|
|
+//ZZ }
|
|
+
|
|
+ /*---------------- COPY ----------------*/
|
|
+
|
|
+ if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_COPY)) {
|
|
+ /* EQ after COPY --> (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1 */
|
|
+ return binop(Iop_And64,
|
|
+ binop(Iop_Shr64, cc_dep1,
|
|
+ mkU8(ARM64G_CC_SHIFT_Z)),
|
|
+ mkU64(1));
|
|
+ }
|
|
+ if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_COPY)) {
|
|
+ /* NE after COPY --> ((cc_dep1 >> ARM64G_CC_SHIFT_Z) ^ 1) & 1 */
|
|
+ return binop(Iop_And64,
|
|
+ binop(Iop_Xor64,
|
|
+ binop(Iop_Shr64, cc_dep1,
|
|
+ mkU8(ARM64G_CC_SHIFT_Z)),
|
|
+ mkU64(1)),
|
|
+ mkU64(1));
|
|
+ }
|
|
+
|
|
+//ZZ /*----------------- AL -----------------*/
|
|
+//ZZ
|
|
+//ZZ /* A critically important case for Thumb code.
|
|
+//ZZ
|
|
+//ZZ What we're trying to spot is the case where cond_n_op is an
|
|
+//ZZ expression of the form Or32(..., 0xE0) since that means the
|
|
+//ZZ caller is asking for CondAL and we can simply return 1
|
|
+//ZZ without caring what the ... part is. This is a potentially
|
|
+//ZZ dodgy kludge in that it assumes that the ... part has zeroes
|
|
+//ZZ in bits 7:4, so that the result of the Or32 is guaranteed to
|
|
+//ZZ be 0xE in bits 7:4. Given that the places where this first
|
|
+//ZZ arg are constructed (in guest_arm_toIR.c) are very
|
|
+//ZZ constrained, we can get away with this. To make this
|
|
+//ZZ guaranteed safe would require to have a new primop, Slice44
|
|
+//ZZ or some such, thusly
|
|
+//ZZ
|
|
+//ZZ Slice44(arg1, arg2) = 0--(24)--0 arg1[7:4] arg2[3:0]
|
|
+//ZZ
|
|
+//ZZ and we would then look for Slice44(0xE0, ...)
|
|
+//ZZ which would give the required safety property.
|
|
+//ZZ
|
|
+//ZZ It would be infeasibly expensive to scan backwards through
|
|
+//ZZ the entire block looking for an assignment to the temp, so
|
|
+//ZZ just look at the previous 16 statements. That should find it
|
|
+//ZZ if it is an interesting case, as a result of how the
|
|
+//ZZ boilerplate guff at the start of each Thumb insn translation
|
|
+//ZZ is made.
|
|
+//ZZ */
|
|
+//ZZ if (cond_n_op->tag == Iex_RdTmp) {
|
|
+//ZZ Int j;
|
|
+//ZZ IRTemp look_for = cond_n_op->Iex.RdTmp.tmp;
|
|
+//ZZ Int limit = n_precedingStmts - 16;
|
|
+//ZZ if (limit < 0) limit = 0;
|
|
+//ZZ if (0) vex_printf("scanning %d .. %d\n", n_precedingStmts-1, limit);
|
|
+//ZZ for (j = n_precedingStmts - 1; j >= limit; j--) {
|
|
+//ZZ IRStmt* st = precedingStmts[j];
|
|
+//ZZ if (st->tag == Ist_WrTmp
|
|
+//ZZ && st->Ist.WrTmp.tmp == look_for
|
|
+//ZZ && st->Ist.WrTmp.data->tag == Iex_Binop
|
|
+//ZZ && st->Ist.WrTmp.data->Iex.Binop.op == Iop_Or32
|
|
+//ZZ && isU32(st->Ist.WrTmp.data->Iex.Binop.arg2, (ARMCondAL << 4)))
|
|
+//ZZ return mkU32(1);
|
|
+//ZZ }
|
|
+//ZZ /* Didn't find any useful binding to the first arg
|
|
+//ZZ in the previous 16 stmts. */
|
|
+//ZZ }
|
|
+ }
|
|
+
|
|
+//ZZ /* --------- specialising "armg_calculate_flag_c" --------- */
|
|
+//ZZ
|
|
+//ZZ else
|
|
+//ZZ if (vex_streq(function_name, "armg_calculate_flag_c")) {
|
|
+//ZZ
|
|
+//ZZ /* specialise calls to the "armg_calculate_flag_c" function.
|
|
+//ZZ Note that the returned value must be either 0 or 1; nonzero
|
|
+//ZZ bits 31:1 are not allowed. In turn, incoming oldV and oldC
|
|
+//ZZ values (from the thunk) are assumed to have bits 31:1
|
|
+//ZZ clear. */
|
|
+//ZZ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
|
|
+//ZZ vassert(arity == 4);
|
|
+//ZZ cc_op = args[0]; /* ARMG_CC_OP_* */
|
|
+//ZZ cc_dep1 = args[1];
|
|
+//ZZ cc_dep2 = args[2];
|
|
+//ZZ cc_ndep = args[3];
|
|
+//ZZ
|
|
+//ZZ if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
|
|
+//ZZ /* Thunk args are (result, shco, oldV) */
|
|
+//ZZ /* C after LOGIC --> shco */
|
|
+//ZZ return cc_dep2;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ if (isU32(cc_op, ARMG_CC_OP_SUB)) {
|
|
+//ZZ /* Thunk args are (argL, argR, unused) */
|
|
+//ZZ /* C after SUB --> argL >=u argR
|
|
+//ZZ --> argR <=u argL */
|
|
+//ZZ return unop(Iop_1Uto32,
|
|
+//ZZ binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ if (isU32(cc_op, ARMG_CC_OP_SBB)) {
|
|
+//ZZ /* This happens occasionally in softfloat code, eg __divdf3+140 */
|
|
+//ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
|
|
+//ZZ /* C after SBB (same as HS after SBB above)
|
|
+//ZZ --> oldC ? (argL >=u argR) : (argL >u argR)
|
|
+//ZZ --> oldC ? (argR <=u argL) : (argR <u argL)
|
|
+//ZZ */
|
|
+//ZZ return
|
|
+//ZZ IRExpr_ITE(
|
|
+//ZZ binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
|
|
+//ZZ /* case oldC != 0 */
|
|
+//ZZ unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
|
|
+//ZZ /* case oldC == 0 */
|
|
+//ZZ unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
|
|
+//ZZ );
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ /* --------- specialising "armg_calculate_flag_v" --------- */
|
|
+//ZZ
|
|
+//ZZ else
|
|
+//ZZ if (vex_streq(function_name, "armg_calculate_flag_v")) {
|
|
+//ZZ
|
|
+//ZZ /* specialise calls to the "armg_calculate_flag_v" function.
|
|
+//ZZ Note that the returned value must be either 0 or 1; nonzero
|
|
+//ZZ bits 31:1 are not allowed. In turn, incoming oldV and oldC
|
|
+//ZZ values (from the thunk) are assumed to have bits 31:1
|
|
+//ZZ clear. */
|
|
+//ZZ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
|
|
+//ZZ vassert(arity == 4);
|
|
+//ZZ cc_op = args[0]; /* ARMG_CC_OP_* */
|
|
+//ZZ cc_dep1 = args[1];
|
|
+//ZZ cc_dep2 = args[2];
|
|
+//ZZ cc_ndep = args[3];
|
|
+//ZZ
|
|
+//ZZ if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
|
|
+//ZZ /* Thunk args are (result, shco, oldV) */
|
|
+//ZZ /* V after LOGIC --> oldV */
|
|
+//ZZ return cc_ndep;
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ if (isU32(cc_op, ARMG_CC_OP_SUB)) {
|
|
+//ZZ /* Thunk args are (argL, argR, unused) */
|
|
+//ZZ /* V after SUB
|
|
+//ZZ --> let res = argL - argR
|
|
+//ZZ in ((argL ^ argR) & (argL ^ res)) >> 31
|
|
+//ZZ --> ((argL ^ argR) & (argL ^ (argL - argR))) >> 31
|
|
+//ZZ */
|
|
+//ZZ IRExpr* argL = cc_dep1;
|
|
+//ZZ IRExpr* argR = cc_dep2;
|
|
+//ZZ return
|
|
+//ZZ binop(Iop_Shr32,
|
|
+//ZZ binop(Iop_And32,
|
|
+//ZZ binop(Iop_Xor32, argL, argR),
|
|
+//ZZ binop(Iop_Xor32, argL, binop(Iop_Sub32, argL, argR))
|
|
+//ZZ ),
|
|
+//ZZ mkU8(31)
|
|
+//ZZ );
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ if (isU32(cc_op, ARMG_CC_OP_SBB)) {
|
|
+//ZZ /* This happens occasionally in softfloat code, eg __divdf3+140 */
|
|
+//ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
|
|
+//ZZ /* V after SBB
|
|
+//ZZ --> let res = argL - argR - (oldC ^ 1)
|
|
+//ZZ in (argL ^ argR) & (argL ^ res) & 1
|
|
+//ZZ */
|
|
+//ZZ return
|
|
+//ZZ binop(
|
|
+//ZZ Iop_And32,
|
|
+//ZZ binop(
|
|
+//ZZ Iop_And32,
|
|
+//ZZ // argL ^ argR
|
|
+//ZZ binop(Iop_Xor32, cc_dep1, cc_dep2),
|
|
+//ZZ // argL ^ (argL - argR - (oldC ^ 1))
|
|
+//ZZ binop(Iop_Xor32,
|
|
+//ZZ cc_dep1,
|
|
+//ZZ binop(Iop_Sub32,
|
|
+//ZZ binop(Iop_Sub32, cc_dep1, cc_dep2),
|
|
+//ZZ binop(Iop_Xor32, cc_ndep, mkU32(1)))
|
|
+//ZZ )
|
|
+//ZZ ),
|
|
+//ZZ mkU32(1)
|
|
+//ZZ );
|
|
+//ZZ }
|
|
+//ZZ
|
|
+//ZZ }
|
|
+
|
|
+# undef unop
|
|
+# undef binop
|
|
+# undef mkU64
|
|
+# undef mkU8
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+
|
|
+/*----------------------------------------------*/
|
|
+/*--- The exported fns .. ---*/
|
|
+/*----------------------------------------------*/
|
|
+
|
|
+//ZZ /* VISIBLE TO LIBVEX CLIENT */
|
|
+//ZZ #if 0
|
|
+//ZZ void LibVEX_GuestARM_put_flags ( UInt flags_native,
|
|
+//ZZ /*OUT*/VexGuestARMState* vex_state )
|
|
+//ZZ {
|
|
+//ZZ vassert(0); // FIXME
|
|
+//ZZ
|
|
+//ZZ /* Mask out everything except N Z V C. */
|
|
+//ZZ flags_native
|
|
+//ZZ &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C);
|
|
+//ZZ
|
|
+//ZZ vex_state->guest_CC_OP = ARMG_CC_OP_COPY;
|
|
+//ZZ vex_state->guest_CC_DEP1 = flags_native;
|
|
+//ZZ vex_state->guest_CC_DEP2 = 0;
|
|
+//ZZ vex_state->guest_CC_NDEP = 0;
|
|
+//ZZ }
|
|
+//ZZ #endif
|
|
+
|
|
+/* VISIBLE TO LIBVEX CLIENT */
|
|
+ULong LibVEX_GuestARM64_get_nzcv ( /*IN*/const VexGuestARM64State* vex_state )
|
|
+{
|
|
+ ULong nzcv = 0;
|
|
+ // NZCV
|
|
+ nzcv |= arm64g_calculate_flags_nzcv(
|
|
+ vex_state->guest_CC_OP,
|
|
+ vex_state->guest_CC_DEP1,
|
|
+ vex_state->guest_CC_DEP2,
|
|
+ vex_state->guest_CC_NDEP
|
|
+ );
|
|
+ vassert(0 == (nzcv & 0xFFFFFFFF0FFFFFFFULL));
|
|
+//ZZ // Q
|
|
+//ZZ if (vex_state->guest_QFLAG32 > 0)
|
|
+//ZZ cpsr |= (1 << 27);
|
|
+//ZZ // GE
|
|
+//ZZ if (vex_state->guest_GEFLAG0 > 0)
|
|
+//ZZ cpsr |= (1 << 16);
|
|
+//ZZ if (vex_state->guest_GEFLAG1 > 0)
|
|
+//ZZ cpsr |= (1 << 17);
|
|
+//ZZ if (vex_state->guest_GEFLAG2 > 0)
|
|
+//ZZ cpsr |= (1 << 18);
|
|
+//ZZ if (vex_state->guest_GEFLAG3 > 0)
|
|
+//ZZ cpsr |= (1 << 19);
|
|
+//ZZ // M
|
|
+//ZZ cpsr |= (1 << 4); // 0b10000 means user-mode
|
|
+//ZZ // J,T J (bit 24) is zero by initialisation above
|
|
+//ZZ // T we copy from R15T[0]
|
|
+//ZZ if (vex_state->guest_R15T & 1)
|
|
+//ZZ cpsr |= (1 << 5);
|
|
+//ZZ // ITSTATE we punt on for the time being. Could compute it
|
|
+//ZZ // if needed though.
|
|
+//ZZ // E, endianness, 0 (littleendian) from initialisation above
|
|
+//ZZ // A,I,F disable some async exceptions. Not sure about these.
|
|
+//ZZ // Leave as zero for the time being.
|
|
+ return nzcv;
|
|
+}
|
|
+
|
|
+/* VISIBLE TO LIBVEX CLIENT */
|
|
+void LibVEX_GuestARM64_initialise ( /*OUT*/VexGuestARM64State* vex_state )
|
|
+{
|
|
+ vex_bzero(vex_state, sizeof(*vex_state));
|
|
+//ZZ vex_state->host_EvC_FAILADDR = 0;
|
|
+//ZZ vex_state->host_EvC_COUNTER = 0;
|
|
+//ZZ
|
|
+//ZZ vex_state->guest_R0 = 0;
|
|
+//ZZ vex_state->guest_R1 = 0;
|
|
+//ZZ vex_state->guest_R2 = 0;
|
|
+//ZZ vex_state->guest_R3 = 0;
|
|
+//ZZ vex_state->guest_R4 = 0;
|
|
+//ZZ vex_state->guest_R5 = 0;
|
|
+//ZZ vex_state->guest_R6 = 0;
|
|
+//ZZ vex_state->guest_R7 = 0;
|
|
+//ZZ vex_state->guest_R8 = 0;
|
|
+//ZZ vex_state->guest_R9 = 0;
|
|
+//ZZ vex_state->guest_R10 = 0;
|
|
+//ZZ vex_state->guest_R11 = 0;
|
|
+//ZZ vex_state->guest_R12 = 0;
|
|
+//ZZ vex_state->guest_R13 = 0;
|
|
+//ZZ vex_state->guest_R14 = 0;
|
|
+//ZZ vex_state->guest_R15T = 0; /* NB: implies ARM mode */
|
|
+//ZZ
|
|
+ vex_state->guest_CC_OP = ARM64G_CC_OP_COPY;
|
|
+//ZZ vex_state->guest_CC_DEP1 = 0;
|
|
+//ZZ vex_state->guest_CC_DEP2 = 0;
|
|
+//ZZ vex_state->guest_CC_NDEP = 0;
|
|
+//ZZ vex_state->guest_QFLAG32 = 0;
|
|
+//ZZ vex_state->guest_GEFLAG0 = 0;
|
|
+//ZZ vex_state->guest_GEFLAG1 = 0;
|
|
+//ZZ vex_state->guest_GEFLAG2 = 0;
|
|
+//ZZ vex_state->guest_GEFLAG3 = 0;
|
|
+//ZZ
|
|
+//ZZ vex_state->guest_EMNOTE = EmNote_NONE;
|
|
+//ZZ vex_state->guest_TISTART = 0;
|
|
+//ZZ vex_state->guest_TILEN = 0;
|
|
+//ZZ vex_state->guest_NRADDR = 0;
|
|
+//ZZ vex_state->guest_IP_AT_SYSCALL = 0;
|
|
+//ZZ
|
|
+//ZZ vex_state->guest_D0 = 0;
|
|
+//ZZ vex_state->guest_D1 = 0;
|
|
+//ZZ vex_state->guest_D2 = 0;
|
|
+//ZZ vex_state->guest_D3 = 0;
|
|
+//ZZ vex_state->guest_D4 = 0;
|
|
+//ZZ vex_state->guest_D5 = 0;
|
|
+//ZZ vex_state->guest_D6 = 0;
|
|
+//ZZ vex_state->guest_D7 = 0;
|
|
+//ZZ vex_state->guest_D8 = 0;
|
|
+//ZZ vex_state->guest_D9 = 0;
|
|
+//ZZ vex_state->guest_D10 = 0;
|
|
+//ZZ vex_state->guest_D11 = 0;
|
|
+//ZZ vex_state->guest_D12 = 0;
|
|
+//ZZ vex_state->guest_D13 = 0;
|
|
+//ZZ vex_state->guest_D14 = 0;
|
|
+//ZZ vex_state->guest_D15 = 0;
|
|
+//ZZ vex_state->guest_D16 = 0;
|
|
+//ZZ vex_state->guest_D17 = 0;
|
|
+//ZZ vex_state->guest_D18 = 0;
|
|
+//ZZ vex_state->guest_D19 = 0;
|
|
+//ZZ vex_state->guest_D20 = 0;
|
|
+//ZZ vex_state->guest_D21 = 0;
|
|
+//ZZ vex_state->guest_D22 = 0;
|
|
+//ZZ vex_state->guest_D23 = 0;
|
|
+//ZZ vex_state->guest_D24 = 0;
|
|
+//ZZ vex_state->guest_D25 = 0;
|
|
+//ZZ vex_state->guest_D26 = 0;
|
|
+//ZZ vex_state->guest_D27 = 0;
|
|
+//ZZ vex_state->guest_D28 = 0;
|
|
+//ZZ vex_state->guest_D29 = 0;
|
|
+//ZZ vex_state->guest_D30 = 0;
|
|
+//ZZ vex_state->guest_D31 = 0;
|
|
+//ZZ
|
|
+//ZZ /* ARM encoded; zero is the default as it happens (result flags
|
|
+//ZZ (NZCV) cleared, FZ disabled, round to nearest, non-vector mode,
|
|
+//ZZ all exns masked, all exn sticky bits cleared). */
|
|
+//ZZ vex_state->guest_FPSCR = 0;
|
|
+//ZZ
|
|
+//ZZ vex_state->guest_TPIDRURO = 0;
|
|
+//ZZ
|
|
+//ZZ /* Not in a Thumb IT block. */
|
|
+//ZZ vex_state->guest_ITSTATE = 0;
|
|
+//ZZ
|
|
+//ZZ vex_state->padding1 = 0;
|
|
+//ZZ vex_state->padding2 = 0;
|
|
+//ZZ vex_state->padding3 = 0;
|
|
+//ZZ vex_state->padding4 = 0;
|
|
+//ZZ vex_state->padding5 = 0;
|
|
+}
|
|
+
|
|
+
|
|
+/*-----------------------------------------------------------*/
|
|
+/*--- Describing the arm guest state, for the benefit ---*/
|
|
+/*--- of iropt and instrumenters. ---*/
|
|
+/*-----------------------------------------------------------*/
|
|
+
|
|
+/* Figure out if any part of the guest state contained in minoff
|
|
+ .. maxoff requires precise memory exceptions. If in doubt return
|
|
+ True (but this generates significantly slower code).
|
|
+
|
|
+ We enforce precise exns for guest SP, PC, 29(FP), 30(LR).
|
|
+ That might be overkill (for 29 and 30); I don't know.
|
|
+*/
|
|
+Bool guest_arm64_state_requires_precise_mem_exns ( Int minoff,
|
|
+ Int maxoff)
|
|
+{
|
|
+ Int sp_min = offsetof(VexGuestARM64State, guest_SP);
|
|
+ Int sp_max = sp_min + 8 - 1;
|
|
+ Int pc_min = offsetof(VexGuestARM64State, guest_PC);
|
|
+ Int pc_max = pc_min + 8 - 1;
|
|
+
|
|
+ if (maxoff < sp_min || minoff > sp_max) {
|
|
+ /* no overlap with sp */
|
|
+ if (vex_control.iropt_register_updates == VexRegUpdSpAtMemAccess)
|
|
+ return False; // We only need to check stack pointer.
|
|
+ } else {
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ if (maxoff < pc_min || minoff > pc_max) {
|
|
+ /* no overlap with pc */
|
|
+ } else {
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* Guessing that we need PX for FP, but I don't really know. */
|
|
+ Int x29_min = offsetof(VexGuestARM64State, guest_X29);
|
|
+ Int x29_max = x29_min + 8 - 1;
|
|
+
|
|
+ if (maxoff < x29_min || minoff > x29_max) {
|
|
+ /* no overlap with x29 */
|
|
+ } else {
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ /* Guessing that we need PX for LR, but I don't really know. */
|
|
+ Int x30_min = offsetof(VexGuestARM64State, guest_X30);
|
|
+ Int x30_max = x30_min + 8 - 1;
|
|
+
|
|
+ if (maxoff < x30_min || minoff > x30_max) {
|
|
+ /* no overlap with r30 */
|
|
+ } else {
|
|
+ return True;
|
|
+ }
|
|
+
|
|
+ return False;
|
|
+}
|
|
+
|
|
+
|
|
+#define ALWAYSDEFD(field) \
|
|
+ { offsetof(VexGuestARM64State, field), \
|
|
+ (sizeof ((VexGuestARM64State*)0)->field) }
|
|
+VexGuestLayout
|
|
+ arm64Guest_layout
|
|
+ = {
|
|
+ /* Total size of the guest state, in bytes. */
|
|
+ .total_sizeB = sizeof(VexGuestARM64State),
|
|
+
|
|
+ /* Describe the stack pointer. */
|
|
+ .offset_SP = offsetof(VexGuestARM64State,guest_SP),
|
|
+ .sizeof_SP = 8,
|
|
+
|
|
+ /* Describe the instruction pointer. */
|
|
+ .offset_IP = offsetof(VexGuestARM64State,guest_PC),
|
|
+ .sizeof_IP = 8,
|
|
+
|
|
+ /* Describe any sections to be regarded by Memcheck as
|
|
+ 'always-defined'. */
|
|
+ .n_alwaysDefd = 10,
|
|
+
|
|
+ /* flags thunk: OP is always defd, whereas DEP1 and DEP2
|
|
+ have to be tracked. See detailed comment in gdefs.h on
|
|
+ meaning of thunk fields. */
|
|
+ .alwaysDefd
|
|
+ = { /* 0 */ ALWAYSDEFD(guest_PC),
|
|
+ /* 1 */ ALWAYSDEFD(guest_CC_OP),
|
|
+ /* 2 */ ALWAYSDEFD(guest_CC_NDEP),
|
|
+ /* 3 */ ALWAYSDEFD(guest_EMNOTE),
|
|
+ /* 4 */ ALWAYSDEFD(guest_TISTART),
|
|
+ /* 5 */ ALWAYSDEFD(guest_TILEN),
|
|
+ /* 6 */ ALWAYSDEFD(guest_NRADDR),
|
|
+ /* 7 */ ALWAYSDEFD(guest_IP_AT_SYSCALL),
|
|
+ /* 8 */ ALWAYSDEFD(guest_FPCR),
|
|
+ /* 9 */ ALWAYSDEFD(guest_FPSR)
|
|
+ }
|
|
+ };
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- end guest_arm64_helpers.c ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
Index: VEX/priv/guest_s390_helpers.c
|
|
===================================================================
|
|
--- VEX/priv/guest_s390_helpers.c.orig
|
|
+++ VEX/priv/guest_s390_helpers.c
|
|
@@ -154,11 +154,11 @@ LibVEX_GuestS390X_initialise(VexGuestS39
|
|
Bool
|
|
guest_s390x_state_requires_precise_mem_exns(Int minoff, Int maxoff)
|
|
{
|
|
- Int lr_min = S390X_GUEST_OFFSET(guest_LR);
|
|
+ Int lr_min = S390X_GUEST_OFFSET(guest_s390_LR);
|
|
Int lr_max = lr_min + 8 - 1;
|
|
- Int sp_min = S390X_GUEST_OFFSET(guest_SP);
|
|
+ Int sp_min = S390X_GUEST_OFFSET(guest_s390_SP);
|
|
Int sp_max = sp_min + 8 - 1;
|
|
- Int fp_min = S390X_GUEST_OFFSET(guest_FP);
|
|
+ Int fp_min = S390X_GUEST_OFFSET(guest_s390_FP);
|
|
Int fp_max = fp_min + 8 - 1;
|
|
Int ia_min = S390X_GUEST_OFFSET(guest_IA);
|
|
Int ia_max = ia_min + 8 - 1;
|
|
@@ -203,11 +203,11 @@ VexGuestLayout s390xGuest_layout = {
|
|
.total_sizeB = sizeof(VexGuestS390XState),
|
|
|
|
/* Describe the stack pointer. */
|
|
- .offset_SP = S390X_GUEST_OFFSET(guest_SP),
|
|
+ .offset_SP = S390X_GUEST_OFFSET(guest_s390_SP),
|
|
.sizeof_SP = 8,
|
|
|
|
/* Describe the frame pointer. */
|
|
- .offset_FP = S390X_GUEST_OFFSET(guest_FP),
|
|
+ .offset_FP = S390X_GUEST_OFFSET(guest_s390_FP),
|
|
.sizeof_FP = 8,
|
|
|
|
/* Describe the instruction pointer. */
|
|
Index: VEX/priv/main_main.c
|
|
===================================================================
|
|
--- VEX/priv/main_main.c.orig
|
|
+++ VEX/priv/main_main.c
|
|
@@ -38,6 +38,7 @@
|
|
#include "libvex_guest_x86.h"
|
|
#include "libvex_guest_amd64.h"
|
|
#include "libvex_guest_arm.h"
|
|
+#include "libvex_guest_arm64.h"
|
|
#include "libvex_guest_ppc32.h"
|
|
#include "libvex_guest_ppc64.h"
|
|
#include "libvex_guest_s390x.h"
|
|
@@ -53,6 +54,7 @@
|
|
#include "host_amd64_defs.h"
|
|
#include "host_ppc_defs.h"
|
|
#include "host_arm_defs.h"
|
|
+#include "host_arm64_defs.h"
|
|
#include "host_s390_defs.h"
|
|
#include "host_mips_defs.h"
|
|
|
|
@@ -60,6 +62,7 @@
|
|
#include "guest_x86_defs.h"
|
|
#include "guest_amd64_defs.h"
|
|
#include "guest_arm_defs.h"
|
|
+#include "guest_arm64_defs.h"
|
|
#include "guest_ppc_defs.h"
|
|
#include "guest_s390_defs.h"
|
|
#include "guest_mips_defs.h"
|
|
@@ -417,6 +420,30 @@ VexTranslateResult LibVEX_Translate ( Ve
|
|
vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_host.hwcaps));
|
|
break;
|
|
|
|
+ case VexArchARM64:
|
|
+ mode64 = True;
|
|
+ getAllocableRegs_ARM64 ( &n_available_real_regs,
|
|
+ &available_real_regs );
|
|
+ isMove = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_ARM64Instr;
|
|
+ getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool))
|
|
+ getRegUsage_ARM64Instr;
|
|
+ mapRegs = (void(*)(HRegRemap*,HInstr*, Bool))
|
|
+ mapRegs_ARM64Instr;
|
|
+ genSpill = (void(*)(HInstr**,HInstr**,HReg,Int,Bool))
|
|
+ genSpill_ARM64;
|
|
+ genReload = (void(*)(HInstr**,HInstr**,HReg,Int,Bool))
|
|
+ genReload_ARM64;
|
|
+ ppInstr = (void(*)(HInstr*, Bool)) ppARM64Instr;
|
|
+ ppReg = (void(*)(HReg)) ppHRegARM64;
|
|
+ iselSB = iselSB_ARM64;
|
|
+ emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
|
|
+ void*,void*,void*,void*))
|
|
+ emit_ARM64Instr;
|
|
+ host_is_bigendian = False;
|
|
+ host_word_type = Ity_I64;
|
|
+ vassert(are_valid_hwcaps(VexArchARM64, vta->archinfo_host.hwcaps));
|
|
+ break;
|
|
+
|
|
case VexArchMIPS32:
|
|
mode64 = False;
|
|
getAllocableRegs_MIPS ( &n_available_real_regs,
|
|
@@ -593,6 +620,26 @@ VexTranslateResult LibVEX_Translate ( Ve
|
|
vassert(sizeof( ((VexGuestARMState*)0)->guest_NRADDR ) == 4);
|
|
break;
|
|
|
|
+ case VexArchARM64:
|
|
+ preciseMemExnsFn = guest_arm64_state_requires_precise_mem_exns;
|
|
+ disInstrFn = disInstr_ARM64;
|
|
+ specHelper = guest_arm64_spechelper;
|
|
+ guest_sizeB = sizeof(VexGuestARM64State);
|
|
+ guest_word_type = Ity_I64;
|
|
+ guest_layout = &arm64Guest_layout;
|
|
+ offB_TISTART = offsetof(VexGuestARM64State,guest_TISTART);
|
|
+ offB_TILEN = offsetof(VexGuestARM64State,guest_TILEN);
|
|
+ offB_GUEST_IP = offsetof(VexGuestARM64State,guest_PC);
|
|
+ szB_GUEST_IP = sizeof( ((VexGuestARM64State*)0)->guest_PC );
|
|
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestARM64State,host_EvC_COUNTER);
|
|
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestARM64State,host_EvC_FAILADDR);
|
|
+ vassert(are_valid_hwcaps(VexArchARM64, vta->archinfo_guest.hwcaps));
|
|
+ vassert(0 == sizeof(VexGuestARM64State) % 16);
|
|
+ vassert(sizeof( ((VexGuestARM64State*)0)->guest_TISTART) == 8);
|
|
+ vassert(sizeof( ((VexGuestARM64State*)0)->guest_TILEN ) == 8);
|
|
+ vassert(sizeof( ((VexGuestARM64State*)0)->guest_NRADDR ) == 8);
|
|
+ break;
|
|
+
|
|
case VexArchMIPS32:
|
|
preciseMemExnsFn = guest_mips32_state_requires_precise_mem_exns;
|
|
disInstrFn = disInstr_MIPS;
|
|
@@ -958,6 +1005,8 @@ VexInvalRange LibVEX_Chain ( VexArch arc
|
|
chainXDirect = chainXDirect_AMD64; break;
|
|
case VexArchARM:
|
|
chainXDirect = chainXDirect_ARM; break;
|
|
+ case VexArchARM64:
|
|
+ chainXDirect = chainXDirect_ARM64; break;
|
|
case VexArchS390X:
|
|
chainXDirect = chainXDirect_S390; break;
|
|
case VexArchPPC32:
|
|
@@ -1038,6 +1087,8 @@ Int LibVEX_evCheckSzB ( VexArch arch_hos
|
|
cached = evCheckSzB_AMD64(); break;
|
|
case VexArchARM:
|
|
cached = evCheckSzB_ARM(); break;
|
|
+ case VexArchARM64:
|
|
+ cached = evCheckSzB_ARM64(); break;
|
|
case VexArchS390X:
|
|
cached = evCheckSzB_S390(); break;
|
|
case VexArchPPC32:
|
|
@@ -1152,6 +1203,7 @@ const HChar* LibVEX_ppVexArch ( VexArch
|
|
case VexArchX86: return "X86";
|
|
case VexArchAMD64: return "AMD64";
|
|
case VexArchARM: return "ARM";
|
|
+ case VexArchARM64: return "ARM64";
|
|
case VexArchPPC32: return "PPC32";
|
|
case VexArchPPC64: return "PPC64";
|
|
case VexArchS390X: return "S390X";
|
|
@@ -1381,6 +1433,15 @@ static const HChar* show_hwcaps_arm ( UI
|
|
return NULL;
|
|
}
|
|
|
|
+static const HChar* show_hwcaps_arm64 ( UInt hwcaps )
|
|
+{
|
|
+ /* Since there are no variants, just insist that hwcaps is zero,
|
|
+ and declare it invalid otherwise. */
|
|
+ if (hwcaps == 0)
|
|
+ return "baseline";
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
static const HChar* show_hwcaps_s390x ( UInt hwcaps )
|
|
{
|
|
static const HChar prefix[] = "s390x";
|
|
@@ -1472,6 +1533,7 @@ static const HChar* show_hwcaps ( VexArc
|
|
case VexArchPPC32: return show_hwcaps_ppc32(hwcaps);
|
|
case VexArchPPC64: return show_hwcaps_ppc64(hwcaps);
|
|
case VexArchARM: return show_hwcaps_arm(hwcaps);
|
|
+ case VexArchARM64: return show_hwcaps_arm64(hwcaps);
|
|
case VexArchS390X: return show_hwcaps_s390x(hwcaps);
|
|
case VexArchMIPS32: return show_hwcaps_mips32(hwcaps);
|
|
case VexArchMIPS64: return show_hwcaps_mips64(hwcaps);
|
|
Index: VEX/priv/guest_arm64_defs.h
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ VEX/priv/guest_arm64_defs.h
|
|
@@ -0,0 +1,244 @@
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- begin guest_arm64_defs.h ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
+/*
|
|
+ This file is part of Valgrind, a dynamic binary instrumentation
|
|
+ framework.
|
|
+
|
|
+ Copyright (C) 2013-2013 OpenWorks
|
|
+ info@open-works.net
|
|
+
|
|
+ This program is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU General Public License as
|
|
+ published by the Free Software Foundation; either version 2 of the
|
|
+ License, or (at your option) any later version.
|
|
+
|
|
+ This program is distributed in the hope that it will be useful, but
|
|
+ WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License
|
|
+ along with this program; if not, write to the Free Software
|
|
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
+ 02110-1301, USA.
|
|
+
|
|
+ The GNU General Public License is contained in the file COPYING.
|
|
+*/
|
|
+
|
|
+#ifndef __VEX_GUEST_ARM64_DEFS_H
|
|
+#define __VEX_GUEST_ARM64_DEFS_H
|
|
+
|
|
+#include "libvex_basictypes.h"
|
|
+#include "guest_generic_bb_to_IR.h" // DisResult
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- arm64 to IR conversion ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* Convert one ARM64 insn to IR. See the type DisOneInstrFn in
|
|
+ bb_to_IR.h. */
|
|
+extern
|
|
+DisResult disInstr_ARM64 ( IRSB* irbb,
|
|
+ Bool (*resteerOkFn) ( void*, Addr64 ),
|
|
+ Bool resteerCisOk,
|
|
+ void* callback_opaque,
|
|
+ UChar* guest_code,
|
|
+ Long delta,
|
|
+ Addr64 guest_IP,
|
|
+ VexArch guest_arch,
|
|
+ VexArchInfo* archinfo,
|
|
+ VexAbiInfo* abiinfo,
|
|
+ Bool host_bigendian,
|
|
+ Bool sigill_diag );
|
|
+
|
|
+/* Used by the optimiser to specialise calls to helpers. */
|
|
+extern
|
|
+IRExpr* guest_arm64_spechelper ( const HChar* function_name,
|
|
+ IRExpr** args,
|
|
+ IRStmt** precedingStmts,
|
|
+ Int n_precedingStmts );
|
|
+
|
|
+/* Describes to the optimser which part of the guest state require
|
|
+ precise memory exceptions. This is logically part of the guest
|
|
+ state description. */
|
|
+extern
|
|
+Bool guest_arm64_state_requires_precise_mem_exns ( Int, Int );
|
|
+
|
|
+extern
|
|
+VexGuestLayout arm64Guest_layout;
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- arm64 guest helpers ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* --- CLEAN HELPERS --- */
|
|
+
|
|
+/* Calculate NZCV from the supplied thunk components, in the positions
|
|
+ they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
|
|
+ Returned bits 63:32 and 27:0 are zero. */
|
|
+extern
|
|
+ULong arm64g_calculate_flags_nzcv ( ULong cc_op, ULong cc_dep1,
|
|
+ ULong cc_dep2, ULong cc_dep3 );
|
|
+
|
|
+//ZZ /* Calculate the C flag from the thunk components, in the lowest bit
|
|
+//ZZ of the word (bit 0). */
|
|
+//ZZ extern
|
|
+//ZZ UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1,
|
|
+//ZZ UInt cc_dep2, UInt cc_dep3 );
|
|
+//ZZ
|
|
+//ZZ /* Calculate the V flag from the thunk components, in the lowest bit
|
|
+//ZZ of the word (bit 0). */
|
|
+//ZZ extern
|
|
+//ZZ UInt armg_calculate_flag_v ( UInt cc_op, UInt cc_dep1,
|
|
+//ZZ UInt cc_dep2, UInt cc_dep3 );
|
|
+//ZZ
|
|
+/* Calculate the specified condition from the thunk components, in the
|
|
+ lowest bit of the word (bit 0). */
|
|
+extern
|
|
+ULong arm64g_calculate_condition ( /* ARM64Condcode << 4 | cc_op */
|
|
+ ULong cond_n_op ,
|
|
+ ULong cc_dep1,
|
|
+ ULong cc_dep2, ULong cc_dep3 );
|
|
+
|
|
+//ZZ /* Calculate the QC flag from the thunk components, in the lowest bit
|
|
+//ZZ of the word (bit 0). */
|
|
+//ZZ extern
|
|
+//ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
|
|
+//ZZ UInt resR1, UInt resR2 );
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------*/
|
|
+/*--- Condition code stuff ---*/
|
|
+/*---------------------------------------------------------*/
|
|
+
|
|
+/* Flag masks. Defines positions of flag bits in the NZCV
|
|
+ register. */
|
|
+#define ARM64G_CC_SHIFT_N 31
|
|
+#define ARM64G_CC_SHIFT_Z 30
|
|
+#define ARM64G_CC_SHIFT_C 29
|
|
+#define ARM64G_CC_SHIFT_V 28
|
|
+//ZZ #define ARMG_CC_SHIFT_Q 27
|
|
+//ZZ
|
|
+//ZZ #define ARMG_CC_MASK_N (1 << ARMG_CC_SHIFT_N)
|
|
+//ZZ #define ARMG_CC_MASK_Z (1 << ARMG_CC_SHIFT_Z)
|
|
+//ZZ #define ARMG_CC_MASK_C (1 << ARMG_CC_SHIFT_C)
|
|
+//ZZ #define ARMG_CC_MASK_V (1 << ARMG_CC_SHIFT_V)
|
|
+//ZZ #define ARMG_CC_MASK_Q (1 << ARMG_CC_SHIFT_Q)
|
|
+
|
|
+/* Flag thunk descriptors. A four-word thunk is used to record
|
|
+ details of the most recent flag-setting operation, so NZCV can
|
|
+ be computed later if needed.
|
|
+
|
|
+ The four words are:
|
|
+
|
|
+ CC_OP, which describes the operation.
|
|
+
|
|
+ CC_DEP1, CC_DEP2, CC_NDEP. These are arguments to the
|
|
+ operation. We want set up the mcx_masks in flag helper calls
|
|
+ involving these fields so that Memcheck "believes" that the
|
|
+ resulting flags are data-dependent on both CC_DEP1 and
|
|
+ CC_DEP2. Hence the name DEP.
|
|
+
|
|
+ When building the thunk, it is always necessary to write words into
|
|
+ CC_DEP1/2 and NDEP, even if those args are not used given the CC_OP
|
|
+ field. This is important because otherwise Memcheck could give
|
|
+ false positives as it does not understand the relationship between
|
|
+ the CC_OP field and CC_DEP1/2/NDEP, and so believes that the
|
|
+ definedness of the stored flags always depends on all 3 DEP values.
|
|
+
|
|
+ A summary of the field usages is:
|
|
+
|
|
+ OP DEP1 DEP2 DEP3
|
|
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
+
|
|
+ OP_COPY curr_NZCV:28x0 unused unused
|
|
+ OP_ADD32 argL argR unused
|
|
+ OP_ADD64 argL argR unused
|
|
+ OP_SUB32 argL argR unused
|
|
+ OP_SUB64 argL argR unused
|
|
+//ZZ OP_ADC argL argR 31x0:old_C
|
|
+//ZZ OP_SBB argL argR 31x0:old_C
|
|
+ OP_LOGIC32 result unused unused
|
|
+ OP_LOGIC64 result unused unused
|
|
+//ZZ OP_MUL result unused 30x0:old_C:old_V
|
|
+//ZZ OP_MULL resLO32 resHI32 30x0:old_C:old_V
|
|
+//ZZ */
|
|
+
|
|
+enum {
|
|
+ ARM64G_CC_OP_COPY=0, /* DEP1 = NZCV in 31:28, DEP2 = 0, DEP3 = 0
|
|
+ just copy DEP1 to output */
|
|
+
|
|
+ ARM64G_CC_OP_ADD32, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op),
|
|
+ DEP3 = 0 */
|
|
+
|
|
+ ARM64G_CC_OP_ADD64, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op),
|
|
+ DEP3 = 0 */
|
|
+
|
|
+ ARM64G_CC_OP_SUB32, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op),
|
|
+ DEP3 = 0 */
|
|
+
|
|
+ ARM64G_CC_OP_SUB64, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op),
|
|
+ DEP3 = 0 */
|
|
+
|
|
+//ZZ ARMG_CC_OP_ADC, /* DEP1 = argL (Rn), DEP2 = arg2 (shifter_op),
|
|
+//ZZ DEP3 = oldC (in LSB) */
|
|
+//ZZ
|
|
+//ZZ ARMG_CC_OP_SBB, /* DEP1 = argL (Rn), DEP2 = arg2 (shifter_op),
|
|
+//ZZ DEP3 = oldC (in LSB) */
|
|
+
|
|
+ ARM64G_CC_OP_LOGIC32, /* DEP1 = result, DEP2 = 0, DEP3 = 0 */
|
|
+ ARM64G_CC_OP_LOGIC64, /* DEP1 = result, DEP2 = 0, DEP3 = 0 */
|
|
+
|
|
+//ZZ ARMG_CC_OP_MUL, /* DEP1 = result, DEP2 = 0, DEP3 = oldC:old_V
|
|
+//ZZ (in bits 1:0) */
|
|
+//ZZ
|
|
+//ZZ ARMG_CC_OP_MULL, /* DEP1 = resLO32, DEP2 = resHI32, DEP3 = oldC:old_V
|
|
+//ZZ (in bits 1:0) */
|
|
+
|
|
+ ARM64G_CC_OP_NUMBER
|
|
+};
|
|
+
|
|
+/* XXXX because of the calling conventions for
|
|
+ arm64g_calculate_condition, all these OP values MUST be in the range
|
|
+ 0 .. 15 only (viz, 4-bits). */
|
|
+
|
|
+
|
|
+
|
|
+/* Defines conditions which we can ask for */
|
|
+
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64CondEQ = 0, /* equal : Z=1 */
|
|
+ ARM64CondNE = 1, /* not equal : Z=0 */
|
|
+
|
|
+ ARM64CondCS = 2, /* >=u (higher or same) (aka HS) : C=1 */
|
|
+ ARM64CondCC = 3, /* <u (lower) (aka LO) : C=0 */
|
|
+
|
|
+ ARM64CondMI = 4, /* minus (negative) : N=1 */
|
|
+ ARM64CondPL = 5, /* plus (zero or +ve) : N=0 */
|
|
+
|
|
+ ARM64CondVS = 6, /* overflow : V=1 */
|
|
+ ARM64CondVC = 7, /* no overflow : V=0 */
|
|
+
|
|
+ ARM64CondHI = 8, /* >u (higher) : C=1 && Z=0 */
|
|
+ ARM64CondLS = 9, /* <=u (lower or same) : C=0 || Z=1 */
|
|
+
|
|
+ ARM64CondGE = 10, /* >=s (signed greater or equal) : N=V */
|
|
+ ARM64CondLT = 11, /* <s (signed less than) : N!=V */
|
|
+
|
|
+ ARM64CondGT = 12, /* >s (signed greater) : Z=0 && N=V */
|
|
+ ARM64CondLE = 13, /* <=s (signed less or equal) : Z=1 || N!=V */
|
|
+
|
|
+ ARM64CondAL = 14, /* always (unconditional) : 1 */
|
|
+ ARM64CondNV = 15 /* always (unconditional) : 1 */
|
|
+ }
|
|
+ ARM64Condcode;
|
|
+
|
|
+#endif /* ndef __VEX_GUEST_ARM64_DEFS_H */
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- end guest_arm64_defs.h ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
Index: VEX/pub/libvex.h
|
|
===================================================================
|
|
--- VEX/pub/libvex.h.orig
|
|
+++ VEX/pub/libvex.h
|
|
@@ -55,6 +55,7 @@ typedef
|
|
VexArchX86,
|
|
VexArchAMD64,
|
|
VexArchARM,
|
|
+ VexArchARM64,
|
|
VexArchPPC32,
|
|
VexArchPPC64,
|
|
VexArchS390X,
|
|
@@ -172,6 +173,9 @@ typedef
|
|
/* Get an ARM architecure level from HWCAPS */
|
|
#define VEX_ARM_ARCHLEVEL(x) ((x) & 0x3f)
|
|
|
|
+/* ARM64: baseline capability is AArch64 v8. */
|
|
+/* (no definitions since no variants so far) */
|
|
+
|
|
/* MIPS baseline capability */
|
|
/* Assigned Company values for bits 23:16 of the PRId Register
|
|
(CP0 register 15, select 0). As of the MIPS32 and MIPS64 specs from
|
|
@@ -213,6 +217,7 @@ typedef
|
|
extern const HChar* LibVEX_ppVexArch ( VexArch );
|
|
extern const HChar* LibVEX_ppVexHwCaps ( VexArch, UInt );
|
|
|
|
+
|
|
/* The various kinds of caches */
|
|
typedef enum {
|
|
DATA_CACHE,
|
|
@@ -907,6 +912,14 @@ extern void LibVEX_InitIRI ( const IRICB
|
|
~~~~~
|
|
Same as ppc32.
|
|
|
|
+ arm32
|
|
+ ~~~~~
|
|
+ r8 is GSP.
|
|
+
|
|
+ arm64
|
|
+ ~~~~~
|
|
+ r21 is GSP.
|
|
+
|
|
ALL GUEST ARCHITECTURES
|
|
~~~~~~~~~~~~~~~~~~~~~~~
|
|
The guest state must contain two pseudo-registers, guest_TISTART
|
|
Index: VEX/pub/libvex_guest_arm64.h
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ VEX/pub/libvex_guest_arm64.h
|
|
@@ -0,0 +1,190 @@
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- begin libvex_guest_arm64.h ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
+
|
|
+/*
|
|
+ This file is part of Valgrind, a dynamic binary instrumentation
|
|
+ framework.
|
|
+
|
|
+ Copyright (C) 2013-2013 OpenWorks
|
|
+ info@open-works.net
|
|
+
|
|
+ This program is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU General Public License as
|
|
+ published by the Free Software Foundation; either version 2 of the
|
|
+ License, or (at your option) any later version.
|
|
+
|
|
+ This program is distributed in the hope that it will be useful, but
|
|
+ WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License
|
|
+ along with this program; if not, write to the Free Software
|
|
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
+ 02110-1301, USA.
|
|
+
|
|
+ The GNU General Public License is contained in the file COPYING.
|
|
+*/
|
|
+
|
|
+#ifndef __LIBVEX_PUB_GUEST_ARM64_H
|
|
+#define __LIBVEX_PUB_GUEST_ARM64_H
|
|
+
|
|
+#include "libvex_basictypes.h"
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- Vex's representation of the ARM64 CPU state. ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
+
|
|
+typedef
|
|
+ struct {
|
|
+ /* Event check fail addr and counter. */
|
|
+ /* 0 */ ULong host_EvC_FAILADDR;
|
|
+ /* 8 */ UInt host_EvC_COUNTER;
|
|
+ /* 12 */ UInt pad0;
|
|
+ /* 16 */
|
|
+ ULong guest_X0;
|
|
+ ULong guest_X1;
|
|
+ ULong guest_X2;
|
|
+ ULong guest_X3;
|
|
+ ULong guest_X4;
|
|
+ ULong guest_X5;
|
|
+ ULong guest_X6;
|
|
+ ULong guest_X7;
|
|
+ ULong guest_X8;
|
|
+ ULong guest_X9;
|
|
+ ULong guest_X10;
|
|
+ ULong guest_X11;
|
|
+ ULong guest_X12;
|
|
+ ULong guest_X13;
|
|
+ ULong guest_X14;
|
|
+ ULong guest_X15;
|
|
+ ULong guest_X16;
|
|
+ ULong guest_X17;
|
|
+ ULong guest_X18;
|
|
+ ULong guest_X19;
|
|
+ ULong guest_X20;
|
|
+ ULong guest_X21;
|
|
+ ULong guest_X22;
|
|
+ ULong guest_X23;
|
|
+ ULong guest_X24;
|
|
+ ULong guest_X25;
|
|
+ ULong guest_X26;
|
|
+ ULong guest_X27;
|
|
+ ULong guest_X28;
|
|
+ ULong guest_X29;
|
|
+ ULong guest_X30; /* link register */
|
|
+ ULong guest_SP;
|
|
+ ULong guest_PC;
|
|
+
|
|
+ /* 4-word thunk used to calculate N(sign) Z(zero) C(carry,
|
|
+ unsigned overflow) and V(signed overflow) flags. */
|
|
+ ULong guest_CC_OP;
|
|
+ ULong guest_CC_DEP1;
|
|
+ ULong guest_CC_DEP2;
|
|
+ ULong guest_CC_NDEP;
|
|
+
|
|
+ /* User-space thread register? */
|
|
+ ULong guest_TPIDR_EL0;
|
|
+
|
|
+ /* FP/SIMD state */
|
|
+ U128 guest_Q0;
|
|
+ U128 guest_Q1;
|
|
+ U128 guest_Q2;
|
|
+ U128 guest_Q3;
|
|
+ U128 guest_Q4;
|
|
+ U128 guest_Q5;
|
|
+ U128 guest_Q6;
|
|
+ U128 guest_Q7;
|
|
+ U128 guest_Q8;
|
|
+ U128 guest_Q9;
|
|
+ U128 guest_Q10;
|
|
+ U128 guest_Q11;
|
|
+ U128 guest_Q12;
|
|
+ U128 guest_Q13;
|
|
+ U128 guest_Q14;
|
|
+ U128 guest_Q15;
|
|
+ U128 guest_Q16;
|
|
+ U128 guest_Q17;
|
|
+ U128 guest_Q18;
|
|
+ U128 guest_Q19;
|
|
+ U128 guest_Q20;
|
|
+ U128 guest_Q21;
|
|
+ U128 guest_Q22;
|
|
+ U128 guest_Q23;
|
|
+ U128 guest_Q24;
|
|
+ U128 guest_Q25;
|
|
+ U128 guest_Q26;
|
|
+ U128 guest_Q27;
|
|
+ U128 guest_Q28;
|
|
+ U128 guest_Q29;
|
|
+ U128 guest_Q30;
|
|
+ U128 guest_Q31;
|
|
+
|
|
+ /* Various pseudo-regs mandated by Vex or Valgrind. */
|
|
+ /* Emulation notes */
|
|
+ UInt guest_EMNOTE;
|
|
+
|
|
+ /* For clflush: record start and length of area to invalidate */
|
|
+ ULong guest_TISTART;
|
|
+ ULong guest_TILEN;
|
|
+
|
|
+ /* Used to record the unredirected guest address at the start of
|
|
+ a translation whose start has been redirected. By reading
|
|
+ this pseudo-register shortly afterwards, the translation can
|
|
+ find out what the corresponding no-redirection address was.
|
|
+ Note, this is only set for wrap-style redirects, not for
|
|
+ replace-style ones. */
|
|
+ ULong guest_NRADDR;
|
|
+
|
|
+ /* Needed for Darwin (but mandated for all guest architectures):
|
|
+ program counter at the last syscall insn (int 0x80/81/82,
|
|
+ sysenter, syscall, svc). Used when backing up to restart a
|
|
+ syscall that has been interrupted by a signal. */
|
|
+ ULong guest_IP_AT_SYSCALL;
|
|
+
|
|
+ /* The complete FPCR. Default value seems to be zero. We
|
|
+ ignore all bits except 23 and 22, which are the rounding
|
|
+ mode. The guest is unconstrained in what values it can write
|
|
+ to and read from this register, but the emulation only takes
|
|
+ note of bits 23 and 22. */
|
|
+ UInt guest_FPCR;
|
|
+
|
|
+ /* The complete FPSR. As with FPCR, the guest may write and
|
|
+ read any values here, and the emulation ignores it, with the
|
|
+ exception of bit 27 (QC, the sticky saturation bit) which
|
|
+ does get set when required. */
|
|
+ UInt guest_FPSR;
|
|
+
|
|
+ /* Padding to make it have an 32-aligned size */
|
|
+ UInt pad_end_0;
|
|
+ ULong pad_end_1;
|
|
+ }
|
|
+ VexGuestARM64State;
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- Utility functions for ARM64 guest stuff. ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
+
|
|
+/* ALL THE FOLLOWING ARE VISIBLE TO LIBRARY CLIENT */
|
|
+
|
|
+/* Initialise all guest ARM64 state. */
|
|
+
|
|
+extern
|
|
+void LibVEX_GuestARM64_initialise ( /*OUT*/VexGuestARM64State* vex_state );
|
|
+
|
|
+/* Calculate the ARM64 flag state from the saved data, in the format
|
|
+ 32x0:n:z:c:v:28x0. */
|
|
+extern
|
|
+ULong LibVEX_GuestARM64_get_nzcv ( /*IN*/
|
|
+ const VexGuestARM64State* vex_state );
|
|
+
|
|
+#endif /* ndef __LIBVEX_PUB_GUEST_ARM64_H */
|
|
+
|
|
+
|
|
+/*---------------------------------------------------------------*/
|
|
+/*--- libvex_guest_arm64.h ---*/
|
|
+/*---------------------------------------------------------------*/
|
|
Index: VEX/pub/libvex_basictypes.h
|
|
===================================================================
|
|
--- VEX/pub/libvex_basictypes.h.orig
|
|
+++ VEX/pub/libvex_basictypes.h
|
|
@@ -192,15 +192,11 @@ typedef unsigned long HWord;
|
|
# define VEX_HOST_WORDSIZE 4
|
|
# define VEX_REGPARM(_n) /* */
|
|
|
|
-#elif defined(__arm__)
|
|
+#elif defined(__arm__) && !defined(__aarch64__)
|
|
# define VEX_HOST_WORDSIZE 4
|
|
# define VEX_REGPARM(_n) /* */
|
|
|
|
-#elif defined(_AIX) && !defined(__64BIT__)
|
|
-# define VEX_HOST_WORDSIZE 4
|
|
-# define VEX_REGPARM(_n) /* */
|
|
-
|
|
-#elif defined(_AIX) && defined(__64BIT__)
|
|
+#elif defined(__aarch64__) && !defined(__arm__)
|
|
# define VEX_HOST_WORDSIZE 8
|
|
# define VEX_REGPARM(_n) /* */
|
|
|
|
@@ -208,12 +204,12 @@ typedef unsigned long HWord;
|
|
# define VEX_HOST_WORDSIZE 8
|
|
# define VEX_REGPARM(_n) /* */
|
|
|
|
-#elif defined(__mips__)
|
|
-#if (__mips==64)
|
|
+#elif defined(__mips__) && (__mips == 64)
|
|
# define VEX_HOST_WORDSIZE 8
|
|
-#else
|
|
+# define VEX_REGPARM(_n) /* */
|
|
+
|
|
+#elif defined(__mips__) && (__mips != 64)
|
|
# define VEX_HOST_WORDSIZE 4
|
|
-#endif
|
|
# define VEX_REGPARM(_n) /* */
|
|
|
|
#else
|
|
Index: VEX/pub/libvex_ir.h
|
|
===================================================================
|
|
--- VEX/pub/libvex_ir.h.orig
|
|
+++ VEX/pub/libvex_ir.h
|
|
@@ -1691,7 +1691,7 @@ typedef
|
|
Irrm_PosINF = 2, // Round to positive infinity
|
|
Irrm_ZERO = 3, // Round toward zero
|
|
Irrm_NEAREST_TIE_AWAY_0 = 4, // Round to nearest, ties away from 0
|
|
- Irrm_PREPARE_SHORTER = 5, // Round to prepare for storter
|
|
+ Irrm_PREPARE_SHORTER = 5, // Round to prepare for shorter
|
|
// precision
|
|
Irrm_AWAY_FROM_ZERO = 6, // Round to away from 0
|
|
Irrm_NEAREST_TIE_TOWARD_0 = 7 // Round to nearest, ties towards 0
|
|
Index: VEX/pub/libvex_guest_s390x.h
|
|
===================================================================
|
|
--- VEX/pub/libvex_guest_s390x.h.orig
|
|
+++ VEX/pub/libvex_guest_s390x.h
|
|
@@ -166,9 +166,9 @@ void LibVEX_GuestS390X_initialise(VexGue
|
|
/*--- Dedicated registers ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
-#define guest_LR guest_r14 /* Link register */
|
|
-#define guest_SP guest_r15 /* Stack pointer */
|
|
-#define guest_FP guest_r11 /* Frame pointer */
|
|
+#define guest_s390_LR guest_r14 /* Link register */
|
|
+#define guest_s390_SP guest_r15 /* Stack pointer */
|
|
+#define guest_s390_FP guest_r11 /* Frame pointer */
|
|
|
|
/*---------------------------------------------------------------*/
|
|
/*--- end libvex_guest_s390x.h ---*/
|