Index: auxprogs/genoffsets.c =================================================================== --- auxprogs/genoffsets.c (.../tags/VEX_3_9_0) (revision 2863) +++ auxprogs/genoffsets.c (.../trunk) (revision 2863) @@ -51,6 +51,7 @@ #include "../pub/libvex_guest_ppc32.h" #include "../pub/libvex_guest_ppc64.h" #include "../pub/libvex_guest_arm.h" +#include "../pub/libvex_guest_arm64.h" #include "../pub/libvex_guest_s390x.h" #include "../pub/libvex_guest_mips32.h" #include "../pub/libvex_guest_mips64.h" @@ -159,6 +160,19 @@ GENOFFSET(ARM,arm,R14); GENOFFSET(ARM,arm,R15T); + // arm64 + GENOFFSET(ARM64,arm64,X0); + GENOFFSET(ARM64,arm64,X1); + GENOFFSET(ARM64,arm64,X2); + GENOFFSET(ARM64,arm64,X3); + GENOFFSET(ARM64,arm64,X4); + GENOFFSET(ARM64,arm64,X5); + GENOFFSET(ARM64,arm64,X6); + GENOFFSET(ARM64,arm64,X7); + GENOFFSET(ARM64,arm64,X8); + GENOFFSET(ARM64,arm64,XSP); + GENOFFSET(ARM64,arm64,PC); + // s390x GENOFFSET(S390X,s390x,r2); GENOFFSET(S390X,s390x,r3); Index: priv/guest_amd64_helpers.c =================================================================== --- priv/guest_amd64_helpers.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_amd64_helpers.c (.../trunk) (revision 2863) @@ -3777,6 +3777,7 @@ vex_state->guest_DFLAG = 1; /* forwards */ vex_state->guest_IDFLAG = 0; + vex_state->guest_ACFLAG = 0; /* HACK: represent the offset associated with %fs==0. This assumes that %fs is only ever zero. */ @@ -3817,8 +3818,8 @@ /* These should not ever be either read or written, but we initialise them anyway. */ - vex_state->guest_TISTART = 0; - vex_state->guest_TILEN = 0; + vex_state->guest_CMSTART = 0; + vex_state->guest_CMLEN = 0; vex_state->guest_NRADDR = 0; vex_state->guest_SC_CLASS = 0; @@ -3923,8 +3924,8 @@ // /* */ ALWAYSDEFD(guest_GDT), /* 10 */ ALWAYSDEFD(guest_EMNOTE), /* 11 */ ALWAYSDEFD(guest_SSEROUND), - /* 12 */ ALWAYSDEFD(guest_TISTART), - /* 13 */ ALWAYSDEFD(guest_TILEN), + /* 12 */ ALWAYSDEFD(guest_CMSTART), + /* 13 */ ALWAYSDEFD(guest_CMLEN), /* 14 */ ALWAYSDEFD(guest_SC_CLASS), /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) } Index: priv/guest_amd64_toIR.c =================================================================== --- priv/guest_amd64_toIR.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_amd64_toIR.c (.../trunk) (revision 2863) @@ -51,10 +51,6 @@ float-to-float rounding. For all other operations, round-to-nearest is used, regardless. - * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the - simulation claims the argument is in-range (-2^63 <= arg <= 2^63) - even when it isn't. - * some of the FCOM cases could do with testing -- not convinced that the args are the right way round. @@ -436,8 +432,8 @@ #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16) #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE) -#define OFFB_TISTART offsetof(VexGuestAMD64State,guest_TISTART) -#define OFFB_TILEN offsetof(VexGuestAMD64State,guest_TILEN) +#define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART) +#define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN) #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR) @@ -771,10 +767,10 @@ return toBool( ! haveNo66noF2noF3(pfx) ); } -/* Return True iff pfx has 66 or F2 set */ -static Bool have66orF2 ( Prefix pfx ) +/* Return True iff pfx has 66 or F3 set */ +static Bool have66orF3 ( Prefix pfx ) { - return toBool((pfx & (PFX_66|PFX_F2)) > 0); + return toBool((pfx & (PFX_66|PFX_F3)) > 0); } /* Clear all the segment-override bits in a prefix. */ @@ -4270,8 +4266,12 @@ modrm = getUChar(delta); if (epartIsReg(modrm)) { - /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */ - if (haveF2orF3(pfx)) goto unhandledR; + /* F2/XACQ and F3/XREL are always invalid in the non-mem case. + F2/CALL and F2/JMP may have bnd prefix. */ + if (haveF2orF3(pfx) + && ! (haveF2(pfx) + && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4))) + goto unhandledR; assign(t1, getIRegE(sz,pfx,modrm)); switch (gregLO3ofRM(modrm)) { case 0: /* INC */ @@ -4291,6 +4291,7 @@ case 2: /* call Ev */ /* Ignore any sz value and operate as if sz==8. */ if (!(sz == 4 || sz == 8)) goto unhandledR; + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ sz = 8; t3 = newTemp(Ity_I64); assign(t3, getIRegE(sz,pfx,modrm)); @@ -4306,6 +4307,7 @@ case 4: /* jmp Ev */ /* Ignore any sz value and operate as if sz==8. */ if (!(sz == 4 || sz == 8)) goto unhandledR; + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ sz = 8; t3 = newTemp(Ity_I64); assign(t3, getIRegE(sz,pfx,modrm)); @@ -4338,11 +4340,14 @@ showSz ? nameISize(sz) : ' ', nameIRegE(sz, pfx, modrm)); } else { - /* Decide if F2/XACQ or F3/XREL might be valid. */ + /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */ Bool validF2orF3 = haveF2orF3(pfx) ? False : True; if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/) && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { validF2orF3 = True; + } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4) + && (haveF2(pfx) && !haveF3(pfx))) { + validF2orF3 = True; } if (!validF2orF3) goto unhandledM; /* */ @@ -4379,6 +4384,7 @@ case 2: /* call Ev */ /* Ignore any sz value and operate as if sz==8. */ if (!(sz == 4 || sz == 8)) goto unhandledM; + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ sz = 8; t3 = newTemp(Ity_I64); assign(t3, loadLE(Ity_I64,mkexpr(addr))); @@ -4394,6 +4400,7 @@ case 4: /* JMP Ev */ /* Ignore any sz value and operate as if sz==8. */ if (!(sz == 4 || sz == 8)) goto unhandledM; + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ sz = 8; t3 = newTemp(Ity_I64); assign(t3, loadLE(Ity_I64,mkexpr(addr))); @@ -5066,6 +5073,42 @@ } +/* Given i, and some expression e, and a condition cond, generate IR + which has the same effect as put_ST(i,e) when cond is true and has + no effect when cond is false. Given the lack of proper + if-then-else in the IR, this is pretty tricky. +*/ + +static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value ) +{ + // new_tag = if cond then FULL else old_tag + // new_val = if cond then (if old_tag==FULL then NaN else val) + // else old_val + + IRTemp old_tag = newTemp(Ity_I8); + assign(old_tag, get_ST_TAG(i)); + IRTemp new_tag = newTemp(Ity_I8); + assign(new_tag, + IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag))); + + IRTemp old_val = newTemp(Ity_F64); + assign(old_val, get_ST_UNCHECKED(i)); + IRTemp new_val = newTemp(Ity_F64); + assign(new_val, + IRExpr_ITE(mkexpr(cond), + IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)), + /* non-0 means full */ + mkQNaN64(), + /* 0 means empty */ + value), + mkexpr(old_val))); + + put_ST_UNCHECKED(i, mkexpr(new_val)); + // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So + // now set it to new_tag instead. + put_ST_TAG(i, mkexpr(new_tag)); +} + /* Adjust FTOP downwards by one register. */ static void fp_push ( void ) @@ -5073,6 +5116,14 @@ put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); } +/* Adjust FTOP downwards by one register when COND is 1:I1. Else + don't change it. */ + +static void maybe_fp_push ( IRTemp cond ) +{ + put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) ); +} + /* Adjust FTOP upwards by one register, and mark the vacated register as empty. */ @@ -5082,12 +5133,49 @@ put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); } -/* Clear the C2 bit of the FPU status register, for - sin/cos/tan/sincos. */ +/* Set the C2 bit of the FPU status register to e[0]. Assumes that + e[31:1] == 0. +*/ +static void set_C2 ( IRExpr* e ) +{ + IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)); + put_C3210( binop(Iop_Or64, + cleared, + binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) ); +} -static void clear_C2 ( void ) +/* Generate code to check that abs(d64) < 2^63 and is finite. This is + used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The + test is simple, but the derivation of it is not so simple. + + The exponent field for an IEEE754 double is 11 bits. That means it + can take values 0 through 0x7FF. If the exponent has value 0x7FF, + the number is either a NaN or an Infinity and so is not finite. + Furthermore, a finite value of exactly 2^63 is the smallest value + that has exponent value 0x43E. Hence, what we need to do is + extract the exponent, ignoring the sign bit and mantissa, and check + it is < 0x43E, or <= 0x43D. + + To make this easily applicable to 32- and 64-bit targets, a + roundabout approach is used. First the number is converted to I64, + then the top 32 bits are taken. Shifting them right by 20 bits + places the sign bit and exponent in the bottom 12 bits. Anding + with 0x7FF gets rid of the sign bit, leaving just the exponent + available for comparison. +*/ +static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 ) { - put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) ); + IRTemp i64 = newTemp(Ity_I64); + assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) ); + IRTemp exponent = newTemp(Ity_I32); + assign(exponent, + binop(Iop_And32, + binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)), + mkU32(0x7FF))); + IRTemp in_range_and_finite = newTemp(Ity_I1); + assign(in_range_and_finite, + binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D))); + return in_range_and_finite; } /* Invent a plausible-looking FPU status word value: @@ -5717,16 +5805,31 @@ fp_pop(); break; - case 0xF2: /* FPTAN */ - DIP("ftan\n"); - put_ST_UNCHECKED(0, - binop(Iop_TanF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - fp_push(); - put_ST(0, IRExpr_Const(IRConst_F64(1.0))); - clear_C2(); /* HACK */ + case 0xF2: { /* FPTAN */ + DIP("fptan\n"); + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(Iop_TanF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + /* Conditionally push 1.0 on the stack, if the arg is + in range */ + maybe_fp_push(argOK); + maybe_put_ST(argOK, 0, + IRExpr_Const(IRConst_F64(1.0))); + set_C2( binop(Iop_Xor64, + unop(Iop_1Uto64, mkexpr(argOK)), + mkU64(1)) ); break; + } case 0xF3: /* FPATAN */ DIP("fpatan\n"); @@ -5842,19 +5945,30 @@ break; case 0xFB: { /* FSINCOS */ - IRTemp a1 = newTemp(Ity_F64); - assign( a1, get_ST(0) ); DIP("fsincos\n"); - put_ST_UNCHECKED(0, - binop(Iop_SinF64, + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(Iop_SinF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + /* Conditionally push the cos value on the stack, if + the arg is in range */ + maybe_fp_push(argOK); + maybe_put_ST(argOK, 0, + binop(Iop_CosF64, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - mkexpr(a1))); - fp_push(); - put_ST(0, - binop(Iop_CosF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - mkexpr(a1))); - clear_C2(); /* HACK */ + mkexpr(argD))); + set_C2( binop(Iop_Xor64, + unop(Iop_1Uto64, mkexpr(argOK)), + mkU64(1)) ); break; } @@ -5873,24 +5987,29 @@ get_ST(1))); break; - case 0xFE: /* FSIN */ - DIP("fsin\n"); - put_ST_UNCHECKED(0, - binop(Iop_SinF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - clear_C2(); /* HACK */ + case 0xFE: /* FSIN */ + case 0xFF: { /* FCOS */ + Bool isSIN = modrm == 0xFE; + DIP("%s\n", isSIN ? "fsin" : "fcos"); + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(isSIN ? Iop_SinF64 : Iop_CosF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + set_C2( binop(Iop_Xor64, + unop(Iop_1Uto64, mkexpr(argOK)), + mkU64(1)) ); break; + } - case 0xFF: /* FCOS */ - DIP("fcos\n"); - put_ST_UNCHECKED(0, - binop(Iop_CosF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - clear_C2(); /* HACK */ - break; - default: goto decode_fail; } @@ -8548,6 +8667,32 @@ /*--- SSE/SSE2/SSE3 helpers ---*/ /*------------------------------------------------------------*/ +/* Indicates whether the op requires a rounding-mode argument. Note + that this covers only vector floating point arithmetic ops, and + omits the scalar ones that need rounding modes. Note also that + inconsistencies here will get picked up later by the IR sanity + checker, so this isn't correctness-critical. */ +static Bool requiresRMode ( IROp op ) +{ + switch (op) { + /* 128 bit ops */ + case Iop_Add32Fx4: case Iop_Sub32Fx4: + case Iop_Mul32Fx4: case Iop_Div32Fx4: + case Iop_Add64Fx2: case Iop_Sub64Fx2: + case Iop_Mul64Fx2: case Iop_Div64Fx2: + /* 256 bit ops */ + case Iop_Add32Fx8: case Iop_Sub32Fx8: + case Iop_Mul32Fx8: case Iop_Div32Fx8: + case Iop_Add64Fx4: case Iop_Sub64Fx4: + case Iop_Mul64Fx4: case Iop_Div64Fx4: + return True; + default: + break; + } + return False; +} + + /* Worker function; do not call directly. Handles full width G = G `op` E and G = (not G) `op` E. */ @@ -8563,13 +8708,20 @@ Int alen; IRTemp addr; UChar rm = getUChar(delta); + Bool needsRMode = requiresRMode(op); IRExpr* gpart = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm))) : getXMMReg(gregOfRexRM(pfx,rm)); if (epartIsReg(rm)) { - putXMMReg( gregOfRexRM(pfx,rm), - binop(op, gpart, - getXMMReg(eregOfRexRM(pfx,rm))) ); + putXMMReg( + gregOfRexRM(pfx,rm), + needsRMode + ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + gpart, + getXMMReg(eregOfRexRM(pfx,rm))) + : binop(op, gpart, + getXMMReg(eregOfRexRM(pfx,rm))) + ); DIP("%s %s,%s\n", opname, nameXMMReg(eregOfRexRM(pfx,rm)), nameXMMReg(gregOfRexRM(pfx,rm)) ); @@ -8576,9 +8728,15 @@ return delta+1; } else { addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); - putXMMReg( gregOfRexRM(pfx,rm), - binop(op, gpart, - loadLE(Ity_V128, mkexpr(addr))) ); + putXMMReg( + gregOfRexRM(pfx,rm), + needsRMode + ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + gpart, + loadLE(Ity_V128, mkexpr(addr))) + : binop(op, gpart, + loadLE(Ity_V128, mkexpr(addr))) + ); DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(gregOfRexRM(pfx,rm)) ); @@ -10982,9 +11140,11 @@ IRTemp subV = newTemp(Ity_V128); IRTemp a1 = newTemp(Ity_I64); IRTemp s0 = newTemp(Ity_I64); + IRTemp rm = newTemp(Ity_I32); - assign( addV, binop(Iop_Add64Fx2, mkexpr(dV), mkexpr(sV)) ); - assign( subV, binop(Iop_Sub64Fx2, mkexpr(dV), mkexpr(sV)) ); + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ + assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); + assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); assign( s0, unop(Iop_V128to64, mkexpr(subV) )); @@ -11000,10 +11160,12 @@ IRTemp a3, a2, a1, a0, s3, s2, s1, s0; IRTemp addV = newTemp(Ity_V256); IRTemp subV = newTemp(Ity_V256); + IRTemp rm = newTemp(Ity_I32); a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; - assign( addV, binop(Iop_Add64Fx4, mkexpr(dV), mkexpr(sV)) ); - assign( subV, binop(Iop_Sub64Fx4, mkexpr(dV), mkexpr(sV)) ); + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ + assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); + assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); breakupV256to64s( addV, &a3, &a2, &a1, &a0 ); breakupV256to64s( subV, &s3, &s2, &s1, &s0 ); @@ -11019,10 +11181,12 @@ IRTemp a3, a2, a1, a0, s3, s2, s1, s0; IRTemp addV = newTemp(Ity_V128); IRTemp subV = newTemp(Ity_V128); + IRTemp rm = newTemp(Ity_I32); a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; - assign( addV, binop(Iop_Add32Fx4, mkexpr(dV), mkexpr(sV)) ); - assign( subV, binop(Iop_Sub32Fx4, mkexpr(dV), mkexpr(sV)) ); + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ + assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); + assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); breakupV128to32s( addV, &a3, &a2, &a1, &a0 ); breakupV128to32s( subV, &s3, &s2, &s1, &s0 ); @@ -11039,11 +11203,13 @@ IRTemp s7, s6, s5, s4, s3, s2, s1, s0; IRTemp addV = newTemp(Ity_V256); IRTemp subV = newTemp(Ity_V256); + IRTemp rm = newTemp(Ity_I32); a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID; s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; - assign( addV, binop(Iop_Add32Fx8, mkexpr(dV), mkexpr(sV)) ); - assign( subV, binop(Iop_Sub32Fx8, mkexpr(dV), mkexpr(sV)) ); + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ + assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); + assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 ); breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); @@ -13361,14 +13527,14 @@ /* Round addr down to the start of the containing block. */ stmt( IRStmt_Put( - OFFB_TISTART, + OFFB_CMSTART, binop( Iop_And64, mkexpr(addr), mkU64( ~(lineszB-1) ))) ); - stmt( IRStmt_Put(OFFB_TILEN, mkU64(lineszB) ) ); + stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) ); - jmp_lit(dres, Ijk_TInval, (Addr64)(guest_RIP_bbstart+delta)); + jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta)); DIP("clflush %s\n", dis_buf); goto decode_success; @@ -14594,6 +14760,7 @@ IRTemp s3, s2, s1, s0, d3, d2, d1, d0; IRTemp leftV = newTemp(Ity_V128); IRTemp rightV = newTemp(Ity_V128); + IRTemp rm = newTemp(Ity_I32); s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); @@ -14603,8 +14770,9 @@ assign( rightV, mkV128from32s( s3, s1, d3, d1 ) ); IRTemp res = newTemp(Ity_V128); - assign( res, binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, - mkexpr(leftV), mkexpr(rightV) ) ); + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ + assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, + mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); return res; } @@ -14614,6 +14782,7 @@ IRTemp s1, s0, d1, d0; IRTemp leftV = newTemp(Ity_V128); IRTemp rightV = newTemp(Ity_V128); + IRTemp rm = newTemp(Ity_I32); s1 = s0 = d1 = d0 = IRTemp_INVALID; breakupV128to64s( sV, &s1, &s0 ); @@ -14623,8 +14792,9 @@ assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); IRTemp res = newTemp(Ity_V128); - assign( res, binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, - mkexpr(leftV), mkexpr(rightV) ) ); + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ + assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, + mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); return res; } @@ -18271,8 +18441,11 @@ UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF }; IRTemp and_vec = newTemp(Ity_V128); IRTemp sum_vec = newTemp(Ity_V128); + IRTemp rm = newTemp(Ity_I32); + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ assign( and_vec, binop( Iop_AndV128, - binop( Iop_Mul64Fx2, + triop( Iop_Mul64Fx2, + mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ), mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) ); @@ -18296,6 +18469,7 @@ IRTemp tmp_prod_vec = newTemp(Ity_V128); IRTemp prod_vec = newTemp(Ity_V128); IRTemp sum_vec = newTemp(Ity_V128); + IRTemp rm = newTemp(Ity_I32); IRTemp v3, v2, v1, v0; v3 = v2 = v1 = v0 = IRTemp_INVALID; UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, @@ -18303,15 +18477,17 @@ 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 0xFFFF }; + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ assign( tmp_prod_vec, binop( Iop_AndV128, - binop( Iop_Mul32Fx4, mkexpr(dst_vec), - mkexpr(src_vec) ), + triop( Iop_Mul32Fx4, + mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ), mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) ); breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 ); assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) ); - assign( sum_vec, binop( Iop_Add32Fx4, + assign( sum_vec, triop( Iop_Add32Fx4, + mkexpr(rm), binop( Iop_InterleaveHI32x4, mkexpr(prod_vec), mkexpr(prod_vec) ), binop( Iop_InterleaveLO32x4, @@ -18319,7 +18495,8 @@ IRTemp res = newTemp(Ity_V128); assign( res, binop( Iop_AndV128, - binop( Iop_Add32Fx4, + triop( Iop_Add32Fx4, + mkexpr(rm), binop( Iop_InterleaveHI32x4, mkexpr(sum_vec), mkexpr(sum_vec) ), binop( Iop_InterleaveLO32x4, @@ -19550,7 +19727,8 @@ case 0x7F: { /* JGb/JNLEb (jump greater) */ Long jmpDelta; const HChar* comment = ""; - if (haveF2orF3(pfx)) goto decode_failure; + if (haveF3(pfx)) goto decode_failure; + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ jmpDelta = getSDisp8(delta); vassert(-128 <= jmpDelta && jmpDelta < 128); d64 = (guest_RIP_bbstart+delta+1) + jmpDelta; @@ -20203,7 +20381,8 @@ } case 0xC2: /* RET imm16 */ - if (have66orF2orF3(pfx)) goto decode_failure; + if (have66orF3(pfx)) goto decode_failure; + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ d64 = getUDisp16(delta); delta += 2; dis_ret(dres, vbi, d64); @@ -20211,8 +20390,9 @@ return delta; case 0xC3: /* RET */ - if (have66orF2(pfx)) goto decode_failure; + if (have66(pfx)) goto decode_failure; /* F3 is acceptable on AMD. */ + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ dis_ret(dres, vbi, 0); DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n"); return delta; @@ -20251,7 +20431,7 @@ return delta; } /* BEGIN HACKY SUPPORT FOR xbegin */ - if (modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4 + if (opc == 0xC7 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { delta++; /* mod/rm byte */ d64 = getSDisp(4,delta); @@ -20270,6 +20450,16 @@ return delta; } /* END HACKY SUPPORT FOR xbegin */ + /* BEGIN HACKY SUPPORT FOR xabort */ + if (opc == 0xC6 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 1 + && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { + delta++; /* mod/rm byte */ + abyte = getUChar(delta); delta++; + /* There is never a real transaction in progress, so do nothing. */ + DIP("xabort $%d", (Int)abyte); + return delta; + } + /* END HACKY SUPPORT FOR xabort */ goto decode_failure; case 0xC8: /* ENTER */ @@ -20606,7 +20796,8 @@ } case 0xE8: /* CALL J4 */ - if (haveF2orF3(pfx)) goto decode_failure; + if (haveF3(pfx)) goto decode_failure; + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ d64 = getSDisp32(delta); delta += 4; d64 += (guest_RIP_bbstart+delta); /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */ @@ -20629,9 +20820,10 @@ return delta; case 0xE9: /* Jv (jump, 16/32 offset) */ - if (haveF2orF3(pfx)) goto decode_failure; + if (haveF3(pfx)) goto decode_failure; if (sz != 4) goto decode_failure; /* JRS added 2004 July 11 */ + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta); delta += sz; if (resteerOkFn(callback_opaque,d64)) { @@ -20645,9 +20837,10 @@ return delta; case 0xEB: /* Jb (jump, byte offset) */ - if (haveF2orF3(pfx)) goto decode_failure; + if (haveF3(pfx)) goto decode_failure; if (sz != 4) goto decode_failure; /* JRS added 2004 July 11 */ + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); delta++; if (resteerOkFn(callback_opaque,d64)) { @@ -21065,7 +21258,8 @@ case 0x8F: { /* JGb/JNLEb (jump greater) */ Long jmpDelta; const HChar* comment = ""; - if (haveF2orF3(pfx)) goto decode_failure; + if (haveF3(pfx)) goto decode_failure; + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ jmpDelta = getSDisp32(delta); d64 = (guest_RIP_bbstart+delta+4) + jmpDelta; delta += 4; @@ -21156,6 +21350,66 @@ } return delta; + case 0x1A: + case 0x1B: { /* Future MPX instructions, currently NOPs. + BNDMK b, m F3 0F 1B + BNDCL b, r/m F3 0F 1A + BNDCU b, r/m F2 0F 1A + BNDCN b, r/m F2 0F 1B + BNDMOV b, b/m 66 0F 1A + BNDMOV b/m, b 66 0F 1B + BNDLDX b, mib 0F 1A + BNDSTX mib, b 0F 1B */ + + /* All instructions have two operands. One operand is always the + bnd register number (bnd0-bnd3, other register numbers are + ignored when MPX isn't enabled, but should generate an + exception if MPX is enabled) given by gregOfRexRM. The other + operand is either a ModRM:reg, ModRM:r/m or a SIB encoded + address, all of which can be decoded by using either + eregOfRexRM or disAMode. */ + + modrm = getUChar(delta); + int bnd = gregOfRexRM(pfx,modrm); + const HChar *oper; + if (epartIsReg(modrm)) { + oper = nameIReg64 (eregOfRexRM(pfx,modrm)); + delta += 1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + delta += alen; + oper = dis_buf; + } + + if (haveF3no66noF2 (pfx)) { + if (opc == 0x1B) { + DIP ("bndmk %s, %%bnd%d\n", oper, bnd); + } else /* opc == 0x1A */ { + DIP ("bndcl %s, %%bnd%d\n", oper, bnd); + } + } else if (haveF2no66noF3 (pfx)) { + if (opc == 0x1A) { + DIP ("bndcu %s, %%bnd%d\n", oper, bnd); + } else /* opc == 0x1B */ { + DIP ("bndcn %s, %%bnd%d\n", oper, bnd); + } + } else if (have66noF2noF3 (pfx)) { + if (opc == 0x1A) { + DIP ("bndmov %s, %%bnd%d\n", oper, bnd); + } else /* opc == 0x1B */ { + DIP ("bndmov %%bnd%d, %s\n", bnd, oper); + } + } else if (haveNo66noF2noF3 (pfx)) { + if (opc == 0x1A) { + DIP ("bndldx %s, %%bnd%d\n", oper, bnd); + } else /* opc == 0x1B */ { + DIP ("bndstx %%bnd%d, %s\n", bnd, oper); + } + } else goto decode_failure; + + return delta; + } + case 0xA2: { /* CPUID */ /* Uses dirty helper: void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* ) @@ -21888,8 +22142,17 @@ if (op != Iop_INVALID) { vassert(opFn == NULL); res = newTemp(Ity_V128); - assign(res, swapArgs ? binop(op, mkexpr(tSR), mkexpr(tSL)) - : binop(op, mkexpr(tSL), mkexpr(tSR))); + if (requiresRMode(op)) { + IRTemp rm = newTemp(Ity_I32); + assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */ + assign(res, swapArgs + ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL)) + : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR))); + } else { + assign(res, swapArgs + ? binop(op, mkexpr(tSR), mkexpr(tSL)) + : binop(op, mkexpr(tSL), mkexpr(tSR))); + } } else { vassert(opFn != NULL); res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR); @@ -22792,8 +23055,17 @@ if (op != Iop_INVALID) { vassert(opFn == NULL); res = newTemp(Ity_V256); - assign(res, swapArgs ? binop(op, mkexpr(tSR), mkexpr(tSL)) - : binop(op, mkexpr(tSL), mkexpr(tSR))); + if (requiresRMode(op)) { + IRTemp rm = newTemp(Ity_I32); + assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */ + assign(res, swapArgs + ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL)) + : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR))); + } else { + assign(res, swapArgs + ? binop(op, mkexpr(tSR), mkexpr(tSL)) + : binop(op, mkexpr(tSL), mkexpr(tSR))); + } } else { vassert(opFn != NULL); res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR); @@ -30936,14 +31208,14 @@ // injecting here can change. In which case the translation has to // be redone. For ease of handling, we simply invalidate all the // time. - stmt(IRStmt_Put(OFFB_TISTART, mkU64(guest_RIP_curr_instr))); - stmt(IRStmt_Put(OFFB_TILEN, mkU64(19))); + stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_RIP_curr_instr))); + stmt(IRStmt_Put(OFFB_CMLEN, mkU64(19))); delta += 19; stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) ); dres.whatNext = Dis_StopHere; - dres.jk_StopHere = Ijk_TInval; + dres.jk_StopHere = Ijk_InvalICache; goto decode_success; } /* We don't know what it is. */ Index: priv/guest_arm64_defs.h =================================================================== --- priv/guest_arm64_defs.h (.../tags/VEX_3_9_0) (revision 0) +++ priv/guest_arm64_defs.h (.../trunk) (revision 2863) @@ -0,0 +1,244 @@ + +/*---------------------------------------------------------------*/ +/*--- begin guest_arm64_defs.h ---*/ +/*---------------------------------------------------------------*/ +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2013-2013 OpenWorks + info@open-works.net + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __VEX_GUEST_ARM64_DEFS_H +#define __VEX_GUEST_ARM64_DEFS_H + +#include "libvex_basictypes.h" +#include "guest_generic_bb_to_IR.h" // DisResult + +/*---------------------------------------------------------*/ +/*--- arm64 to IR conversion ---*/ +/*---------------------------------------------------------*/ + +/* Convert one ARM64 insn to IR. See the type DisOneInstrFn in + bb_to_IR.h. */ +extern +DisResult disInstr_ARM64 ( IRSB* irbb, + Bool (*resteerOkFn) ( void*, Addr64 ), + Bool resteerCisOk, + void* callback_opaque, + UChar* guest_code, + Long delta, + Addr64 guest_IP, + VexArch guest_arch, + VexArchInfo* archinfo, + VexAbiInfo* abiinfo, + Bool host_bigendian, + Bool sigill_diag ); + +/* Used by the optimiser to specialise calls to helpers. */ +extern +IRExpr* guest_arm64_spechelper ( const HChar* function_name, + IRExpr** args, + IRStmt** precedingStmts, + Int n_precedingStmts ); + +/* Describes to the optimser which part of the guest state require + precise memory exceptions. This is logically part of the guest + state description. */ +extern +Bool guest_arm64_state_requires_precise_mem_exns ( Int, Int ); + +extern +VexGuestLayout arm64Guest_layout; + + +/*---------------------------------------------------------*/ +/*--- arm64 guest helpers ---*/ +/*---------------------------------------------------------*/ + +/* --- CLEAN HELPERS --- */ + +/* Calculate NZCV from the supplied thunk components, in the positions + they appear in the CPSR, viz bits 31:28 for N Z C V respectively. + Returned bits 63:32 and 27:0 are zero. */ +extern +ULong arm64g_calculate_flags_nzcv ( ULong cc_op, ULong cc_dep1, + ULong cc_dep2, ULong cc_dep3 ); + +//ZZ /* Calculate the C flag from the thunk components, in the lowest bit +//ZZ of the word (bit 0). */ +//ZZ extern +//ZZ UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1, +//ZZ UInt cc_dep2, UInt cc_dep3 ); +//ZZ +//ZZ /* Calculate the V flag from the thunk components, in the lowest bit +//ZZ of the word (bit 0). */ +//ZZ extern +//ZZ UInt armg_calculate_flag_v ( UInt cc_op, UInt cc_dep1, +//ZZ UInt cc_dep2, UInt cc_dep3 ); +//ZZ +/* Calculate the specified condition from the thunk components, in the + lowest bit of the word (bit 0). */ +extern +ULong arm64g_calculate_condition ( /* ARM64Condcode << 4 | cc_op */ + ULong cond_n_op , + ULong cc_dep1, + ULong cc_dep2, ULong cc_dep3 ); + +//ZZ /* Calculate the QC flag from the thunk components, in the lowest bit +//ZZ of the word (bit 0). */ +//ZZ extern +//ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2, +//ZZ UInt resR1, UInt resR2 ); + + +/*---------------------------------------------------------*/ +/*--- Condition code stuff ---*/ +/*---------------------------------------------------------*/ + +/* Flag masks. Defines positions of flag bits in the NZCV + register. */ +#define ARM64G_CC_SHIFT_N 31 +#define ARM64G_CC_SHIFT_Z 30 +#define ARM64G_CC_SHIFT_C 29 +#define ARM64G_CC_SHIFT_V 28 +//ZZ #define ARMG_CC_SHIFT_Q 27 +//ZZ +//ZZ #define ARMG_CC_MASK_N (1 << ARMG_CC_SHIFT_N) +//ZZ #define ARMG_CC_MASK_Z (1 << ARMG_CC_SHIFT_Z) +//ZZ #define ARMG_CC_MASK_C (1 << ARMG_CC_SHIFT_C) +//ZZ #define ARMG_CC_MASK_V (1 << ARMG_CC_SHIFT_V) +//ZZ #define ARMG_CC_MASK_Q (1 << ARMG_CC_SHIFT_Q) + +/* Flag thunk descriptors. A four-word thunk is used to record + details of the most recent flag-setting operation, so NZCV can + be computed later if needed. + + The four words are: + + CC_OP, which describes the operation. + + CC_DEP1, CC_DEP2, CC_NDEP. These are arguments to the + operation. We want set up the mcx_masks in flag helper calls + involving these fields so that Memcheck "believes" that the + resulting flags are data-dependent on both CC_DEP1 and + CC_DEP2. Hence the name DEP. + + When building the thunk, it is always necessary to write words into + CC_DEP1/2 and NDEP, even if those args are not used given the CC_OP + field. This is important because otherwise Memcheck could give + false positives as it does not understand the relationship between + the CC_OP field and CC_DEP1/2/NDEP, and so believes that the + definedness of the stored flags always depends on all 3 DEP values. + + A summary of the field usages is: + + OP DEP1 DEP2 DEP3 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + OP_COPY curr_NZCV:28x0 unused unused + OP_ADD32 argL argR unused + OP_ADD64 argL argR unused + OP_SUB32 argL argR unused + OP_SUB64 argL argR unused +//ZZ OP_ADC argL argR 31x0:old_C +//ZZ OP_SBB argL argR 31x0:old_C + OP_LOGIC32 result unused unused + OP_LOGIC64 result unused unused +//ZZ OP_MUL result unused 30x0:old_C:old_V +//ZZ OP_MULL resLO32 resHI32 30x0:old_C:old_V +//ZZ */ + +enum { + ARM64G_CC_OP_COPY=0, /* DEP1 = NZCV in 31:28, DEP2 = 0, DEP3 = 0 + just copy DEP1 to output */ + + ARM64G_CC_OP_ADD32, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op), + DEP3 = 0 */ + + ARM64G_CC_OP_ADD64, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op), + DEP3 = 0 */ + + ARM64G_CC_OP_SUB32, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op), + DEP3 = 0 */ + + ARM64G_CC_OP_SUB64, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op), + DEP3 = 0 */ + +//ZZ ARMG_CC_OP_ADC, /* DEP1 = argL (Rn), DEP2 = arg2 (shifter_op), +//ZZ DEP3 = oldC (in LSB) */ +//ZZ +//ZZ ARMG_CC_OP_SBB, /* DEP1 = argL (Rn), DEP2 = arg2 (shifter_op), +//ZZ DEP3 = oldC (in LSB) */ + + ARM64G_CC_OP_LOGIC32, /* DEP1 = result, DEP2 = 0, DEP3 = 0 */ + ARM64G_CC_OP_LOGIC64, /* DEP1 = result, DEP2 = 0, DEP3 = 0 */ + +//ZZ ARMG_CC_OP_MUL, /* DEP1 = result, DEP2 = 0, DEP3 = oldC:old_V +//ZZ (in bits 1:0) */ +//ZZ +//ZZ ARMG_CC_OP_MULL, /* DEP1 = resLO32, DEP2 = resHI32, DEP3 = oldC:old_V +//ZZ (in bits 1:0) */ + + ARM64G_CC_OP_NUMBER +}; + +/* XXXX because of the calling conventions for + arm64g_calculate_condition, all these OP values MUST be in the range + 0 .. 15 only (viz, 4-bits). */ + + + +/* Defines conditions which we can ask for */ + +typedef + enum { + ARM64CondEQ = 0, /* equal : Z=1 */ + ARM64CondNE = 1, /* not equal : Z=0 */ + + ARM64CondCS = 2, /* >=u (higher or same) (aka HS) : C=1 */ + ARM64CondCC = 3, /* u (higher) : C=1 && Z=0 */ + ARM64CondLS = 9, /* <=u (lower or same) : C=0 || Z=1 */ + + ARM64CondGE = 10, /* >=s (signed greater or equal) : N=V */ + ARM64CondLT = 11, /* s (signed greater) : Z=0 && N=V */ + ARM64CondLE = 13, /* <=s (signed less or equal) : Z=1 || N!=V */ + + ARM64CondAL = 14, /* always (unconditional) : 1 */ + ARM64CondNV = 15 /* always (unconditional) : 1 */ + } + ARM64Condcode; + +#endif /* ndef __VEX_GUEST_ARM64_DEFS_H */ + +/*---------------------------------------------------------------*/ +/*--- end guest_arm64_defs.h ---*/ +/*---------------------------------------------------------------*/ Index: priv/guest_arm64_helpers.c =================================================================== --- priv/guest_arm64_helpers.c (.../tags/VEX_3_9_0) (revision 0) +++ priv/guest_arm64_helpers.c (.../trunk) (revision 2863) @@ -0,0 +1,1292 @@ + +/*---------------------------------------------------------------*/ +/*--- begin guest_arm64_helpers.c ---*/ +/*---------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2013-2013 OpenWorks + info@open-works.net + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "libvex_basictypes.h" +#include "libvex_emnote.h" +#include "libvex_guest_arm64.h" +#include "libvex_ir.h" +#include "libvex.h" + +#include "main_util.h" +#include "main_globals.h" +#include "guest_generic_bb_to_IR.h" +#include "guest_arm64_defs.h" + + +/* This file contains helper functions for arm guest code. Calls to + these functions are generated by the back end. These calls are of + course in the host machine code and this file will be compiled to + host machine code, so that all makes sense. + + Only change the signatures of these helper functions very + carefully. If you change the signature here, you'll have to change + the parameters passed to it in the IR calls constructed by + guest_arm64_toIR.c. +*/ + + +/* Set to 1 to get detailed profiling info about individual N, Z, C + and V flag evaluation. */ +#define PROFILE_NZCV_FLAGS 0 + +#if PROFILE_NZCV_FLAGS + +static UInt tab_eval[ARM64G_CC_OP_NUMBER][16]; +static UInt initted = 0; +static UInt tot_evals = 0; + +static void initCounts ( void ) +{ + UInt i, j; + for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) { + for (j = 0; j < 16; j++) { + tab_eval[i][j] = 0; + } + } + initted = 1; +} + +static void showCounts ( void ) +{ + const HChar* nameCC[16] + = { "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC", + "HI", "LS", "GE", "LT", "GT", "LE", "AL", "NV" }; + UInt i, j; + ULong sum = 0; + vex_printf("\nCC_OP 0 1 2 3 " + " 4 5 6\n"); + vex_printf( "--------------------------------------------------" + "--------------------------\n"); + for (j = 0; j < 16; j++) { + vex_printf("%2d %s ", j, nameCC[j]); + for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) { + vex_printf("%9d ", tab_eval[i][j]); + sum += tab_eval[i][j]; + } + vex_printf("\n"); + } + vex_printf("(In total %llu calls)\n", sum); +} + +#define NOTE_EVAL(_cc_op, _cond) \ + do { \ + if (!initted) initCounts(); \ + vassert( ((UInt)(_cc_op)) < ARM64G_CC_OP_NUMBER); \ + vassert( ((UInt)(_cond)) < 16); \ + tab_eval[(UInt)(_cc_op)][(UInt)(cond)]++; \ + tot_evals++; \ + if (0 == (tot_evals & 0x7FFF)) \ + showCounts(); \ + } while (0) + +#endif /* PROFILE_NZCV_FLAGS */ + + +/* Calculate the N flag from the supplied thunk components, in the + least significant bit of the word. Returned bits 63:1 are zero. */ +static +ULong arm64g_calculate_flag_n ( ULong cc_op, ULong cc_dep1, + ULong cc_dep2, ULong cc_dep3 ) +{ + switch (cc_op) { + case ARM64G_CC_OP_COPY: { + /* (nzcv:28x0, unused, unused) */ + ULong nf = (cc_dep1 >> ARM64G_CC_SHIFT_N) & 1; + return nf; + } + case ARM64G_CC_OP_ADD32: { + /* (argL, argR, unused) */ + UInt argL = (UInt)cc_dep1; + UInt argR = (UInt)cc_dep2; + UInt res = argL + argR; + ULong nf = (ULong)(res >> 31); + return nf; + } + case ARM64G_CC_OP_ADD64: { + /* (argL, argR, unused) */ + ULong argL = cc_dep1; + ULong argR = cc_dep2; + ULong res = argL + argR; + ULong nf = (ULong)(res >> 63); + return nf; + } + case ARM64G_CC_OP_SUB32: { + /* (argL, argR, unused) */ + UInt argL = (UInt)cc_dep1; + UInt argR = (UInt)cc_dep2; + UInt res = argL - argR; + ULong nf = (ULong)(res >> 31); + return nf; + } + case ARM64G_CC_OP_SUB64: { + /* (argL, argR, unused) */ + ULong argL = cc_dep1; + ULong argR = cc_dep2; + ULong res = argL - argR; + ULong nf = res >> 63; + return nf; + } +//ZZ case ARMG_CC_OP_ADC: { +//ZZ /* (argL, argR, oldC) */ +//ZZ UInt argL = cc_dep1; +//ZZ UInt argR = cc_dep2; +//ZZ UInt oldC = cc_dep3; +//ZZ vassert((oldC & ~1) == 0); +//ZZ UInt res = argL + argR + oldC; +//ZZ UInt nf = res >> 31; +//ZZ return nf; +//ZZ } +//ZZ case ARMG_CC_OP_SBB: { +//ZZ /* (argL, argR, oldC) */ +//ZZ UInt argL = cc_dep1; +//ZZ UInt argR = cc_dep2; +//ZZ UInt oldC = cc_dep3; +//ZZ vassert((oldC & ~1) == 0); +//ZZ UInt res = argL - argR - (oldC ^ 1); +//ZZ UInt nf = res >> 31; +//ZZ return nf; +//ZZ } + case ARM64G_CC_OP_LOGIC32: { + /* (res, unused, unused) */ + UInt res = (UInt)cc_dep1; + ULong nf = res >> 31; + return nf; + } + case ARM64G_CC_OP_LOGIC64: { + /* (res, unused, unused) */ + ULong res = cc_dep1; + ULong nf = res >> 63; + return nf; + } +//ZZ case ARMG_CC_OP_MUL: { +//ZZ /* (res, unused, oldC:oldV) */ +//ZZ UInt res = cc_dep1; +//ZZ UInt nf = res >> 31; +//ZZ return nf; +//ZZ } +//ZZ case ARMG_CC_OP_MULL: { +//ZZ /* (resLo32, resHi32, oldC:oldV) */ +//ZZ UInt resHi32 = cc_dep2; +//ZZ UInt nf = resHi32 >> 31; +//ZZ return nf; +//ZZ } + default: + /* shouldn't really make these calls from generated code */ + vex_printf("arm64g_calculate_flag_n" + "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n", + cc_op, cc_dep1, cc_dep2, cc_dep3 ); + vpanic("arm64g_calculate_flag_n"); + } +} + + +/* Calculate the Z flag from the supplied thunk components, in the + least significant bit of the word. Returned bits 63:1 are zero. */ +static +ULong arm64g_calculate_flag_z ( ULong cc_op, ULong cc_dep1, + ULong cc_dep2, ULong cc_dep3 ) +{ + switch (cc_op) { + case ARM64G_CC_OP_COPY: { + /* (nzcv:28x0, unused, unused) */ + ULong zf = (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1; + return zf; + } + case ARM64G_CC_OP_ADD32: { + /* (argL, argR, unused) */ + UInt argL = (UInt)cc_dep1; + UInt argR = (UInt)cc_dep2; + UInt res = argL + argR; + ULong zf = res == 0; + return zf; + } + case ARM64G_CC_OP_ADD64: { + /* (argL, argR, unused) */ + ULong argL = cc_dep1; + ULong argR = cc_dep2; + ULong res = argL + argR; + ULong zf = res == 0; + return zf; + } + case ARM64G_CC_OP_SUB32: { + /* (argL, argR, unused) */ + UInt argL = (UInt)cc_dep1; + UInt argR = (UInt)cc_dep2; + UInt res = argL - argR; + ULong zf = res == 0; + return zf; + } + case ARM64G_CC_OP_SUB64: { + /* (argL, argR, unused) */ + ULong argL = cc_dep1; + ULong argR = cc_dep2; + ULong res = argL - argR; + ULong zf = res == 0; + return zf; + } +//ZZ case ARMG_CC_OP_ADC: { +//ZZ /* (argL, argR, oldC) */ +//ZZ UInt argL = cc_dep1; +//ZZ UInt argR = cc_dep2; +//ZZ UInt oldC = cc_dep3; +//ZZ vassert((oldC & ~1) == 0); +//ZZ UInt res = argL + argR + oldC; +//ZZ UInt zf = res == 0; +//ZZ return zf; +//ZZ } +//ZZ case ARMG_CC_OP_SBB: { +//ZZ /* (argL, argR, oldC) */ +//ZZ UInt argL = cc_dep1; +//ZZ UInt argR = cc_dep2; +//ZZ UInt oldC = cc_dep3; +//ZZ vassert((oldC & ~1) == 0); +//ZZ UInt res = argL - argR - (oldC ^ 1); +//ZZ UInt zf = res == 0; +//ZZ return zf; +//ZZ } + case ARM64G_CC_OP_LOGIC32: { + /* (res, unused, unused) */ + UInt res = (UInt)cc_dep1; + ULong zf = res == 0; + return zf; + } + case ARM64G_CC_OP_LOGIC64: { + /* (res, unused, unused) */ + ULong res = cc_dep1; + ULong zf = res == 0; + return zf; + } +//ZZ case ARMG_CC_OP_MUL: { +//ZZ /* (res, unused, oldC:oldV) */ +//ZZ UInt res = cc_dep1; +//ZZ UInt zf = res == 0; +//ZZ return zf; +//ZZ } +//ZZ case ARMG_CC_OP_MULL: { +//ZZ /* (resLo32, resHi32, oldC:oldV) */ +//ZZ UInt resLo32 = cc_dep1; +//ZZ UInt resHi32 = cc_dep2; +//ZZ UInt zf = (resHi32|resLo32) == 0; +//ZZ return zf; +//ZZ } + default: + /* shouldn't really make these calls from generated code */ + vex_printf("arm64g_calculate_flag_z" + "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n", + cc_op, cc_dep1, cc_dep2, cc_dep3 ); + vpanic("arm64g_calculate_flag_z"); + } +} + + +/* CALLED FROM GENERATED CODE: CLEAN HELPER */ +/* Calculate the C flag from the supplied thunk components, in the + least significant bit of the word. Returned bits 63:1 are zero. */ +static +ULong arm64g_calculate_flag_c ( ULong cc_op, ULong cc_dep1, + ULong cc_dep2, ULong cc_dep3 ) +{ + switch (cc_op) { + case ARM64G_CC_OP_COPY: { + /* (nzcv:28x0, unused, unused) */ + ULong cf = (cc_dep1 >> ARM64G_CC_SHIFT_C) & 1; + return cf; + } + case ARM64G_CC_OP_ADD32: { + /* (argL, argR, unused) */ + UInt argL = (UInt)cc_dep1; + UInt argR = (UInt)cc_dep2; + UInt res = argL + argR; + ULong cf = res < argL; + return cf; + } + case ARM64G_CC_OP_ADD64: { + /* (argL, argR, unused) */ + ULong argL = cc_dep1; + ULong argR = cc_dep2; + ULong res = argL + argR; + ULong cf = res < argL; + return cf; + } + case ARM64G_CC_OP_SUB32: { + /* (argL, argR, unused) */ + UInt argL = (UInt)cc_dep1; + UInt argR = (UInt)cc_dep2; + ULong cf = argL >= argR; + return cf; + } + case ARM64G_CC_OP_SUB64: { + /* (argL, argR, unused) */ + ULong argL = cc_dep1; + ULong argR = cc_dep2; + ULong cf = argL >= argR; + return cf; + } +//ZZ case ARMG_CC_OP_ADC: { +//ZZ /* (argL, argR, oldC) */ +//ZZ UInt argL = cc_dep1; +//ZZ UInt argR = cc_dep2; +//ZZ UInt oldC = cc_dep3; +//ZZ vassert((oldC & ~1) == 0); +//ZZ UInt res = argL + argR + oldC; +//ZZ UInt cf = oldC ? (res <= argL) : (res < argL); +//ZZ return cf; +//ZZ } +//ZZ case ARMG_CC_OP_SBB: { +//ZZ /* (argL, argR, oldC) */ +//ZZ UInt argL = cc_dep1; +//ZZ UInt argR = cc_dep2; +//ZZ UInt oldC = cc_dep3; +//ZZ vassert((oldC & ~1) == 0); +//ZZ UInt cf = oldC ? (argL >= argR) : (argL > argR); +//ZZ return cf; +//ZZ } + case ARM64G_CC_OP_LOGIC32: + case ARM64G_CC_OP_LOGIC64: { + /* (res, unused, unused) */ + return 0; // C after logic is zero on arm64 + } +//ZZ case ARMG_CC_OP_MUL: { +//ZZ /* (res, unused, oldC:oldV) */ +//ZZ UInt oldC = (cc_dep3 >> 1) & 1; +//ZZ vassert((cc_dep3 & ~3) == 0); +//ZZ UInt cf = oldC; +//ZZ return cf; +//ZZ } +//ZZ case ARMG_CC_OP_MULL: { +//ZZ /* (resLo32, resHi32, oldC:oldV) */ +//ZZ UInt oldC = (cc_dep3 >> 1) & 1; +//ZZ vassert((cc_dep3 & ~3) == 0); +//ZZ UInt cf = oldC; +//ZZ return cf; +//ZZ } + default: + /* shouldn't really make these calls from generated code */ + vex_printf("arm64g_calculate_flag_c" + "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n", + cc_op, cc_dep1, cc_dep2, cc_dep3 ); + vpanic("arm64g_calculate_flag_c"); + } +} + + +/* CALLED FROM GENERATED CODE: CLEAN HELPER */ +/* Calculate the V flag from the supplied thunk components, in the + least significant bit of the word. Returned bits 63:1 are zero. */ +static +ULong arm64g_calculate_flag_v ( ULong cc_op, ULong cc_dep1, + ULong cc_dep2, ULong cc_dep3 ) +{ + switch (cc_op) { + case ARM64G_CC_OP_COPY: { + /* (nzcv:28x0, unused, unused) */ + ULong vf = (cc_dep1 >> ARM64G_CC_SHIFT_V) & 1; + return vf; + } + case ARM64G_CC_OP_ADD32: { + /* (argL, argR, unused) */ + UInt argL = (UInt)cc_dep1; + UInt argR = (UInt)cc_dep2; + UInt res = argL + argR; + ULong vf = (ULong)(((res ^ argL) & (res ^ argR)) >> 31); + return vf; + } + case ARM64G_CC_OP_ADD64: { + /* (argL, argR, unused) */ + ULong argL = cc_dep1; + ULong argR = cc_dep2; + ULong res = argL + argR; + ULong vf = ((res ^ argL) & (res ^ argR)) >> 63; + return vf; + } + case ARM64G_CC_OP_SUB32: { + /* (argL, argR, unused) */ + UInt argL = (UInt)cc_dep1; + UInt argR = (UInt)cc_dep2; + UInt res = argL - argR; + ULong vf = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31); + return vf; + } + case ARM64G_CC_OP_SUB64: { + /* (argL, argR, unused) */ + ULong argL = cc_dep1; + ULong argR = cc_dep2; + ULong res = argL - argR; + ULong vf = (((argL ^ argR) & (argL ^ res))) >> 63; + return vf; + } +//ZZ case ARMG_CC_OP_ADC: { +//ZZ /* (argL, argR, oldC) */ +//ZZ UInt argL = cc_dep1; +//ZZ UInt argR = cc_dep2; +//ZZ UInt oldC = cc_dep3; +//ZZ vassert((oldC & ~1) == 0); +//ZZ UInt res = argL + argR + oldC; +//ZZ UInt vf = ((res ^ argL) & (res ^ argR)) >> 31; +//ZZ return vf; +//ZZ } +//ZZ case ARMG_CC_OP_SBB: { +//ZZ /* (argL, argR, oldC) */ +//ZZ UInt argL = cc_dep1; +//ZZ UInt argR = cc_dep2; +//ZZ UInt oldC = cc_dep3; +//ZZ vassert((oldC & ~1) == 0); +//ZZ UInt res = argL - argR - (oldC ^ 1); +//ZZ UInt vf = ((argL ^ argR) & (argL ^ res)) >> 31; +//ZZ return vf; +//ZZ } + case ARM64G_CC_OP_LOGIC32: + case ARM64G_CC_OP_LOGIC64: { + /* (res, unused, unused) */ + return 0; // V after logic is zero on arm64 + } +//ZZ case ARMG_CC_OP_MUL: { +//ZZ /* (res, unused, oldC:oldV) */ +//ZZ UInt oldV = (cc_dep3 >> 0) & 1; +//ZZ vassert((cc_dep3 & ~3) == 0); +//ZZ UInt vf = oldV; +//ZZ return vf; +//ZZ } +//ZZ case ARMG_CC_OP_MULL: { +//ZZ /* (resLo32, resHi32, oldC:oldV) */ +//ZZ UInt oldV = (cc_dep3 >> 0) & 1; +//ZZ vassert((cc_dep3 & ~3) == 0); +//ZZ UInt vf = oldV; +//ZZ return vf; +//ZZ } + default: + /* shouldn't really make these calls from generated code */ + vex_printf("arm64g_calculate_flag_v" + "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n", + cc_op, cc_dep1, cc_dep2, cc_dep3 ); + vpanic("arm64g_calculate_flag_v"); + } +} + + +/* CALLED FROM GENERATED CODE: CLEAN HELPER */ +/* Calculate NZCV from the supplied thunk components, in the positions + they appear in the CPSR, viz bits 31:28 for N Z C V respectively. + Returned bits 27:0 are zero. */ +ULong arm64g_calculate_flags_nzcv ( ULong cc_op, ULong cc_dep1, + ULong cc_dep2, ULong cc_dep3 ) +{ + ULong f; + ULong res = 0; + f = 1 & arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3); + res |= (f << ARM64G_CC_SHIFT_N); + f = 1 & arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3); + res |= (f << ARM64G_CC_SHIFT_Z); + f = 1 & arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3); + res |= (f << ARM64G_CC_SHIFT_C); + f = 1 & arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3); + res |= (f << ARM64G_CC_SHIFT_V); + return res; +} + +//ZZ +//ZZ /* CALLED FROM GENERATED CODE: CLEAN HELPER */ +//ZZ /* Calculate the QC flag from the arguments, in the lowest bit +//ZZ of the word (bit 0). Urr, having this out of line is bizarre. +//ZZ Push back inline. */ +//ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2, +//ZZ UInt resR1, UInt resR2 ) +//ZZ { +//ZZ if (resL1 != resR1 || resL2 != resR2) +//ZZ return 1; +//ZZ else +//ZZ return 0; +//ZZ } + +/* CALLED FROM GENERATED CODE: CLEAN HELPER */ +/* Calculate the specified condition from the thunk components, in the + lowest bit of the word (bit 0). Returned bits 63:1 are zero. */ +ULong arm64g_calculate_condition ( /* ARM64Condcode << 4 | cc_op */ + ULong cond_n_op , + ULong cc_dep1, + ULong cc_dep2, ULong cc_dep3 ) +{ + ULong cond = cond_n_op >> 4; + ULong cc_op = cond_n_op & 0xF; + ULong inv = cond & 1; + ULong nf, zf, vf, cf; + +# if PROFILE_NZCV_FLAGS + NOTE_EVAL(cc_op, cond); +# endif + + // vex_printf("XXXXXXXX %llx %llx %llx %llx\n", + // cond_n_op, cc_dep1, cc_dep2, cc_dep3); + + switch (cond) { + case ARM64CondEQ: // Z=1 => z + case ARM64CondNE: // Z=0 + zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ zf; + + case ARM64CondCS: // C=1 => c + case ARM64CondCC: // C=0 + cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ cf; + + case ARM64CondMI: // N=1 => n + case ARM64CondPL: // N=0 + nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ nf; + + case ARM64CondVS: // V=1 => v + case ARM64CondVC: // V=0 + vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ vf; + + case ARM64CondHI: // C=1 && Z=0 => c & ~z + case ARM64CondLS: // C=0 || Z=1 + cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3); + zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ (1 & (cf & ~zf)); + + case ARM64CondGE: // N=V => ~(n^v) + case ARM64CondLT: // N!=V + nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3); + vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ (1 & ~(nf ^ vf)); + + case ARM64CondGT: // Z=0 && N=V => ~z & ~(n^v) => ~(z | (n^v)) + case ARM64CondLE: // Z=1 || N!=V + nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3); + vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3); + zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ (1 & ~(zf | (nf ^ vf))); + + case ARM64CondAL: // 1 + case ARM64CondNV: // 1 + return 1; + + default: + /* shouldn't really make these calls from generated code */ + vex_printf("arm64g_calculate_condition(ARM64)" + "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n", + cond, cc_op, cc_dep1, cc_dep2, cc_dep3 ); + vpanic("armg_calculate_condition(ARM64)"); + } +} + + +/*---------------------------------------------------------------*/ +/*--- Flag-helpers translation-time function specialisers. ---*/ +/*--- These help iropt specialise calls the above run-time ---*/ +/*--- flags functions. ---*/ +/*---------------------------------------------------------------*/ + +/* Used by the optimiser to try specialisations. Returns an + equivalent expression, or NULL if none. */ + +static Bool isU64 ( IRExpr* e, ULong n ) +{ + return + toBool( e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_U64 + && e->Iex.Const.con->Ico.U64 == n ); +} + +IRExpr* guest_arm64_spechelper ( const HChar* function_name, + IRExpr** args, + IRStmt** precedingStmts, + Int n_precedingStmts ) +{ +# define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) +# define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) +# define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) +# define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) + + Int i, arity = 0; + for (i = 0; args[i]; i++) + arity++; +//ZZ # if 0 +//ZZ vex_printf("spec request:\n"); +//ZZ vex_printf(" %s ", function_name); +//ZZ for (i = 0; i < arity; i++) { +//ZZ vex_printf(" "); +//ZZ ppIRExpr(args[i]); +//ZZ } +//ZZ vex_printf("\n"); +//ZZ # endif + + /* --------- specialising "arm64g_calculate_condition" --------- */ + + if (vex_streq(function_name, "arm64g_calculate_condition")) { + + /* specialise calls to the "arm64g_calculate_condition" function. + Not sure whether this is strictly necessary, but: the + replacement IR must produce only the values 0 or 1. Bits + 63:1 are required to be zero. */ + IRExpr *cond_n_op, *cc_dep1, *cc_dep2 ; //, *cc_ndep; + vassert(arity == 4); + cond_n_op = args[0]; /* (ARM64Condcode << 4) | ARM64G_CC_OP_* */ + cc_dep1 = args[1]; + cc_dep2 = args[2]; + //cc_ndep = args[3]; + + /*---------------- SUB64 ----------------*/ + + /* 0, 1 */ + if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB64)) { + /* EQ after SUB --> test argL == argR */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ64, cc_dep1, cc_dep2)); + } + if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB64)) { + /* NE after SUB --> test argL != argR */ + return unop(Iop_1Uto64, + binop(Iop_CmpNE64, cc_dep1, cc_dep2)); + } + + /* 2, 3 */ + if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB64)) { + /* CS after SUB --> test argL >=u argR + --> test argR <=u argL */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE64U, cc_dep2, cc_dep1)); + } + if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB64)) { + /* CC after SUB --> test argL test argL <=u argR */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE64U, cc_dep1, cc_dep2)); + } + if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB64)) { + /* HI after SUB --> test argL >u argR + --> test argR test argL test argL >=s argR + --> test argR <=s argL */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE64S, cc_dep2, cc_dep1)); + } + + /* 12, 13 */ + if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB64)) { + /* GT after SUB --> test argL >s argR + --> test argR test argL <=s argR */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE64S, cc_dep1, cc_dep2)); + } + + /*---------------- SUB32 ----------------*/ + + /* 0, 1 */ + if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB32)) { + /* EQ after SUB --> test argL == argR */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1), + unop(Iop_64to32, cc_dep2))); + } + if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB32)) { + /* NE after SUB --> test argL != argR */ + return unop(Iop_1Uto64, + binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1), + unop(Iop_64to32, cc_dep2))); + } + + /* 2, 3 */ + if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB32)) { + /* CS after SUB --> test argL >=u argR + --> test argR <=u argL */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep2), + unop(Iop_64to32, cc_dep1))); + } + if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB32)) { + /* CC after SUB --> test argL test argL <=u argR */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep1), + unop(Iop_64to32, cc_dep2))); + } + if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB32)) { + /* HI after SUB --> test argL >u argR + --> test argR test argL test argL >=s argR + --> test argR <=s argL */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep2), + unop(Iop_64to32, cc_dep1))); + } + + /* 12, 13 */ + if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB32)) { + /* GT after SUB --> test argL >s argR + --> test argR test argL <=s argR */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep1), + unop(Iop_64to32, cc_dep2))); + } + +//ZZ /*---------------- SBB ----------------*/ +//ZZ +//ZZ if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SBB)) { +//ZZ /* This seems to happen a lot in softfloat code, eg __divdf3+140 */ +//ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */ +//ZZ /* HS after SBB (same as C after SBB below) +//ZZ --> oldC ? (argL >=u argR) : (argL >u argR) +//ZZ --> oldC ? (argR <=u argL) : (argR test res == 0 */ +//ZZ return unop(Iop_1Uto32, +//ZZ binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); +//ZZ } +//ZZ if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_LOGIC)) { +//ZZ /* NE after LOGIC --> test res != 0 */ +//ZZ return unop(Iop_1Uto32, +//ZZ binop(Iop_CmpNE32, cc_dep1, mkU32(0))); +//ZZ } +//ZZ +//ZZ if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_LOGIC)) { +//ZZ /* PL after LOGIC --> test (res >> 31) == 0 */ +//ZZ return unop(Iop_1Uto32, +//ZZ binop(Iop_CmpEQ32, +//ZZ binop(Iop_Shr32, cc_dep1, mkU8(31)), +//ZZ mkU32(0))); +//ZZ } +//ZZ if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_LOGIC)) { +//ZZ /* MI after LOGIC --> test (res >> 31) == 1 */ +//ZZ return unop(Iop_1Uto32, +//ZZ binop(Iop_CmpEQ32, +//ZZ binop(Iop_Shr32, cc_dep1, mkU8(31)), +//ZZ mkU32(1))); +//ZZ } + + /*---------------- COPY ----------------*/ + + if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_COPY)) { + /* EQ after COPY --> (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1 */ + return binop(Iop_And64, + binop(Iop_Shr64, cc_dep1, + mkU8(ARM64G_CC_SHIFT_Z)), + mkU64(1)); + } + if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_COPY)) { + /* NE after COPY --> ((cc_dep1 >> ARM64G_CC_SHIFT_Z) ^ 1) & 1 */ + return binop(Iop_And64, + binop(Iop_Xor64, + binop(Iop_Shr64, cc_dep1, + mkU8(ARM64G_CC_SHIFT_Z)), + mkU64(1)), + mkU64(1)); + } + +//ZZ /*----------------- AL -----------------*/ +//ZZ +//ZZ /* A critically important case for Thumb code. +//ZZ +//ZZ What we're trying to spot is the case where cond_n_op is an +//ZZ expression of the form Or32(..., 0xE0) since that means the +//ZZ caller is asking for CondAL and we can simply return 1 +//ZZ without caring what the ... part is. This is a potentially +//ZZ dodgy kludge in that it assumes that the ... part has zeroes +//ZZ in bits 7:4, so that the result of the Or32 is guaranteed to +//ZZ be 0xE in bits 7:4. Given that the places where this first +//ZZ arg are constructed (in guest_arm_toIR.c) are very +//ZZ constrained, we can get away with this. To make this +//ZZ guaranteed safe would require to have a new primop, Slice44 +//ZZ or some such, thusly +//ZZ +//ZZ Slice44(arg1, arg2) = 0--(24)--0 arg1[7:4] arg2[3:0] +//ZZ +//ZZ and we would then look for Slice44(0xE0, ...) +//ZZ which would give the required safety property. +//ZZ +//ZZ It would be infeasibly expensive to scan backwards through +//ZZ the entire block looking for an assignment to the temp, so +//ZZ just look at the previous 16 statements. That should find it +//ZZ if it is an interesting case, as a result of how the +//ZZ boilerplate guff at the start of each Thumb insn translation +//ZZ is made. +//ZZ */ +//ZZ if (cond_n_op->tag == Iex_RdTmp) { +//ZZ Int j; +//ZZ IRTemp look_for = cond_n_op->Iex.RdTmp.tmp; +//ZZ Int limit = n_precedingStmts - 16; +//ZZ if (limit < 0) limit = 0; +//ZZ if (0) vex_printf("scanning %d .. %d\n", n_precedingStmts-1, limit); +//ZZ for (j = n_precedingStmts - 1; j >= limit; j--) { +//ZZ IRStmt* st = precedingStmts[j]; +//ZZ if (st->tag == Ist_WrTmp +//ZZ && st->Ist.WrTmp.tmp == look_for +//ZZ && st->Ist.WrTmp.data->tag == Iex_Binop +//ZZ && st->Ist.WrTmp.data->Iex.Binop.op == Iop_Or32 +//ZZ && isU32(st->Ist.WrTmp.data->Iex.Binop.arg2, (ARMCondAL << 4))) +//ZZ return mkU32(1); +//ZZ } +//ZZ /* Didn't find any useful binding to the first arg +//ZZ in the previous 16 stmts. */ +//ZZ } + } + +//ZZ /* --------- specialising "armg_calculate_flag_c" --------- */ +//ZZ +//ZZ else +//ZZ if (vex_streq(function_name, "armg_calculate_flag_c")) { +//ZZ +//ZZ /* specialise calls to the "armg_calculate_flag_c" function. +//ZZ Note that the returned value must be either 0 or 1; nonzero +//ZZ bits 31:1 are not allowed. In turn, incoming oldV and oldC +//ZZ values (from the thunk) are assumed to have bits 31:1 +//ZZ clear. */ +//ZZ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; +//ZZ vassert(arity == 4); +//ZZ cc_op = args[0]; /* ARMG_CC_OP_* */ +//ZZ cc_dep1 = args[1]; +//ZZ cc_dep2 = args[2]; +//ZZ cc_ndep = args[3]; +//ZZ +//ZZ if (isU32(cc_op, ARMG_CC_OP_LOGIC)) { +//ZZ /* Thunk args are (result, shco, oldV) */ +//ZZ /* C after LOGIC --> shco */ +//ZZ return cc_dep2; +//ZZ } +//ZZ +//ZZ if (isU32(cc_op, ARMG_CC_OP_SUB)) { +//ZZ /* Thunk args are (argL, argR, unused) */ +//ZZ /* C after SUB --> argL >=u argR +//ZZ --> argR <=u argL */ +//ZZ return unop(Iop_1Uto32, +//ZZ binop(Iop_CmpLE32U, cc_dep2, cc_dep1)); +//ZZ } +//ZZ +//ZZ if (isU32(cc_op, ARMG_CC_OP_SBB)) { +//ZZ /* This happens occasionally in softfloat code, eg __divdf3+140 */ +//ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */ +//ZZ /* C after SBB (same as HS after SBB above) +//ZZ --> oldC ? (argL >=u argR) : (argL >u argR) +//ZZ --> oldC ? (argR <=u argL) : (argR oldV */ +//ZZ return cc_ndep; +//ZZ } +//ZZ +//ZZ if (isU32(cc_op, ARMG_CC_OP_SUB)) { +//ZZ /* Thunk args are (argL, argR, unused) */ +//ZZ /* V after SUB +//ZZ --> let res = argL - argR +//ZZ in ((argL ^ argR) & (argL ^ res)) >> 31 +//ZZ --> ((argL ^ argR) & (argL ^ (argL - argR))) >> 31 +//ZZ */ +//ZZ IRExpr* argL = cc_dep1; +//ZZ IRExpr* argR = cc_dep2; +//ZZ return +//ZZ binop(Iop_Shr32, +//ZZ binop(Iop_And32, +//ZZ binop(Iop_Xor32, argL, argR), +//ZZ binop(Iop_Xor32, argL, binop(Iop_Sub32, argL, argR)) +//ZZ ), +//ZZ mkU8(31) +//ZZ ); +//ZZ } +//ZZ +//ZZ if (isU32(cc_op, ARMG_CC_OP_SBB)) { +//ZZ /* This happens occasionally in softfloat code, eg __divdf3+140 */ +//ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */ +//ZZ /* V after SBB +//ZZ --> let res = argL - argR - (oldC ^ 1) +//ZZ in (argL ^ argR) & (argL ^ res) & 1 +//ZZ */ +//ZZ return +//ZZ binop( +//ZZ Iop_And32, +//ZZ binop( +//ZZ Iop_And32, +//ZZ // argL ^ argR +//ZZ binop(Iop_Xor32, cc_dep1, cc_dep2), +//ZZ // argL ^ (argL - argR - (oldC ^ 1)) +//ZZ binop(Iop_Xor32, +//ZZ cc_dep1, +//ZZ binop(Iop_Sub32, +//ZZ binop(Iop_Sub32, cc_dep1, cc_dep2), +//ZZ binop(Iop_Xor32, cc_ndep, mkU32(1))) +//ZZ ) +//ZZ ), +//ZZ mkU32(1) +//ZZ ); +//ZZ } +//ZZ +//ZZ } + +# undef unop +# undef binop +# undef mkU64 +# undef mkU8 + + return NULL; +} + + +/*----------------------------------------------*/ +/*--- The exported fns .. ---*/ +/*----------------------------------------------*/ + +//ZZ /* VISIBLE TO LIBVEX CLIENT */ +//ZZ #if 0 +//ZZ void LibVEX_GuestARM_put_flags ( UInt flags_native, +//ZZ /*OUT*/VexGuestARMState* vex_state ) +//ZZ { +//ZZ vassert(0); // FIXME +//ZZ +//ZZ /* Mask out everything except N Z V C. */ +//ZZ flags_native +//ZZ &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C); +//ZZ +//ZZ vex_state->guest_CC_OP = ARMG_CC_OP_COPY; +//ZZ vex_state->guest_CC_DEP1 = flags_native; +//ZZ vex_state->guest_CC_DEP2 = 0; +//ZZ vex_state->guest_CC_NDEP = 0; +//ZZ } +//ZZ #endif + +/* VISIBLE TO LIBVEX CLIENT */ +ULong LibVEX_GuestARM64_get_nzcv ( /*IN*/const VexGuestARM64State* vex_state ) +{ + ULong nzcv = 0; + // NZCV + nzcv |= arm64g_calculate_flags_nzcv( + vex_state->guest_CC_OP, + vex_state->guest_CC_DEP1, + vex_state->guest_CC_DEP2, + vex_state->guest_CC_NDEP + ); + vassert(0 == (nzcv & 0xFFFFFFFF0FFFFFFFULL)); +//ZZ // Q +//ZZ if (vex_state->guest_QFLAG32 > 0) +//ZZ cpsr |= (1 << 27); +//ZZ // GE +//ZZ if (vex_state->guest_GEFLAG0 > 0) +//ZZ cpsr |= (1 << 16); +//ZZ if (vex_state->guest_GEFLAG1 > 0) +//ZZ cpsr |= (1 << 17); +//ZZ if (vex_state->guest_GEFLAG2 > 0) +//ZZ cpsr |= (1 << 18); +//ZZ if (vex_state->guest_GEFLAG3 > 0) +//ZZ cpsr |= (1 << 19); +//ZZ // M +//ZZ cpsr |= (1 << 4); // 0b10000 means user-mode +//ZZ // J,T J (bit 24) is zero by initialisation above +//ZZ // T we copy from R15T[0] +//ZZ if (vex_state->guest_R15T & 1) +//ZZ cpsr |= (1 << 5); +//ZZ // ITSTATE we punt on for the time being. Could compute it +//ZZ // if needed though. +//ZZ // E, endianness, 0 (littleendian) from initialisation above +//ZZ // A,I,F disable some async exceptions. Not sure about these. +//ZZ // Leave as zero for the time being. + return nzcv; +} + +/* VISIBLE TO LIBVEX CLIENT */ +void LibVEX_GuestARM64_initialise ( /*OUT*/VexGuestARM64State* vex_state ) +{ + vex_bzero(vex_state, sizeof(*vex_state)); +//ZZ vex_state->host_EvC_FAILADDR = 0; +//ZZ vex_state->host_EvC_COUNTER = 0; +//ZZ +//ZZ vex_state->guest_R0 = 0; +//ZZ vex_state->guest_R1 = 0; +//ZZ vex_state->guest_R2 = 0; +//ZZ vex_state->guest_R3 = 0; +//ZZ vex_state->guest_R4 = 0; +//ZZ vex_state->guest_R5 = 0; +//ZZ vex_state->guest_R6 = 0; +//ZZ vex_state->guest_R7 = 0; +//ZZ vex_state->guest_R8 = 0; +//ZZ vex_state->guest_R9 = 0; +//ZZ vex_state->guest_R10 = 0; +//ZZ vex_state->guest_R11 = 0; +//ZZ vex_state->guest_R12 = 0; +//ZZ vex_state->guest_R13 = 0; +//ZZ vex_state->guest_R14 = 0; +//ZZ vex_state->guest_R15T = 0; /* NB: implies ARM mode */ +//ZZ + vex_state->guest_CC_OP = ARM64G_CC_OP_COPY; +//ZZ vex_state->guest_CC_DEP1 = 0; +//ZZ vex_state->guest_CC_DEP2 = 0; +//ZZ vex_state->guest_CC_NDEP = 0; +//ZZ vex_state->guest_QFLAG32 = 0; +//ZZ vex_state->guest_GEFLAG0 = 0; +//ZZ vex_state->guest_GEFLAG1 = 0; +//ZZ vex_state->guest_GEFLAG2 = 0; +//ZZ vex_state->guest_GEFLAG3 = 0; +//ZZ +//ZZ vex_state->guest_EMNOTE = EmNote_NONE; +//ZZ vex_state->guest_CMSTART = 0; +//ZZ vex_state->guest_CMLEN = 0; +//ZZ vex_state->guest_NRADDR = 0; +//ZZ vex_state->guest_IP_AT_SYSCALL = 0; +//ZZ +//ZZ vex_state->guest_D0 = 0; +//ZZ vex_state->guest_D1 = 0; +//ZZ vex_state->guest_D2 = 0; +//ZZ vex_state->guest_D3 = 0; +//ZZ vex_state->guest_D4 = 0; +//ZZ vex_state->guest_D5 = 0; +//ZZ vex_state->guest_D6 = 0; +//ZZ vex_state->guest_D7 = 0; +//ZZ vex_state->guest_D8 = 0; +//ZZ vex_state->guest_D9 = 0; +//ZZ vex_state->guest_D10 = 0; +//ZZ vex_state->guest_D11 = 0; +//ZZ vex_state->guest_D12 = 0; +//ZZ vex_state->guest_D13 = 0; +//ZZ vex_state->guest_D14 = 0; +//ZZ vex_state->guest_D15 = 0; +//ZZ vex_state->guest_D16 = 0; +//ZZ vex_state->guest_D17 = 0; +//ZZ vex_state->guest_D18 = 0; +//ZZ vex_state->guest_D19 = 0; +//ZZ vex_state->guest_D20 = 0; +//ZZ vex_state->guest_D21 = 0; +//ZZ vex_state->guest_D22 = 0; +//ZZ vex_state->guest_D23 = 0; +//ZZ vex_state->guest_D24 = 0; +//ZZ vex_state->guest_D25 = 0; +//ZZ vex_state->guest_D26 = 0; +//ZZ vex_state->guest_D27 = 0; +//ZZ vex_state->guest_D28 = 0; +//ZZ vex_state->guest_D29 = 0; +//ZZ vex_state->guest_D30 = 0; +//ZZ vex_state->guest_D31 = 0; +//ZZ +//ZZ /* ARM encoded; zero is the default as it happens (result flags +//ZZ (NZCV) cleared, FZ disabled, round to nearest, non-vector mode, +//ZZ all exns masked, all exn sticky bits cleared). */ +//ZZ vex_state->guest_FPSCR = 0; +//ZZ +//ZZ vex_state->guest_TPIDRURO = 0; +//ZZ +//ZZ /* Not in a Thumb IT block. */ +//ZZ vex_state->guest_ITSTATE = 0; +//ZZ +//ZZ vex_state->padding1 = 0; +//ZZ vex_state->padding2 = 0; +//ZZ vex_state->padding3 = 0; +//ZZ vex_state->padding4 = 0; +//ZZ vex_state->padding5 = 0; +} + + +/*-----------------------------------------------------------*/ +/*--- Describing the arm guest state, for the benefit ---*/ +/*--- of iropt and instrumenters. ---*/ +/*-----------------------------------------------------------*/ + +/* Figure out if any part of the guest state contained in minoff + .. maxoff requires precise memory exceptions. If in doubt return + True (but this generates significantly slower code). + + We enforce precise exns for guest SP, PC, 29(FP), 30(LR). + That might be overkill (for 29 and 30); I don't know. +*/ +Bool guest_arm64_state_requires_precise_mem_exns ( Int minoff, + Int maxoff) +{ + Int xsp_min = offsetof(VexGuestARM64State, guest_XSP); + Int xsp_max = xsp_min + 8 - 1; + Int pc_min = offsetof(VexGuestARM64State, guest_PC); + Int pc_max = pc_min + 8 - 1; + + if (maxoff < xsp_min || minoff > xsp_max) { + /* no overlap with xsp */ + if (vex_control.iropt_register_updates == VexRegUpdSpAtMemAccess) + return False; // We only need to check stack pointer. + } else { + return True; + } + + if (maxoff < pc_min || minoff > pc_max) { + /* no overlap with pc */ + } else { + return True; + } + + /* Guessing that we need PX for FP, but I don't really know. */ + Int x29_min = offsetof(VexGuestARM64State, guest_X29); + Int x29_max = x29_min + 8 - 1; + + if (maxoff < x29_min || minoff > x29_max) { + /* no overlap with x29 */ + } else { + return True; + } + + /* Guessing that we need PX for LR, but I don't really know. */ + Int x30_min = offsetof(VexGuestARM64State, guest_X30); + Int x30_max = x30_min + 8 - 1; + + if (maxoff < x30_min || minoff > x30_max) { + /* no overlap with r30 */ + } else { + return True; + } + + return False; +} + + +#define ALWAYSDEFD(field) \ + { offsetof(VexGuestARM64State, field), \ + (sizeof ((VexGuestARM64State*)0)->field) } +VexGuestLayout + arm64Guest_layout + = { + /* Total size of the guest state, in bytes. */ + .total_sizeB = sizeof(VexGuestARM64State), + + /* Describe the stack pointer. */ + .offset_SP = offsetof(VexGuestARM64State,guest_XSP), + .sizeof_SP = 8, + + /* Describe the instruction pointer. */ + .offset_IP = offsetof(VexGuestARM64State,guest_PC), + .sizeof_IP = 8, + + /* Describe any sections to be regarded by Memcheck as + 'always-defined'. */ + .n_alwaysDefd = 10, + + /* flags thunk: OP is always defd, whereas DEP1 and DEP2 + have to be tracked. See detailed comment in gdefs.h on + meaning of thunk fields. */ + .alwaysDefd + = { /* 0 */ ALWAYSDEFD(guest_PC), + /* 1 */ ALWAYSDEFD(guest_CC_OP), + /* 2 */ ALWAYSDEFD(guest_CC_NDEP), + /* 3 */ ALWAYSDEFD(guest_EMNOTE), + /* 4 */ ALWAYSDEFD(guest_CMSTART), + /* 5 */ ALWAYSDEFD(guest_CMLEN), + /* 6 */ ALWAYSDEFD(guest_NRADDR), + /* 7 */ ALWAYSDEFD(guest_IP_AT_SYSCALL), + /* 8 */ ALWAYSDEFD(guest_FPCR), + /* 9 */ ALWAYSDEFD(guest_FPSR) + } + }; + + +/*---------------------------------------------------------------*/ +/*--- end guest_arm64_helpers.c ---*/ +/*---------------------------------------------------------------*/ Index: priv/guest_arm64_toIR.c =================================================================== --- priv/guest_arm64_toIR.c (.../tags/VEX_3_9_0) (revision 0) +++ priv/guest_arm64_toIR.c (.../trunk) (revision 2863) @@ -0,0 +1,7856 @@ +/* -*- mode: C; c-basic-offset: 3; -*- */ + +/*--------------------------------------------------------------------*/ +/*--- begin guest_arm64_toIR.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2013-2013 OpenWorks + info@open-works.net + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +//ZZ /* XXXX thumb to check: +//ZZ that all cases where putIRegT writes r15, we generate a jump. +//ZZ +//ZZ All uses of newTemp assign to an IRTemp and not a UInt +//ZZ +//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is +//ZZ backed out before the memory op, and restored afterwards. This +//ZZ needs to happen even after we go uncond. (and for sure it doesn't +//ZZ happen for VFP loads/stores right now). +//ZZ +//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we +//ZZ should. +//ZZ +//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by +//ZZ taking into account the number of insns guarded by an IT. +//ZZ +//ZZ remove the nasty hack, in the spechelper, of looking for Or32(..., +//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead +//ZZ use Slice44 as specified in comments in the spechelper. +//ZZ +//ZZ add specialisations for armg_calculate_flag_c and _v, as they +//ZZ are moderately often needed in Thumb code. +//ZZ +//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong. +//ZZ +//ZZ Correctness (obscure): in m_transtab, when invalidating code +//ZZ address ranges, invalidate up to 18 bytes after the end of the +//ZZ range. This is because the ITSTATE optimisation at the top of +//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any +//ZZ given instruction, and so might depend on the invalidated area. +//ZZ */ +//ZZ +//ZZ /* Limitations, etc +//ZZ +//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD. +//ZZ These instructions are non-restartable in the case where the +//ZZ transfer(s) fault. +//ZZ +//ZZ - SWP: the restart jump back is Ijk_Boring; it should be +//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in +//ZZ guest_x86_toIR.c. +//ZZ */ + +/* "Special" instructions. + + This instruction decoder can decode four special instructions + which mean nothing natively (are no-ops as far as regs/mem are + concerned) but have meaning for supporting Valgrind. A special + instruction is flagged by a 16-byte preamble: + + 93CC0D8C 93CC358C 93CCCD8C 93CCF58C + (ror x12, x12, #3; ror x12, x12, #13 + ror x12, x12, #51; ror x12, x12, #61) + + Following that, one of the following 3 are allowed + (standard interpretation in parentheses): + + AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 ) + AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR + AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8 + AA090129 (orr x9,x9,x9) IR injection + + Any other bytes following the 16-byte preamble are illegal and + constitute a failure in instruction decoding. This all assumes + that the preamble will never occur except in specific code + fragments designed for Valgrind to catch. +*/ + +/* Translates ARM64 code to IR. */ + +#include "libvex_basictypes.h" +#include "libvex_ir.h" +#include "libvex.h" +#include "libvex_guest_arm64.h" + +#include "main_util.h" +#include "main_globals.h" +#include "guest_generic_bb_to_IR.h" +#include "guest_arm64_defs.h" + + +/*------------------------------------------------------------*/ +/*--- Globals ---*/ +/*------------------------------------------------------------*/ + +/* These are set at the start of the translation of a instruction, so + that we don't have to pass them around endlessly. CONST means does + not change during translation of the instruction. +*/ + +/* CONST: is the host bigendian? We need to know this in order to do + sub-register accesses to the SIMD/FP registers correctly. */ +static Bool host_is_bigendian; + +/* CONST: The guest address for the instruction currently being + translated. */ +static Addr64 guest_PC_curr_instr; + +/* MOD: The IRSB* into which we're generating code. */ +static IRSB* irsb; + + +/*------------------------------------------------------------*/ +/*--- Debugging output ---*/ +/*------------------------------------------------------------*/ + +#define DIP(format, args...) \ + if (vex_traceflags & VEX_TRACE_FE) \ + vex_printf(format, ## args) + +#define DIS(buf, format, args...) \ + if (vex_traceflags & VEX_TRACE_FE) \ + vex_sprintf(buf, format, ## args) + + +/*------------------------------------------------------------*/ +/*--- Helper bits and pieces for deconstructing the ---*/ +/*--- arm insn stream. ---*/ +/*------------------------------------------------------------*/ + +/* Do a little-endian load of a 32-bit word, regardless of the + endianness of the underlying host. */ +static inline UInt getUIntLittleEndianly ( UChar* p ) +{ + UInt w = 0; + w = (w << 8) | p[3]; + w = (w << 8) | p[2]; + w = (w << 8) | p[1]; + w = (w << 8) | p[0]; + return w; +} + +/* Sign extend a N-bit value up to 64 bits, by copying + bit N-1 into all higher positions. */ +static ULong sx_to_64 ( ULong x, UInt n ) +{ + vassert(n > 1 && n < 64); + Long r = (Long)x; + r = (r << (64-n)) >> (64-n); + return (ULong)r; +} + +//ZZ /* Do a little-endian load of a 16-bit word, regardless of the +//ZZ endianness of the underlying host. */ +//ZZ static inline UShort getUShortLittleEndianly ( UChar* p ) +//ZZ { +//ZZ UShort w = 0; +//ZZ w = (w << 8) | p[1]; +//ZZ w = (w << 8) | p[0]; +//ZZ return w; +//ZZ } +//ZZ +//ZZ static UInt ROR32 ( UInt x, UInt sh ) { +//ZZ vassert(sh >= 0 && sh < 32); +//ZZ if (sh == 0) +//ZZ return x; +//ZZ else +//ZZ return (x << (32-sh)) | (x >> sh); +//ZZ } +//ZZ +//ZZ static Int popcount32 ( UInt x ) +//ZZ { +//ZZ Int res = 0, i; +//ZZ for (i = 0; i < 32; i++) { +//ZZ res += (x & 1); +//ZZ x >>= 1; +//ZZ } +//ZZ return res; +//ZZ } +//ZZ +//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b ) +//ZZ { +//ZZ UInt mask = 1 << ix; +//ZZ x &= ~mask; +//ZZ x |= ((b << ix) & mask); +//ZZ return x; +//ZZ } + +#define BITS2(_b1,_b0) \ + (((_b1) << 1) | (_b0)) + +#define BITS3(_b2,_b1,_b0) \ + (((_b2) << 2) | ((_b1) << 1) | (_b0)) + +#define BITS4(_b3,_b2,_b1,_b0) \ + (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0)) + +#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ + ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \ + | BITS4((_b3),(_b2),(_b1),(_b0))) + +#define BITS5(_b4,_b3,_b2,_b1,_b0) \ + (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0))) +#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \ + (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) +#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ + (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) + +#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ + (((_b8) << 8) \ + | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) + +#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ + (((_b9) << 9) | ((_b8) << 8) \ + | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) + +#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ + (((_b10) << 10) \ + | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) + +#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ + (((_b11) << 11) \ + | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) + +// produces _uint[_bMax:_bMin] +#define SLICE_UInt(_uint,_bMax,_bMin) \ + (( ((UInt)(_uint)) >> (_bMin)) \ + & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL)) + + +/*------------------------------------------------------------*/ +/*--- Helper bits and pieces for creating IR fragments. ---*/ +/*------------------------------------------------------------*/ + +static IRExpr* mkV128 ( UShort w ) +{ + return IRExpr_Const(IRConst_V128(w)); +} + +static IRExpr* mkU64 ( ULong i ) +{ + return IRExpr_Const(IRConst_U64(i)); +} + +static IRExpr* mkU32 ( UInt i ) +{ + return IRExpr_Const(IRConst_U32(i)); +} + +static IRExpr* mkU8 ( UInt i ) +{ + vassert(i < 256); + return IRExpr_Const(IRConst_U8( (UChar)i )); +} + +static IRExpr* mkexpr ( IRTemp tmp ) +{ + return IRExpr_RdTmp(tmp); +} + +static IRExpr* unop ( IROp op, IRExpr* a ) +{ + return IRExpr_Unop(op, a); +} + +static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) +{ + return IRExpr_Binop(op, a1, a2); +} + +static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) +{ + return IRExpr_Triop(op, a1, a2, a3); +} + +static IRExpr* loadLE ( IRType ty, IRExpr* addr ) +{ + return IRExpr_Load(Iend_LE, ty, addr); +} + +/* Add a statement to the list held by "irbb". */ +static void stmt ( IRStmt* st ) +{ + addStmtToIRSB( irsb, st ); +} + +static void assign ( IRTemp dst, IRExpr* e ) +{ + stmt( IRStmt_WrTmp(dst, e) ); +} + +static void storeLE ( IRExpr* addr, IRExpr* data ) +{ + stmt( IRStmt_Store(Iend_LE, addr, data) ); +} + +//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT ) +//ZZ { +//ZZ if (guardT == IRTemp_INVALID) { +//ZZ /* unconditional */ +//ZZ storeLE(addr, data); +//ZZ } else { +//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data, +//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); +//ZZ } +//ZZ } +//ZZ +//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt, +//ZZ IRExpr* addr, IRExpr* alt, +//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) +//ZZ { +//ZZ if (guardT == IRTemp_INVALID) { +//ZZ /* unconditional */ +//ZZ IRExpr* loaded = NULL; +//ZZ switch (cvt) { +//ZZ case ILGop_Ident32: +//ZZ loaded = loadLE(Ity_I32, addr); break; +//ZZ case ILGop_8Uto32: +//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break; +//ZZ case ILGop_8Sto32: +//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break; +//ZZ case ILGop_16Uto32: +//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break; +//ZZ case ILGop_16Sto32: +//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break; +//ZZ default: +//ZZ vassert(0); +//ZZ } +//ZZ vassert(loaded != NULL); +//ZZ assign(dst, loaded); +//ZZ } else { +//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the +//ZZ loaded data before putting the data in 'dst'. If the load +//ZZ does not take place, 'alt' is placed directly in 'dst'. */ +//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt, +//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); +//ZZ } +//ZZ } + +/* Generate a new temporary of the given type. */ +static IRTemp newTemp ( IRType ty ) +{ + vassert(isPlausibleIRType(ty)); + return newIRTemp( irsb->tyenv, ty ); +} + +//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type +//ZZ IRRoundingMode. */ +//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) +//ZZ { +//ZZ return mkU32(Irrm_NEAREST); +//ZZ } +//ZZ +//ZZ /* Generate an expression for SRC rotated right by ROT. */ +//ZZ static IRExpr* genROR32( IRTemp src, Int rot ) +//ZZ { +//ZZ vassert(rot >= 0 && rot < 32); +//ZZ if (rot == 0) +//ZZ return mkexpr(src); +//ZZ return +//ZZ binop(Iop_Or32, +//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)), +//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot))); +//ZZ } +//ZZ +//ZZ static IRExpr* mkU128 ( ULong i ) +//ZZ { +//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i)); +//ZZ } +//ZZ +//ZZ /* Generate a 4-aligned version of the given expression if +//ZZ the given condition is true. Else return it unchanged. */ +//ZZ static IRExpr* align4if ( IRExpr* e, Bool b ) +//ZZ { +//ZZ if (b) +//ZZ return binop(Iop_And32, e, mkU32(~3)); +//ZZ else +//ZZ return e; +//ZZ } + +/* Other IR construction helpers. */ +static IROp mkAND ( IRType ty ) { + switch (ty) { + case Ity_I32: return Iop_And32; + case Ity_I64: return Iop_And64; + default: vpanic("mkAND"); + } +} + +static IROp mkOR ( IRType ty ) { + switch (ty) { + case Ity_I32: return Iop_Or32; + case Ity_I64: return Iop_Or64; + default: vpanic("mkOR"); + } +} + +static IROp mkXOR ( IRType ty ) { + switch (ty) { + case Ity_I32: return Iop_Xor32; + case Ity_I64: return Iop_Xor64; + default: vpanic("mkXOR"); + } +} + +static IROp mkSHL ( IRType ty ) { + switch (ty) { + case Ity_I32: return Iop_Shl32; + case Ity_I64: return Iop_Shl64; + default: vpanic("mkSHL"); + } +} + +static IROp mkSHR ( IRType ty ) { + switch (ty) { + case Ity_I32: return Iop_Shr32; + case Ity_I64: return Iop_Shr64; + default: vpanic("mkSHR"); + } +} + +static IROp mkSAR ( IRType ty ) { + switch (ty) { + case Ity_I32: return Iop_Sar32; + case Ity_I64: return Iop_Sar64; + default: vpanic("mkSAR"); + } +} + +static IROp mkNOT ( IRType ty ) { + switch (ty) { + case Ity_I32: return Iop_Not32; + case Ity_I64: return Iop_Not64; + default: vpanic("mkNOT"); + } +} + +static IROp mkADD ( IRType ty ) { + switch (ty) { + case Ity_I32: return Iop_Add32; + case Ity_I64: return Iop_Add64; + default: vpanic("mkADD"); + } +} + +static IROp mkSUB ( IRType ty ) { + switch (ty) { + case Ity_I32: return Iop_Sub32; + case Ity_I64: return Iop_Sub64; + default: vpanic("mkSUB"); + } +} + +static IROp mkADDF ( IRType ty ) { + switch (ty) { + case Ity_F32: return Iop_AddF32; + case Ity_F64: return Iop_AddF64; + default: vpanic("mkADDF"); + } +} + +static IROp mkSUBF ( IRType ty ) { + switch (ty) { + case Ity_F32: return Iop_SubF32; + case Ity_F64: return Iop_SubF64; + default: vpanic("mkSUBF"); + } +} + +static IROp mkMULF ( IRType ty ) { + switch (ty) { + case Ity_F32: return Iop_MulF32; + case Ity_F64: return Iop_MulF64; + default: vpanic("mkMULF"); + } +} + +static IROp mkDIVF ( IRType ty ) { + switch (ty) { + case Ity_F32: return Iop_DivF32; + case Ity_F64: return Iop_DivF64; + default: vpanic("mkMULF"); + } +} + +static IROp mkNEGF ( IRType ty ) { + switch (ty) { + case Ity_F32: return Iop_NegF32; + case Ity_F64: return Iop_NegF64; + default: vpanic("mkNEGF"); + } +} + +static IROp mkABSF ( IRType ty ) { + switch (ty) { + case Ity_F32: return Iop_AbsF32; + case Ity_F64: return Iop_AbsF64; + default: vpanic("mkNEGF"); + } +} + +static IROp mkSQRTF ( IRType ty ) { + switch (ty) { + case Ity_F32: return Iop_SqrtF32; + case Ity_F64: return Iop_SqrtF64; + default: vpanic("mkNEGF"); + } +} + +static IRExpr* mkU ( IRType ty, ULong imm ) { + switch (ty) { + case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL)); + case Ity_I64: return mkU64(imm); + default: vpanic("mkU"); + } +} + +/* Generate IR to create 'arg rotated right by imm', for sane values + of 'ty' and 'imm'. */ +static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm ) +{ + UInt w = 0; + if (ty == Ity_I64) { + w = 64; + } else { + vassert(ty == Ity_I32); + w = 32; + } + vassert(w != 0); + vassert(imm < w); + if (imm == 0) { + return arg; + } + IRTemp res = newTemp(ty); + assign(res, binop(mkOR(ty), + binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)), + binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) )); + return res; +} + +/* Generate IR to set the returned temp to either all-zeroes or + all ones, as a copy of arg. */ +static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm ) +{ + UInt w = 0; + if (ty == Ity_I64) { + w = 64; + } else { + vassert(ty == Ity_I32); + w = 32; + } + vassert(w != 0); + vassert(imm < w); + IRTemp res = newTemp(ty); + assign(res, binop(mkSAR(ty), + binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)), + mkU8(w - 1))); + return res; +} + +/* U-widen 8/16/32/64 bit int expr to 64. */ +static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e ) +{ + switch (srcTy) { + case Ity_I64: return e; + case Ity_I32: return unop(Iop_32Uto64, e); + case Ity_I16: return unop(Iop_16Uto64, e); + case Ity_I8: return unop(Iop_8Uto64, e); + default: vpanic("widenUto64(arm64)"); + } +} + +/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some + of these combinations make sense. */ +static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e ) +{ + switch (dstTy) { + case Ity_I64: return e; + case Ity_I32: return unop(Iop_64to32, e); + case Ity_I16: return unop(Iop_64to16, e); + case Ity_I8: return unop(Iop_64to8, e); + default: vpanic("narrowFrom64(arm64)"); + } +} + + +/*------------------------------------------------------------*/ +/*--- Helpers for accessing guest registers. ---*/ +/*------------------------------------------------------------*/ + +#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0) +#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1) +#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2) +#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3) +#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4) +#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5) +#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6) +#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7) +#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8) +#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9) +#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10) +#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11) +#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12) +#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13) +#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14) +#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15) +#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16) +#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17) +#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18) +#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19) +#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20) +#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21) +#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22) +#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23) +#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24) +#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25) +#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26) +#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27) +#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28) +#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29) +#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30) + +#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP) +#define OFFB_PC offsetof(VexGuestARM64State,guest_PC) + +#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP) +#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1) +#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2) +#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP) + +#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0) +#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR) + +#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0) +#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1) +#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2) +#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3) +#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4) +#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5) +#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6) +#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7) +#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8) +#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9) +#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10) +#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11) +#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12) +#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13) +#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14) +#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15) +#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16) +#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17) +#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18) +#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19) +#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20) +#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21) +#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22) +#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23) +#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24) +#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25) +#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26) +#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27) +#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28) +#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29) +#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30) +#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31) + +#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR) +#define OFFB_FPSR offsetof(VexGuestARM64State,guest_FPSR) +//ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO) +//ZZ #define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE) +//ZZ #define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32) +//ZZ #define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0) +//ZZ #define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1) +//ZZ #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2) +//ZZ #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3) + +#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART) +#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN) + + +/* ---------------- Integer registers ---------------- */ + +static Int offsetIReg64 ( UInt iregNo ) +{ + /* Do we care about endianness here? We do if sub-parts of integer + registers are accessed. */ + switch (iregNo) { + case 0: return OFFB_X0; + case 1: return OFFB_X1; + case 2: return OFFB_X2; + case 3: return OFFB_X3; + case 4: return OFFB_X4; + case 5: return OFFB_X5; + case 6: return OFFB_X6; + case 7: return OFFB_X7; + case 8: return OFFB_X8; + case 9: return OFFB_X9; + case 10: return OFFB_X10; + case 11: return OFFB_X11; + case 12: return OFFB_X12; + case 13: return OFFB_X13; + case 14: return OFFB_X14; + case 15: return OFFB_X15; + case 16: return OFFB_X16; + case 17: return OFFB_X17; + case 18: return OFFB_X18; + case 19: return OFFB_X19; + case 20: return OFFB_X20; + case 21: return OFFB_X21; + case 22: return OFFB_X22; + case 23: return OFFB_X23; + case 24: return OFFB_X24; + case 25: return OFFB_X25; + case 26: return OFFB_X26; + case 27: return OFFB_X27; + case 28: return OFFB_X28; + case 29: return OFFB_X29; + case 30: return OFFB_X30; + /* but not 31 */ + default: vassert(0); + } +} + +static Int offsetIReg64orSP ( UInt iregNo ) +{ + return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo); +} + +static const HChar* nameIReg64orZR ( UInt iregNo ) +{ + vassert(iregNo < 32); + static const HChar* names[32] + = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" }; + return names[iregNo]; +} + +static const HChar* nameIReg64orSP ( UInt iregNo ) +{ + if (iregNo == 31) { + return "sp"; + } + vassert(iregNo < 31); + return nameIReg64orZR(iregNo); +} + +static IRExpr* getIReg64orSP ( UInt iregNo ) +{ + vassert(iregNo < 32); + return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); +} + +static IRExpr* getIReg64orZR ( UInt iregNo ) +{ + if (iregNo == 31) { + return mkU64(0); + } + vassert(iregNo < 31); + return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); +} + +static void putIReg64orSP ( UInt iregNo, IRExpr* e ) +{ + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); + stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); +} + +static void putIReg64orZR ( UInt iregNo, IRExpr* e ) +{ + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); + if (iregNo == 31) { + return; + } + vassert(iregNo < 31); + stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); +} + +static const HChar* nameIReg32orZR ( UInt iregNo ) +{ + vassert(iregNo < 32); + static const HChar* names[32] + = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", + "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", + "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", + "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" }; + return names[iregNo]; +} + +static const HChar* nameIReg32orSP ( UInt iregNo ) +{ + if (iregNo == 31) { + return "wsp"; + } + vassert(iregNo < 31); + return nameIReg32orZR(iregNo); +} + +static IRExpr* getIReg32orSP ( UInt iregNo ) +{ + vassert(iregNo < 32); + return unop(Iop_64to32, + IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); +} + +static IRExpr* getIReg32orZR ( UInt iregNo ) +{ + if (iregNo == 31) { + return mkU32(0); + } + vassert(iregNo < 31); + return unop(Iop_64to32, + IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); +} + +static void putIReg32orSP ( UInt iregNo, IRExpr* e ) +{ + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); + stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); +} + +static void putIReg32orZR ( UInt iregNo, IRExpr* e ) +{ + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); + if (iregNo == 31) { + return; + } + vassert(iregNo < 31); + stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); +} + +static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo ) +{ + vassert(is64 == True || is64 == False); + return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo); +} + +static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo ) +{ + vassert(is64 == True || is64 == False); + return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo); +} + +static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo ) +{ + vassert(is64 == True || is64 == False); + return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo); +} + +static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e ) +{ + vassert(is64 == True || is64 == False); + if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e); +} + +static void putPC ( IRExpr* e ) +{ + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); + stmt( IRStmt_Put(OFFB_PC, e) ); +} + + +/* ---------------- Vector (Q) registers ---------------- */ + +static Int offsetQReg128 ( UInt qregNo ) +{ + /* We don't care about endianness at this point. It only becomes + relevant when dealing with sections of these registers.*/ + switch (qregNo) { + case 0: return OFFB_Q0; + case 1: return OFFB_Q1; + case 2: return OFFB_Q2; + case 3: return OFFB_Q3; + case 4: return OFFB_Q4; + case 5: return OFFB_Q5; + case 6: return OFFB_Q6; + case 7: return OFFB_Q7; + case 8: return OFFB_Q8; + case 9: return OFFB_Q9; + case 10: return OFFB_Q10; + case 11: return OFFB_Q11; + case 12: return OFFB_Q12; + case 13: return OFFB_Q13; + case 14: return OFFB_Q14; + case 15: return OFFB_Q15; + case 16: return OFFB_Q16; + case 17: return OFFB_Q17; + case 18: return OFFB_Q18; + case 19: return OFFB_Q19; + case 20: return OFFB_Q20; + case 21: return OFFB_Q21; + case 22: return OFFB_Q22; + case 23: return OFFB_Q23; + case 24: return OFFB_Q24; + case 25: return OFFB_Q25; + case 26: return OFFB_Q26; + case 27: return OFFB_Q27; + case 28: return OFFB_Q28; + case 29: return OFFB_Q29; + case 30: return OFFB_Q30; + case 31: return OFFB_Q31; + default: vassert(0); + } +} + +/* Write to a complete Qreg. */ +static void putQReg128 ( UInt qregNo, IRExpr* e ) +{ + vassert(qregNo < 32); + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); + stmt( IRStmt_Put(offsetQReg128(qregNo), e) ); +} + +/* Read a complete Qreg. */ +static IRExpr* getQReg128 ( UInt qregNo ) +{ + vassert(qregNo < 32); + return IRExpr_Get(offsetQReg128(qregNo), Ity_V128); +} + +/* Produce the IR type for some sub-part of a vector. For 32- and 64- + bit sub-parts we can choose either integer or float types, and + choose float on the basis that that is the common use case and so + will give least interference with Put-to-Get forwarding later + on. */ +static IRType preferredVectorSubTypeFromSize ( UInt szB ) +{ + switch (szB) { + case 1: return Ity_I8; + case 2: return Ity_I16; + case 4: return Ity_I32; //Ity_F32; + case 8: return Ity_F64; + case 16: return Ity_V128; + default: vassert(0); + } +} + +/* Find the offset of the laneNo'th lane of type laneTy in the given + Qreg. Since the host is little-endian, the least significant lane + has the lowest offset. */ +static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo ) +{ + vassert(!host_is_bigendian); + Int base = offsetQReg128(qregNo); + /* Since the host is little-endian, the least significant lane + will be at the lowest address. */ + /* Restrict this to known types, so as to avoid silently accepting + stupid types. */ + UInt laneSzB = 0; + switch (laneTy) { + case Ity_I8: laneSzB = 1; break; + case Ity_I16: laneSzB = 2; break; + case Ity_F32: case Ity_I32: laneSzB = 4; break; + case Ity_F64: case Ity_I64: laneSzB = 8; break; + case Ity_V128: laneSzB = 16; break; + default: break; + } + vassert(laneSzB > 0); + UInt minOff = laneNo * laneSzB; + UInt maxOff = minOff + laneSzB - 1; + vassert(maxOff < 16); + return base + minOff; +} + +/* Put to the least significant lane of a Qreg. */ +static void putQRegLO ( UInt qregNo, IRExpr* e ) +{ + IRType ty = typeOfIRExpr(irsb->tyenv, e); + Int off = offsetQRegLane(qregNo, ty, 0); + switch (ty) { + case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: + case Ity_F32: case Ity_F64: case Ity_V128: + break; + default: + vassert(0); // Other cases are probably invalid + } + stmt(IRStmt_Put(off, e)); +} + +/* Get from the least significant lane of a Qreg. */ +static IRExpr* getQRegLO ( UInt qregNo, IRType ty ) +{ + Int off = offsetQRegLane(qregNo, ty, 0); + switch (ty) { + case Ity_I8: + case Ity_I16: + case Ity_I32: case Ity_I64: + case Ity_F32: case Ity_F64: case Ity_V128: + break; + default: + vassert(0); // Other cases are ATC + } + return IRExpr_Get(off, ty); +} + +static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy ) +{ + static const HChar* namesQ[32] + = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", + "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", + "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" }; + static const HChar* namesD[32] + = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" }; + static const HChar* namesS[32] + = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", + "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" }; + static const HChar* namesH[32] + = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", + "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15", + "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23", + "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" }; + static const HChar* namesB[32] + = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", + "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15", + "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23", + "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" }; + vassert(qregNo < 32); + switch (sizeofIRType(laneTy)) { + case 1: return namesB[qregNo]; + case 2: return namesH[qregNo]; + case 4: return namesS[qregNo]; + case 8: return namesD[qregNo]; + case 16: return namesQ[qregNo]; + default: vassert(0); + } + /*NOTREACHED*/ +} + +static const HChar* nameQReg128 ( UInt qregNo ) +{ + return nameQRegLO(qregNo, Ity_V128); +} + +/* Find the offset of the most significant half (8 bytes) of the given + Qreg. This requires knowing the endianness of the host. */ +static Int offsetQRegHI64 ( UInt qregNo ) +{ + return offsetQRegLane(qregNo, Ity_I64, 1); +} + +static IRExpr* getQRegHI64 ( UInt qregNo ) +{ + return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64); +} + +static void putQRegHI64 ( UInt qregNo, IRExpr* e ) +{ + IRType ty = typeOfIRExpr(irsb->tyenv, e); + Int off = offsetQRegHI64(qregNo); + switch (ty) { + case Ity_I64: case Ity_F64: + break; + default: + vassert(0); // Other cases are plain wrong + } + stmt(IRStmt_Put(off, e)); +} + +/* Put to a specified lane of a Qreg. */ +static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e ) +{ + IRType laneTy = typeOfIRExpr(irsb->tyenv, e); + Int off = offsetQRegLane(qregNo, laneTy, laneNo); + switch (laneTy) { + case Ity_F64: case Ity_I64: + case Ity_I32: case Ity_F32: + case Ity_I16: + case Ity_I8: + break; + default: + vassert(0); // Other cases are ATC + } + stmt(IRStmt_Put(off, e)); +} + +/* Get from a specified lane of a Qreg. */ +static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy ) +{ + Int off = offsetQRegLane(qregNo, laneTy, laneNo); + switch (laneTy) { + case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: + case Ity_F64: + break; + default: + vassert(0); // Other cases are ATC + } + return IRExpr_Get(off, laneTy); +} + + +//ZZ /* ---------------- Misc registers ---------------- */ +//ZZ +//ZZ static void putMiscReg32 ( UInt gsoffset, +//ZZ IRExpr* e, /* :: Ity_I32 */ +//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */) +//ZZ { +//ZZ switch (gsoffset) { +//ZZ case OFFB_FPSCR: break; +//ZZ case OFFB_QFLAG32: break; +//ZZ case OFFB_GEFLAG0: break; +//ZZ case OFFB_GEFLAG1: break; +//ZZ case OFFB_GEFLAG2: break; +//ZZ case OFFB_GEFLAG3: break; +//ZZ default: vassert(0); /* awaiting more cases */ +//ZZ } +//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); +//ZZ +//ZZ if (guardT == IRTemp_INVALID) { +//ZZ /* unconditional write */ +//ZZ stmt(IRStmt_Put(gsoffset, e)); +//ZZ } else { +//ZZ stmt(IRStmt_Put( +//ZZ gsoffset, +//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)), +//ZZ e, IRExpr_Get(gsoffset, Ity_I32) ) +//ZZ )); +//ZZ } +//ZZ } +//ZZ +//ZZ static IRTemp get_ITSTATE ( void ) +//ZZ { +//ZZ ASSERT_IS_THUMB; +//ZZ IRTemp t = newTemp(Ity_I32); +//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32)); +//ZZ return t; +//ZZ } +//ZZ +//ZZ static void put_ITSTATE ( IRTemp t ) +//ZZ { +//ZZ ASSERT_IS_THUMB; +//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) ); +//ZZ } +//ZZ +//ZZ static IRTemp get_QFLAG32 ( void ) +//ZZ { +//ZZ IRTemp t = newTemp(Ity_I32); +//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32)); +//ZZ return t; +//ZZ } +//ZZ +//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT ) +//ZZ { +//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT ); +//ZZ } +//ZZ +//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program +//ZZ Status Register) to indicate that overflow or saturation occurred. +//ZZ Nb: t must be zero to denote no saturation, and any nonzero +//ZZ value to indicate saturation. */ +//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT ) +//ZZ { +//ZZ IRTemp old = get_QFLAG32(); +//ZZ IRTemp nyu = newTemp(Ity_I32); +//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) ); +//ZZ put_QFLAG32(nyu, condT); +//ZZ } + + +/* ---------------- FPCR stuff ---------------- */ + +/* Generate IR to get hold of the rounding mode bits in FPCR, and + convert them to IR format. Bind the final result to the + returned temp. */ +static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void ) +{ + /* The ARMvfp encoding for rounding mode bits is: + 00 to nearest + 01 to +infinity + 10 to -infinity + 11 to zero + We need to convert that to the IR encoding: + 00 to nearest (the default) + 10 to +infinity + 01 to -infinity + 11 to zero + Which can be done by swapping bits 0 and 1. + The rmode bits are at 23:22 in FPSCR. + */ + IRTemp armEncd = newTemp(Ity_I32); + IRTemp swapped = newTemp(Ity_I32); + /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that + we don't zero out bits 24 and above, since the assignment to + 'swapped' will mask them out anyway. */ + assign(armEncd, + binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22))); + /* Now swap them. */ + assign(swapped, + binop(Iop_Or32, + binop(Iop_And32, + binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)), + mkU32(2)), + binop(Iop_And32, + binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)), + mkU32(1)) + )); + return swapped; +} + + +/*------------------------------------------------------------*/ +/*--- Helpers for flag handling and conditional insns ---*/ +/*------------------------------------------------------------*/ + +static const HChar* nameARM64Condcode ( ARM64Condcode cond ) +{ + switch (cond) { + case ARM64CondEQ: return "eq"; + case ARM64CondNE: return "ne"; + case ARM64CondCS: return "cs"; // or 'hs' + case ARM64CondCC: return "cc"; // or 'lo' + case ARM64CondMI: return "mi"; + case ARM64CondPL: return "pl"; + case ARM64CondVS: return "vs"; + case ARM64CondVC: return "vc"; + case ARM64CondHI: return "hi"; + case ARM64CondLS: return "ls"; + case ARM64CondGE: return "ge"; + case ARM64CondLT: return "lt"; + case ARM64CondGT: return "gt"; + case ARM64CondLE: return "le"; + case ARM64CondAL: return "al"; + case ARM64CondNV: return "nv"; + default: vpanic("name_ARM64Condcode"); + } +} + +/* and a handy shorthand for it */ +static const HChar* nameCC ( ARM64Condcode cond ) { + return nameARM64Condcode(cond); +} + + +/* Build IR to calculate some particular condition from stored + CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type + Ity_I64, suitable for narrowing. Although the return type is + Ity_I64, the returned value is either 0 or 1. 'cond' must be + :: Ity_I64 and must denote the condition to compute in + bits 7:4, and be zero everywhere else. +*/ +static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond ) +{ + vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64); + /* And 'cond' had better produce a value in which only bits 7:4 are + nonzero. However, obviously we can't assert for that. */ + + /* So what we're constructing for the first argument is + "(cond << 4) | stored-operation". + However, as per comments above, 'cond' must be supplied + pre-shifted to this function. + + This pairing scheme requires that the ARM64_CC_OP_ values all fit + in 4 bits. Hence we are passing a (COND, OP) pair in the lowest + 8 bits of the first argument. */ + IRExpr** args + = mkIRExprVec_4( + binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond), + IRExpr_Get(OFFB_CC_DEP1, Ity_I64), + IRExpr_Get(OFFB_CC_DEP2, Ity_I64), + IRExpr_Get(OFFB_CC_NDEP, Ity_I64) + ); + IRExpr* call + = mkIRExprCCall( + Ity_I64, + 0/*regparm*/, + "arm64g_calculate_condition", &arm64g_calculate_condition, + args + ); + + /* Exclude the requested condition, OP and NDEP from definedness + checking. We're only interested in DEP1 and DEP2. */ + call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); + return call; +} + + +/* Build IR to calculate some particular condition from stored + CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type + Ity_I64, suitable for narrowing. Although the return type is + Ity_I64, the returned value is either 0 or 1. +*/ +static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond ) +{ + /* First arg is "(cond << 4) | condition". This requires that the + ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a + (COND, OP) pair in the lowest 8 bits of the first argument. */ + vassert(cond >= 0 && cond <= 15); + return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) ); +} + + +//ZZ /* Build IR to calculate just the carry flag from stored +//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: +//ZZ Ity_I32. */ +//ZZ static IRExpr* mk_armg_calculate_flag_c ( void ) +//ZZ { +//ZZ IRExpr** args +//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), +//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32), +//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32), +//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); +//ZZ IRExpr* call +//ZZ = mkIRExprCCall( +//ZZ Ity_I32, +//ZZ 0/*regparm*/, +//ZZ "armg_calculate_flag_c", &armg_calculate_flag_c, +//ZZ args +//ZZ ); +//ZZ /* Exclude OP and NDEP from definedness checking. We're only +//ZZ interested in DEP1 and DEP2. */ +//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); +//ZZ return call; +//ZZ } +//ZZ +//ZZ +//ZZ /* Build IR to calculate just the overflow flag from stored +//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: +//ZZ Ity_I32. */ +//ZZ static IRExpr* mk_armg_calculate_flag_v ( void ) +//ZZ { +//ZZ IRExpr** args +//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), +//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32), +//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32), +//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); +//ZZ IRExpr* call +//ZZ = mkIRExprCCall( +//ZZ Ity_I32, +//ZZ 0/*regparm*/, +//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v, +//ZZ args +//ZZ ); +//ZZ /* Exclude OP and NDEP from definedness checking. We're only +//ZZ interested in DEP1 and DEP2. */ +//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); +//ZZ return call; +//ZZ } + + +/* Build IR to calculate N Z C V in bits 31:28 of the + returned word. */ +static IRExpr* mk_arm64g_calculate_flags_nzcv ( void ) +{ + IRExpr** args + = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), + IRExpr_Get(OFFB_CC_DEP1, Ity_I64), + IRExpr_Get(OFFB_CC_DEP2, Ity_I64), + IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); + IRExpr* call + = mkIRExprCCall( + Ity_I64, + 0/*regparm*/, + "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv, + args + ); + /* Exclude OP and NDEP from definedness checking. We're only + interested in DEP1 and DEP2. */ + call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); + return call; +} + + +/* Build IR to set the flags thunk, in the most general case. */ +static +void setFlags_D1_D2_ND ( UInt cc_op, + IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep ) +{ + vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64)); + vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64)); + vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64)); + vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER); + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) )); + stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) )); +} + +/* Build IR to set the flags thunk after ADD or SUB. */ +static +void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR ) +{ + IRTemp argL64 = IRTemp_INVALID; + IRTemp argR64 = IRTemp_INVALID; + IRTemp z64 = newTemp(Ity_I64); + if (is64) { + argL64 = argL; + argR64 = argR; + } else { + argL64 = newTemp(Ity_I64); + argR64 = newTemp(Ity_I64); + assign(argL64, unop(Iop_32Uto64, mkexpr(argL))); + assign(argR64, unop(Iop_32Uto64, mkexpr(argR))); + } + assign(z64, mkU64(0)); + UInt cc_op = ARM64G_CC_OP_NUMBER; + /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; } + else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; } + else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; } + else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; } + else { vassert(0); } + setFlags_D1_D2_ND(cc_op, argL64, argR64, z64); +} + +/* Build IR to set the flags thunk after ADD or SUB, if the given + condition evaluates to True at run time. If not, the flags are set + to the specified NZCV value. */ +static +void setFlags_ADD_SUB_conditionally ( + Bool is64, Bool isSUB, + IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv + ) +{ + /* Generate IR as follows: + CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY) + CC_DEP1 = ITE(cond, argL64, nzcv << 28) + CC_DEP2 = ITE(cond, argR64, 0) + CC_NDEP = 0 + */ + + IRTemp z64 = newTemp(Ity_I64); + assign(z64, mkU64(0)); + + /* Establish the operation and operands for the True case. */ + IRTemp t_dep1 = IRTemp_INVALID; + IRTemp t_dep2 = IRTemp_INVALID; + UInt t_op = ARM64G_CC_OP_NUMBER; + /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; } + else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; } + else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; } + else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; } + else { vassert(0); } + /* */ + if (is64) { + t_dep1 = argL; + t_dep2 = argR; + } else { + t_dep1 = newTemp(Ity_I64); + t_dep2 = newTemp(Ity_I64); + assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL))); + assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR))); + } + + /* Establish the operation and operands for the False case. */ + IRTemp f_dep1 = newTemp(Ity_I64); + IRTemp f_dep2 = z64; + UInt f_op = ARM64G_CC_OP_COPY; + assign(f_dep1, mkU64(nzcv << 28)); + + /* Final thunk values */ + IRTemp dep1 = newTemp(Ity_I64); + IRTemp dep2 = newTemp(Ity_I64); + IRTemp op = newTemp(Ity_I64); + + assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op))); + assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1))); + assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2))); + + /* finally .. */ + stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) )); + stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) )); +} + +/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */ +static +void setFlags_LOGIC ( Bool is64, IRTemp res ) +{ + IRTemp res64 = IRTemp_INVALID; + IRTemp z64 = newTemp(Ity_I64); + UInt cc_op = ARM64G_CC_OP_NUMBER; + if (is64) { + res64 = res; + cc_op = ARM64G_CC_OP_LOGIC64; + } else { + res64 = newTemp(Ity_I64); + assign(res64, unop(Iop_32Uto64, mkexpr(res))); + cc_op = ARM64G_CC_OP_LOGIC32; + } + assign(z64, mkU64(0)); + setFlags_D1_D2_ND(cc_op, res64, z64, z64); +} + +/* Build IR to set the flags thunk to a given NZCV value. NZCV is + located in bits 31:28 of the supplied value. */ +static +void setFlags_COPY ( IRTemp nzcv_28x0 ) +{ + IRTemp z64 = newTemp(Ity_I64); + assign(z64, mkU64(0)); + setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64); +} + + +//ZZ /* Minor variant of the above that sets NDEP to zero (if it +//ZZ sets it at all) */ +//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1, +//ZZ IRTemp t_dep2, +//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) +//ZZ { +//ZZ IRTemp z32 = newTemp(Ity_I32); +//ZZ assign( z32, mkU32(0) ); +//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT ); +//ZZ } +//ZZ +//ZZ +//ZZ /* Minor variant of the above that sets DEP2 to zero (if it +//ZZ sets it at all) */ +//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1, +//ZZ IRTemp t_ndep, +//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) +//ZZ { +//ZZ IRTemp z32 = newTemp(Ity_I32); +//ZZ assign( z32, mkU32(0) ); +//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT ); +//ZZ } +//ZZ +//ZZ +//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it +//ZZ sets them at all) */ +//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1, +//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) +//ZZ { +//ZZ IRTemp z32 = newTemp(Ity_I32); +//ZZ assign( z32, mkU32(0) ); +//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT ); +//ZZ } + + +/*------------------------------------------------------------*/ +/*--- Misc math helpers ---*/ +/*------------------------------------------------------------*/ + +/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */ +static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh ) +{ + IRTemp maskT = newTemp(Ity_I64); + IRTemp res = newTemp(Ity_I64); + vassert(sh >= 1 && sh <= 63); + assign(maskT, mkU64(mask)); + assign( res, + binop(Iop_Or64, + binop(Iop_Shr64, + binop(Iop_And64,mkexpr(x),mkexpr(maskT)), + mkU8(sh)), + binop(Iop_And64, + binop(Iop_Shl64,mkexpr(x),mkU8(sh)), + mkexpr(maskT)) + ) + ); + return res; +} + +/* Generates byte swaps within 32-bit lanes. */ +static IRTemp math_UINTSWAP64 ( IRTemp src ) +{ + IRTemp res; + res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); + res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); + return res; +} + +/* Generates byte swaps within 16-bit lanes. */ +static IRTemp math_USHORTSWAP64 ( IRTemp src ) +{ + IRTemp res; + res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); + return res; +} + +/* Generates a 64-bit byte swap. */ +static IRTemp math_BYTESWAP64 ( IRTemp src ) +{ + IRTemp res; + res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); + res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); + res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32); + return res; +} + +/* Generates a 64-bit bit swap. */ +static IRTemp math_BITSWAP64 ( IRTemp src ) +{ + IRTemp res; + res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1); + res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2); + res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4); + return math_BYTESWAP64(res); +} + +/* Duplicates the bits at the bottom of the given word to fill the + whole word. src :: Ity_I64 is assumed to have zeroes everywhere + except for the bottom bits. */ +static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy ) +{ + if (srcTy == Ity_I8) { + IRTemp t16 = newTemp(Ity_I64); + assign(t16, binop(Iop_Or64, mkexpr(src), + binop(Iop_Shl64, mkexpr(src), mkU8(8)))); + IRTemp t32 = newTemp(Ity_I64); + assign(t32, binop(Iop_Or64, mkexpr(t16), + binop(Iop_Shl64, mkexpr(t16), mkU8(16)))); + IRTemp t64 = newTemp(Ity_I64); + assign(t64, binop(Iop_Or64, mkexpr(t32), + binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); + return t64; + } + if (srcTy == Ity_I16) { + IRTemp t32 = newTemp(Ity_I64); + assign(t32, binop(Iop_Or64, mkexpr(src), + binop(Iop_Shl64, mkexpr(src), mkU8(16)))); + IRTemp t64 = newTemp(Ity_I64); + assign(t64, binop(Iop_Or64, mkexpr(t32), + binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); + return t64; + } + if (srcTy == Ity_I32) { + IRTemp t64 = newTemp(Ity_I64); + assign(t64, binop(Iop_Or64, mkexpr(src), + binop(Iop_Shl64, mkexpr(src), mkU8(32)))); + return t64; + } + if (srcTy == Ity_I64) { + return src; + } + vassert(0); +} + + +/*------------------------------------------------------------*/ +/*--- FP comparison helpers ---*/ +/*------------------------------------------------------------*/ + +/* irRes :: Ity_I32 holds a floating point comparison result encoded + as an IRCmpF64Result. Generate code to convert it to an + ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value. + Assign a new temp to hold that value, and return the temp. */ +static +IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 ) +{ + IRTemp ix = newTemp(Ity_I64); + IRTemp termL = newTemp(Ity_I64); + IRTemp termR = newTemp(Ity_I64); + IRTemp nzcv = newTemp(Ity_I64); + IRTemp irRes = newTemp(Ity_I64); + + /* This is where the fun starts. We have to convert 'irRes' from + an IR-convention return result (IRCmpF64Result) to an + ARM-encoded (N,Z,C,V) group. The final result is in the bottom + 4 bits of 'nzcv'. */ + /* Map compare result from IR to ARM(nzcv) */ + /* + FP cmp result | IR | ARM(nzcv) + -------------------------------- + UN 0x45 0011 + LT 0x01 1000 + GT 0x00 0010 + EQ 0x40 0110 + */ + /* Now since you're probably wondering WTF .. + + ix fishes the useful bits out of the IR value, bits 6 and 0, and + places them side by side, giving a number which is 0, 1, 2 or 3. + + termL is a sequence cooked up by GNU superopt. It converts ix + into an almost correct value NZCV value (incredibly), except + for the case of UN, where it produces 0100 instead of the + required 0011. + + termR is therefore a correction term, also computed from ix. It + is 1 in the UN case and 0 for LT, GT and UN. Hence, to get + the final correct value, we subtract termR from termL. + + Don't take my word for it. There's a test program at the bottom + of guest_arm_toIR.c, to try this out with. + */ + assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32))); + + assign( + ix, + binop(Iop_Or64, + binop(Iop_And64, + binop(Iop_Shr64, mkexpr(irRes), mkU8(5)), + mkU64(3)), + binop(Iop_And64, mkexpr(irRes), mkU64(1)))); + + assign( + termL, + binop(Iop_Add64, + binop(Iop_Shr64, + binop(Iop_Sub64, + binop(Iop_Shl64, + binop(Iop_Xor64, mkexpr(ix), mkU64(1)), + mkU8(62)), + mkU64(1)), + mkU8(61)), + mkU64(1))); + + assign( + termR, + binop(Iop_And64, + binop(Iop_And64, + mkexpr(ix), + binop(Iop_Shr64, mkexpr(ix), mkU8(1))), + mkU64(1))); + + assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR))); + return nzcv; +} + + +/*------------------------------------------------------------*/ +/*--- Data processing (immediate) ---*/ +/*------------------------------------------------------------*/ + +/* Helper functions for supporting "DecodeBitMasks" */ + +static ULong dbm_ROR ( Int width, ULong x, Int rot ) +{ + vassert(width > 0 && width <= 64); + vassert(rot >= 0 && rot < width); + if (rot == 0) return x; + ULong res = x >> rot; + res |= (x << (width - rot)); + if (width < 64) + res &= ((1ULL << width) - 1); + return res; +} + +static ULong dbm_RepTo64( Int esize, ULong x ) +{ + switch (esize) { + case 64: + return x; + case 32: + x &= 0xFFFFFFFF; x |= (x << 32); + return x; + case 16: + x &= 0xFFFF; x |= (x << 16); x |= (x << 32); + return x; + case 8: + x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32); + return x; + case 4: + x &= 0xF; x |= (x << 4); x |= (x << 8); + x |= (x << 16); x |= (x << 32); + return x; + case 2: + x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8); + x |= (x << 16); x |= (x << 32); + return x; + default: + break; + } + vpanic("dbm_RepTo64"); + /*NOTREACHED*/ + return 0; +} + +static Int dbm_highestSetBit ( ULong x ) +{ + Int i; + for (i = 63; i >= 0; i--) { + if (x & (1ULL << i)) + return i; + } + vassert(x == 0); + return -1; +} + +static +Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask, + ULong immN, ULong imms, ULong immr, Bool immediate, + UInt M /*32 or 64*/) +{ + vassert(immN < (1ULL << 1)); + vassert(imms < (1ULL << 6)); + vassert(immr < (1ULL << 6)); + vassert(immediate == False || immediate == True); + vassert(M == 32 || M == 64); + + Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) ); + if (len < 1) { /* printf("fail1\n"); */ return False; } + vassert(len <= 6); + vassert(M >= (1 << len)); + + vassert(len >= 1 && len <= 6); + ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len); + (1 << len) - 1; + vassert(levels >= 1 && levels <= 63); + + if (immediate && ((imms & levels) == levels)) { + /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */ + return False; + } + + ULong S = imms & levels; + ULong R = immr & levels; + Int diff = S - R; + diff &= 63; + Int esize = 1 << len; + vassert(2 <= esize && esize <= 64); + + /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the + same below with d. S can be 63 in which case we have an out of + range and hence undefined shift. */ + vassert(S >= 0 && S <= 63); + vassert(esize >= (S+1)); + ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1) + //(1ULL << (S+1)) - 1; + ((1ULL << S) - 1) + (1ULL << S); + + Int d = // diff + diff & ((1 << len)-1); + vassert(esize >= (d+1)); + vassert(d >= 0 && d <= 63); + + ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1) + //(1ULL << (d+1)) - 1; + ((1ULL << d) - 1) + (1ULL << d); + + if (esize != 64) vassert(elem_s < (1ULL << esize)); + if (esize != 64) vassert(elem_d < (1ULL << esize)); + + if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R)); + if (tmask) *tmask = dbm_RepTo64(esize, elem_d); + + return True; +} + + +static +Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres, + UInt insn) +{ +# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + + /* insn[28:23] + 10000x PC-rel addressing + 10001x Add/subtract (immediate) + 100100 Logical (immediate) + 100101 Move Wide (immediate) + 100110 Bitfield + 100111 Extract + */ + + /* ------------------ ADD/SUB{,S} imm12 ------------------ */ + if (INSN(28,24) == BITS5(1,0,0,0,1)) { + Bool is64 = INSN(31,31) == 1; + Bool isSub = INSN(30,30) == 1; + Bool setCC = INSN(29,29) == 1; + UInt sh = INSN(23,22); + UInt uimm12 = INSN(21,10); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + const HChar* nm = isSub ? "sub" : "add"; + if (sh >= 2) { + /* Invalid; fall through */ + } else { + vassert(sh <= 1); + uimm12 <<= (12 * sh); + if (is64) { + IRTemp argL = newTemp(Ity_I64); + IRTemp argR = newTemp(Ity_I64); + IRTemp res = newTemp(Ity_I64); + assign(argL, getIReg64orSP(nn)); + assign(argR, mkU64(uimm12)); + assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, + mkexpr(argL), mkexpr(argR))); + if (setCC) { + putIReg64orZR(dd, mkexpr(res)); + setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); + DIP("%ss %s, %s, 0x%x\n", + nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12); + } else { + putIReg64orSP(dd, mkexpr(res)); + DIP("%s %s, %s, 0x%x\n", + nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12); + } + } else { + IRTemp argL = newTemp(Ity_I32); + IRTemp argR = newTemp(Ity_I32); + IRTemp res = newTemp(Ity_I32); + assign(argL, getIReg32orSP(nn)); + assign(argR, mkU32(uimm12)); + assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32, + mkexpr(argL), mkexpr(argR))); + if (setCC) { + putIReg32orZR(dd, mkexpr(res)); + setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR); + DIP("%ss %s, %s, 0x%x\n", + nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12); + } else { + putIReg32orSP(dd, mkexpr(res)); + DIP("%s %s, %s, 0x%x\n", + nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12); + } + } + return True; + } + } + + /* -------------------- ADR/ADRP -------------------- */ + if (INSN(28,24) == BITS5(1,0,0,0,0)) { + UInt bP = INSN(31,31); + UInt immLo = INSN(30,29); + UInt immHi = INSN(23,5); + UInt rD = INSN(4,0); + ULong uimm = (immHi << 2) | immLo; + ULong simm = sx_to_64(uimm, 21); + ULong val; + if (bP) { + val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12); + } else { + val = guest_PC_curr_instr + simm; + } + putIReg64orZR(rD, mkU64(val)); + DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val); + return True; + } + + /* -------------------- LOGIC(imm) -------------------- */ + if (INSN(28,23) == BITS6(1,0,0,1,0,0)) { + /* 31 30 28 22 21 15 9 4 + sf op 100100 N immr imms Rn Rd + op=00: AND Rd|SP, Rn, #imm + op=01: ORR Rd|SP, Rn, #imm + op=10: EOR Rd|SP, Rn, #imm + op=11: ANDS Rd|ZR, Rn, #imm + */ + Bool is64 = INSN(31,31) == 1; + UInt op = INSN(30,29); + UInt N = INSN(22,22); + UInt immR = INSN(21,16); + UInt immS = INSN(15,10); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + ULong imm = 0; + Bool ok; + if (N == 1 && !is64) + goto after_logic_imm; /* not allowed; fall through */ + ok = dbm_DecodeBitMasks(&imm, NULL, + N, immS, immR, True, is64 ? 64 : 32); + if (!ok) + goto after_logic_imm; + + const HChar* names[4] = { "and", "orr", "eor", "ands" }; + const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 }; + const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 }; + + vassert(op < 4); + if (is64) { + IRExpr* argL = getIReg64orZR(nn); + IRExpr* argR = mkU64(imm); + IRTemp res = newTemp(Ity_I64); + assign(res, binop(ops64[op], argL, argR)); + if (op < 3) { + putIReg64orSP(dd, mkexpr(res)); + DIP("%s %s, %s, 0x%llx\n", names[op], + nameIReg64orSP(dd), nameIReg64orZR(nn), imm); + } else { + putIReg64orZR(dd, mkexpr(res)); + setFlags_LOGIC(True/*is64*/, res); + DIP("%s %s, %s, 0x%llx\n", names[op], + nameIReg64orZR(dd), nameIReg64orZR(nn), imm); + } + } else { + IRExpr* argL = getIReg32orZR(nn); + IRExpr* argR = mkU32((UInt)imm); + IRTemp res = newTemp(Ity_I32); + assign(res, binop(ops32[op], argL, argR)); + if (op < 3) { + putIReg32orSP(dd, mkexpr(res)); + DIP("%s %s, %s, 0x%x\n", names[op], + nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm); + } else { + putIReg32orZR(dd, mkexpr(res)); + setFlags_LOGIC(False/*!is64*/, res); + DIP("%s %s, %s, 0x%x\n", names[op], + nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm); + } + } + return True; + } + after_logic_imm: + + /* -------------------- MOV{Z,N,K} -------------------- */ + if (INSN(28,23) == BITS6(1,0,0,1,0,1)) { + /* 31 30 28 22 20 4 + | | | | | | + sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw)) + sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw)) + sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw)) + */ + Bool is64 = INSN(31,31) == 1; + UInt subopc = INSN(30,29); + UInt hw = INSN(22,21); + UInt imm16 = INSN(20,5); + UInt dd = INSN(4,0); + if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) { + /* invalid; fall through */ + } else { + ULong imm64 = ((ULong)imm16) << (16 * hw); + if (!is64) + vassert(imm64 < 0x100000000ULL); + switch (subopc) { + case BITS2(1,0): // MOVZ + putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); + DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); + break; + case BITS2(0,0): // MOVN + imm64 = ~imm64; + if (!is64) + imm64 &= 0xFFFFFFFFULL; + putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); + DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); + break; + case BITS2(1,1): // MOVK + /* This is more complex. We are inserting a slice into + the destination register, so we need to have the old + value of it. */ + if (is64) { + IRTemp old = newTemp(Ity_I64); + assign(old, getIReg64orZR(dd)); + ULong mask = 0xFFFFULL << (16 * hw); + IRExpr* res + = binop(Iop_Or64, + binop(Iop_And64, mkexpr(old), mkU64(~mask)), + mkU64(imm64)); + putIReg64orZR(dd, res); + DIP("movk %s, 0x%x, lsl %u\n", + nameIReg64orZR(dd), imm16, 16*hw); + } else { + IRTemp old = newTemp(Ity_I32); + assign(old, getIReg32orZR(dd)); + vassert(hw <= 1); + UInt mask = 0xFFFF << (16 * hw); + IRExpr* res + = binop(Iop_Or32, + binop(Iop_And32, mkexpr(old), mkU32(~mask)), + mkU32((UInt)imm64)); + putIReg32orZR(dd, res); + DIP("movk %s, 0x%x, lsl %u\n", + nameIReg32orZR(dd), imm16, 16*hw); + } + break; + default: + vassert(0); + } + return True; + } + } + + /* -------------------- {U,S,}BFM -------------------- */ + /* 30 28 22 21 15 9 4 + + sf 10 100110 N immr imms nn dd + UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 + UBFM Xd, Xn, #immr, #imms when sf=1, N=1 + + sf 00 100110 N immr imms nn dd + SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 + SBFM Xd, Xn, #immr, #imms when sf=1, N=1 + + sf 01 100110 N immr imms nn dd + BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 + BFM Xd, Xn, #immr, #imms when sf=1, N=1 + */ + if (INSN(28,23) == BITS6(1,0,0,1,1,0)) { + UInt sf = INSN(31,31); + UInt opc = INSN(30,29); + UInt N = INSN(22,22); + UInt immR = INSN(21,16); + UInt immS = INSN(15,10); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + Bool inZero = False; + Bool extend = False; + const HChar* nm = "???"; + /* skip invalid combinations */ + switch (opc) { + case BITS2(0,0): + inZero = True; extend = True; nm = "sbfm"; break; + case BITS2(0,1): + inZero = False; extend = False; nm = "bfm"; break; + case BITS2(1,0): + inZero = True; extend = False; nm = "ubfm"; break; + case BITS2(1,1): + goto after_bfm; /* invalid */ + default: + vassert(0); + } + if (sf == 1 && N != 1) goto after_bfm; + if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0 + || ((immS >> 5) & 1) != 0)) goto after_bfm; + ULong wmask = 0, tmask = 0; + Bool ok = dbm_DecodeBitMasks(&wmask, &tmask, + N, immS, immR, False, sf == 1 ? 64 : 32); + if (!ok) goto after_bfm; /* hmmm */ + + Bool is64 = sf == 1; + IRType ty = is64 ? Ity_I64 : Ity_I32; + + IRTemp dst = newTemp(ty); + IRTemp src = newTemp(ty); + IRTemp bot = newTemp(ty); + IRTemp top = newTemp(ty); + IRTemp res = newTemp(ty); + assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd)); + assign(src, getIRegOrZR(is64, nn)); + /* perform bitfield move on low bits */ + assign(bot, binop(mkOR(ty), + binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)), + binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)), + mkU(ty, wmask)))); + /* determine extension bits (sign, zero or dest register) */ + assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst)); + /* combine extension bits and result bits */ + assign(res, binop(mkOR(ty), + binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)), + binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask)))); + putIRegOrZR(is64, dd, mkexpr(res)); + DIP("%s %s, %s, immR=%u, immS=%u\n", + nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS); + return True; + } + after_bfm: + + /* ---------------------- EXTR ---------------------- */ + /* 30 28 22 20 15 9 4 + 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6 + 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32 + */ + if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) { + Bool is64 = INSN(31,31) == 1; + UInt mm = INSN(20,16); + UInt imm6 = INSN(15,10); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + Bool valid = True; + if (INSN(31,31) != INSN(22,22)) + valid = False; + if (!is64 && imm6 >= 32) + valid = False; + if (!valid) goto after_extr; + IRType ty = is64 ? Ity_I64 : Ity_I32; + IRTemp srcHi = newTemp(ty); + IRTemp srcLo = newTemp(ty); + IRTemp res = newTemp(ty); + assign(srcHi, getIRegOrZR(is64, nn)); + assign(srcLo, getIRegOrZR(is64, mm)); + if (imm6 == 0) { + assign(res, mkexpr(srcLo)); + } else { + UInt szBits = 8 * sizeofIRType(ty); + vassert(imm6 > 0 && imm6 < szBits); + assign(res, binop(mkOR(ty), + binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)), + binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6)))); + } + putIRegOrZR(is64, dd, mkexpr(res)); + DIP("extr %s, %s, %s, #%u\n", + nameIRegOrZR(is64,dd), + nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6); + return True; + } + after_extr: + + vex_printf("ARM64 front end: data_processing_immediate\n"); + return False; +# undef INSN +} + + +/*------------------------------------------------------------*/ +/*--- Data processing (register) instructions ---*/ +/*------------------------------------------------------------*/ + +static const HChar* nameSH ( UInt sh ) { + switch (sh) { + case 0: return "lsl"; + case 1: return "lsr"; + case 2: return "asr"; + case 3: return "ror"; + default: vassert(0); + } +} + +/* Generate IR to get a register value, possibly shifted by an + immediate. Returns either a 32- or 64-bit temporary holding the + result. After the shift, the value can optionally be NOT-ed + too. + + sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be + in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR + isn't allowed, but it's the job of the caller to check that. +*/ +static IRTemp getShiftedIRegOrZR ( Bool is64, + UInt sh_how, UInt sh_amt, UInt regNo, + Bool invert ) +{ + vassert(sh_how < 4); + vassert(sh_amt < (is64 ? 64 : 32)); + IRType ty = is64 ? Ity_I64 : Ity_I32; + IRTemp t0 = newTemp(ty); + assign(t0, getIRegOrZR(is64, regNo)); + IRTemp t1 = newTemp(ty); + switch (sh_how) { + case BITS2(0,0): + assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt))); + break; + case BITS2(0,1): + assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt))); + break; + case BITS2(1,0): + assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt))); + break; + case BITS2(1,1): + assign(t1, mkexpr(mathROR(ty, t0, sh_amt))); + break; + default: + vassert(0); + } + if (invert) { + IRTemp t2 = newTemp(ty); + assign(t2, unop(mkNOT(ty), mkexpr(t1))); + return t2; + } else { + return t1; + } +} + + +static +Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres, + UInt insn) +{ +# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + + /* ------------------- ADD/SUB(reg) ------------------- */ + /* x==0 => 32 bit op x==1 => 64 bit op + sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED) + + 31 30 29 28 23 21 20 15 9 4 + | | | | | | | | | | + x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6) + x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6) + x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6) + x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6) + */ + if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) { + UInt bX = INSN(31,31); + UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */ + UInt bS = INSN(29, 29); /* set flags? */ + UInt sh = INSN(23,22); + UInt rM = INSN(20,16); + UInt imm6 = INSN(15,10); + UInt rN = INSN(9,5); + UInt rD = INSN(4,0); + Bool isSUB = bOP == 1; + Bool is64 = bX == 1; + IRType ty = is64 ? Ity_I64 : Ity_I32; + if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) { + /* invalid; fall through */ + } else { + IRTemp argL = newTemp(ty); + assign(argL, getIRegOrZR(is64, rN)); + IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False); + IROp op = isSUB ? mkSUB(ty) : mkADD(ty); + IRTemp res = newTemp(ty); + assign(res, binop(op, mkexpr(argL), mkexpr(argR))); + if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res)); + if (bS) { + setFlags_ADD_SUB(is64, isSUB, argL, argR); + } + DIP("%s%s %s, %s, %s, %s #%u\n", + bOP ? "sub" : "add", bS ? "s" : "", + nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), + nameIRegOrZR(is64, rM), nameSH(sh), imm6); + return True; + } + } + + /* -------------------- LOGIC(reg) -------------------- */ + /* x==0 => 32 bit op x==1 => 64 bit op + N==0 => inv? is no-op (no inversion) + N==1 => inv? is NOT + sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR + + 31 30 28 23 21 20 15 9 4 + | | | | | | | | | + x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6)) + x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6)) + x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6)) + x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6)) + With N=1, the names are: BIC ORN EON BICS + */ + if (INSN(28,24) == BITS5(0,1,0,1,0)) { + UInt bX = INSN(31,31); + UInt sh = INSN(23,22); + UInt bN = INSN(21,21); + UInt rM = INSN(20,16); + UInt imm6 = INSN(15,10); + UInt rN = INSN(9,5); + UInt rD = INSN(4,0); + Bool is64 = bX == 1; + IRType ty = is64 ? Ity_I64 : Ity_I32; + if (!is64 && imm6 > 31) { + /* invalid; fall though */ + } else { + IRTemp argL = newTemp(ty); + assign(argL, getIRegOrZR(is64, rN)); + IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1); + IROp op = Iop_INVALID; + switch (INSN(30,29)) { + case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break; + case BITS2(0,1): op = mkOR(ty); break; + case BITS2(1,0): op = mkXOR(ty); break; + default: vassert(0); + } + IRTemp res = newTemp(ty); + assign(res, binop(op, mkexpr(argL), mkexpr(argR))); + if (INSN(30,29) == BITS2(1,1)) { + setFlags_LOGIC(is64, res); + } + putIRegOrZR(is64, rD, mkexpr(res)); + + static const HChar* names_op[8] + = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" }; + vassert(((bN << 2) | INSN(30,29)) < 8); + const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)]; + /* Special-case the printing of "MOV" */ + if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) { + DIP("mov %s, %s\n", nameIRegOrZR(is64, rD), + nameIRegOrZR(is64, rM)); + } else { + DIP("%s %s, %s, %s, %s #%u\n", nm_op, + nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), + nameIRegOrZR(is64, rM), nameSH(sh), imm6); + } + return True; + } + } + + /* -------------------- {U,S}MULH -------------------- */ + /* 31 23 22 20 15 9 4 + 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm + 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm + */ + if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) + && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) { + Bool isU = INSN(23,23) == 1; + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + putIReg64orZR(dd, unop(Iop_128HIto64, + binop(isU ? Iop_MullU64 : Iop_MullS64, + getIReg64orZR(nn), getIReg64orZR(mm)))); + DIP("%cmulh %s, %s, %s\n", + isU ? 'u' : 's', + nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm)); + return True; + } + + /* -------------------- M{ADD,SUB} -------------------- */ + /* 31 30 20 15 14 9 4 + sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n + sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n + */ + if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) { + Bool is64 = INSN(31,31) == 1; + UInt mm = INSN(20,16); + Bool isAdd = INSN(15,15) == 0; + UInt aa = INSN(14,10); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + if (is64) { + putIReg64orZR( + dd, + binop(isAdd ? Iop_Add64 : Iop_Sub64, + getIReg64orZR(aa), + binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn)))); + } else { + putIReg32orZR( + dd, + binop(isAdd ? Iop_Add32 : Iop_Sub32, + getIReg32orZR(aa), + binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn)))); + } + DIP("%s %s, %s, %s, %s\n", + isAdd ? "madd" : "msub", + nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), + nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa)); + return True; + } + + /* ---------------- CS{EL,INC,INV,NEG} ---------------- */ + /* 31 30 28 20 15 11 9 4 + sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm + sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm + sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm + sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm + In all cases, the operation is: Rd = if cond then Rn else OP(Rm) + */ + if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) { + Bool is64 = INSN(31,31) == 1; + UInt b30 = INSN(30,30); + UInt mm = INSN(20,16); + UInt cond = INSN(15,12); + UInt b10 = INSN(10,10); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */ + IRType ty = is64 ? Ity_I64 : Ity_I32; + IRExpr* argL = getIRegOrZR(is64, nn); + IRExpr* argR = getIRegOrZR(is64, mm); + switch (op) { + case BITS2(0,0): + break; + case BITS2(0,1): + argR = binop(mkADD(ty), argR, mkU(ty,1)); + break; + case BITS2(1,0): + argR = unop(mkNOT(ty), argR); + break; + case BITS2(1,1): + argR = binop(mkSUB(ty), mkU(ty,0), argR); + break; + default: + vassert(0); + } + putIRegOrZR( + is64, dd, + IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), + argL, argR) + ); + const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" }; + DIP("%s %s, %s, %s, %s\n", op_nm[op], + nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), + nameIRegOrZR(is64, mm), nameCC(cond)); + return True; + } + + /* -------------- ADD/SUB(extended reg) -------------- */ + /* 28 20 15 12 9 4 + 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld + 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld + + 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld + 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld + + 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld + 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld + + 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld + 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld + + The 'm' operand is extended per opt, thusly: + + 000 Xm & 0xFF UXTB + 001 Xm & 0xFFFF UXTH + 010 Xm & (2^32)-1 UXTW + 011 Xm UXTX + + 100 Xm sx from bit 7 SXTB + 101 Xm sx from bit 15 SXTH + 110 Xm sx from bit 31 SXTW + 111 Xm SXTX + + In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity + operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX + are the identity operation on Wm. + + After extension, the value is shifted left by imm3 bits, which + may only be in the range 0 .. 4 inclusive. + */ + if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) { + Bool is64 = INSN(31,31) == 1; + Bool isSub = INSN(30,30) == 1; + Bool setCC = INSN(29,29) == 1; + UInt mm = INSN(20,16); + UInt opt = INSN(15,13); + UInt imm3 = INSN(12,10); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx", + "sxtb", "sxth", "sxtw", "sxtx" }; + /* Do almost the same thing in the 32- and 64-bit cases. */ + IRTemp xN = newTemp(Ity_I64); + IRTemp xM = newTemp(Ity_I64); + assign(xN, getIReg64orSP(nn)); + assign(xM, getIReg64orZR(mm)); + IRExpr* xMw = mkexpr(xM); /* "xM widened" */ + Int shSX = 0; + /* widen Xm .. */ + switch (opt) { + case BITS3(0,0,0): // UXTB + xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break; + case BITS3(0,0,1): // UXTH + xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break; + case BITS3(0,1,0): // UXTW -- noop for the 32bit case + if (is64) { + xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw)); + } + break; + case BITS3(0,1,1): // UXTX -- always a noop + break; + case BITS3(1,0,0): // SXTB + shSX = 56; goto sxTo64; + case BITS3(1,0,1): // SXTH + shSX = 48; goto sxTo64; + case BITS3(1,1,0): // SXTW -- noop for the 32bit case + if (is64) { + shSX = 32; goto sxTo64; + } + break; + case BITS3(1,1,1): // SXTX -- always a noop + break; + sxTo64: + vassert(shSX >= 32); + xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)), + mkU8(shSX)); + break; + default: + vassert(0); + } + /* and now shift */ + IRTemp argL = xN; + IRTemp argR = newTemp(Ity_I64); + assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3))); + IRTemp res = newTemp(Ity_I64); + assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, + mkexpr(argL), mkexpr(argR))); + if (is64) { + if (setCC) { + putIReg64orZR(dd, mkexpr(res)); + setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); + } else { + putIReg64orSP(dd, mkexpr(res)); + } + } else { + if (setCC) { + IRTemp argL32 = newTemp(Ity_I32); + IRTemp argR32 = newTemp(Ity_I32); + putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res))); + assign(argL32, unop(Iop_64to32, mkexpr(argL))); + assign(argR32, unop(Iop_64to32, mkexpr(argR))); + setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32); + } else { + putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res))); + } + } + DIP("%s%s %s, %s, %s %s lsl %u\n", + isSub ? "sub" : "add", setCC ? "s" : "", + setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd), + nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm), + nameExt[opt], imm3); + return True; + } + + /* ---------------- CCMP/CCMN(imm) ---------------- */ + /* Bizarrely, these appear in the "data processing register" + category, even though they are operations against an + immediate. */ + /* 31 29 20 15 11 9 3 + sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond + sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond + + Operation is: + (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv + (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv + */ + if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) + && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) { + Bool is64 = INSN(31,31) == 1; + Bool isSUB = INSN(30,30) == 1; + UInt imm5 = INSN(20,16); + UInt cond = INSN(15,12); + UInt nn = INSN(9,5); + UInt nzcv = INSN(3,0); + + IRTemp condT = newTemp(Ity_I1); + assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); + + IRType ty = is64 ? Ity_I64 : Ity_I32; + IRTemp argL = newTemp(ty); + IRTemp argR = newTemp(ty); + + if (is64) { + assign(argL, getIReg64orZR(nn)); + assign(argR, mkU64(imm5)); + } else { + assign(argL, getIReg32orZR(nn)); + assign(argR, mkU32(imm5)); + } + setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); + + DIP("ccm%c %s, #%u, #%u, %s\n", + isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), + imm5, nzcv, nameCC(cond)); + return True; + } + + /* ---------------- CCMP/CCMN(reg) ---------------- */ + /* 31 29 20 15 11 9 3 + sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond + sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond + Operation is: + (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv + (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv + */ + if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) + && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) { + Bool is64 = INSN(31,31) == 1; + Bool isSUB = INSN(30,30) == 1; + UInt mm = INSN(20,16); + UInt cond = INSN(15,12); + UInt nn = INSN(9,5); + UInt nzcv = INSN(3,0); + + IRTemp condT = newTemp(Ity_I1); + assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); + + IRType ty = is64 ? Ity_I64 : Ity_I32; + IRTemp argL = newTemp(ty); + IRTemp argR = newTemp(ty); + + if (is64) { + assign(argL, getIReg64orZR(nn)); + assign(argR, getIReg64orZR(mm)); + } else { + assign(argL, getIReg32orZR(nn)); + assign(argR, getIReg32orZR(mm)); + } + setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); + + DIP("ccm%c %s, %s, #%u, %s\n", + isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), + nameIRegOrZR(is64, mm), nzcv, nameCC(cond)); + return True; + } + + + /* -------------- REV/REV16/REV32/RBIT -------------- */ + /* 31 30 28 20 15 11 9 4 + + 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn + 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn + + 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn + 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn + + 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn + 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn + + 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn + */ + if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) + && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) { + UInt b31 = INSN(31,31); + UInt opc = INSN(11,10); + + UInt ix = 0; + /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1; + else if (b31 == 0 && opc == BITS2(1,0)) ix = 2; + else if (b31 == 1 && opc == BITS2(0,0)) ix = 3; + else if (b31 == 0 && opc == BITS2(0,0)) ix = 4; + else if (b31 == 1 && opc == BITS2(0,1)) ix = 5; + else if (b31 == 0 && opc == BITS2(0,1)) ix = 6; + else if (b31 == 1 && opc == BITS2(1,0)) ix = 7; + if (ix >= 1 && ix <= 7) { + Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7; + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRTemp src = newTemp(Ity_I64); + IRTemp dst = IRTemp_INVALID; + IRTemp (*math)(IRTemp) = NULL; + switch (ix) { + case 1: case 2: math = math_BYTESWAP64; break; + case 3: case 4: math = math_BITSWAP64; break; + case 5: case 6: math = math_USHORTSWAP64; break; + case 7: math = math_UINTSWAP64; break; + default: vassert(0); + } + const HChar* names[7] + = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" }; + const HChar* nm = names[ix-1]; + vassert(math); + if (ix == 6) { + /* This has to be special cased, since the logic below doesn't + handle it correctly. */ + assign(src, getIReg64orZR(nn)); + dst = math(src); + putIReg64orZR(dd, + unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst)))); + } else if (is64) { + assign(src, getIReg64orZR(nn)); + dst = math(src); + putIReg64orZR(dd, mkexpr(dst)); + } else { + assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32))); + dst = math(src); + putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); + } + DIP("%s %s, %s\n", nm, + nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn)); + return True; + } + /* else fall through */ + } + + /* -------------------- CLZ/CLS -------------------- */ + /* 30 28 24 20 15 9 4 + sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn + sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn + */ + if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) + && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) { + Bool is64 = INSN(31,31) == 1; + Bool isCLS = INSN(10,10) == 1; + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRTemp src = newTemp(Ity_I64); + IRTemp dst = newTemp(Ity_I64); + if (!isCLS) { // CLS not yet supported + if (is64) { + assign(src, getIReg64orZR(nn)); + assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)), + mkU64(64), + unop(Iop_Clz64, mkexpr(src)))); + putIReg64orZR(dd, mkexpr(dst)); + } else { + assign(src, binop(Iop_Shl64, + unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32))); + assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)), + mkU64(32), + unop(Iop_Clz64, mkexpr(src)))); + putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); + } + DIP("cl%c %s, %s\n", + isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn)); + return True; + } + } + + /* -------------------- LSLV/LSRV/ASRV -------------------- */ + /* 30 28 20 15 11 9 4 + sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm + sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm + sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm + */ + if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) + && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) { + Bool is64 = INSN(31,31) == 1; + UInt mm = INSN(20,16); + UInt op = INSN(11,10); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRType ty = is64 ? Ity_I64 : Ity_I32; + IRTemp srcL = newTemp(ty); + IRTemp srcR = newTemp(Ity_I8); + IRTemp res = newTemp(ty); + IROp iop = Iop_INVALID; + assign(srcL, getIRegOrZR(is64, nn)); + assign(srcR, + unop(Iop_64to8, + binop(Iop_And64, + getIReg64orZR(mm), mkU64(is64 ? 63 : 31)))); + switch (op) { + case BITS2(0,0): iop = mkSHL(ty); break; + case BITS2(0,1): iop = mkSHR(ty); break; + case BITS2(1,0): iop = mkSAR(ty); break; + default: vassert(0); + } + assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR))); + putIRegOrZR(is64, dd, mkexpr(res)); + vassert(op < 3); + const HChar* names[3] = { "lslv", "lsrv", "asrv" }; + DIP("%s %s, %s, %s\n", + names[op], nameIRegOrZR(is64,dd), + nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm)); + return True; + } + + /* -------------------- SDIV/UDIV -------------------- */ + /* 30 28 20 15 10 9 4 + sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm + sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm + */ + if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) + && INSN(15,11) == BITS5(0,0,0,0,1)) { + Bool is64 = INSN(31,31) == 1; + UInt mm = INSN(20,16); + Bool isS = INSN(10,10) == 1; + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + if (isS) { + putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32, + getIRegOrZR(is64, nn), + getIRegOrZR(is64, mm))); + } else { + putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32, + getIRegOrZR(is64, nn), + getIRegOrZR(is64, mm))); + } + DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u', + nameIRegOrZR(is64, dd), + nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm)); + return True; + } + + /* ------------------ {S,U}M{ADD,SUB}L ------------------ */ + /* 31 23 20 15 14 9 4 + 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa + 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa + 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa + 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa + with operation + Xd = Xa +/- (Wn *u/s Wm) + */ + if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) { + Bool isU = INSN(23,23) == 1; + UInt mm = INSN(20,16); + Bool isAdd = INSN(15,15) == 0; + UInt aa = INSN(14,10); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRTemp wN = newTemp(Ity_I32); + IRTemp wM = newTemp(Ity_I32); + IRTemp xA = newTemp(Ity_I64); + IRTemp muld = newTemp(Ity_I64); + IRTemp res = newTemp(Ity_I64); + assign(wN, getIReg32orZR(nn)); + assign(wM, getIReg32orZR(mm)); + assign(xA, getIReg64orZR(aa)); + assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32, + mkexpr(wN), mkexpr(wM))); + assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64, + mkexpr(xA), mkexpr(muld))); + putIReg64orZR(dd, mkexpr(res)); + DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub", + nameIReg64orZR(dd), nameIReg32orZR(nn), + nameIReg32orZR(mm), nameIReg64orZR(aa)); + return True; + } + vex_printf("ARM64 front end: data_processing_register\n"); + return False; +# undef INSN +} + + +/*------------------------------------------------------------*/ +/*--- Load and Store instructions ---*/ +/*------------------------------------------------------------*/ + +/* Generate the EA for a "reg + reg" style amode. This is done from + parts of the insn, but for sanity checking sake it takes the whole + insn. This appears to depend on insn[15:12], with opt=insn[15:13] + and S=insn[12]: + + The possible forms, along with their opt:S values, are: + 011:0 Xn|SP + Xm + 111:0 Xn|SP + Xm + 011:1 Xn|SP + Xm * transfer_szB + 111:1 Xn|SP + Xm * transfer_szB + 010:0 Xn|SP + 32Uto64(Wm) + 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB + 110:0 Xn|SP + 32Sto64(Wm) + 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB + + Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of + the transfer size is insn[23,31,30]. For integer loads/stores, + insn[23] is zero, hence szLg2 can be at most 3 in such cases. + + If the decoding fails, it returns IRTemp_INVALID. + + isInt is True iff this is decoding is for transfers to/from integer + registers. If False it is for transfers to/from vector registers. +*/ +static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt ) +{ + UInt optS = SLICE_UInt(insn, 15, 12); + UInt mm = SLICE_UInt(insn, 20, 16); + UInt nn = SLICE_UInt(insn, 9, 5); + UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2)) + | SLICE_UInt(insn, 31, 30); // Log2 of the size + + buf[0] = 0; + + /* Sanity checks, that this really is a load/store insn. */ + if (SLICE_UInt(insn, 11, 10) != BITS2(1,0)) + goto fail; + + if (isInt + && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/ + && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/ + && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/ + && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/ + goto fail; + + if (!isInt + && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/ + goto fail; + + /* Throw out non-verified but possibly valid cases. */ + switch (szLg2) { + case BITS3(0,0,0): break; // 8 bit, valid for both int and vec + case BITS3(0,0,1): break; // 16 bit, valid for both int and vec + case BITS3(0,1,0): break; // 32 bit, valid for both int and vec + case BITS3(0,1,1): break; // 64 bit, valid for both int and vec + case BITS3(1,0,0): // can only ever be valid for the vector case + if (isInt) goto fail; else goto fail; + case BITS3(1,0,1): // these sizes are never valid + case BITS3(1,1,0): + case BITS3(1,1,1): goto fail; + + default: vassert(0); + } + + IRExpr* rhs = NULL; + switch (optS) { + case BITS4(1,1,1,0): goto fail; //ATC + case BITS4(0,1,1,0): + rhs = getIReg64orZR(mm); + vex_sprintf(buf, "[%s, %s]", + nameIReg64orZR(nn), nameIReg64orZR(mm)); + break; + case BITS4(1,1,1,1): goto fail; //ATC + case BITS4(0,1,1,1): + rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2)); + vex_sprintf(buf, "[%s, %s lsl %u]", + nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2); + break; + case BITS4(0,1,0,0): + rhs = unop(Iop_32Uto64, getIReg32orZR(mm)); + vex_sprintf(buf, "[%s, %s uxtx]", + nameIReg64orZR(nn), nameIReg32orZR(mm)); + break; + case BITS4(0,1,0,1): + rhs = binop(Iop_Shl64, + unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2)); + vex_sprintf(buf, "[%s, %s uxtx, lsl %u]", + nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); + break; + case BITS4(1,1,0,0): + rhs = unop(Iop_32Sto64, getIReg32orZR(mm)); + vex_sprintf(buf, "[%s, %s sxtx]", + nameIReg64orZR(nn), nameIReg32orZR(mm)); + break; + case BITS4(1,1,0,1): + rhs = binop(Iop_Shl64, + unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2)); + vex_sprintf(buf, "[%s, %s sxtx, lsl %u]", + nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); + break; + default: + /* The rest appear to be genuinely invalid */ + goto fail; + } + + vassert(rhs); + IRTemp res = newTemp(Ity_I64); + assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs)); + return res; + + fail: + vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS); + return IRTemp_INVALID; +} + + +/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest + bits of DATAE :: Ity_I64. */ +static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE ) +{ + IRExpr* addrE = mkexpr(addr); + switch (szB) { + case 8: + storeLE(addrE, dataE); + break; + case 4: + storeLE(addrE, unop(Iop_64to32, dataE)); + break; + case 2: + storeLE(addrE, unop(Iop_64to16, dataE)); + break; + case 1: + storeLE(addrE, unop(Iop_64to8, dataE)); + break; + default: + vassert(0); + } +} + + +/* Generate an 8/16/32/64 bit unsigned widening load from ADDR, + placing the result in an Ity_I64 temporary. */ +static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr ) +{ + IRTemp res = newTemp(Ity_I64); + IRExpr* addrE = mkexpr(addr); + switch (szB) { + case 8: + assign(res, loadLE(Ity_I64,addrE)); + break; + case 4: + assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE))); + break; + case 2: + assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE))); + break; + case 1: + assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE))); + break; + default: + vassert(0); + } + return res; +} + + +static +Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn) +{ +# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + + /* ------------ LDR,STR (immediate, uimm12) ----------- */ + /* uimm12 is scaled by the transfer size + + 31 29 26 21 9 4 + | | | | | | + 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8] + 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8] + + 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4] + 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4] + + 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2] + 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2] + + 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1] + 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1] + */ + if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) { + UInt szLg2 = INSN(31,30); + UInt szB = 1 << szLg2; + Bool isLD = INSN(22,22) == 1; + UInt offs = INSN(21,10) * szB; + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + IRTemp ta = newTemp(Ity_I64); + assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs))); + if (nn == 31) { /* FIXME generate stack alignment check */ } + vassert(szLg2 < 4); + if (isLD) { + putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta))); + } else { + gen_narrowing_store(szB, ta, getIReg64orZR(tt)); + } + const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" }; + const HChar* st_name[4] = { "strb", "strh", "str", "str" }; + DIP("%s %s, [%s, #%u]\n", + (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt), + nameIReg64orSP(nn), offs); + return True; + } + + /* ------------ LDUR,STUR (immediate, simm9) ----------- */ + /* + 31 29 26 20 11 9 4 + | | | | | | | + (at-Rn-then-Rn=EA) | | | + sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9 + sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9 + + (at-EA-then-Rn=EA) + sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]! + sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]! + + (at-EA) + sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9] + sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9] + + simm9 is unscaled. + + The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the + load case this is because would create two competing values for + Rt. In the store case the reason is unclear, but the spec + disallows it anyway. + + Stores are narrowing, loads are unsigned widening. sz encodes + the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8. + */ + if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1)) + == BITS9(1,1,1, 0,0,0,0,0, 0)) { + UInt szLg2 = INSN(31,30); + UInt szB = 1 << szLg2; + Bool isLoad = INSN(22,22) == 1; + UInt imm9 = INSN(20,12); + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + Bool wBack = INSN(10,10) == 1; + UInt how = INSN(11,10); + if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) { + /* undecodable; fall through */ + } else { + if (nn == 31) { /* FIXME generate stack alignment check */ } + + // Compute the transfer address TA and the writeback address WA. + IRTemp tRN = newTemp(Ity_I64); + assign(tRN, getIReg64orSP(nn)); + IRTemp tEA = newTemp(Ity_I64); + Long simm9 = (Long)sx_to_64(imm9, 9); + assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); + + IRTemp tTA = newTemp(Ity_I64); + IRTemp tWA = newTemp(Ity_I64); + switch (how) { + case BITS2(0,1): + assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; + case BITS2(1,1): + assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; + case BITS2(0,0): + assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; + default: + vassert(0); /* NOTREACHED */ + } + + /* Normally rN would be updated after the transfer. However, in + the special case typifed by + str x30, [sp,#-16]! + it is necessary to update SP before the transfer, (1) + because Memcheck will otherwise complain about a write + below the stack pointer, and (2) because the segfault + stack extension mechanism will otherwise extend the stack + only down to SP before the instruction, which might not be + far enough, if the -16 bit takes the actual access + address to the next page. + */ + Bool earlyWBack + = wBack && simm9 < 0 && szB == 8 + && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn; + + if (wBack && earlyWBack) + putIReg64orSP(nn, mkexpr(tEA)); + + if (isLoad) { + putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA))); + } else { + gen_narrowing_store(szB, tTA, getIReg64orZR(tt)); + } + + if (wBack && !earlyWBack) + putIReg64orSP(nn, mkexpr(tEA)); + + const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" }; + const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" }; + const HChar* fmt_str = NULL; + switch (how) { + case BITS2(0,1): + fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; + break; + case BITS2(1,1): + fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; + break; + case BITS2(0,0): + fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n"; + break; + default: + vassert(0); + } + DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2], + nameIRegOrZR(szB == 8, tt), + nameIReg64orSP(nn), simm9); + return True; + } + } + + /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */ + /* L==1 => mm==LD + L==0 => mm==ST + x==0 => 32 bit transfers, and zero extended loads + x==1 => 64 bit transfers + simm7 is scaled by the (single-register) transfer size + + (at-Rn-then-Rn=EA) + x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm + + (at-EA-then-Rn=EA) + x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]! + + (at-EA) + x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm] + */ + + UInt insn_30_23 = INSN(30,23); + if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1) + || insn_30_23 == BITS8(0,1,0,1,0,0,1,1) + || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) { + UInt bL = INSN(22,22); + UInt bX = INSN(31,31); + UInt bWBack = INSN(23,23); + UInt rT1 = INSN(4,0); + UInt rN = INSN(9,5); + UInt rT2 = INSN(14,10); + Long simm7 = (Long)sx_to_64(INSN(21,15), 7); + if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31) + || (bL && rT1 == rT2)) { + /* undecodable; fall through */ + } else { + if (rN == 31) { /* FIXME generate stack alignment check */ } + + // Compute the transfer address TA and the writeback address WA. + IRTemp tRN = newTemp(Ity_I64); + assign(tRN, getIReg64orSP(rN)); + IRTemp tEA = newTemp(Ity_I64); + simm7 = (bX ? 8 : 4) * simm7; + assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); + + IRTemp tTA = newTemp(Ity_I64); + IRTemp tWA = newTemp(Ity_I64); + switch (INSN(24,23)) { + case BITS2(0,1): + assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; + case BITS2(1,1): + assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; + case BITS2(1,0): + assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; + default: + vassert(0); /* NOTREACHED */ + } + + /* Normally rN would be updated after the transfer. However, in + the special case typifed by + stp x29, x30, [sp,#-112]! + it is necessary to update SP before the transfer, (1) + because Memcheck will otherwise complain about a write + below the stack pointer, and (2) because the segfault + stack extension mechanism will otherwise extend the stack + only down to SP before the instruction, which might not be + far enough, if the -112 bit takes the actual access + address to the next page. + */ + Bool earlyWBack + = bWBack && simm7 < 0 + && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0; + + if (bWBack && earlyWBack) + putIReg64orSP(rN, mkexpr(tEA)); + + /**/ if (bL == 1 && bX == 1) { + // 64 bit load + putIReg64orZR(rT1, loadLE(Ity_I64, + binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); + putIReg64orZR(rT2, loadLE(Ity_I64, + binop(Iop_Add64,mkexpr(tTA),mkU64(8)))); + } else if (bL == 1 && bX == 0) { + // 32 bit load + putIReg32orZR(rT1, loadLE(Ity_I32, + binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); + putIReg32orZR(rT2, loadLE(Ity_I32, + binop(Iop_Add64,mkexpr(tTA),mkU64(4)))); + } else if (bL == 0 && bX == 1) { + // 64 bit store + storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), + getIReg64orZR(rT1)); + storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)), + getIReg64orZR(rT2)); + } else { + vassert(bL == 0 && bX == 0); + // 32 bit store + storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), + getIReg32orZR(rT1)); + storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)), + getIReg32orZR(rT2)); + } + + if (bWBack && !earlyWBack) + putIReg64orSP(rN, mkexpr(tEA)); + + const HChar* fmt_str = NULL; + switch (INSN(24,23)) { + case BITS2(0,1): + fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; + break; + case BITS2(1,1): + fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; + break; + case BITS2(1,0): + fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; + break; + default: + vassert(0); + } + DIP(fmt_str, bL == 0 ? "st" : "ld", + nameIRegOrZR(bX == 1, rT1), + nameIRegOrZR(bX == 1, rT2), + nameIReg64orSP(rN), simm7); + return True; + } + } + + /* ---------------- LDR (literal, int reg) ---------------- */ + /* 31 29 23 4 + 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)] + 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)] + 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)] + 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)] + Just handles the first two cases for now. + */ + if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) { + UInt imm19 = INSN(23,5); + UInt rT = INSN(4,0); + UInt bX = INSN(30,30); + ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); + if (bX) { + putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea))); + } else { + putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea))); + } + DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea); + return True; + } + + /* -------------- {LD,ST}R (integer register) --------------- */ + /* 31 29 20 15 12 11 9 4 + | | | | | | | | + 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R{ext/sh}] + 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R{ext/sh}] + 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R{ext/sh}] + 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R{ext/sh}] + + 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R{ext/sh}] + 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R{ext/sh}] + 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R{ext/sh}] + 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R{ext/sh}] + */ + if (INSN(29,23) == BITS7(1,1,1,0,0,0,0) + && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { + HChar dis_buf[64]; + UInt szLg2 = INSN(31,30); + Bool isLD = INSN(22,22) == 1; + UInt tt = INSN(4,0); + IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); + if (ea != IRTemp_INVALID) { + switch (szLg2) { + case 3: /* 64 bit */ + if (isLD) { + putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea))); + DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf); + } else { + storeLE(mkexpr(ea), getIReg64orZR(tt)); + DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf); + } + break; + case 2: /* 32 bit */ + if (isLD) { + putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea))); + DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf); + } else { + storeLE(mkexpr(ea), getIReg32orZR(tt)); + DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf); + } + break; + case 1: /* 16 bit */ + if (isLD) { + putIReg64orZR(tt, unop(Iop_16Uto64, + loadLE(Ity_I16, mkexpr(ea)))); + DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf); + } else { + storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt))); + DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf); + } + break; + case 0: /* 8 bit */ + if (isLD) { + putIReg64orZR(tt, unop(Iop_8Uto64, + loadLE(Ity_I8, mkexpr(ea)))); + DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf); + } else { + storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt))); + DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf); + } + break; + default: + vassert(0); + } + return True; + } + } + + /* -------------- LDRS{B,H,W} (uimm12) -------------- */ + /* 31 29 26 23 21 9 4 + 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4] + 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2] + 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1] + where + Rt is Wt when x==1, Xt when x==0 + */ + if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) { + /* Further checks on bits 31:30 and 22 */ + Bool valid = False; + switch ((INSN(31,30) << 1) | INSN(22,22)) { + case BITS3(1,0,0): + case BITS3(0,1,0): case BITS3(0,1,1): + case BITS3(0,0,0): case BITS3(0,0,1): + valid = True; + break; + } + if (valid) { + UInt szLg2 = INSN(31,30); + UInt bitX = INSN(22,22); + UInt imm12 = INSN(21,10); + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + UInt szB = 1 << szLg2; + IRExpr* ea = binop(Iop_Add64, + getIReg64orSP(nn), mkU64(imm12 * szB)); + switch (szB) { + case 4: + vassert(bitX == 0); + putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea))); + DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt), + nameIReg64orSP(nn), imm12 * szB); + break; + case 2: + if (bitX == 1) { + putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea))); + } else { + putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea))); + } + DIP("ldrsh %s, [%s, #%u]\n", + nameIRegOrZR(bitX == 0, tt), + nameIReg64orSP(nn), imm12 * szB); + break; + case 1: + if (bitX == 1) { + putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea))); + } else { + putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea))); + } + DIP("ldrsb %s, [%s, #%u]\n", + nameIRegOrZR(bitX == 0, tt), + nameIReg64orSP(nn), imm12 * szB); + break; + default: + vassert(0); + } + return True; + } + /* else fall through */ + } + + /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */ + /* (at-Rn-then-Rn=EA) + 31 29 23 21 20 11 9 4 + 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9 + 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9 + 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9 + + (at-EA-then-Rn=EA) + 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]! + 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]! + 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]! + where + Rt is Wt when x==1, Xt when x==0 + transfer-at-Rn when [11]==0, at EA when [11]==1 + */ + if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) + && INSN(21,21) == 0 && INSN(10,10) == 1) { + /* Further checks on bits 31:30 and 22 */ + Bool valid = False; + switch ((INSN(31,30) << 1) | INSN(22,22)) { + case BITS3(1,0,0): // LDRSW Xt + case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt + case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt + valid = True; + break; + } + if (valid) { + UInt szLg2 = INSN(31,30); + UInt imm9 = INSN(20,12); + Bool atRN = INSN(11,11) == 0; + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + IRTemp tRN = newTemp(Ity_I64); + IRTemp tEA = newTemp(Ity_I64); + IRTemp tTA = IRTemp_INVALID; + ULong simm9 = sx_to_64(imm9, 9); + Bool is64 = INSN(22,22) == 0; + assign(tRN, getIReg64orSP(nn)); + assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); + tTA = atRN ? tRN : tEA; + HChar ch = '?'; + /* There are 5 cases: + byte load, SX to 64 + byte load, SX to 32, ZX to 64 + halfword load, SX to 64 + halfword load, SX to 32, ZX to 64 + word load, SX to 64 + The ifs below handle them in the listed order. + */ + if (szLg2 == 0) { + ch = 'b'; + if (is64) { + putIReg64orZR(tt, unop(Iop_8Sto64, + loadLE(Ity_I8, mkexpr(tTA)))); + } else { + putIReg32orZR(tt, unop(Iop_8Sto32, + loadLE(Ity_I8, mkexpr(tTA)))); + } + } + else if (szLg2 == 1) { + ch = 'h'; + if (is64) { + putIReg64orZR(tt, unop(Iop_16Sto64, + loadLE(Ity_I16, mkexpr(tTA)))); + } else { + putIReg32orZR(tt, unop(Iop_16Sto32, + loadLE(Ity_I16, mkexpr(tTA)))); + } + } + else if (szLg2 == 2 && is64) { + ch = 'w'; + putIReg64orZR(tt, unop(Iop_32Sto64, + loadLE(Ity_I32, mkexpr(tTA)))); + } + else { + vassert(0); + } + putIReg64orSP(nn, mkexpr(tEA)); + DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!", + ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9); + return True; + } + /* else fall through */ + } + + /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */ + /* 31 29 23 21 20 11 9 4 + 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9] + 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9] + 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9] + where + Rt is Wt when x==1, Xt when x==0 + */ + if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) + && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { + /* Further checks on bits 31:30 and 22 */ + Bool valid = False; + switch ((INSN(31,30) << 1) | INSN(22,22)) { + case BITS3(1,0,0): // LDURSW Xt + case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt + case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt + valid = True; + break; + } + if (valid) { + UInt szLg2 = INSN(31,30); + UInt imm9 = INSN(20,12); + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + IRTemp tRN = newTemp(Ity_I64); + IRTemp tEA = newTemp(Ity_I64); + ULong simm9 = sx_to_64(imm9, 9); + Bool is64 = INSN(22,22) == 0; + assign(tRN, getIReg64orSP(nn)); + assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); + HChar ch = '?'; + /* There are 5 cases: + byte load, SX to 64 + byte load, SX to 32, ZX to 64 + halfword load, SX to 64 + halfword load, SX to 32, ZX to 64 + word load, SX to 64 + The ifs below handle them in the listed order. + */ + if (szLg2 == 0) { + ch = 'b'; + if (is64) { + putIReg64orZR(tt, unop(Iop_8Sto64, + loadLE(Ity_I8, mkexpr(tEA)))); + } else { + putIReg32orZR(tt, unop(Iop_8Sto32, + loadLE(Ity_I8, mkexpr(tEA)))); + } + } + else if (szLg2 == 1) { + ch = 'h'; + if (is64) { + putIReg64orZR(tt, unop(Iop_16Sto64, + loadLE(Ity_I16, mkexpr(tEA)))); + } else { + putIReg32orZR(tt, unop(Iop_16Sto32, + loadLE(Ity_I16, mkexpr(tEA)))); + } + } + else if (szLg2 == 2 && is64) { + ch = 'w'; + putIReg64orZR(tt, unop(Iop_32Sto64, + loadLE(Ity_I32, mkexpr(tEA)))); + } + else { + vassert(0); + } + DIP("ldurs%c %s, [%s, #%lld]", + ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9); + return True; + } + /* else fall through */ + } + + /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */ + /* L==1 => mm==LD + L==0 => mm==ST + sz==00 => 32 bit (S) transfers + sz==01 => 64 bit (D) transfers + sz==10 => 128 bit (Q) transfers + sz==11 isn't allowed + simm7 is scaled by the (single-register) transfer size + + 31 29 22 21 14 9 4 + sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm + (at-Rn-then-Rn=EA) + + sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]! + (at-EA-then-Rn=EA) + + sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm] + (at-EA) + */ + + UInt insn_29_23 = INSN(29,23); + if (insn_29_23 == BITS7(1,0,1,1,0,0,1) + || insn_29_23 == BITS7(1,0,1,1,0,1,1) + || insn_29_23 == BITS7(1,0,1,1,0,1,0)) { + UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units + Bool isLD = INSN(22,22) == 1; + Bool wBack = INSN(23,23) == 1; + Long simm7 = (Long)sx_to_64(INSN(21,15), 7); + UInt tt2 = INSN(14,10); + UInt nn = INSN(9,5); + UInt tt1 = INSN(4,0); + if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) { + /* undecodable; fall through */ + } else { + if (nn == 31) { /* FIXME generate stack alignment check */ } + + // Compute the transfer address TA and the writeback address WA. + UInt szB = 4 << szSlg2; /* szB is the per-register size */ + IRTemp tRN = newTemp(Ity_I64); + assign(tRN, getIReg64orSP(nn)); + IRTemp tEA = newTemp(Ity_I64); + simm7 = szB * simm7; + assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); + + IRTemp tTA = newTemp(Ity_I64); + IRTemp tWA = newTemp(Ity_I64); + switch (INSN(24,23)) { + case BITS2(0,1): + assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; + case BITS2(1,1): + assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; + case BITS2(1,0): + assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; + default: + vassert(0); /* NOTREACHED */ + } + + IRType ty = Ity_INVALID; + switch (szB) { + case 4: ty = Ity_F32; break; + case 8: ty = Ity_F64; break; + case 16: ty = Ity_V128; break; + default: vassert(0); + } + + /* Normally rN would be updated after the transfer. However, in + the special cases typifed by + stp q0, q1, [sp,#-512]! + stp d0, d1, [sp,#-512]! + stp s0, s1, [sp,#-512]! + it is necessary to update SP before the transfer, (1) + because Memcheck will otherwise complain about a write + below the stack pointer, and (2) because the segfault + stack extension mechanism will otherwise extend the stack + only down to SP before the instruction, which might not be + far enough, if the -512 bit takes the actual access + address to the next page. + */ + Bool earlyWBack + = wBack && simm7 < 0 + && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD; + + if (wBack && earlyWBack) + putIReg64orSP(nn, mkexpr(tEA)); + + if (isLD) { + if (szB < 16) { + putQReg128(tt1, mkV128(0x0000)); + } + putQRegLO(tt1, + loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0)))); + if (szB < 16) { + putQReg128(tt2, mkV128(0x0000)); + } + putQRegLO(tt2, + loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB)))); + } else { + storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)), + getQRegLO(tt1, ty)); + storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)), + getQRegLO(tt2, ty)); + } + + if (wBack && !earlyWBack) + putIReg64orSP(nn, mkexpr(tEA)); + + const HChar* fmt_str = NULL; + switch (INSN(24,23)) { + case BITS2(0,1): + fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; + break; + case BITS2(1,1): + fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; + break; + case BITS2(1,0): + fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; + break; + default: + vassert(0); + } + DIP(fmt_str, isLD ? "ld" : "st", + nameQRegLO(tt1, ty), nameQRegLO(tt2, ty), + nameIReg64orSP(nn), simm7); + return True; + } + } + + /* -------------- {LD,ST}R (vector register) --------------- */ + /* 31 29 23 20 15 12 11 9 4 + | | | | | | | | | + 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R{ext/sh}] + 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R{ext/sh}] + 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R{ext/sh}] + 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R{ext/sh}] + 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R{ext/sh}] + + 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R{ext/sh}] + 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R{ext/sh}] + 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R{ext/sh}] + 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R{ext/sh}] + 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R{ext/sh}] + */ + if (INSN(29,24) == BITS6(1,1,1,1,0,0) + && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { + HChar dis_buf[64]; + UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); + Bool isLD = INSN(22,22) == 1; + UInt tt = INSN(4,0); + if (szLg2 >= 4) goto after_LDR_STR_vector_register; + IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/); + if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register; + switch (szLg2) { + case 0: /* 8 bit */ + if (isLD) { + putQReg128(tt, mkV128(0x0000)); + putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea))); + DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); + } else { + vassert(0); //ATC + storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8)); + DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); + } + break; + case 1: + if (isLD) { + putQReg128(tt, mkV128(0x0000)); + putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea))); + DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); + } else { + vassert(0); //ATC + storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16)); + DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); + } + break; + case 2: /* 32 bit */ + if (isLD) { + putQReg128(tt, mkV128(0x0000)); + putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea))); + DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); + } else { + storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32)); + DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); + } + break; + case 3: /* 64 bit */ + if (isLD) { + putQReg128(tt, mkV128(0x0000)); + putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea))); + DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); + } else { + storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64)); + DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); + } + break; + case 4: return False; //ATC + default: vassert(0); + } + return True; + } + after_LDR_STR_vector_register: + + /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */ + /* 31 29 22 20 15 12 11 9 4 + | | | | | | | | | + 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R{ext/sh}] + + 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R{ext/sh}] + 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R{ext/sh}] + + 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R{ext/sh}] + 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R{ext/sh}] + */ + if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) + && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { + HChar dis_buf[64]; + UInt szLg2 = INSN(31,30); + Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64 + UInt tt = INSN(4,0); + if (szLg2 == 3) goto after_LDRS_integer_register; + IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); + if (ea == IRTemp_INVALID) goto after_LDRS_integer_register; + /* Enumerate the 5 variants explicitly. */ + if (szLg2 == 2/*32 bit*/ && sxTo64) { + putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea)))); + DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf); + return True; + } + else + if (szLg2 == 1/*16 bit*/) { + if (sxTo64) { + putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea)))); + DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf); + } else { + putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea)))); + DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf); + } + return True; + } + else + if (szLg2 == 0/*8 bit*/) { + if (sxTo64) { + putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea)))); + DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf); + } else { + putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea)))); + DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf); + } + return True; + } + /* else it's an invalid combination */ + } + after_LDRS_integer_register: + + /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */ + /* This is the Unsigned offset variant only. The Post-Index and + Pre-Index variants are below. + + 31 29 23 21 9 4 + 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1] + 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2] + 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4] + 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8] + 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16] + + 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1] + 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2] + 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4] + 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8] + 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16] + */ + if (INSN(29,24) == BITS6(1,1,1,1,0,1) + && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) { + UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); + Bool isLD = INSN(22,22) == 1; + UInt pimm12 = INSN(21,10) << szLg2; + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + IRTemp tEA = newTemp(Ity_I64); + IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); + assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12))); + if (isLD) { + if (szLg2 < 4) { + putQReg128(tt, mkV128(0x0000)); + } + putQRegLO(tt, loadLE(ty, mkexpr(tEA))); + } else { + storeLE(mkexpr(tEA), getQRegLO(tt, ty)); + } + DIP("%s %s, [%s, #%u]\n", + isLD ? "ldr" : "str", + nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12); + return True; + } + + /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */ + /* These are the Post-Index and Pre-Index variants. + + 31 29 23 20 11 9 4 + (at-Rn-then-Rn=EA) + 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm + 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm + 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm + 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm + 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm + + (at-EA-then-Rn=EA) + 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]! + 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]! + 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]! + 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]! + 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]! + + Stores are the same except with bit 22 set to 0. + */ + if (INSN(29,24) == BITS6(1,1,1,1,0,0) + && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 + && INSN(21,21) == 0 && INSN(10,10) == 1) { + UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); + Bool isLD = INSN(22,22) == 1; + UInt imm9 = INSN(20,12); + Bool atRN = INSN(11,11) == 0; + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + IRTemp tRN = newTemp(Ity_I64); + IRTemp tEA = newTemp(Ity_I64); + IRTemp tTA = IRTemp_INVALID; + IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); + ULong simm9 = sx_to_64(imm9, 9); + assign(tRN, getIReg64orSP(nn)); + assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); + tTA = atRN ? tRN : tEA; + if (isLD) { + if (szLg2 < 4) { + putQReg128(tt, mkV128(0x0000)); + } + putQRegLO(tt, loadLE(ty, mkexpr(tTA))); + } else { + storeLE(mkexpr(tTA), getQRegLO(tt, ty)); + } + putIReg64orSP(nn, mkexpr(tEA)); + DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n", + isLD ? "ldr" : "str", + nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9); + return True; + } + + /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */ + /* 31 29 23 20 11 9 4 + 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm] + 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm] + 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm] + 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm] + 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm] + + 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm] + 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm] + 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm] + 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm] + 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm] + */ + if (INSN(29,24) == BITS6(1,1,1,1,0,0) + && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 + && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { + UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); + Bool isLD = INSN(22,22) == 1; + UInt imm9 = INSN(20,12); + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + ULong simm9 = sx_to_64(imm9, 9); + IRTemp tEA = newTemp(Ity_I64); + IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); + assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9))); + if (isLD) { + if (szLg2 < 4) { + putQReg128(tt, mkV128(0x0000)); + } + putQRegLO(tt, loadLE(ty, mkexpr(tEA))); + } else { + storeLE(mkexpr(tEA), getQRegLO(tt, ty)); + } + DIP("%s %s, [%s, #%lld]\n", + isLD ? "ldur" : "stur", + nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9); + return True; + } + + /* ---------------- LDR (literal, SIMD&FP) ---------------- */ + /* 31 29 23 4 + 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)] + 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)] + 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)] + */ + if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) { + UInt szB = 4 << INSN(31,30); + UInt imm19 = INSN(23,5); + UInt tt = INSN(4,0); + ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); + IRType ty = preferredVectorSubTypeFromSize(szB); + putQReg128(tt, mkV128(0x0000)); + putQRegLO(tt, loadLE(ty, mkU64(ea))); + DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea); + return True; + } + + /* ---------- LD1/ST1 (single structure, no offset) ---------- */ + /* 31 23 + 0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn|SP] + 0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn|SP] + 0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn|SP] + 0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn|SP] + 0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn|SP] + 0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn|SP] + 0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn|SP] + 0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn|SP] + FIXME does this assume that the host is little endian? + */ + if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases + || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases + ) { + Bool isLD = INSN(22,22) == 1; + UInt rN = INSN(9,5); + UInt vT = INSN(4,0); + IRTemp tEA = newTemp(Ity_I64); + const HChar* names[4] = { "2d", "4s", "8h", "16b" }; + const HChar* name = names[INSN(11,10)]; + assign(tEA, getIReg64orSP(rN)); + if (rN == 31) { /* FIXME generate stack alignment check */ } + if (isLD) { + putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA))); + } else { + storeLE(mkexpr(tEA), getQReg128(vT)); + } + DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1", + vT, name, nameIReg64orSP(rN)); + return True; + } + + /* 31 23 + 0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn|SP] + 0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn|SP] + 0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn|SP] + 0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn|SP] + 0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn|SP] + 0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn|SP] + 0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn|SP] + 0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn|SP] + FIXME does this assume that the host is little endian? + */ + if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases + || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases + ) { + Bool isLD = INSN(22,22) == 1; + UInt rN = INSN(9,5); + UInt vT = INSN(4,0); + IRTemp tEA = newTemp(Ity_I64); + const HChar* names[4] = { "1d", "2s", "4h", "8b" }; + const HChar* name = names[INSN(11,10)]; + assign(tEA, getIReg64orSP(rN)); + if (rN == 31) { /* FIXME generate stack alignment check */ } + if (isLD) { + putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA))); + putQRegLane(vT, 1, mkU64(0)); + } else { + storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64)); + } + DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1", + vT, name, nameIReg64orSP(rN)); + return True; + } + + /* ---------- LD1/ST1 (single structure, post index) ---------- */ + /* 31 23 + 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16 + 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16 + 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16 + 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16 + 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16 + 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16 + 0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN|SP], #16 + 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16 + Note that #16 is implied and cannot be any other value. + FIXME does this assume that the host is little endian? + */ + if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases + || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases + ) { + Bool isLD = INSN(22,22) == 1; + UInt rN = INSN(9,5); + UInt vT = INSN(4,0); + IRTemp tEA = newTemp(Ity_I64); + const HChar* names[4] = { "2d", "4s", "8h", "16b" }; + const HChar* name = names[INSN(11,10)]; + assign(tEA, getIReg64orSP(rN)); + if (rN == 31) { /* FIXME generate stack alignment check */ } + if (isLD) { + putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA))); + } else { + storeLE(mkexpr(tEA), getQReg128(vT)); + } + putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16))); + DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1", + vT, name, nameIReg64orSP(rN)); + return True; + } + + /* 31 23 + 0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN|SP], #8 + 0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN|SP], #8 + 0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8 + 0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN|SP], #8 + 0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8 + 0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN|SP], #8 + 0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN|SP], #8 + 0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN|SP], #8 + Note that #8 is implied and cannot be any other value. + FIXME does this assume that the host is little endian? + */ + if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases + || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases + ) { + Bool isLD = INSN(22,22) == 1; + UInt rN = INSN(9,5); + UInt vT = INSN(4,0); + IRTemp tEA = newTemp(Ity_I64); + const HChar* names[4] = { "1d", "2s", "4h", "8b" }; + const HChar* name = names[INSN(11,10)]; + assign(tEA, getIReg64orSP(rN)); + if (rN == 31) { /* FIXME generate stack alignment check */ } + if (isLD) { + putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA))); + putQRegLane(vT, 1, mkU64(0)); + } else { + storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64)); + } + putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8))); + DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1", + vT, name, nameIReg64orSP(rN)); + return True; + } + + /* ---------- LD2/ST2 (multiple structures, post index) ---------- */ + /* Only a very few cases. */ + /* 31 23 11 9 4 + 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32 + 0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32 + 0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32 + 0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32 + */ + if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d + || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d + || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s + || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s + ) { + Bool isLD = INSN(22,22) == 1; + UInt rN = INSN(9,5); + UInt vT = INSN(4,0); + IRTemp tEA = newTemp(Ity_I64); + UInt sz = INSN(11,10); + const HChar* name = "??"; + assign(tEA, getIReg64orSP(rN)); + if (rN == 31) { /* FIXME generate stack alignment check */ } + IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0)); + IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8)); + IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16)); + IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24)); + if (sz == BITS2(1,1)) { + name = "2d"; + if (isLD) { + putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0)); + putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16)); + putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8)); + putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24)); + } else { + storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64)); + storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64)); + storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64)); + storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64)); + } + } + else if (sz == BITS2(1,0)) { + /* Uh, this is ugly. TODO: better. */ + name = "4s"; + IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4)); + IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12)); + IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20)); + IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28)); + if (isLD) { + putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0)); + putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8)); + putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16)); + putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24)); + putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4)); + putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12)); + putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20)); + putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28)); + } else { + storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32)); + storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32)); + storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32)); + storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32)); + storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32)); + storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32)); + storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32)); + storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32)); + } + } + else { + vassert(0); // Can't happen. + } + putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32))); + DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2", + (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN)); + return True; + } + + /* ---------- LD1/ST1 (multiple structures, no offset) ---------- */ + /* Only a very few cases. */ + /* 31 23 + 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP] + 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP] + */ + if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1 + || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1 + ) { + Bool isLD = INSN(22,22) == 1; + UInt rN = INSN(9,5); + UInt vT = INSN(4,0); + IRTemp tEA = newTemp(Ity_I64); + const HChar* name = "16b"; + assign(tEA, getIReg64orSP(rN)); + if (rN == 31) { /* FIXME generate stack alignment check */ } + IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0)); + IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16)); + if (isLD) { + putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0)); + putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16)); + } else { + storeLE(tEA_0, getQReg128((vT+0) % 32)); + storeLE(tEA_16, getQReg128((vT+1) % 32)); + } + DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1", + (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN)); + return True; + } + + /* ------------------ LD{,A}X{R,RH,RB} ------------------ */ + /* ------------------ ST{,L}X{R,RH,RB} ------------------ */ + /* 31 29 23 20 14 9 4 + sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP] + sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP] + sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP] + sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP] + */ + if (INSN(29,23) == BITS7(0,0,1,0,0,0,0) + && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0) + && INSN(14,10) == BITS5(1,1,1,1,1)) { + UInt szBlg2 = INSN(31,30); + Bool isLD = INSN(22,22) == 1; + Bool isAcqOrRel = INSN(15,15) == 1; + UInt ss = INSN(20,16); + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + + vassert(szBlg2 < 4); + UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ + IRType ty = integerIRTypeOfSize(szB); + const HChar* suffix[4] = { "rb", "rh", "r", "r" }; + + IRTemp ea = newTemp(Ity_I64); + assign(ea, getIReg64orSP(nn)); + /* FIXME generate check that ea is szB-aligned */ + + if (isLD && ss == BITS5(1,1,1,1,1)) { + IRTemp res = newTemp(ty); + stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/)); + putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); + if (isAcqOrRel) { + stmt(IRStmt_MBE(Imbe_Fence)); + } + DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], + nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); + return True; + } + if (!isLD) { + if (isAcqOrRel) { + stmt(IRStmt_MBE(Imbe_Fence)); + } + IRTemp res = newTemp(Ity_I1); + IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); + stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data)); + /* IR semantics: res is 1 if store succeeds, 0 if it fails. + Need to set rS to 1 on failure, 0 on success. */ + putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)), + mkU64(1))); + DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], + nameIRegOrZR(False, ss), + nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); + return True; + } + /* else fall through */ + } + + /* ------------------ LDA{R,RH,RB} ------------------ */ + /* ------------------ STL{R,RH,RB} ------------------ */ + /* 31 29 23 20 14 9 4 + sz 001000 110 11111 1 11111 n t LDAR Rt, [Xn|SP] + sz 001000 100 11111 1 11111 n t STLR Rt, [Xn|SP] + */ + if (INSN(29,23) == BITS7(0,0,1,0,0,0,1) + && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) { + UInt szBlg2 = INSN(31,30); + Bool isLD = INSN(22,22) == 1; + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + + vassert(szBlg2 < 4); + UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ + IRType ty = integerIRTypeOfSize(szB); + const HChar* suffix[4] = { "rb", "rh", "r", "r" }; + + IRTemp ea = newTemp(Ity_I64); + assign(ea, getIReg64orSP(nn)); + /* FIXME generate check that ea is szB-aligned */ + + if (isLD) { + IRTemp res = newTemp(ty); + assign(res, loadLE(ty, mkexpr(ea))); + putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); + stmt(IRStmt_MBE(Imbe_Fence)); + DIP("lda%s %s, [%s]\n", suffix[szBlg2], + nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); + } else { + stmt(IRStmt_MBE(Imbe_Fence)); + IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); + storeLE(mkexpr(ea), data); + DIP("stl%s %s, [%s]\n", suffix[szBlg2], + nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); + } + return True; + } + + vex_printf("ARM64 front end: load_store\n"); + return False; +# undef INSN +} + + +/*------------------------------------------------------------*/ +/*--- Control flow and misc instructions ---*/ +/*------------------------------------------------------------*/ + +static +Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn, + VexArchInfo* archinfo) +{ +# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + + /* ---------------------- B cond ----------------------- */ + /* 31 24 4 3 + 0101010 0 imm19 0 cond */ + if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) { + UInt cond = INSN(3,0); + ULong uimm64 = INSN(23,5) << 2; + Long simm64 = (Long)sx_to_64(uimm64, 21); + vassert(dres->whatNext == Dis_Continue); + vassert(dres->len == 4); + vassert(dres->continueAt == 0); + vassert(dres->jk_StopHere == Ijk_INVALID); + stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), + Ijk_Boring, + IRConst_U64(guest_PC_curr_instr + simm64), + OFFB_PC) ); + putPC(mkU64(guest_PC_curr_instr + 4)); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_Boring; + DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64); + return True; + } + + /* -------------------- B{L} uncond -------------------- */ + if (INSN(30,26) == BITS5(0,0,1,0,1)) { + /* 000101 imm26 B (PC + sxTo64(imm26 << 2)) + 100101 imm26 B (PC + sxTo64(imm26 << 2)) + */ + UInt bLink = INSN(31,31); + ULong uimm64 = INSN(25,0) << 2; + Long simm64 = (Long)sx_to_64(uimm64, 28); + if (bLink) { + putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); + } + putPC(mkU64(guest_PC_curr_instr + simm64)); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_Call; + DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "", + guest_PC_curr_instr + simm64); + return True; + } + + /* --------------------- B{L} reg --------------------- */ + /* 31 24 22 20 15 9 4 + 1101011 00 10 11111 000000 nn 00000 RET Rn + 1101011 00 01 11111 000000 nn 00000 CALL Rn + 1101011 00 00 11111 000000 nn 00000 JMP Rn + */ + if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0) + && INSN(20,16) == BITS5(1,1,1,1,1) + && INSN(15,10) == BITS6(0,0,0,0,0,0) + && INSN(4,0) == BITS5(0,0,0,0,0)) { + UInt branch_type = INSN(22,21); + UInt nn = INSN(9,5); + if (branch_type == BITS2(1,0) /* RET */) { + putPC(getIReg64orZR(nn)); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_Ret; + DIP("ret %s\n", nameIReg64orZR(nn)); + return True; + } + if (branch_type == BITS2(0,1) /* CALL */) { + IRTemp dst = newTemp(Ity_I64); + assign(dst, getIReg64orZR(nn)); + putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); + putPC(mkexpr(dst)); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_Call; + DIP("blr %s\n", nameIReg64orZR(nn)); + return True; + } + if (branch_type == BITS2(0,0) /* JMP */) { + putPC(getIReg64orZR(nn)); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_Boring; + DIP("jmp %s\n", nameIReg64orZR(nn)); + return True; + } + } + + /* -------------------- CB{N}Z -------------------- */ + /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2)) + sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2)) + */ + if (INSN(30,25) == BITS6(0,1,1,0,1,0)) { + Bool is64 = INSN(31,31) == 1; + Bool bIfZ = INSN(24,24) == 0; + ULong uimm64 = INSN(23,5) << 2; + UInt rT = INSN(4,0); + Long simm64 = (Long)sx_to_64(uimm64, 21); + IRExpr* cond = NULL; + if (is64) { + cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, + getIReg64orZR(rT), mkU64(0)); + } else { + cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32, + getIReg32orZR(rT), mkU32(0)); + } + stmt( IRStmt_Exit(cond, + Ijk_Boring, + IRConst_U64(guest_PC_curr_instr + simm64), + OFFB_PC) ); + putPC(mkU64(guest_PC_curr_instr + 4)); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_Boring; + DIP("cb%sz %s, 0x%llx\n", + bIfZ ? "" : "n", nameIRegOrZR(is64, rT), + guest_PC_curr_instr + simm64); + return True; + } + + /* -------------------- TB{N}Z -------------------- */ + /* 31 30 24 23 18 5 4 + b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) + b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) + */ + if (INSN(30,25) == BITS6(0,1,1,0,1,1)) { + UInt b5 = INSN(31,31); + Bool bIfZ = INSN(24,24) == 0; + UInt b40 = INSN(23,19); + UInt imm14 = INSN(18,5); + UInt tt = INSN(4,0); + UInt bitNo = (b5 << 5) | b40; + ULong uimm64 = imm14 << 2; + Long simm64 = sx_to_64(uimm64, 16); + IRExpr* cond + = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, + binop(Iop_And64, + binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)), + mkU64(1)), + mkU64(0)); + stmt( IRStmt_Exit(cond, + Ijk_Boring, + IRConst_U64(guest_PC_curr_instr + simm64), + OFFB_PC) ); + putPC(mkU64(guest_PC_curr_instr + 4)); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_Boring; + DIP("tb%sz %s, #%u, 0x%llx\n", + bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo, + guest_PC_curr_instr + simm64); + return True; + } + + /* -------------------- SVC -------------------- */ + /* 11010100 000 imm16 000 01 + Don't bother with anything except the imm16==0 case. + */ + if (INSN(31,0) == 0xD4000001) { + putPC(mkU64(guest_PC_curr_instr + 4)); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_Sys_syscall; + DIP("svc #0\n"); + return True; + } + + /* ------------------ M{SR,RS} ------------------ */ + /* Only handles the case where the system register is TPIDR_EL0. + 0xD51BD0 010 Rt MSR tpidr_el0, rT + 0xD53BD0 010 Rt MRS rT, tpidr_el0 + */ + if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/ + || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) { + Bool toSys = INSN(21,21) == 0; + UInt tt = INSN(4,0); + if (toSys) { + stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) ); + DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt)); + } else { + putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 )); + DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt)); + } + return True; + } + /* Cases for FPCR + 0xD51B44 000 Rt MSR fpcr, rT + 0xD53B44 000 Rt MSR rT, fpcr + */ + if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/ + || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) { + Bool toSys = INSN(21,21) == 0; + UInt tt = INSN(4,0); + if (toSys) { + stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) ); + DIP("msr fpcr, %s\n", nameIReg64orZR(tt)); + } else { + putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32)); + DIP("mrs %s, fpcr\n", nameIReg64orZR(tt)); + } + return True; + } + /* Cases for FPSR + 0xD51B44 001 Rt MSR fpsr, rT + 0xD53B44 001 Rt MSR rT, fpsr + */ + if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/ + || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) { + Bool toSys = INSN(21,21) == 0; + UInt tt = INSN(4,0); + if (toSys) { + stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) ); + DIP("msr fpsr, %s\n", nameIReg64orZR(tt)); + } else { + putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32)); + DIP("mrs %s, fpsr\n", nameIReg64orZR(tt)); + } + return True; + } + /* Cases for NZCV + D51B42 000 Rt MSR nzcv, rT + D53B42 000 Rt MRS rT, nzcv + */ + if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/ + || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) { + Bool toSys = INSN(21,21) == 0; + UInt tt = INSN(4,0); + if (toSys) { + IRTemp t = newTemp(Ity_I64); + assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL))); + setFlags_COPY(t); + DIP("msr %s, nzcv\n", nameIReg32orZR(tt)); + } else { + IRTemp res = newTemp(Ity_I64); + assign(res, mk_arm64g_calculate_flags_nzcv()); + putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res))); + DIP("mrs %s, nzcv\n", nameIReg64orZR(tt)); + } + return True; + } + /* Cases for DCZID_EL0 + Don't support arbitrary reads and writes to this register. Just + return the value 16, which indicates that the DC ZVA instruction + is not permitted, so we don't have to emulate it. + D5 3B 00 111 Rt MRS rT, dczid_el0 + */ + if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) { + UInt tt = INSN(4,0); + putIReg64orZR(tt, mkU64(1<<4)); + DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt)); + return True; + } + /* Cases for CTR_EL0 + We just handle reads, and make up a value from the D and I line + sizes in the VexArchInfo we are given, and patch in the following + fields that the Foundation model gives ("natively"): + CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11 + D5 3B 00 001 Rt MRS rT, dczid_el0 + */ + if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) { + UInt tt = INSN(4,0); + /* Need to generate a value from dMinLine_lg2_szB and + dMinLine_lg2_szB. The value in the register is in 32-bit + units, so need to subtract 2 from the values in the + VexArchInfo. We can assume that the values here are valid -- + disInstr_ARM64 checks them -- so there's no need to deal with + out-of-range cases. */ + vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 + && archinfo->arm64_dMinLine_lg2_szB <= 17 + && archinfo->arm64_iMinLine_lg2_szB >= 2 + && archinfo->arm64_iMinLine_lg2_szB <= 17); + UInt val + = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16) + | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0); + putIReg64orZR(tt, mkU64(val)); + DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt)); + return True; + } + + /* ------------------ IC_IVAU ------------------ */ + /* D5 0B 75 001 Rt ic ivau, rT + */ + if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) { + /* We will always be provided with a valid iMinLine value. */ + vassert(archinfo->arm64_iMinLine_lg2_szB >= 2 + && archinfo->arm64_iMinLine_lg2_szB <= 17); + /* Round the requested address, in rT, down to the start of the + containing block. */ + UInt tt = INSN(4,0); + ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB; + IRTemp addr = newTemp(Ity_I64); + assign( addr, binop( Iop_And64, + getIReg64orZR(tt), + mkU64(~(lineszB - 1))) ); + /* Set the invalidation range, request exit-and-invalidate, with + continuation at the next instruction. */ + stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); + stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); + /* be paranoid ... */ + stmt( IRStmt_MBE(Imbe_Fence) ); + putPC(mkU64( guest_PC_curr_instr + 4 )); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_InvalICache; + DIP("ic ivau, %s\n", nameIReg64orZR(tt)); + return True; + } + + /* ------------------ DC_CVAU ------------------ */ + /* D5 0B 7B 001 Rt dc cvau, rT + */ + if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) { + /* Exactly the same scheme as for IC IVAU, except we observe the + dMinLine size, and request an Ijk_FlushDCache instead of + Ijk_InvalICache. */ + /* We will always be provided with a valid dMinLine value. */ + vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 + && archinfo->arm64_dMinLine_lg2_szB <= 17); + /* Round the requested address, in rT, down to the start of the + containing block. */ + UInt tt = INSN(4,0); + ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB; + IRTemp addr = newTemp(Ity_I64); + assign( addr, binop( Iop_And64, + getIReg64orZR(tt), + mkU64(~(lineszB - 1))) ); + /* Set the flush range, request exit-and-flush, with + continuation at the next instruction. */ + stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); + stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); + /* be paranoid ... */ + stmt( IRStmt_MBE(Imbe_Fence) ); + putPC(mkU64( guest_PC_curr_instr + 4 )); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_FlushDCache; + DIP("dc cvau, %s\n", nameIReg64orZR(tt)); + return True; + } + + /* ------------------ ISB, DMB, DSB ------------------ */ + if (INSN(31,0) == 0xD5033FDF) { + stmt(IRStmt_MBE(Imbe_Fence)); + DIP("isb\n"); + return True; + } + if (INSN(31,0) == 0xD5033BBF) { + stmt(IRStmt_MBE(Imbe_Fence)); + DIP("dmb ish\n"); + return True; + } + if (INSN(31,0) == 0xD5033B9F) { + stmt(IRStmt_MBE(Imbe_Fence)); + DIP("dsb ish\n"); + return True; + } + + /* -------------------- NOP -------------------- */ + if (INSN(31,0) == 0xD503201F) { + DIP("nop\n"); + return True; + } + + //fail: + vex_printf("ARM64 front end: branch_etc\n"); + return False; +# undef INSN +} + + +/*------------------------------------------------------------*/ +/*--- SIMD and FP instructions ---*/ +/*------------------------------------------------------------*/ + +/* begin FIXME -- rm temp scaffolding */ +static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp ); +static IRExpr* mk_CatOddLanes64x2 ( IRTemp, IRTemp ); + +static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp ); +static IRExpr* mk_CatOddLanes32x4 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp ); + +static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp ); +static IRExpr* mk_CatOddLanes16x8 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp ); + +static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp ); +static IRExpr* mk_CatOddLanes8x16 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp ); +/* end FIXME -- rm temp scaffolding */ + +/* Generate N copies of |bit| in the bottom of a ULong. */ +static ULong Replicate ( ULong bit, Int N ) +{ + vassert(bit <= 1 && N >= 1 && N < 64); + if (bit == 0) { + return 0; + } else { + /* Careful. This won't work for N == 64. */ + return (1ULL << N) - 1; + } +} + +static ULong Replicate32x2 ( ULong bits32 ) +{ + vassert(0 == (bits32 & ~0xFFFFFFFFULL)); + return (bits32 << 32) | bits32; +} + +static ULong Replicate16x4 ( ULong bits16 ) +{ + vassert(0 == (bits16 & ~0xFFFFULL)); + return Replicate32x2((bits16 << 16) | bits16); +} + +static ULong Replicate8x8 ( ULong bits8 ) +{ + vassert(0 == (bits8 & ~0xFFULL)); + return Replicate16x4((bits8 << 8) | bits8); +} + +/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of + |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N + is 64. In the former case, the upper 32 bits of the returned value + are guaranteed to be zero. */ +static ULong VFPExpandImm ( ULong imm8, Int N ) +{ + vassert(imm8 <= 0xFF); + vassert(N == 32 || N == 64); + Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2. + Int F = N - E - 1; + ULong imm8_6 = (imm8 >> 6) & 1; + /* sign: 1 bit */ + /* exp: E bits */ + /* frac: F bits */ + ULong sign = (imm8 >> 7) & 1; + ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1); + ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6); + vassert(sign < (1ULL << 1)); + vassert(exp < (1ULL << E)); + vassert(frac < (1ULL << F)); + vassert(1 + E + F == N); + ULong res = (sign << (E+F)) | (exp << F) | frac; + return res; +} + +/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value. + This might fail, as indicated by the returned Bool. Page 2530 of + the manual. */ +static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res, + UInt op, UInt cmode, UInt imm8 ) +{ + vassert(op <= 1); + vassert(cmode <= 15); + vassert(imm8 <= 255); + + *res = 0; /* will overwrite iff returning True */ + + ULong imm64 = 0; + Bool testimm8 = False; + + switch (cmode >> 1) { + case 0: + testimm8 = False; imm64 = Replicate32x2(imm8); break; + case 1: + testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break; + case 2: + testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break; + case 3: + testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break; + case 4: + testimm8 = False; imm64 = Replicate16x4(imm8); break; + case 5: + testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break; + case 6: + testimm8 = True; + if ((cmode & 1) == 0) + imm64 = Replicate32x2((imm8 << 8) | 0xFF); + else + imm64 = Replicate32x2((imm8 << 16) | 0xFFFF); + break; + case 7: + testimm8 = False; + if ((cmode & 1) == 0 && op == 0) + imm64 = Replicate8x8(imm8); + if ((cmode & 1) == 0 && op == 1) { + imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00; + imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00; + imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00; + imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00; + imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00; + imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00; + imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00; + imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00; + } + if ((cmode & 1) == 1 && op == 0) { + ULong imm8_7 = (imm8 >> 7) & 1; + ULong imm8_6 = (imm8 >> 6) & 1; + ULong imm8_50 = imm8 & 63; + ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19)) + | ((imm8_6 ^ 1) << (5 + 6 + 19)) + | (Replicate(imm8_6, 5) << (6 + 19)) + | (imm8_50 << 19); + imm64 = Replicate32x2(imm32); + } + if ((cmode & 1) == 1 && op == 1) { + // imm64 = imm8<7>:NOT(imm8<6>) + // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48); + ULong imm8_7 = (imm8 >> 7) & 1; + ULong imm8_6 = (imm8 >> 6) & 1; + ULong imm8_50 = imm8 & 63; + imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62) + | (Replicate(imm8_6, 8) << 54) + | (imm8_50 << 48); + } + break; + default: + vassert(0); + } + + if (testimm8 && imm8 == 0) + return False; + + *res = imm64; + return True; +} + + +/* Help a bit for decoding laneage for vector operations that can be + of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q + and SZ bits, typically for vector floating point. */ +static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF, + /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper, + /*OUT*/const HChar** arrSpec, + Bool bitQ, Bool bitSZ ) +{ + vassert(bitQ == True || bitQ == False); + vassert(bitSZ == True || bitSZ == False); + if (bitQ && bitSZ) { // 2x64 + if (tyI) *tyI = Ity_I64; + if (tyF) *tyF = Ity_F64; + if (nLanes) *nLanes = 2; + if (zeroUpper) *zeroUpper = False; + if (arrSpec) *arrSpec = "2d"; + return True; + } + if (bitQ && !bitSZ) { // 4x32 + if (tyI) *tyI = Ity_I32; + if (tyF) *tyF = Ity_F32; + if (nLanes) *nLanes = 4; + if (zeroUpper) *zeroUpper = False; + if (arrSpec) *arrSpec = "4s"; + return True; + } + if (!bitQ && !bitSZ) { // 2x32 + if (tyI) *tyI = Ity_I32; + if (tyF) *tyF = Ity_F32; + if (nLanes) *nLanes = 2; + if (zeroUpper) *zeroUpper = True; + if (arrSpec) *arrSpec = "2s"; + return True; + } + // Else impliedly 1x64, which isn't allowed. + return False; +} + +/* Helper for decoding laneage for simple vector operations, + eg integer add. */ +static Bool getLaneInfo_SIMPLE ( /*OUT*/Bool* zeroUpper, + /*OUT*/const HChar** arrSpec, + Bool bitQ, UInt szBlg2 ) +{ + vassert(bitQ == True || bitQ == False); + vassert(szBlg2 < 4); + Bool zu = False; + const HChar* as = NULL; + switch ((szBlg2 << 1) | (bitQ ? 1 : 0)) { + case 0: zu = True; as = "8b"; break; + case 1: zu = False; as = "16b"; break; + case 2: zu = True; as = "4h"; break; + case 3: zu = False; as = "8h"; break; + case 4: zu = True; as = "2s"; break; + case 5: zu = False; as = "4s"; break; + case 6: return False; // impliedly 1x64 + case 7: zu = False; as = "2d"; break; + default: vassert(0); + } + vassert(as); + if (arrSpec) *arrSpec = as; + if (zeroUpper) *zeroUpper = zu; + return True; +} + + +/* Helper for decoding laneage for shift-style vector operations + that involve an immediate shift amount. */ +static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2, + UInt immh, UInt immb ) +{ + vassert(immh < (1<<4)); + vassert(immb < (1<<3)); + UInt immhb = (immh << 3) | immb; + if (immh & 8) { + if (shift) *shift = 128 - immhb; + if (szBlg2) *szBlg2 = 3; + return True; + } + if (immh & 4) { + if (shift) *shift = 64 - immhb; + if (szBlg2) *szBlg2 = 2; + return True; + } + if (immh & 2) { + if (shift) *shift = 32 - immhb; + if (szBlg2) *szBlg2 = 1; + return True; + } + if (immh & 1) { + if (shift) *shift = 16 - immhb; + if (szBlg2) *szBlg2 = 0; + return True; + } + return False; +} + + +/* Generate IR to fold all lanes of the V128 value in 'src' as + characterised by the operator 'op', and return the result in the + bottom bits of a V128, with all other bits set to zero. */ +static IRTemp math_MINMAXV ( IRTemp src, IROp op ) +{ + /* The basic idea is to use repeated applications of Iop_CatEven* + and Iop_CatOdd* operators to 'src' so as to clone each lane into + a complete vector. Then fold all those vectors with 'op' and + zero out all but the least significant lane. */ + switch (op) { + case Iop_Min8Sx16: case Iop_Min8Ux16: + case Iop_Max8Sx16: case Iop_Max8Ux16: { + /* NB: temp naming here is misleading -- the naming is for 8 + lanes of 16 bit, whereas what is being operated on is 16 + lanes of 8 bits. */ + IRTemp x76543210 = src; + IRTemp x76547654 = newTemp(Ity_V128); + IRTemp x32103210 = newTemp(Ity_V128); + assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); + assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); + IRTemp x76767676 = newTemp(Ity_V128); + IRTemp x54545454 = newTemp(Ity_V128); + IRTemp x32323232 = newTemp(Ity_V128); + IRTemp x10101010 = newTemp(Ity_V128); + assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); + assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); + assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); + assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); + IRTemp x77777777 = newTemp(Ity_V128); + IRTemp x66666666 = newTemp(Ity_V128); + IRTemp x55555555 = newTemp(Ity_V128); + IRTemp x44444444 = newTemp(Ity_V128); + IRTemp x33333333 = newTemp(Ity_V128); + IRTemp x22222222 = newTemp(Ity_V128); + IRTemp x11111111 = newTemp(Ity_V128); + IRTemp x00000000 = newTemp(Ity_V128); + assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); + assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); + assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); + assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); + assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); + assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); + assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); + assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); + /* Naming not misleading after here. */ + IRTemp xAllF = newTemp(Ity_V128); + IRTemp xAllE = newTemp(Ity_V128); + IRTemp xAllD = newTemp(Ity_V128); + IRTemp xAllC = newTemp(Ity_V128); + IRTemp xAllB = newTemp(Ity_V128); + IRTemp xAllA = newTemp(Ity_V128); + IRTemp xAll9 = newTemp(Ity_V128); + IRTemp xAll8 = newTemp(Ity_V128); + IRTemp xAll7 = newTemp(Ity_V128); + IRTemp xAll6 = newTemp(Ity_V128); + IRTemp xAll5 = newTemp(Ity_V128); + IRTemp xAll4 = newTemp(Ity_V128); + IRTemp xAll3 = newTemp(Ity_V128); + IRTemp xAll2 = newTemp(Ity_V128); + IRTemp xAll1 = newTemp(Ity_V128); + IRTemp xAll0 = newTemp(Ity_V128); + assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777)); + assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777)); + assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666)); + assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666)); + assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555)); + assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555)); + assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444)); + assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444)); + assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333)); + assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333)); + assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222)); + assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222)); + assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111)); + assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111)); + assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000)); + assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000)); + IRTemp maxFE = newTemp(Ity_V128); + IRTemp maxDC = newTemp(Ity_V128); + IRTemp maxBA = newTemp(Ity_V128); + IRTemp max98 = newTemp(Ity_V128); + IRTemp max76 = newTemp(Ity_V128); + IRTemp max54 = newTemp(Ity_V128); + IRTemp max32 = newTemp(Ity_V128); + IRTemp max10 = newTemp(Ity_V128); + assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE))); + assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC))); + assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA))); + assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8))); + assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6))); + assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4))); + assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2))); + assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0))); + IRTemp maxFEDC = newTemp(Ity_V128); + IRTemp maxBA98 = newTemp(Ity_V128); + IRTemp max7654 = newTemp(Ity_V128); + IRTemp max3210 = newTemp(Ity_V128); + assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC))); + assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98))); + assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); + assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); + IRTemp maxFEDCBA98 = newTemp(Ity_V128); + IRTemp max76543210 = newTemp(Ity_V128); + assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98))); + assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); + IRTemp maxAllLanes = newTemp(Ity_V128); + assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98), + mkexpr(max76543210))); + IRTemp res = newTemp(Ity_V128); + assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes))); + return res; + } + case Iop_Min16Sx8: case Iop_Min16Ux8: + case Iop_Max16Sx8: case Iop_Max16Ux8: { + IRTemp x76543210 = src; + IRTemp x76547654 = newTemp(Ity_V128); + IRTemp x32103210 = newTemp(Ity_V128); + assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); + assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); + IRTemp x76767676 = newTemp(Ity_V128); + IRTemp x54545454 = newTemp(Ity_V128); + IRTemp x32323232 = newTemp(Ity_V128); + IRTemp x10101010 = newTemp(Ity_V128); + assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); + assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); + assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); + assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); + IRTemp x77777777 = newTemp(Ity_V128); + IRTemp x66666666 = newTemp(Ity_V128); + IRTemp x55555555 = newTemp(Ity_V128); + IRTemp x44444444 = newTemp(Ity_V128); + IRTemp x33333333 = newTemp(Ity_V128); + IRTemp x22222222 = newTemp(Ity_V128); + IRTemp x11111111 = newTemp(Ity_V128); + IRTemp x00000000 = newTemp(Ity_V128); + assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); + assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); + assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); + assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); + assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); + assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); + assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); + assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); + IRTemp max76 = newTemp(Ity_V128); + IRTemp max54 = newTemp(Ity_V128); + IRTemp max32 = newTemp(Ity_V128); + IRTemp max10 = newTemp(Ity_V128); + assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666))); + assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444))); + assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222))); + assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000))); + IRTemp max7654 = newTemp(Ity_V128); + IRTemp max3210 = newTemp(Ity_V128); + assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); + assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); + IRTemp max76543210 = newTemp(Ity_V128); + assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); + IRTemp res = newTemp(Ity_V128); + assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210))); + return res; + } + case Iop_Min32Sx4: case Iop_Min32Ux4: + case Iop_Max32Sx4: case Iop_Max32Ux4: { + IRTemp x3210 = src; + IRTemp x3232 = newTemp(Ity_V128); + IRTemp x1010 = newTemp(Ity_V128); + assign(x3232, mk_CatOddLanes64x2 (x3210, x3210)); + assign(x1010, mk_CatEvenLanes64x2(x3210, x3210)); + IRTemp x3333 = newTemp(Ity_V128); + IRTemp x2222 = newTemp(Ity_V128); + IRTemp x1111 = newTemp(Ity_V128); + IRTemp x0000 = newTemp(Ity_V128); + assign(x3333, mk_CatOddLanes32x4 (x3232, x3232)); + assign(x2222, mk_CatEvenLanes32x4(x3232, x3232)); + assign(x1111, mk_CatOddLanes32x4 (x1010, x1010)); + assign(x0000, mk_CatEvenLanes32x4(x1010, x1010)); + IRTemp max32 = newTemp(Ity_V128); + IRTemp max10 = newTemp(Ity_V128); + assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222))); + assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000))); + IRTemp max3210 = newTemp(Ity_V128); + assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); + IRTemp res = newTemp(Ity_V128); + assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210))); + return res; + } + default: + vassert(0); + } +} + + +/* Generate IR for TBL and TBX. This deals with the 128 bit case + only. */ +static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src, + IRTemp oor_values ) +{ + vassert(len >= 0 && len <= 3); + + /* Generate some useful constants as concisely as possible. */ + IRTemp half15 = newTemp(Ity_I64); + assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL)); + IRTemp half16 = newTemp(Ity_I64); + assign(half16, mkU64(0x1010101010101010ULL)); + + /* A zero vector */ + IRTemp allZero = newTemp(Ity_V128); + assign(allZero, mkV128(0x0000)); + /* A vector containing 15 in each 8-bit lane */ + IRTemp all15 = newTemp(Ity_V128); + assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15))); + /* A vector containing 16 in each 8-bit lane */ + IRTemp all16 = newTemp(Ity_V128); + assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16))); + /* A vector containing 32 in each 8-bit lane */ + IRTemp all32 = newTemp(Ity_V128); + assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16))); + /* A vector containing 48 in each 8-bit lane */ + IRTemp all48 = newTemp(Ity_V128); + assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32))); + /* A vector containing 64 in each 8-bit lane */ + IRTemp all64 = newTemp(Ity_V128); + assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32))); + + /* Group the 16/32/48/64 vectors so as to be indexable. */ + IRTemp allXX[4] = { all16, all32, all48, all64 }; + + /* Compute the result for each table vector, with zeroes in places + where the index values are out of range, and OR them into the + running vector. */ + IRTemp running_result = newTemp(Ity_V128); + assign(running_result, mkV128(0)); + + UInt tabent; + for (tabent = 0; tabent <= len; tabent++) { + vassert(tabent >= 0 && tabent < 4); + IRTemp bias = newTemp(Ity_V128); + assign(bias, + mkexpr(tabent == 0 ? allZero : allXX[tabent-1])); + IRTemp biased_indices = newTemp(Ity_V128); + assign(biased_indices, + binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias))); + IRTemp valid_mask = newTemp(Ity_V128); + assign(valid_mask, + binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices))); + IRTemp safe_biased_indices = newTemp(Ity_V128); + assign(safe_biased_indices, + binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15))); + IRTemp results_or_junk = newTemp(Ity_V128); + assign(results_or_junk, + binop(Iop_Perm8x16, mkexpr(tab[tabent]), + mkexpr(safe_biased_indices))); + IRTemp results_or_zero = newTemp(Ity_V128); + assign(results_or_zero, + binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask))); + /* And OR that into the running result. */ + IRTemp tmp = newTemp(Ity_V128); + assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero), + mkexpr(running_result))); + running_result = tmp; + } + + /* So now running_result holds the overall result where the indices + are in range, and zero in out-of-range lanes. Now we need to + compute an overall validity mask and use this to copy in the + lanes in the oor_values for out of range indices. This is + unnecessary for TBL but will get folded out by iropt, so we lean + on that and generate the same code for TBL and TBX here. */ + IRTemp overall_valid_mask = newTemp(Ity_V128); + assign(overall_valid_mask, + binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src))); + IRTemp result = newTemp(Ity_V128); + assign(result, + binop(Iop_OrV128, + mkexpr(running_result), + binop(Iop_AndV128, + mkexpr(oor_values), + unop(Iop_NotV128, mkexpr(overall_valid_mask))))); + return result; +} + + +static +Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) +{ +# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + + /* ---------------- FMOV (general) ---------------- */ + /* case 30 23 20 18 15 9 4 + (1) 0 00 11110 00 1 00 111 000000 n d FMOV Sd, Wn + (2) 1 00 11110 01 1 00 111 000000 n d FMOV Dd, Xn + (3) 1 00 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn + + (4) 0 00 11110 00 1 00 110 000000 n d FMOV Wd, Sn + (5) 1 00 11110 01 1 00 110 000000 n d FMOV Xd, Dn + (6) 1 00 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1] + */ + if (INSN(30,24) == BITS7(0,0,1,1,1,1,0) + && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) { + UInt sf = INSN(31,31); + UInt ty = INSN(23,22); // type + UInt rm = INSN(20,19); // rmode + UInt op = INSN(18,16); // opcode + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + UInt ix = 0; // case + if (sf == 0) { + if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1)) + ix = 1; + else + if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0)) + ix = 4; + } else { + vassert(sf == 1); + if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1)) + ix = 2; + else + if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0)) + ix = 5; + else + if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1)) + ix = 3; + else + if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0)) + ix = 6; + } + if (ix > 0) { + switch (ix) { + case 1: + putQReg128(dd, mkV128(0)); + putQRegLO(dd, getIReg32orZR(nn)); + DIP("fmov s%u, w%u\n", dd, nn); + break; + case 2: + putQReg128(dd, mkV128(0)); + putQRegLO(dd, getIReg64orZR(nn)); + DIP("fmov d%u, x%u\n", dd, nn); + break; + case 3: + putQRegHI64(dd, getIReg64orZR(nn)); + DIP("fmov v%u.d[1], x%u\n", dd, nn); + break; + case 4: + putIReg32orZR(dd, getQRegLO(nn, Ity_I32)); + DIP("fmov w%u, s%u\n", dd, nn); + break; + case 5: + putIReg64orZR(dd, getQRegLO(nn, Ity_I64)); + DIP("fmov x%u, d%u\n", dd, nn); + break; + case 6: + putIReg64orZR(dd, getQRegHI64(nn)); + DIP("fmov x%u, v%u.d[1]\n", dd, nn); + break; + default: + vassert(0); + } + return True; + } + /* undecodable; fall through */ + } + + /* -------------- FMOV (scalar, immediate) -------------- */ + /* 31 28 23 20 12 9 4 + 000 11110 00 1 imm8 100 00000 d FMOV Sd, #imm + 000 11110 01 1 imm8 100 00000 d FMOV Dd, #imm + */ + if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) + && INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) { + Bool isD = INSN(22,22) == 1; + UInt imm8 = INSN(20,13); + UInt dd = INSN(4,0); + ULong imm = VFPExpandImm(imm8, isD ? 64 : 32); + if (!isD) { + vassert(0 == (imm & 0xFFFFFFFF00000000ULL)); + } + putQReg128(dd, mkV128(0)); + putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL)); + DIP("fmov %s, #0x%llx\n", + nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm); + return True; + } + + /* -------------- {FMOV,MOVI} (vector, immediate) -------------- */ + /* 31 28 18 15 11 9 4 + 0q op 01111 00000 abc cmode 01 defgh d MOV Dd, #imm (q=0) + MOV Vd.2d #imm (q=1) + Allowable op:cmode + FMOV = 1:1111 + MOVI = 0:xx00, 1:0x00, 1:10x0, 1:110x, 11110 + */ + if (INSN(31,31) == 0 + && INSN(28,19) == BITS10(0,1,1,1,1,0,0,0,0,0) + && INSN(11,10) == BITS2(0,1)) { + UInt bitQ = INSN(30,30); + UInt bitOP = INSN(29,29); + UInt cmode = INSN(15,12); + UInt imm8 = (INSN(18,16) << 5) | INSN(9,5); + UInt dd = INSN(4,0); + ULong imm64lo = 0; + UInt op_cmode = (bitOP << 4) | cmode; + Bool ok = False; + switch (op_cmode) { + case BITS5(1,1,1,1,1): // 1:1111 + case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0): + case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00 + case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00 + case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0 + case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x + case BITS5(1,1,1,1,0): // 1:1110 + ok = True; break; + default: + break; + } + if (ok) { + ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, imm8); + } + if (ok) { + ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo; + putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo))); + DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo); + return True; + } + /* else fall through */ + } + + /* -------------- {S,U}CVTF (scalar, integer) -------------- */ + /* 31 28 23 21 20 18 15 9 4 ix + 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn 0 + 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn 1 + 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn 2 + 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn 3 + + 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn 4 + 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn 5 + 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn 6 + 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn 7 + + These are signed/unsigned conversion from integer registers to + FP registers, all 4 32/64-bit combinations, rounded per FPCR. + */ + if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1) + && INSN(15,10) == BITS6(0,0,0,0,0,0)) { + Bool isI64 = INSN(31,31) == 1; + Bool isF64 = INSN(22,22) == 1; + Bool isU = INSN(16,16) == 1; + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0); + const IROp ops[8] + = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64, + Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 }; + IRExpr* src = getIRegOrZR(isI64, nn); + IRExpr* res = (isF64 && !isI64) + ? unop(ops[ix], src) + : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src); + putQReg128(dd, mkV128(0)); + putQRegLO(dd, res); + DIP("%ccvtf %s, %s\n", + isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32), + nameIRegOrZR(isI64, nn)); + return True; + } + + /* ------------ F{ADD,SUB,MUL,DIV,NMUL} (scalar) ------------ */ + /* 31 23 20 15 11 9 4 + ---------------- 0000 ------ FMUL -------- + 000 11110 001 m 0001 10 n d FDIV Sd,Sn,Sm + 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm + ---------------- 0010 ------ FADD -------- + ---------------- 0011 ------ FSUB -------- + ---------------- 1000 ------ FNMUL -------- + */ + if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) + && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { + Bool isD = INSN(22,22) == 1; + UInt mm = INSN(20,16); + UInt op = INSN(15,12); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IROp iop = Iop_INVALID; + IRType ty = isD ? Ity_F64 : Ity_F32; + Bool neg = False; + const HChar* nm = "???"; + switch (op) { + case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ty); break; + case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ty); break; + case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ty); break; + case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ty); break; + case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty); + neg = True; break; + default: return False; + } + vassert(iop != Iop_INVALID); + IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()), + getQRegLO(nn, ty), getQRegLO(mm, ty)); + IRTemp res = newTemp(ty); + assign(res, neg ? unop(mkNEGF(ty),resE) : resE); + putQReg128(dd, mkV128(0)); + putQRegLO(dd, mkexpr(res)); + DIP("%s %s, %s, %s\n", + nm, nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty)); + return True; + } + + /* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */ + /* 31 23 21 16 14 9 4 + 000 11110 00 10000 00 10000 n d FMOV Sd, Sn + 000 11110 01 10000 00 10000 n d FMOV Dd, Dn + ------------------ 01 --------- FABS ------ + ------------------ 10 --------- FNEG ------ + ------------------ 11 --------- FSQRT ----- + */ + if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) + && INSN(21,17) == BITS5(1,0,0,0,0) + && INSN(14,10) == BITS5(1,0,0,0,0)) { + Bool isD = INSN(22,22) == 1; + UInt opc = INSN(16,15); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRType ty = isD ? Ity_F64 : Ity_F32; + IRTemp res = newTemp(ty); + if (opc == BITS2(0,0)) { + assign(res, getQRegLO(nn, ty)); + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(res)); + DIP("fmov %s, %s\n", + nameQRegLO(dd, ty), nameQRegLO(nn, ty)); + return True; + } + if (opc == BITS2(1,0) || opc == BITS2(0,1)) { + Bool isAbs = opc == BITS2(0,1); + IROp op = isAbs ? mkABSF(ty) : mkNEGF(ty); + assign(res, unop(op, getQRegLO(nn, ty))); + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(res)); + DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg", + nameQRegLO(dd, ty), nameQRegLO(nn, ty)); + return True; + } + if (opc == BITS2(1,1)) { + assign(res, + binop(mkSQRTF(ty), + mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, ty))); + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(res)); + DIP("fsqrt %s, %s\n", nameQRegLO(dd, ty), nameQRegLO(nn, ty)); + return True; + } + /* else fall through; other cases are ATC */ + } + + /* ---------------- F{ABS,NEG} (vector) ---------------- */ + /* 31 28 22 21 16 9 4 + 0q0 01110 1 sz 10000 01111 10 n d FABS Vd.T, Vn.T + 0q1 01110 1 sz 10000 01111 10 n d FNEG Vd.T, Vn.T + */ + if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,1) + && INSN(21,17) == BITS5(1,0,0,0,0) + && INSN(16,10) == BITS7(0,1,1,1,1,1,0)) { + UInt bitQ = INSN(30,30); + UInt bitSZ = INSN(22,22); + Bool isFNEG = INSN(29,29) == 1; + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + const HChar* ar = "??"; + IRType tyF = Ity_INVALID; + Bool zeroHI = False; + Bool ok = getLaneInfo_Q_SZ(NULL, &tyF, NULL, &zeroHI, &ar, + (Bool)bitQ, (Bool)bitSZ); + if (ok) { + vassert(tyF == Ity_F64 || tyF == Ity_F32); + IROp op = (tyF == Ity_F64) ? (isFNEG ? Iop_Neg64Fx2 : Iop_Abs64Fx2) + : (isFNEG ? Iop_Neg32Fx4 : Iop_Abs32Fx4); + IRTemp res = newTemp(Ity_V128); + assign(res, unop(op, getQReg128(nn))); + putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res)) + : mkexpr(res)); + DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs", + nameQReg128(dd), ar, nameQReg128(nn), ar); + return True; + } + /* else fall through */ + } + + /* -------------------- FCMP,FCMPE -------------------- */ + /* 31 23 20 15 9 4 + 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm + 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0 + 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm + 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0 + + 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm + 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0 + 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm + 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0 + + FCMPE generates Invalid Operation exn if either arg is any kind + of NaN. FCMP generates Invalid Operation exn if either arg is a + signalling NaN. We ignore this detail here and produce the same + IR for both. + */ + if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1 + && INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) { + Bool isD = INSN(22,22) == 1; + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + Bool isCMPE = INSN(4,4) == 1; + Bool cmpZero = INSN(3,3) == 1; + IRType ty = isD ? Ity_F64 : Ity_F32; + Bool valid = True; + if (cmpZero && mm != 0) valid = False; + if (valid) { + IRTemp argL = newTemp(ty); + IRTemp argR = newTemp(ty); + IRTemp irRes = newTemp(Ity_I32); + assign(argL, getQRegLO(nn, ty)); + assign(argR, + cmpZero + ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0))) + : getQRegLO(mm, ty)); + assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32, + mkexpr(argL), mkexpr(argR))); + IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes); + IRTemp nzcv_28x0 = newTemp(Ity_I64); + assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28))); + setFlags_COPY(nzcv_28x0); + DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ty), + cmpZero ? "#0.0" : nameQRegLO(mm, ty)); + return True; + } + } + + /* -------------------- F{N}M{ADD,SUB} -------------------- */ + /* 31 22 20 15 14 9 4 ix + 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa + 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa + 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa + 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa + where Fx=Dx when sz=1, Fx=Sx when sz=0 + + -----SPEC------ ----IMPL---- + fmadd a + n * m a + n * m + fmsub a + (-n) * m a - n * m + fnmadd (-a) + (-n) * m -(a + n * m) + fnmsub (-a) + n * m -(a - n * m) + */ + if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) { + Bool isD = INSN(22,22) == 1; + UInt mm = INSN(20,16); + UInt aa = INSN(14,10); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + UInt ix = (INSN(21,21) << 1) | INSN(15,15); + IRType ty = isD ? Ity_F64 : Ity_F32; + IROp opADD = mkADDF(ty); + IROp opSUB = mkSUBF(ty); + IROp opMUL = mkMULF(ty); + IROp opNEG = mkNEGF(ty); + IRTemp res = newTemp(ty); + IRExpr* eA = getQRegLO(aa, ty); + IRExpr* eN = getQRegLO(nn, ty); + IRExpr* eM = getQRegLO(mm, ty); + IRExpr* rm = mkexpr(mk_get_IR_rounding_mode()); + IRExpr* eNxM = triop(opMUL, rm, eN, eM); + switch (ix) { + case 0: assign(res, triop(opADD, rm, eA, eNxM)); break; + case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break; + case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break; + case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break; + default: vassert(0); + } + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(res)); + const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" }; + DIP("%s %s, %s, %s, %s\n", + names[ix], nameQRegLO(dd, ty), nameQRegLO(nn, ty), + nameQRegLO(mm, ty), nameQRegLO(aa, ty)); + return True; + } + + /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */ + /* 30 23 20 18 15 9 4 + sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to + sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest) + ---------------- 01 -------------- FCVTP-------- (round to +inf) + ---------------- 10 -------------- FCVTM-------- (round to -inf) + ---------------- 11 -------------- FCVTZ-------- (round to zero) + + Rd is Xd when sf==1, Wd when sf==0 + Fn is Dn when x==1, Sn when x==0 + 20:19 carry the rounding mode, using the same encoding as FPCR + */ + if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1 + && INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) { + Bool isI64 = INSN(31,31) == 1; + Bool isF64 = INSN(22,22) == 1; + UInt rm = INSN(20,19); + Bool isU = INSN(16,16) == 1; + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + /* Decide on the IR rounding mode to use. */ + IRRoundingMode irrm = 8; /*impossible*/ + HChar ch = '?'; + switch (rm) { + case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break; + case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break; + case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break; + case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break; + default: vassert(0); + } + vassert(irrm != 8); + /* Decide on the conversion primop, based on the source size, + dest size and signedness (8 possibilities). Case coding: + F32 ->s I32 0 + F32 ->u I32 1 + F32 ->s I64 2 + F32 ->u I64 3 + F64 ->s I32 4 + F64 ->u I32 5 + F64 ->s I64 6 + F64 ->u I64 7 + */ + UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0); + vassert(ix < 8); + const IROp ops[8] + = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U, + Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U }; + IROp op = ops[ix]; + // A bit of ATCery: bounce all cases we haven't seen an example of. + if (/* F32toI32S */ + (op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */ + || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */ + || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */ + /* F32toI32U */ + || (op == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */ + || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */ + /* F32toI64S */ + || (op == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */ + /* F32toI64U */ + || (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */ + /* F64toI32S */ + || (op == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */ + || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */ + || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */ + /* F64toI32U */ + || (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */ + || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */ + || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */ + /* F64toI64S */ + || (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */ + || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */ + || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */ + /* F64toI64U */ + || (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */ + || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */ + ) { + /* validated */ + } else { + return False; + } + IRType srcTy = isF64 ? Ity_F64 : Ity_F32; + IRType dstTy = isI64 ? Ity_I64 : Ity_I32; + IRTemp src = newTemp(srcTy); + IRTemp dst = newTemp(dstTy); + assign(src, getQRegLO(nn, srcTy)); + assign(dst, binop(op, mkU32(irrm), mkexpr(src))); + putIRegOrZR(isI64, dd, mkexpr(dst)); + DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's', + nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy)); + return True; + } + + /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */ + /* 30 23 20 18 15 9 4 + 1 00 11110 0x 1 00 100 000000 n d FCVTAS Xd, Fn + 0 00 11110 0x 1 00 100 000000 n d FCVTAS Wd, Fn + Fn is Dn when x==1, Sn when x==0 + */ + if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) + && INSN(21,16) == BITS6(1,0,0,1,0,0) + && INSN(15,10) == BITS6(0,0,0,0,0,0)) { + Bool isI64 = INSN(31,31) == 1; + Bool isF64 = INSN(22,22) == 1; + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + /* Decide on the IR rounding mode to use. */ + /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */ + IRRoundingMode irrm = Irrm_NEAREST; + /* Decide on the conversion primop. */ + IROp op = isI64 ? (isF64 ? Iop_F64toI64S : Iop_F32toI64S) + : (isF64 ? Iop_F64toI32S : Iop_F32toI32S); + IRType srcTy = isF64 ? Ity_F64 : Ity_F32; + IRType dstTy = isI64 ? Ity_I64 : Ity_I32; + IRTemp src = newTemp(srcTy); + IRTemp dst = newTemp(dstTy); + assign(src, getQRegLO(nn, srcTy)); + assign(dst, binop(op, mkU32(irrm), mkexpr(src))); + putIRegOrZR(isI64, dd, mkexpr(dst)); + DIP("fcvtas %s, %s (KLUDGED)\n", + nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy)); + return True; + } + + /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */ + /* 31 23 21 17 14 9 4 + 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR) + rm + x==0 => S-registers, x==1 => D-registers + rm (17:15) encodings: + 111 per FPCR (FRINTI) + 001 +inf (FRINTP) + 010 -inf (FRINTM) + 011 zero (FRINTZ) + 000 tieeven + 100 tieaway (FRINTA) -- !! FIXME KLUDGED !! + 110 per FPCR + "exact = TRUE" + 101 unallocated + */ + if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) + && INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) { + Bool isD = INSN(22,22) == 1; + UInt rm = INSN(17,15); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRType ty = isD ? Ity_F64 : Ity_F32; + IRExpr* irrmE = NULL; + UChar ch = '?'; + switch (rm) { + case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break; + case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break; + case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break; + // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0 + case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break; + default: break; + } + if (irrmE) { + IRTemp src = newTemp(ty); + IRTemp dst = newTemp(ty); + assign(src, getQRegLO(nn, ty)); + assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, + irrmE, mkexpr(src))); + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(dst)); + DIP("frint%c %s, %s\n", + ch, nameQRegLO(dd, ty), nameQRegLO(nn, ty)); + return True; + } + /* else unhandled rounding mode case -- fall through */ + } + + /* ------------------ FCVT (scalar) ------------------ */ + /* 31 23 21 16 14 9 4 + 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp) + --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp) + --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp) + --------- 00 ----- 01 --------- FCVT Dd, Sn + --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp) + --------- 01 ----- 00 --------- FCVT Sd, Dn + Rounding, when dst is smaller than src, is per the FPCR. + */ + if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0) + && INSN(21,17) == BITS5(1,0,0,0,1) + && INSN(14,10) == BITS5(1,0,0,0,0)) { + UInt b2322 = INSN(23,22); + UInt b1615 = INSN(16,15); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) { + /* Convert S to D */ + IRTemp res = newTemp(Ity_F64); + assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32))); + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(res)); + DIP("fcvt %s, %s\n", + nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32)); + return True; + } + if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) { + /* Convert D to S */ + IRTemp res = newTemp(Ity_F32); + assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()), + getQRegLO(nn, Ity_F64))); + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(res)); + DIP("fcvt %s, %s\n", + nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64)); + return True; + } + /* else unhandled */ + } + + /* ------------------ FABD (scalar) ------------------ */ + /* 31 23 20 15 9 4 + 011 11110 111 m 110101 n d FABD Dd, Dn, Dm + 011 11110 101 m 110101 n d FABD Sd, Sn, Sm + */ + if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1 + && INSN(15,10) == BITS6(1,1,0,1,0,1)) { + Bool isD = INSN(22,22) == 1; + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRType ty = isD ? Ity_F64 : Ity_F32; + IRTemp res = newTemp(ty); + assign(res, unop(mkABSF(ty), + triop(mkSUBF(ty), + mkexpr(mk_get_IR_rounding_mode()), + getQRegLO(nn,ty), getQRegLO(mm,ty)))); + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(res)); + DIP("fabd %s, %s, %s\n", + nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty)); + return True; + } + + /* -------------- {S,U}CVTF (vector, integer) -------------- */ + /* 31 28 22 21 15 9 4 + 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn + 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn + with laneage: + case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D + */ + if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,0) + && INSN(21,16) == BITS6(1,0,0,0,0,1) + && INSN(15,10) == BITS6(1,1,0,1,1,0)) { + Bool isQ = INSN(30,30) == 1; + Bool isU = INSN(29,29) == 1; + Bool isF64 = INSN(22,22) == 1; + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + if (isQ || !isF64) { + IRType tyF = Ity_INVALID, tyI = Ity_INVALID; + UInt nLanes = 0; + Bool zeroHI = False; + const HChar* arrSpec = NULL; + Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec, + isQ, isF64 ); + IROp op = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32) + : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32); + IRTemp rm = mk_get_IR_rounding_mode(); + UInt i; + vassert(ok); /* the 'if' above should ensure this */ + for (i = 0; i < nLanes; i++) { + putQRegLane(dd, i, + binop(op, mkexpr(rm), getQRegLane(nn, i, tyI))); + } + if (zeroHI) { + putQRegLane(dd, 1, mkU64(0)); + } + DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's', + nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); + return True; + } + /* else fall through */ + } + + /* ---------- F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) ---------- */ + /* 31 28 22 21 20 15 9 4 case + 0q0 01110 0 sz 1 m 110101 n d FADD Vd,Vn,Vm 1 + 0q0 01110 1 sz 1 m 110101 n d FSUB Vd,Vn,Vm 2 + 0q1 01110 0 sz 1 m 110111 n d FMUL Vd,Vn,Vm 3 + 0q1 01110 0 sz 1 m 111111 n d FDIV Vd,Vn,Vm 4 + 0q0 01110 0 sz 1 m 110011 n d FMLA Vd,Vn,Vm 5 + 0q0 01110 1 sz 1 m 110011 n d FMLS Vd,Vn,Vm 6 + 0q1 01110 1 sz 1 m 110101 n d FABD Vd,Vn,Vm 7 + */ + if (INSN(31,31) == 0 + && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) { + Bool isQ = INSN(30,30) == 1; + UInt b29 = INSN(29,29); + UInt b23 = INSN(23,23); + Bool isF64 = INSN(22,22) == 1; + UInt mm = INSN(20,16); + UInt b1510 = INSN(15,10); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + UInt ix = 0; + /**/ if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,1,0,1)) ix = 1; + else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 2; + else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,0,1,1,1)) ix = 3; + else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4; + else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5; + else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6; + else if (b29 == 1 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 7; + IRType laneTy = Ity_INVALID; + Bool zeroHI = False; + const HChar* arr = "??"; + Bool ok + = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64); + /* Skip MLA/MLS for the time being */ + if (ok && ix >= 1 && ix <= 4) { + const IROp ops64[4] + = { Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2 }; + const IROp ops32[4] + = { Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4 }; + const HChar* names[4] + = { "fadd", "fsub", "fmul", "fdiv" }; + IROp op = laneTy==Ity_F64 ? ops64[ix-1] : ops32[ix-1]; + IRTemp rm = mk_get_IR_rounding_mode(); + IRTemp t1 = newTemp(Ity_V128); + IRTemp t2 = newTemp(Ity_V128); + assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm))); + assign(t2, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t1)) + : mkexpr(t1)); + putQReg128(dd, mkexpr(t2)); + DIP("%s %s.%s, %s.%s, %s.%s\n", names[ix-1], + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + if (ok && ix >= 5 && ix <= 6) { + IROp opADD = laneTy==Ity_F64 ? Iop_Add64Fx2 : Iop_Add32Fx4; + IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4; + IROp opMUL = laneTy==Ity_F64 ? Iop_Mul64Fx2 : Iop_Mul32Fx4; + IRTemp rm = mk_get_IR_rounding_mode(); + IRTemp t1 = newTemp(Ity_V128); + IRTemp t2 = newTemp(Ity_V128); + // FIXME: double rounding; use FMA primops instead + assign(t1, triop(opMUL, + mkexpr(rm), getQReg128(nn), getQReg128(mm))); + assign(t2, triop(ix == 5 ? opADD : opSUB, + mkexpr(rm), getQReg128(dd), mkexpr(t1))); + putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2)) + : mkexpr(t2)); + DIP("%s %s.%s, %s.%s, %s.%s\n", ix == 5 ? "fmla" : "fmls", + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + if (ok && ix == 7) { + IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4; + IROp opABS = laneTy==Ity_F64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; + IRTemp rm = mk_get_IR_rounding_mode(); + IRTemp t1 = newTemp(Ity_V128); + IRTemp t2 = newTemp(Ity_V128); + // FIXME: use Abd primop instead? + assign(t1, triop(opSUB, + mkexpr(rm), getQReg128(nn), getQReg128(mm))); + assign(t2, unop(opABS, mkexpr(t1))); + putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2)) + : mkexpr(t2)); + DIP("fabd %s.%s, %s.%s, %s.%s\n", + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + } + + /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */ + /* 31 28 22 20 15 9 4 case + 0q1 01110 0 sz 1 m 111011 n d FACGE Vd, Vn, Vm + 0q1 01110 1 sz 1 m 111011 n d FACGT Vd, Vn, Vm + 0q0 01110 0 sz 1 m 111001 n d FCMEQ Vd, Vn, Vm + 0q1 01110 0 sz 1 m 111001 n d FCMGE Vd, Vn, Vm + 0q1 01110 1 sz 1 m 111001 n d FCMGT Vd, Vn, Vm + */ + if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1 + && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) { + Bool isQ = INSN(30,30) == 1; + UInt U = INSN(29,29); + UInt E = INSN(23,23); + Bool isF64 = INSN(22,22) == 1; + UInt ac = INSN(11,11); + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + /* */ + UInt EUac = (E << 2) | (U << 1) | ac; + IROp opABS = Iop_INVALID; + IROp opCMP = Iop_INVALID; + IRType laneTy = Ity_INVALID; + Bool zeroHI = False; + Bool swap = True; + const HChar* arr = "??"; + const HChar* nm = "??"; + Bool ok + = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64); + if (ok) { + vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32)); + switch (EUac) { + case BITS3(0,0,0): + nm = "fcmeq"; + opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4; + swap = False; + break; + case BITS3(0,1,0): + nm = "fcmge"; + opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; + break; + case BITS3(0,1,1): + nm = "facge"; + opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; + opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; + break; + case BITS3(1,1,0): + nm = "fcmgt"; + opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; + break; + case BITS3(1,1,1): + nm = "fcagt"; + opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; + opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; + break; + default: + break; + } + } + if (opCMP != Iop_INVALID) { + IRExpr* argN = getQReg128(nn); + IRExpr* argM = getQReg128(mm); + if (opABS != Iop_INVALID) { + argN = unop(opABS, argN); + argM = unop(opABS, argM); + } + IRExpr* res = swap ? binop(opCMP, argM, argN) + : binop(opCMP, argN, argM); + if (zeroHI) { + res = unop(Iop_ZeroHI64ofV128, res); + } + putQReg128(dd, res); + DIP("%s %s.%s, %s.%s, %s.%s\n", nm, + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + /* else fall through */ + } + + /* -------------------- FCVTN -------------------- */ + /* 31 28 23 20 15 9 4 + 0q0 01110 0s1 00001 011010 n d FCVTN Vd, Vn + where case q:s of 00: 16Fx4(lo) <- 32Fx4 + 01: 32Fx2(lo) <- 64Fx2 + 10: 16Fx4(hi) <- 32Fx4 + 11: 32Fx2(hi) <- 64Fx2 + Only deals with the 32Fx2 <- 64Fx2 version (s==1) + */ + if (INSN(31,31) == 0 && INSN(29,23) == BITS7(0,0,1,1,1,0,0) + && INSN(21,10) == BITS12(1,0,0,0,0,1,0,1,1,0,1,0)) { + UInt bQ = INSN(30,30); + UInt bS = INSN(22,22); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + if (bS == 1) { + IRTemp rm = mk_get_IR_rounding_mode(); + IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64); + IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64); + putQRegLane(dd, 2 * bQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo)); + putQRegLane(dd, 2 * bQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi)); + if (bQ == 0) { + putQRegLane(dd, 1, mkU64(0)); + } + DIP("fcvtn%s %s.%s, %s.2d\n", bQ ? "2" : "", + nameQReg128(dd), bQ ? "4s" : "2s", nameQReg128(nn)); + return True; + } + /* else fall through */ + } + + /* ---------------- ADD/SUB (vector) ---------------- */ + /* 31 28 23 21 20 15 9 4 + 0q0 01110 size 1 m 100001 n d ADD Vd.T, Vn.T, Vm.T + 0q1 01110 size 1 m 100001 n d SUB Vd.T, Vn.T, Vm.T + */ + if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) + && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,0,0,0,0,1)) { + Bool isQ = INSN(30,30) == 1; + UInt szBlg2 = INSN(23,22); + Bool isSUB = INSN(29,29) == 1; + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + Bool zeroHI = False; + const HChar* arrSpec = ""; + Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); + if (ok) { + const IROp opsADD[4] + = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 }; + const IROp opsSUB[4] + = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 }; + vassert(szBlg2 < 4); + IROp op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2]; + IRTemp t = newTemp(Ity_V128); + assign(t, binop(op, getQReg128(nn), getQReg128(mm))); + putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t)) + : mkexpr(t)); + const HChar* nm = isSUB ? "sub" : "add"; + DIP("%s %s.%s, %s.%s, %s.%s\n", nm, + nameQReg128(dd), arrSpec, + nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); + return True; + } + /* else fall through */ + } + + /* ---------------- ADD/SUB (scalar) ---------------- */ + /* 31 28 23 21 20 15 9 4 + 010 11110 11 1 m 100001 n d ADD Dd, Dn, Dm + 011 11110 11 1 m 100001 n d SUB Dd, Dn, Dm + */ + if (INSN(31,30) == BITS2(0,1) && INSN(28,21) == BITS8(1,1,1,1,0,1,1,1) + && INSN(15,10) == BITS6(1,0,0,0,0,1)) { + Bool isSUB = INSN(29,29) == 1; + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRTemp res = newTemp(Ity_I64); + assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64, + getQRegLane(nn, 0, Ity_I64), + getQRegLane(mm, 0, Ity_I64))); + putQRegLane(dd, 0, mkexpr(res)); + putQRegLane(dd, 1, mkU64(0)); + DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add", + nameQRegLO(dd, Ity_I64), + nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); + return True; + } + + /* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */ + /* 31 28 23 21 20 15 9 4 + 0q0 01110 size 1 m 100111 n d MUL Vd.T, Vn.T, Vm.T B/H/S only + 0q1 01110 size 1 m 100111 n d PMUL Vd.T, Vn.T, Vm.T B only + 0q0 01110 size 1 m 100101 n d MLA Vd.T, Vn.T, Vm.T B/H/S only + 0q1 01110 size 1 m 100101 n d MLS Vd.T, Vn.T, Vm.T B/H/S only + */ + if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) + && INSN(21,21) == 1 + && (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) { + Bool isQ = INSN(30,30) == 1; + UInt szBlg2 = INSN(23,22); + UInt bit29 = INSN(29,29); + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + Bool isMLAS = INSN(11,11) == 0; + const IROp opsADD[4] + = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID }; + const IROp opsSUB[4] + = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID }; + const IROp opsMUL[4] + = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID }; + const IROp opsPMUL[4] + = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID }; + /* Set opMUL and, if necessary, opACC. A result value of + Iop_INVALID for opMUL indicates that the instruction is + invalid. */ + Bool zeroHI = False; + const HChar* arrSpec = ""; + Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); + vassert(szBlg2 < 4); + IROp opACC = Iop_INVALID; + IROp opMUL = Iop_INVALID; + if (ok) { + opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2] + : opsMUL[szBlg2]; + opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2]) + : Iop_INVALID; + } + if (ok && opMUL != Iop_INVALID) { + IRTemp t1 = newTemp(Ity_V128); + assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm))); + IRTemp t2 = newTemp(Ity_V128); + assign(t2, opACC == Iop_INVALID + ? mkexpr(t1) + : binop(opACC, getQReg128(dd), mkexpr(t1))); + putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2)) + : mkexpr(t2)); + const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla") + : (bit29 == 1 ? "pmul" : "mul"); + DIP("%s %s.%s, %s.%s, %s.%s\n", nm, + nameQReg128(dd), arrSpec, + nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); + return True; + } + /* else fall through */ + } + + /* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */ + /* 31 28 23 21 20 15 9 4 + 0q0 01110 size 1 m 011011 n d SMIN Vd.T, Vn.T, Vm.T + 0q1 01110 size 1 m 011011 n d UMIN Vd.T, Vn.T, Vm.T + 0q0 01110 size 1 m 011001 n d SMAX Vd.T, Vn.T, Vm.T + 0q1 01110 size 1 m 011001 n d UMAX Vd.T, Vn.T, Vm.T + */ + if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) + && INSN(21,21) == 1 + && ((INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(0,1,1,0,0,1))) { + Bool isQ = INSN(30,30) == 1; + Bool isU = INSN(29,29) == 1; + UInt szBlg2 = INSN(23,22); + Bool isMAX = INSN(11,11) == 0; + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + Bool zeroHI = False; + const HChar* arrSpec = ""; + Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); + if (ok) { + const IROp opMINS[4] + = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 }; + const IROp opMINU[4] + = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 }; + const IROp opMAXS[4] + = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 }; + const IROp opMAXU[4] + = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 }; + vassert(szBlg2 < 4); + IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2]) + : (isU ? opMINU[szBlg2] : opMINS[szBlg2]); + IRTemp t = newTemp(Ity_V128); + assign(t, binop(op, getQReg128(nn), getQReg128(mm))); + putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t)) + : mkexpr(t)); + const HChar* nm = isMAX ? (isU ? "umax" : "smax") + : (isU ? "umin" : "smin"); + DIP("%s %s.%s, %s.%s, %s.%s\n", nm, + nameQReg128(dd), arrSpec, + nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); + return True; + } + /* else fall through */ + } + + /* -------------------- {S,U}{MIN,MAX}V -------------------- */ + /* 31 28 23 21 16 15 9 4 + 0q0 01110 size 11000 1 101010 n d SMINV Vd, Vn.T + 0q1 01110 size 11000 1 101010 n d UMINV Vd, Vn.T + 0q0 01110 size 11000 0 101010 n d SMAXV Vd, Vn.T + 0q1 01110 size 11000 0 101010 n d UMAXV Vd, Vn.T + */ + if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) + && INSN(21,17) == BITS5(1,1,0,0,0) + && INSN(15,10) == BITS6(1,0,1,0,1,0)) { + Bool isQ = INSN(30,30) == 1; + Bool isU = INSN(29,29) == 1; + UInt szBlg2 = INSN(23,22); + Bool isMAX = INSN(16,16) == 0; + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + Bool zeroHI = False; + const HChar* arrSpec = ""; + Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2); + if (ok) { + if (szBlg2 == 3) ok = False; + if (szBlg2 == 2 && !isQ) ok = False; + } + if (ok) { + const IROp opMINS[3] + = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 }; + const IROp opMINU[3] + = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 }; + const IROp opMAXS[3] + = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 }; + const IROp opMAXU[3] + = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 }; + vassert(szBlg2 < 3); + IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2]) + : (isU ? opMINU[szBlg2] : opMINS[szBlg2]); + IRTemp tN1 = newTemp(Ity_V128); + assign(tN1, getQReg128(nn)); + /* If Q == 0, we're just folding lanes in the lower half of + the value. In which case, copy the lower half of the + source into the upper half, so we can then treat it the + same as the full width case. */ + IRTemp tN2 = newTemp(Ity_V128); + assign(tN2, zeroHI ? mk_CatEvenLanes64x2(tN1,tN1) : mkexpr(tN1)); + IRTemp res = math_MINMAXV(tN2, op); + if (res == IRTemp_INVALID) + return False; /* means math_MINMAXV + doesn't handle this case yet */ + putQReg128(dd, mkexpr(res)); + const HChar* nm = isMAX ? (isU ? "umaxv" : "smaxv") + : (isU ? "uminv" : "sminv"); + const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 }; + IRType laneTy = tys[szBlg2]; + DIP("%s %s, %s.%s\n", nm, + nameQRegLO(dd, laneTy), nameQReg128(nn), arrSpec); + return True; + } + /* else fall through */ + } + + /* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */ + /* 31 28 23 20 15 9 4 + 0q0 01110 001 m 000111 n d AND Vd.T, Vn.T, Vm.T + 0q0 01110 011 m 000111 n d BIC Vd.T, Vn.T, Vm.T + 0q0 01110 101 m 000111 n d ORR Vd.T, Vn.T, Vm.T + 0q0 01110 111 m 000111 n d ORN Vd.T, Vn.T, Vm.T + T is 16b when q==1, 8b when q==0 + */ + if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0) + && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) { + Bool isQ = INSN(30,30) == 1; + Bool isORR = INSN(23,23) == 1; + Bool invert = INSN(22,22) == 1; + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRTemp res = newTemp(Ity_V128); + assign(res, binop(isORR ? Iop_OrV128 : Iop_AndV128, + getQReg128(nn), + invert ? unop(Iop_NotV128, getQReg128(mm)) + : getQReg128(mm))); + putQReg128(dd, isQ ? mkexpr(res) + : unop(Iop_ZeroHI64ofV128, mkexpr(res))); + const HChar* names[4] = { "and", "bic", "orr", "orn" }; + const HChar* ar = isQ ? "16b" : "8b"; + DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)], + nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar); + return True; + } + + /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */ + /* 31 28 23 21 15 9 4 ix + 0q1 01110 size 1 m 100011 n d CMEQ Vd.T, Vn.T, Vm.T (1) == + 0q0 01110 size 1 m 100011 n d CMTST Vd.T, Vn.T, Vm.T (2) &, != 0 + + 0q1 01110 size 1 m 001101 n d CMHI Vd.T, Vn.T, Vm.T (3) >u + 0q0 01110 size 1 m 001101 n d CMGT Vd.T, Vn.T, Vm.T (4) >s + + 0q1 01110 size 1 m 001111 n d CMHS Vd.T, Vn.T, Vm.T (5) >=u + 0q0 01110 size 1 m 001111 n d CMGE Vd.T, Vn.T, Vm.T (6) >=s + + 0q1 01110 size 100000 100010 n d CMGE Vd.T, Vn.T, #0 (7) >=s 0 + 0q0 01110 size 100000 100010 n d CMGT Vd.T, Vn.T, #0 (8) >s 0 + + 0q1 01110 size 100000 100110 n d CMLE Vd.T, Vn.T, #0 (9) <=s 0 + 0q0 01110 size 100000 100110 n d CMEQ Vd.T, Vn.T, #0 (10) == 0 + + 0q0 01110 size 100000 101010 n d CMLT Vd.T, Vn.T, #0 (11) y can be expressed directly + x < y == y > x + x <= y == not (x > y) + x >= y == not (y > x) + */ + switch (ix) { + case 1: res = binop(opsEQ[szBlg2], argL, argR); break; + case 2: res = unop(Iop_NotV128, binop(opsEQ[szBlg2], + binop(Iop_AndV128, argL, argR), + mkV128(0x0000))); + break; + case 3: res = binop(opsGTU[szBlg2], argL, argR); break; + case 4: res = binop(opsGTS[szBlg2], argL, argR); break; + case 5: res = unop(Iop_NotV128, binop(opsGTU[szBlg2], argR, argL)); + break; + case 6: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL)); + break; + case 7: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL)); + break; + case 8: res = binop(opsGTS[szBlg2], argL, argR); break; + case 9: res = unop(Iop_NotV128, + binop(opsGTS[szBlg2], argL, argR)); + break; + case 10: res = binop(opsEQ[szBlg2], argL, argR); break; + case 11: res = binop(opsGTS[szBlg2], argR, argL); break; + default: vassert(0); + } + vassert(res); + putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, res) : res); + const HChar* nms[11] = { "eq", "tst", "hi", "gt", "hs", "ge", + "ge", "gt", "le", "eq", "lt" }; + if (ix <= 6) { + DIP("cm%s %s.%s, %s.%s, %s.%s\n", nms[ix-1], + nameQReg128(dd), arrSpec, + nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); + } else { + DIP("cm%s %s.%s, %s.%s, #0\n", nms[ix-1], + nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); + } + return True; + } + /* else fall through */ + } + + /* -------------- {EOR,BSL,BIT,BIF} (vector) -------------- */ + /* 31 28 23 20 15 9 4 + 0q1 01110 00 1 m 000111 n d EOR Vd.T, Vm.T, Vn.T + 0q1 01110 01 1 m 000111 n d BSL Vd.T, Vm.T, Vn.T + 0q1 01110 10 1 m 000111 n d BIT Vd.T, Vm.T, Vn.T + 0q1 01110 11 1 m 000111 n d BIF Vd.T, Vm.T, Vn.T + */ + if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0) + && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) { + Bool isQ = INSN(30,30) == 1; + UInt op = INSN(23,22); + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRTemp argD = newTemp(Ity_V128); + IRTemp argN = newTemp(Ity_V128); + IRTemp argM = newTemp(Ity_V128); + assign(argD, getQReg128(dd)); + assign(argN, getQReg128(nn)); + assign(argM, getQReg128(mm)); + const IROp opXOR = Iop_XorV128; + const IROp opAND = Iop_AndV128; + const IROp opNOT = Iop_NotV128; + IRExpr* res = NULL; + switch (op) { + case BITS2(0,0): /* EOR */ + res = binop(opXOR, mkexpr(argM), mkexpr(argN)); + break; + case BITS2(0,1): /* BSL */ + res = binop(opXOR, mkexpr(argM), + binop(opAND, + binop(opXOR, mkexpr(argM), mkexpr(argN)), + mkexpr(argD))); + break; + case BITS2(1,0): /* BIT */ + res = binop(opXOR, mkexpr(argD), + binop(opAND, + binop(opXOR, mkexpr(argD), mkexpr(argN)), + mkexpr(argM))); + break; + case BITS2(1,1): /* BIF */ + res = binop(opXOR, mkexpr(argD), + binop(opAND, + binop(opXOR, mkexpr(argD), mkexpr(argN)), + unop(opNOT, mkexpr(argM)))); + break; + default: + vassert(0); + } + vassert(res); + putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res)); + const HChar* nms[4] = { "eor", "bsl", "bit", "bif" }; + const HChar* arr = isQ ? "16b" : "8b"; + vassert(op < 4); + DIP("%s %s.%s, %s.%s, %s.%s\n", nms[op], + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + + /* ------------ {USHR,SSHR,SHL} (vector, immediate) ------------ */ + /* 31 28 22 18 15 9 4 + 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #shift (1) + 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #shift (2) + 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #shift (3) + laneTy, shift = case immh:immb of + 0001:xxx -> B, SHR:8-xxx, SHL:xxx + 001x:xxx -> H, SHR:16-xxxx SHL:xxxx + 01xx:xxx -> S, SHR:32-xxxxx SHL:xxxxx + 1xxx:xxx -> D, SHR:64-xxxxxx SHL:xxxxxx + other -> invalid + As usual the case laneTy==D && q==0 is not allowed. + */ + if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0) + && INSN(10,10) == 1) { + UInt ix = 0; + /**/ if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 1; + else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 2; + else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,1,0,1,0)) ix = 3; + if (ix > 0) { + Bool isQ = INSN(30,30) == 1; + UInt immh = INSN(22,19); + UInt immb = INSN(18,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + const IROp opsSHRN[4] + = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 }; + const IROp opsSARN[4] + = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 }; + const IROp opsSHLN[4] + = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 }; + UInt szBlg2 = 0; + UInt shift = 0; + Bool ok = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb); + if (ix == 3) { + /* The shift encoding has opposite sign for the leftwards + case. Adjust shift to compensate. */ + shift = (8 << szBlg2) - shift; + } + if (ok && szBlg2 < 4 && shift > 0 && shift < (8 << szBlg2) + && !(szBlg2 == 3/*64bit*/ && !isQ)) { + IROp op = Iop_INVALID; + const HChar* nm = NULL; + switch (ix) { + case 1: op = opsSHRN[szBlg2]; nm = "ushr"; break; + case 2: op = opsSARN[szBlg2]; nm = "sshr"; break; + case 3: op = opsSHLN[szBlg2]; nm = "shl"; break; + default: vassert(0); + } + IRExpr* src = getQReg128(nn); + IRExpr* res = binop(op, src, mkU8(shift)); + putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res)); + HChar laneCh = "bhsd"[szBlg2]; + UInt nLanes = (isQ ? 128 : 64) / (8 << szBlg2); + DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm, + nameQReg128(dd), nLanes, laneCh, + nameQReg128(nn), nLanes, laneCh, shift); + return True; + } + /* else fall through */ + } + } + + /* -------------------- {U,S}SHLL{,2} -------------------- */ + /* 31 28 22 18 15 9 4 + 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh + 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh + where Ta,Tb,sh + = case immh of 1xxx -> invalid + 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31) + 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15) + 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7) + 0000 -> AdvSIMD modified immediate (???) + */ + if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0) + && INSN(15,10) == BITS6(1,0,1,0,0,1)) { + Bool isQ = INSN(30,30) == 1; + Bool isU = INSN(29,29) == 1; + UInt immh = INSN(22,19); + UInt immb = INSN(18,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + UInt immhb = (immh << 3) | immb; + IRTemp src = newTemp(Ity_V128); + IRTemp zero = newTemp(Ity_V128); + IRExpr* res = NULL; + UInt sh = 0; + const HChar* ta = "??"; + const HChar* tb = "??"; + assign(src, getQReg128(nn)); + assign(zero, mkV128(0x0000)); + if (immh & 8) { + /* invalid; don't assign to res */ + } + else if (immh & 4) { + sh = immhb - 32; + vassert(sh < 32); /* so 32-sh is 1..32 */ + ta = "2d"; + tb = isQ ? "4s" : "2s"; + IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero) + : mk_InterleaveLO32x4(src, zero); + res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh)); + } + else if (immh & 2) { + sh = immhb - 16; + vassert(sh < 16); /* so 16-sh is 1..16 */ + ta = "4s"; + tb = isQ ? "8h" : "4h"; + IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero) + : mk_InterleaveLO16x8(src, zero); + res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh)); + } + else if (immh & 1) { + sh = immhb - 8; + vassert(sh < 8); /* so 8-sh is 1..8 */ + ta = "8h"; + tb = isQ ? "16b" : "8b"; + IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero) + : mk_InterleaveLO8x16(src, zero); + res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh)); + } else { + vassert(immh == 0); + /* invalid; don't assign to res */ + } + /* */ + if (res) { + putQReg128(dd, res); + DIP("%cshll%s %s.%s, %s.%s, #%d\n", + isU ? 'u' : 's', isQ ? "2" : "", + nameQReg128(dd), ta, nameQReg128(nn), tb, sh); + return True; + } + /* else fall through */ + } + + /* -------------------- XTN{,2} -------------------- */ + /* 31 28 23 21 15 9 4 XTN{,2} Vd.Tb, Vn.Ta + 0q0 01110 size 100001 001010 n d + */ + if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0) + && INSN(21,16) == BITS6(1,0,0,0,0,1) + && INSN(15,10) == BITS6(0,0,1,0,1,0)) { + Bool isQ = INSN(30,30) == 1; + UInt size = INSN(23,22); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IROp op = Iop_INVALID; + const HChar* tb = NULL; + const HChar* ta = NULL; + switch ((size << 1) | (isQ ? 1 : 0)) { + case 0: tb = "8b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break; + case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break; + case 2: tb = "4h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break; + case 3: tb = "8h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break; + case 4: tb = "2s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break; + case 5: tb = "4s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break; + case 6: break; + case 7: break; + default: vassert(0); + } + if (op != Iop_INVALID) { + if (!isQ) { + putQRegLane(dd, 1, mkU64(0)); + } + putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn))); + DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "", + nameQReg128(dd), tb, nameQReg128(nn), ta); + return True; + } + /* else fall through */ + } + + /* ---------------- DUP (element, vector) ---------------- */ + /* 31 28 20 15 9 4 + 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index] + */ + if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) + && INSN(15,10) == BITS6(0,0,0,0,0,1)) { + Bool isQ = INSN(30,30) == 1; + UInt imm5 = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRTemp w0 = newTemp(Ity_I64); + const HChar* arT = "??"; + const HChar* arTs = "??"; + IRType laneTy = Ity_INVALID; + UInt laneNo = 16; /* invalid */ + if (imm5 & 1) { + arT = isQ ? "16b" : "8b"; + arTs = "b"; + laneNo = (imm5 >> 1) & 15; + laneTy = Ity_I8; + assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy))); + } + else if (imm5 & 2) { + arT = isQ ? "8h" : "4h"; + arTs = "h"; + laneNo = (imm5 >> 2) & 7; + laneTy = Ity_I16; + assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy))); + } + else if (imm5 & 4) { + arT = isQ ? "4s" : "2s"; + arTs = "s"; + laneNo = (imm5 >> 3) & 3; + laneTy = Ity_I32; + assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy))); + } + else if ((imm5 & 8) && isQ) { + arT = "2d"; + arTs = "d"; + laneNo = (imm5 >> 4) & 1; + laneTy = Ity_I64; + assign(w0, getQRegLane(nn, laneNo, laneTy)); + } + else { + /* invalid; leave laneTy unchanged. */ + } + /* */ + if (laneTy != Ity_INVALID) { + vassert(laneNo < 16); + IRTemp w1 = math_DUP_TO_64(w0, laneTy); + putQReg128(dd, binop(Iop_64HLtoV128, + isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1))); + DIP("dup %s.%s, %s.%s[%u]\n", + nameQReg128(dd), arT, nameQReg128(nn), arTs, laneNo); + return True; + } + /* else fall through */ + } + + /* ---------------- DUP (general, vector) ---------------- */ + /* 31 28 23 20 15 9 4 + 0q0 01110 000 imm5 000011 n d DUP Vd.T, Rn + Q=0 writes 64, Q=1 writes 128 + imm5: xxxx1 8B(q=0) or 16b(q=1), R=W + xxx10 4H(q=0) or 8H(q=1), R=W + xx100 2S(q=0) or 4S(q=1), R=W + x1000 Invalid(q=0) or 2D(q=1), R=X + x0000 Invalid(q=0) or Invalid(q=1) + */ + if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) + && INSN(15,10) == BITS6(0,0,0,0,1,1)) { + Bool isQ = INSN(30,30) == 1; + UInt imm5 = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRTemp w0 = newTemp(Ity_I64); + const HChar* arT = "??"; + IRType laneTy = Ity_INVALID; + if (imm5 & 1) { + arT = isQ ? "16b" : "8b"; + laneTy = Ity_I8; + assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn)))); + } + else if (imm5 & 2) { + arT = isQ ? "8h" : "4h"; + laneTy = Ity_I16; + assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn)))); + } + else if (imm5 & 4) { + arT = isQ ? "4s" : "2s"; + laneTy = Ity_I32; + assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn)))); + } + else if ((imm5 & 8) && isQ) { + arT = "2d"; + laneTy = Ity_I64; + assign(w0, getIReg64orZR(nn)); + } + else { + /* invalid; leave laneTy unchanged. */ + } + /* */ + if (laneTy != Ity_INVALID) { + IRTemp w1 = math_DUP_TO_64(w0, laneTy); + putQReg128(dd, binop(Iop_64HLtoV128, + isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1))); + DIP("dup %s.%s, %s\n", + nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn)); + return True; + } + /* else fall through */ + } + + /* ---------------------- {S,U}MOV ---------------------- */ + /* 31 28 20 15 9 4 + 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index] + 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index] + dest is Xd when q==1, Wd when q==0 + UMOV: + Ts,index,ops = case q:imm5 of + 0:xxxx1 -> B, xxxx, 8Uto64 + 1:xxxx1 -> invalid + 0:xxx10 -> H, xxx, 16Uto64 + 1:xxx10 -> invalid + 0:xx100 -> S, xx, 32Uto64 + 1:xx100 -> invalid + 1:x1000 -> D, x, copy64 + other -> invalid + SMOV: + Ts,index,ops = case q:imm5 of + 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32) + 1:xxxx1 -> B, xxxx, 8Sto64 + 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32) + 1:xxx10 -> H, xxx, 16Sto64 + 0:xx100 -> invalid + 1:xx100 -> S, xx, 32Sto64 + 1:x1000 -> invalid + other -> invalid + */ + if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) + && (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) { + UInt bitQ = INSN(30,30) == 1; + UInt imm5 = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + Bool isU = INSN(12,12) == 1; + const HChar* arTs = "??"; + UInt laneNo = 16; /* invalid */ + // Setting 'res' to non-NULL determines valid/invalid + IRExpr* res = NULL; + if (!bitQ && (imm5 & 1)) { // 0:xxxx1 + laneNo = (imm5 >> 1) & 15; + IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8); + res = isU ? unop(Iop_8Uto64, lane) + : unop(Iop_32Uto64, unop(Iop_8Sto32, lane)); + arTs = "b"; + } + else if (bitQ && (imm5 & 1)) { // 1:xxxx1 + laneNo = (imm5 >> 1) & 15; + IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8); + res = isU ? NULL + : unop(Iop_8Sto64, lane); + arTs = "b"; + } + else if (!bitQ && (imm5 & 2)) { // 0:xxx10 + laneNo = (imm5 >> 2) & 7; + IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16); + res = isU ? unop(Iop_16Uto64, lane) + : unop(Iop_32Uto64, unop(Iop_16Sto32, lane)); + arTs = "h"; + } + else if (bitQ && (imm5 & 2)) { // 1:xxx10 + laneNo = (imm5 >> 2) & 7; + IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16); + res = isU ? NULL + : unop(Iop_16Sto64, lane); + arTs = "h"; + } + else if (!bitQ && (imm5 & 4)) { // 0:xx100 + laneNo = (imm5 >> 3) & 3; + IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32); + res = isU ? unop(Iop_32Uto64, lane) + : NULL; + arTs = "s"; + } + else if (bitQ && (imm5 & 4)) { // 1:xxx10 + laneNo = (imm5 >> 3) & 3; + IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32); + res = isU ? NULL + : unop(Iop_32Sto64, lane); + arTs = "s"; + } + else if (bitQ && (imm5 & 8)) { // 1:x1000 + laneNo = (imm5 >> 4) & 1; + IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64); + res = isU ? lane + : NULL; + arTs = "d"; + } + /* */ + if (res) { + vassert(laneNo < 16); + putIReg64orZR(dd, res); + DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's', + nameIRegOrZR(bitQ == 1, dd), + nameQReg128(nn), arTs, laneNo); + return True; + } + /* else fall through */ + } + + /* -------------------- INS (general) -------------------- */ + /* 31 28 20 15 9 4 + 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn + where Ts,ix = case imm5 of xxxx1 -> B, xxxx + xxx10 -> H, xxx + xx100 -> S, xx + x1000 -> D, x + */ + if (INSN(31,21) == BITS11(0,1,0,0,1,1,1,0,0,0,0) + && INSN(15,10) == BITS6(0,0,0,1,1,1)) { + UInt imm5 = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + HChar ts = '?'; + UInt laneNo = 16; + IRExpr* src = NULL; + if (imm5 & 1) { + src = unop(Iop_64to8, getIReg64orZR(nn)); + laneNo = (imm5 >> 1) & 15; + ts = 'b'; + } + else if (imm5 & 2) { + src = unop(Iop_64to16, getIReg64orZR(nn)); + laneNo = (imm5 >> 2) & 7; + ts = 'h'; + } + else if (imm5 & 4) { + src = unop(Iop_64to32, getIReg64orZR(nn)); + laneNo = (imm5 >> 3) & 3; + ts = 's'; + } + else if (imm5 & 8) { + src = getIReg64orZR(nn); + laneNo = (imm5 >> 4) & 1; + ts = 'd'; + } + /* */ + if (src) { + vassert(laneNo < 16); + putQRegLane(dd, laneNo, src); + DIP("ins %s.%c[%u], %s\n", + nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn)); + return True; + } + /* else invalid; fall through */ + } + + /* -------------------- NEG (vector) -------------------- */ + /* 31 28 23 21 16 9 4 + 0q1 01110 sz 10000 0101110 n d NEG Vd, Vn + sz is laneSz, q:sz == 011 is disallowed, as usual + */ + if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0) + && INSN(21,10) == BITS12(1,0,0,0,0,0,1,0,1,1,1,0)) { + Bool isQ = INSN(30,30) == 1; + UInt szBlg2 = INSN(23,22); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + Bool zeroHI = False; + const HChar* arrSpec = ""; + Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); + if (ok) { + const IROp opSUB[4] + = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 }; + IRTemp res = newTemp(Ity_V128); + vassert(szBlg2 < 4); + assign(res, binop(opSUB[szBlg2], mkV128(0x0000), getQReg128(nn))); + putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res)) + : mkexpr(res)); + DIP("neg %s.%s, %s.%s\n", + nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); + return True; + } + /* else fall through */ + } + + /* -------------------- TBL, TBX -------------------- */ + /* 31 28 20 15 14 12 9 4 + 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta + 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta + where Ta = 16b(q=1) or 8b(q=0) + */ + if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) + && INSN(15,15) == 0 && INSN(11,10) == BITS2(0,0)) { + Bool isQ = INSN(30,30) == 1; + Bool isTBX = INSN(12,12) == 1; + UInt mm = INSN(20,16); + UInt len = INSN(14,13); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + /* The out-of-range values to use. */ + IRTemp oor_values = newTemp(Ity_V128); + assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0)); + /* src value */ + IRTemp src = newTemp(Ity_V128); + assign(src, getQReg128(mm)); + /* The table values */ + IRTemp tab[4]; + UInt i; + for (i = 0; i <= len; i++) { + vassert(i < 4); + tab[i] = newTemp(Ity_V128); + assign(tab[i], getQReg128((nn + i) % 32)); + } + IRTemp res = math_TBL_TBX(tab, len, src, oor_values); + putQReg128(dd, isQ ? mkexpr(res) + : unop(Iop_ZeroHI64ofV128, mkexpr(res)) ); + const HChar* Ta = isQ ? "16b" : "8b"; + const HChar* nm = isTBX ? "tbx" : "tbl"; + DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n", + nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta); + return True; + } + /* FIXME Temporary hacks to get through ld.so FIXME */ + + /* ------------------ movi vD.4s, #0x0 ------------------ */ + /* 0x4F 0x00 0x04 000 vD */ + if ((insn & 0xFFFFFFE0) == 0x4F000400) { + UInt vD = INSN(4,0); + putQReg128(vD, mkV128(0x0000)); + DIP("movi v%u.4s, #0x0\n", vD); + return True; + } + + /* ---------------- MOV vD.16b, vN.16b ---------------- */ + /* 31 23 20 15 9 4 + 010 01110 101 m 000111 n d ORR vD.16b, vN.16b, vM.16b + This only handles the N == M case. + */ + if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0) + && INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) { + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + if (mm == nn) { + putQReg128(dd, getQReg128(nn)); + DIP("mov v%u.16b, v%u.16b\n", dd, nn); + return True; + } + /* else it's really an ORR; fall through. */ + } + + /* ---------------- CMEQ_d_d_#0 ---------------- */ + /* + 010 11110 11 10000 0100 110 n d CMEQ Dd, Dn, #0 + */ + if ((INSN(31,0) & 0xFFFFFC00) == 0x5EE09800) { + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + putQReg128(dd, unop(Iop_ZeroHI64ofV128, + binop(Iop_CmpEQ64x2, getQReg128(nn), + mkV128(0x0000)))); + DIP("cmeq d%u, d%u, #0\n", dd, nn); + return True; + } + + /* ---------------- SHL_d_d_#imm ---------------- */ + /* 31 22 21 18 15 9 4 + 010 111110 1 ih3 ib 010101 n d SHL Dd, Dn, #(ih3:ib) + */ + if (INSN(31,22) == BITS10(0,1,0,1,1,1,1,1,0,1) + && INSN(15,10) == BITS6(0,1,0,1,0,1)) { + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + UInt sh = INSN(21,16); + vassert(sh < 64); + putQReg128(dd, unop(Iop_ZeroHI64ofV128, + binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh)))); + DIP("shl d%u, d%u, #%u\n", dd, nn, sh); + return True; + } + + vex_printf("ARM64 front end: simd_and_fp\n"); + return False; +# undef INSN +} + + +/*------------------------------------------------------------*/ +/*--- Disassemble a single ARM64 instruction ---*/ +/*------------------------------------------------------------*/ + +/* Disassemble a single ARM64 instruction into IR. The instruction + has is located at |guest_instr| and has guest IP of + |guest_PC_curr_instr|, which will have been set before the call + here. Returns True iff the instruction was decoded, in which case + *dres will be set accordingly, or False, in which case *dres should + be ignored by the caller. */ + +static +Bool disInstr_ARM64_WRK ( + /*MB_OUT*/DisResult* dres, + Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), + Bool resteerCisOk, + void* callback_opaque, + UChar* guest_instr, + VexArchInfo* archinfo, + VexAbiInfo* abiinfo + ) +{ + // A macro to fish bits out of 'insn'. +# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + +//ZZ DisResult dres; +//ZZ UInt insn; +//ZZ //Bool allow_VFP = False; +//ZZ //UInt hwcaps = archinfo->hwcaps; +//ZZ IRTemp condT; /* :: Ity_I32 */ +//ZZ UInt summary; +//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text +//ZZ +//ZZ /* What insn variants are we supporting today? */ +//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP)); +//ZZ // etc etc + + /* Set result defaults. */ + dres->whatNext = Dis_Continue; + dres->len = 4; + dres->continueAt = 0; + dres->jk_StopHere = Ijk_INVALID; + + /* At least this is simple on ARM64: insns are all 4 bytes long, and + 4-aligned. So just fish the whole thing out of memory right now + and have done. */ + UInt insn = getUIntLittleEndianly( guest_instr ); + + if (0) vex_printf("insn: 0x%x\n", insn); + + DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr); + + vassert(0 == (guest_PC_curr_instr & 3ULL)); + + /* ----------------------------------------------------------- */ + + /* Spot "Special" instructions (see comment at top of file). */ + { + UChar* code = (UChar*)guest_instr; + /* Spot the 16-byte preamble: + 93CC0D8C ror x12, x12, #3 + 93CC358C ror x12, x12, #13 + 93CCCD8C ror x12, x12, #51 + 93CCF58C ror x12, x12, #61 + */ + UInt word1 = 0x93CC0D8C; + UInt word2 = 0x93CC358C; + UInt word3 = 0x93CCCD8C; + UInt word4 = 0x93CCF58C; + if (getUIntLittleEndianly(code+ 0) == word1 && + getUIntLittleEndianly(code+ 4) == word2 && + getUIntLittleEndianly(code+ 8) == word3 && + getUIntLittleEndianly(code+12) == word4) { + /* Got a "Special" instruction preamble. Which one is it? */ + if (getUIntLittleEndianly(code+16) == 0xAA0A014A + /* orr x10,x10,x10 */) { + /* X3 = client_request ( X4 ) */ + DIP("x3 = client_request ( x4 )\n"); + putPC(mkU64( guest_PC_curr_instr + 20 )); + dres->jk_StopHere = Ijk_ClientReq; + dres->whatNext = Dis_StopHere; + return True; + } + else + if (getUIntLittleEndianly(code+16) == 0xAA0B016B + /* orr x11,x11,x11 */) { + /* X3 = guest_NRADDR */ + DIP("x3 = guest_NRADDR\n"); + dres->len = 20; + putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); + return True; + } + else + if (getUIntLittleEndianly(code+16) == 0xAA0C018C + /* orr x12,x12,x12 */) { + /* branch-and-link-to-noredir X8 */ + DIP("branch-and-link-to-noredir x8\n"); + putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20)); + putPC(getIReg64orZR(8)); + dres->jk_StopHere = Ijk_NoRedir; + dres->whatNext = Dis_StopHere; + return True; + } + else + if (getUIntLittleEndianly(code+16) == 0xAA090129 + /* orr x9,x9,x9 */) { + /* IR injection */ + DIP("IR injection\n"); + vex_inject_ir(irsb, Iend_LE); + // Invalidate the current insn. The reason is that the IRop we're + // injecting here can change. In which case the translation has to + // be redone. For ease of handling, we simply invalidate all the + // time. + stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr))); + stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20))); + putPC(mkU64( guest_PC_curr_instr + 20 )); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_InvalICache; + return True; + } + /* We don't know what it is. */ + return False; + /*NOTREACHED*/ + } + } + + /* ----------------------------------------------------------- */ + + /* Main ARM64 instruction decoder starts here. */ + + Bool ok = False; + + /* insn[28:25] determines the top-level grouping, so let's start + off with that. + + For all of these dis_ARM64_ functions, we pass *dres with the + normal default results "insn OK, 4 bytes long, keep decoding" so + they don't need to change it. However, decodes of control-flow + insns may cause *dres to change. + */ + switch (INSN(28,25)) { + case BITS4(1,0,0,0): case BITS4(1,0,0,1): + // Data processing - immediate + ok = dis_ARM64_data_processing_immediate(dres, insn); + break; + case BITS4(1,0,1,0): case BITS4(1,0,1,1): + // Branch, exception generation and system instructions + ok = dis_ARM64_branch_etc(dres, insn, archinfo); + break; + case BITS4(0,1,0,0): case BITS4(0,1,1,0): + case BITS4(1,1,0,0): case BITS4(1,1,1,0): + // Loads and stores + ok = dis_ARM64_load_store(dres, insn); + break; + case BITS4(0,1,0,1): case BITS4(1,1,0,1): + // Data processing - register + ok = dis_ARM64_data_processing_register(dres, insn); + break; + case BITS4(0,1,1,1): case BITS4(1,1,1,1): + // Data processing - SIMD and floating point + ok = dis_ARM64_simd_and_fp(dres, insn); + break; + case BITS4(0,0,0,0): case BITS4(0,0,0,1): + case BITS4(0,0,1,0): case BITS4(0,0,1,1): + // UNALLOCATED + break; + default: + vassert(0); /* Can't happen */ + } + + /* If the next-level down decoders failed, make sure |dres| didn't + get changed. */ + if (!ok) { + vassert(dres->whatNext == Dis_Continue); + vassert(dres->len == 4); + vassert(dres->continueAt == 0); + vassert(dres->jk_StopHere == Ijk_INVALID); + } + + return ok; + +# undef INSN +} + + +/*------------------------------------------------------------*/ +/*--- Top-level fn ---*/ +/*------------------------------------------------------------*/ + +/* Disassemble a single instruction into IR. The instruction + is located in host memory at &guest_code[delta]. */ + +DisResult disInstr_ARM64 ( IRSB* irsb_IN, + Bool (*resteerOkFn) ( void*, Addr64 ), + Bool resteerCisOk, + void* callback_opaque, + UChar* guest_code_IN, + Long delta_IN, + Addr64 guest_IP, + VexArch guest_arch, + VexArchInfo* archinfo, + VexAbiInfo* abiinfo, + Bool host_bigendian_IN, + Bool sigill_diag_IN ) +{ + DisResult dres; + vex_bzero(&dres, sizeof(dres)); + + /* Set globals (see top of this file) */ + vassert(guest_arch == VexArchARM64); + + irsb = irsb_IN; + host_is_bigendian = host_bigendian_IN; + guest_PC_curr_instr = (Addr64)guest_IP; + + /* Sanity checks */ + /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */ + vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15); + vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15); + + /* Try to decode */ + Bool ok = disInstr_ARM64_WRK( &dres, + resteerOkFn, resteerCisOk, callback_opaque, + (UChar*)&guest_code_IN[delta_IN], + archinfo, abiinfo ); + if (ok) { + /* All decode successes end up here. */ + vassert(dres.len == 4 || dres.len == 20); + switch (dres.whatNext) { + case Dis_Continue: + putPC( mkU64(dres.len + guest_PC_curr_instr) ); + break; + case Dis_ResteerU: + case Dis_ResteerC: + putPC(mkU64(dres.continueAt)); + break; + case Dis_StopHere: + break; + default: + vassert(0); + } + DIP("\n"); + } else { + /* All decode failures end up here. */ + if (sigill_diag_IN) { + Int i, j; + UChar buf[64]; + UInt insn + = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] ); + vex_bzero(buf, sizeof(buf)); + for (i = j = 0; i < 32; i++) { + if (i > 0) { + if ((i & 7) == 0) buf[j++] = ' '; + else if ((i & 3) == 0) buf[j++] = '\''; + } + buf[j++] = (insn & (1<<(31-i))) ? '1' : '0'; + } + vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn); + vex_printf("disInstr(arm64): %s\n", buf); + } + + /* Tell the dispatcher that this insn cannot be decoded, and so + has not been executed, and (is currently) the next to be + executed. PC should be up-to-date since it is made so at the + start of each insn, but nevertheless be paranoid and update + it again right now. */ + putPC( mkU64(guest_PC_curr_instr) ); + dres.whatNext = Dis_StopHere; + dres.len = 0; + dres.continueAt = 0; + dres.jk_StopHere = Ijk_NoDecode; + } + return dres; +} + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// + +/* Spare code for doing reference implementations of various 128-bit + SIMD interleaves/deinterleaves/concatenation ops. For 64-bit + equivalents see the end of guest_arm_toIR.c. */ + +//////////////////////////////////////////////////////////////// +// 64x2 operations +// +static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) +{ + // returns a0 b0 + return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)), + unop(Iop_V128to64, mkexpr(b10))); +} + +static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) +{ + // returns a1 b1 + return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)), + unop(Iop_V128HIto64, mkexpr(b10))); +} + + +//////////////////////////////////////////////////////////////// +// 32x4 operations +// + +// Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with +// the top halves guaranteed to be zero. +static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1, + IRTemp* out0, IRTemp v128 ) +{ + if (out3) *out3 = newTemp(Ity_I64); + if (out2) *out2 = newTemp(Ity_I64); + if (out1) *out1 = newTemp(Ity_I64); + if (out0) *out0 = newTemp(Ity_I64); + IRTemp hi64 = newTemp(Ity_I64); + IRTemp lo64 = newTemp(Ity_I64); + assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); + assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); + if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32))); + if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF))); + if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32))); + if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF))); +} + +// Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit +// IRTemp. +static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) +{ + IRTemp hi64 = newTemp(Ity_I64); + IRTemp lo64 = newTemp(Ity_I64); + assign(hi64, + binop(Iop_Or64, + binop(Iop_Shl64, mkexpr(in3), mkU8(32)), + binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF)))); + assign(lo64, + binop(Iop_Or64, + binop(Iop_Shl64, mkexpr(in1), mkU8(32)), + binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF)))); + IRTemp res = newTemp(Ity_V128); + assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64))); + return res; +} + +static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) +{ + // returns a2 a0 b2 b0 + IRTemp a2, a0, b2, b0; + breakV128to32s(NULL, &a2, NULL, &a0, a3210); + breakV128to32s(NULL, &b2, NULL, &b0, b3210); + return mkexpr(mkV128from32s(a2, a0, b2, b0)); +} + +static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) +{ + // returns a3 a1 b3 b1 + IRTemp a3, a1, b3, b1; + breakV128to32s(&a3, NULL, &a1, NULL, a3210); + breakV128to32s(&b3, NULL, &b1, NULL, b3210); + return mkexpr(mkV128from32s(a3, a1, b3, b1)); +} + +static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) +{ + // returns a1 b1 a0 b0 + IRTemp a1, a0, b1, b0; + breakV128to32s(NULL, NULL, &a1, &a0, a3210); + breakV128to32s(NULL, NULL, &b1, &b0, b3210); + return mkexpr(mkV128from32s(a1, b1, a0, b0)); +} + +static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) +{ + // returns a3 b3 a2 b2 + IRTemp a3, a2, b3, b2; + breakV128to32s(&a3, &a2, NULL, NULL, a3210); + breakV128to32s(&b3, &b2, NULL, NULL, b3210); + return mkexpr(mkV128from32s(a3, b3, a2, b2)); +} + +//////////////////////////////////////////////////////////////// +// 16x8 operations +// + +static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5, + IRTemp* out4, IRTemp* out3, IRTemp* out2, + IRTemp* out1,IRTemp* out0, IRTemp v128 ) +{ + if (out7) *out7 = newTemp(Ity_I64); + if (out6) *out6 = newTemp(Ity_I64); + if (out5) *out5 = newTemp(Ity_I64); + if (out4) *out4 = newTemp(Ity_I64); + if (out3) *out3 = newTemp(Ity_I64); + if (out2) *out2 = newTemp(Ity_I64); + if (out1) *out1 = newTemp(Ity_I64); + if (out0) *out0 = newTemp(Ity_I64); + IRTemp hi64 = newTemp(Ity_I64); + IRTemp lo64 = newTemp(Ity_I64); + assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); + assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); + if (out7) + assign(*out7, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(hi64), mkU8(48)), + mkU64(0xFFFF))); + if (out6) + assign(*out6, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(hi64), mkU8(32)), + mkU64(0xFFFF))); + if (out5) + assign(*out5, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(hi64), mkU8(16)), + mkU64(0xFFFF))); + if (out4) + assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF))); + if (out3) + assign(*out3, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(lo64), mkU8(48)), + mkU64(0xFFFF))); + if (out2) + assign(*out2, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(lo64), mkU8(32)), + mkU64(0xFFFF))); + if (out1) + assign(*out1, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(lo64), mkU8(16)), + mkU64(0xFFFF))); + if (out0) + assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF))); +} + +static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4, + IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) +{ + IRTemp hi64 = newTemp(Ity_I64); + IRTemp lo64 = newTemp(Ity_I64); + assign(hi64, + binop(Iop_Or64, + binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)), + mkU8(48)), + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)), + mkU8(32))), + binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)), + mkU8(16)), + binop(Iop_And64, + mkexpr(in4), mkU64(0xFFFF))))); + assign(lo64, + binop(Iop_Or64, + binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)), + mkU8(48)), + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)), + mkU8(32))), + binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)), + mkU8(16)), + binop(Iop_And64, + mkexpr(in0), mkU64(0xFFFF))))); + IRTemp res = newTemp(Ity_V128); + assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64))); + return res; +} + +static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) +{ + // returns a6 a4 a2 a0 b6 b4 b2 b0 + IRTemp a6, a4, a2, a0, b6, b4, b2, b0; + breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210); + breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210); + return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0)); +} + +static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) +{ + // returns a7 a5 a3 a1 b7 b5 b3 b1 + IRTemp a7, a5, a3, a1, b7, b5, b3, b1; + breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210); + breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210); + return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1)); +} + +static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) +{ + // returns a3 b3 a2 b2 a1 b1 a0 b0 + IRTemp a3, b3, a2, b2, a1, a0, b1, b0; + breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210); + breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210); + return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0)); +} + +static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) +{ + // returns a7 b7 a6 b6 a5 b5 a4 b4 + IRTemp a7, b7, a6, b6, a5, b5, a4, b4; + breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210); + breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210); + return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4)); +} + +//////////////////////////////////////////////////////////////// +// 8x16 operations +// + +static void breakV128to8s ( IRTemp* outF, IRTemp* outE, IRTemp* outD, + IRTemp* outC, IRTemp* outB, IRTemp* outA, + IRTemp* out9, IRTemp* out8, + IRTemp* out7, IRTemp* out6, IRTemp* out5, + IRTemp* out4, IRTemp* out3, IRTemp* out2, + IRTemp* out1,IRTemp* out0, IRTemp v128 ) +{ + if (outF) *outF = newTemp(Ity_I64); + if (outE) *outE = newTemp(Ity_I64); + if (outD) *outD = newTemp(Ity_I64); + if (outC) *outC = newTemp(Ity_I64); + if (outB) *outB = newTemp(Ity_I64); + if (outA) *outA = newTemp(Ity_I64); + if (out9) *out9 = newTemp(Ity_I64); + if (out8) *out8 = newTemp(Ity_I64); + if (out7) *out7 = newTemp(Ity_I64); + if (out6) *out6 = newTemp(Ity_I64); + if (out5) *out5 = newTemp(Ity_I64); + if (out4) *out4 = newTemp(Ity_I64); + if (out3) *out3 = newTemp(Ity_I64); + if (out2) *out2 = newTemp(Ity_I64); + if (out1) *out1 = newTemp(Ity_I64); + if (out0) *out0 = newTemp(Ity_I64); + IRTemp hi64 = newTemp(Ity_I64); + IRTemp lo64 = newTemp(Ity_I64); + assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); + assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); + if (outF) + assign(*outF, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(hi64), mkU8(56)), + mkU64(0xFF))); + if (outE) + assign(*outE, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(hi64), mkU8(48)), + mkU64(0xFF))); + if (outD) + assign(*outD, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(hi64), mkU8(40)), + mkU64(0xFF))); + if (outC) + assign(*outC, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(hi64), mkU8(32)), + mkU64(0xFF))); + if (outB) + assign(*outB, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(hi64), mkU8(24)), + mkU64(0xFF))); + if (outA) + assign(*outA, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(hi64), mkU8(16)), + mkU64(0xFF))); + if (out9) + assign(*out9, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(hi64), mkU8(8)), + mkU64(0xFF))); + if (out8) + assign(*out8, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(hi64), mkU8(0)), + mkU64(0xFF))); + if (out7) + assign(*out7, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(lo64), mkU8(56)), + mkU64(0xFF))); + if (out6) + assign(*out6, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(lo64), mkU8(48)), + mkU64(0xFF))); + if (out5) + assign(*out5, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(lo64), mkU8(40)), + mkU64(0xFF))); + if (out4) + assign(*out4, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(lo64), mkU8(32)), + mkU64(0xFF))); + if (out3) + assign(*out3, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(lo64), mkU8(24)), + mkU64(0xFF))); + if (out2) + assign(*out2, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(lo64), mkU8(16)), + mkU64(0xFF))); + if (out1) + assign(*out1, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(lo64), mkU8(8)), + mkU64(0xFF))); + if (out0) + assign(*out0, binop(Iop_And64, + binop(Iop_Shr64, mkexpr(lo64), mkU8(0)), + mkU64(0xFF))); +} + +static IRTemp mkV128from8s ( IRTemp inF, IRTemp inE, IRTemp inD, IRTemp inC, + IRTemp inB, IRTemp inA, IRTemp in9, IRTemp in8, + IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4, + IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) +{ + IRTemp vFE = newTemp(Ity_I64); + IRTemp vDC = newTemp(Ity_I64); + IRTemp vBA = newTemp(Ity_I64); + IRTemp v98 = newTemp(Ity_I64); + IRTemp v76 = newTemp(Ity_I64); + IRTemp v54 = newTemp(Ity_I64); + IRTemp v32 = newTemp(Ity_I64); + IRTemp v10 = newTemp(Ity_I64); + assign(vFE, binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(inF), mkU64(0xFF)), mkU8(8)), + binop(Iop_And64, mkexpr(inE), mkU64(0xFF)))); + assign(vDC, binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(inD), mkU64(0xFF)), mkU8(8)), + binop(Iop_And64, mkexpr(inC), mkU64(0xFF)))); + assign(vBA, binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(inB), mkU64(0xFF)), mkU8(8)), + binop(Iop_And64, mkexpr(inA), mkU64(0xFF)))); + assign(v98, binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(in9), mkU64(0xFF)), mkU8(8)), + binop(Iop_And64, mkexpr(in8), mkU64(0xFF)))); + assign(v76, binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(in7), mkU64(0xFF)), mkU8(8)), + binop(Iop_And64, mkexpr(in6), mkU64(0xFF)))); + assign(v54, binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(in5), mkU64(0xFF)), mkU8(8)), + binop(Iop_And64, mkexpr(in4), mkU64(0xFF)))); + assign(v32, binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(in3), mkU64(0xFF)), mkU8(8)), + binop(Iop_And64, mkexpr(in2), mkU64(0xFF)))); + assign(v10, binop(Iop_Or64, + binop(Iop_Shl64, + binop(Iop_And64, mkexpr(in1), mkU64(0xFF)), mkU8(8)), + binop(Iop_And64, mkexpr(in0), mkU64(0xFF)))); + return mkV128from16s(vFE, vDC, vBA, v98, v76, v54, v32, v10); +} + +static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210, + IRTemp bFEDCBA9876543210 ) +{ + // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0 + IRTemp aE, aC, aA, a8, a6, a4, a2, a0, bE, bC, bA, b8, b6, b4, b2, b0; + breakV128to8s(NULL, &aE, NULL, &aC, NULL, &aA, NULL, &a8, + NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, + aFEDCBA9876543210); + breakV128to8s(NULL, &bE, NULL, &bC, NULL, &bA, NULL, &b8, + NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, + bFEDCBA9876543210); + return mkexpr(mkV128from8s(aE, aC, aA, a8, a6, a4, a2, a0, + bE, bC, bA, b8, b6, b4, b2, b0)); +} + +static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210, + IRTemp bFEDCBA9876543210 ) +{ + // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1 + IRTemp aF, aD, aB, a9, a7, a5, a3, a1, bF, bD, bB, b9, b7, b5, b3, b1; + breakV128to8s(&aF, NULL, &aD, NULL, &aB, NULL, &a9, NULL, + &a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, + aFEDCBA9876543210); + + breakV128to8s(&bF, NULL, &bD, NULL, &bB, NULL, &b9, NULL, + &b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, + aFEDCBA9876543210); + + return mkexpr(mkV128from8s(aF, aD, aB, a9, a7, a5, a3, a1, + bF, bD, bB, b9, b7, b5, b3, b1)); +} + +static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210, + IRTemp bFEDCBA9876543210 ) +{ + // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0 + IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0; + breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0, + aFEDCBA9876543210); + breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + &b7, &b6, &b5, &b4, &b3, &b2, &b1, &b0, + bFEDCBA9876543210); + return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4, + a3, b3, a2, b2, a1, b1, a0, b0)); +} + +static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210, + IRTemp bFEDCBA9876543210 ) +{ + // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8 + IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8; + breakV128to8s(&aF, &aE, &aD, &aC, &aB, &aA, &a9, &a8, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + aFEDCBA9876543210); + breakV128to8s(&bF, &bE, &bD, &bC, &bB, &bA, &b9, &b8, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + bFEDCBA9876543210); + return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC, + aB, bB, aA, bA, a9, b9, a8, b8)); +} + +/*--------------------------------------------------------------------*/ +/*--- end guest_arm64_toIR.c ---*/ +/*--------------------------------------------------------------------*/ Index: priv/guest_arm_helpers.c =================================================================== --- priv/guest_arm_helpers.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_arm_helpers.c (.../trunk) (revision 2863) @@ -981,8 +981,8 @@ vex_state->guest_GEFLAG3 = 0; vex_state->guest_EMNOTE = EmNote_NONE; - vex_state->guest_TISTART = 0; - vex_state->guest_TILEN = 0; + vex_state->guest_CMSTART = 0; + vex_state->guest_CMLEN = 0; vex_state->guest_NRADDR = 0; vex_state->guest_IP_AT_SYSCALL = 0; @@ -1030,10 +1030,6 @@ vex_state->guest_ITSTATE = 0; vex_state->padding1 = 0; - vex_state->padding2 = 0; - vex_state->padding3 = 0; - vex_state->padding4 = 0; - vex_state->padding5 = 0; } @@ -1130,8 +1126,8 @@ /* 1 */ ALWAYSDEFD(guest_CC_OP), /* 2 */ ALWAYSDEFD(guest_CC_NDEP), /* 3 */ ALWAYSDEFD(guest_EMNOTE), - /* 4 */ ALWAYSDEFD(guest_TISTART), - /* 5 */ ALWAYSDEFD(guest_TILEN), + /* 4 */ ALWAYSDEFD(guest_CMSTART), + /* 5 */ ALWAYSDEFD(guest_CMLEN), /* 6 */ ALWAYSDEFD(guest_NRADDR), /* 7 */ ALWAYSDEFD(guest_IP_AT_SYSCALL), /* 8 */ ALWAYSDEFD(guest_TPIDRURO), Index: priv/guest_arm_toIR.c =================================================================== --- priv/guest_arm_toIR.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_arm_toIR.c (.../trunk) (revision 2863) @@ -485,8 +485,8 @@ #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2) #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3) -#define OFFB_TISTART offsetof(VexGuestARMState,guest_TISTART) -#define OFFB_TILEN offsetof(VexGuestARMState,guest_TILEN) +#define OFFB_CMSTART offsetof(VexGuestARMState,guest_CMSTART) +#define OFFB_CMLEN offsetof(VexGuestARMState,guest_CMLEN) /* ---------------- Integer registers ---------------- */ @@ -2875,6 +2875,31 @@ return True; } +/* Generate specific vector FP binary ops, possibly with a fake + rounding mode as required by the primop. */ +static +IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR ) +{ + switch (op) { + case Iop_Add32Fx4: + case Iop_Sub32Fx4: + case Iop_Mul32Fx4: + return triop(op, get_FAKE_roundingmode(), argL, argR ); + case Iop_Add32x4: case Iop_Add16x8: + case Iop_Sub32x4: case Iop_Sub16x8: + case Iop_Mul32x4: case Iop_Mul16x8: + case Iop_Mul32x2: case Iop_Mul16x4: + case Iop_Add32Fx2: + case Iop_Sub32Fx2: + case Iop_Mul32Fx2: + case Iop_PwAdd32Fx2: + return binop(op, argL, argR); + default: + ppIROp(op); + vassert(0); + } +} + /* VTBL, VTBX */ static Bool dis_neon_vtb ( UInt theInstr, IRTemp condT ) @@ -4601,7 +4626,8 @@ /* VABD */ if (Q) { assign(res, unop(Iop_Abs32Fx4, - binop(Iop_Sub32Fx4, + triop(Iop_Sub32Fx4, + get_FAKE_roundingmode(), mkexpr(arg_n), mkexpr(arg_m)))); } else { @@ -4616,7 +4642,7 @@ break; } } - assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); + assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m))); } else { if (U == 0) { /* VMLA, VMLS */ @@ -4641,9 +4667,11 @@ default: vassert(0); } } - assign(res, binop(op2, - Q ? getQReg(dreg) : getDRegI64(dreg), - binop(op, mkexpr(arg_n), mkexpr(arg_m)))); + assign(res, binop_w_fake_RM( + op2, + Q ? getQReg(dreg) : getDRegI64(dreg), + binop_w_fake_RM(op, mkexpr(arg_n), + mkexpr(arg_m)))); DIP("vml%c.f32 %c%u, %c%u, %c%u\n", P ? 's' : 'a', Q ? 'q' : 'd', @@ -4654,7 +4682,7 @@ if ((C >> 1) != 0) return False; op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ; - assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); + assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m))); DIP("vmul.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); @@ -5318,10 +5346,10 @@ } } op2 = INSN(10,10) ? sub : add; - assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); + assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m))); if (Q) - putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), - condT); + putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)), + condT); else putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)), condT); @@ -5548,7 +5576,7 @@ vassert(0); } } - assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); + assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m))); if (Q) putQReg(dreg, mkexpr(res), condT); else @@ -13499,6 +13527,27 @@ condT); DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM); goto decode_success_vfp; + case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */ + /* XXXROUNDINGFIXME look up ARM reference for fused + multiply-add rounding */ + putDReg(dD, triop(Iop_AddF64, rm, + getDReg(dD), + triop(Iop_MulF64, rm, getDReg(dN), + getDReg(dM))), + condT); + DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM); + goto decode_success_vfp; + case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */ + /* XXXROUNDINGFIXME look up ARM reference for fused + multiply-add rounding */ + putDReg(dD, triop(Iop_AddF64, rm, + getDReg(dD), + triop(Iop_MulF64, rm, + unop(Iop_NegF64, getDReg(dN)), + getDReg(dM))), + condT); + DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM); + goto decode_success_vfp; default: break; } @@ -13963,6 +14012,27 @@ condT); DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM); goto decode_success_vfp; + case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */ + /* XXXROUNDINGFIXME look up ARM reference for fused + multiply-add rounding */ + putFReg(fD, triop(Iop_AddF32, rm, + getFReg(fD), + triop(Iop_MulF32, rm, getFReg(fN), + getFReg(fM))), + condT); + DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM); + goto decode_success_vfp; + case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */ + /* XXXROUNDINGFIXME look up ARM reference for fused + multiply-add rounding */ + putFReg(fD, triop(Iop_AddF32, rm, + getFReg(fD), + triop(Iop_MulF32, rm, + unop(Iop_NegF32, getFReg(fN)), + getFReg(fM))), + condT); + DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM); + goto decode_success_vfp; default: break; } @@ -14577,11 +14647,11 @@ // injecting here can change. In which case the translation has to // be redone. For ease of handling, we simply invalidate all the // time. - stmt(IRStmt_Put(OFFB_TISTART, mkU32(guest_R15_curr_instr_notENC))); - stmt(IRStmt_Put(OFFB_TILEN, mkU32(20))); + stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC))); + stmt(IRStmt_Put(OFFB_CMLEN, mkU32(20))); llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 )); dres.whatNext = Dis_StopHere; - dres.jk_StopHere = Ijk_TInval; + dres.jk_StopHere = Ijk_InvalICache; goto decode_success; } /* We don't know what it is. Set opc1/opc2 so decode_failure @@ -17422,11 +17492,11 @@ // injecting here can change. In which case the translation has to // be redone. For ease of handling, we simply invalidate all the // time. - stmt(IRStmt_Put(OFFB_TISTART, mkU32(guest_R15_curr_instr_notENC))); - stmt(IRStmt_Put(OFFB_TILEN, mkU32(20))); + stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC))); + stmt(IRStmt_Put(OFFB_CMLEN, mkU32(20))); llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 )); dres.whatNext = Dis_StopHere; - dres.jk_StopHere = Ijk_TInval; + dres.jk_StopHere = Ijk_InvalICache; goto decode_success; } /* We don't know what it is. Set insn0 so decode_failure @@ -18203,9 +18273,9 @@ condT = IRTemp_INVALID; // now uncond /* non-interworking branch */ - irsb->next = binop(Iop_Or32, mkexpr(res), mkU32(1)); - irsb->jumpkind = Ijk_Boring; - dres.whatNext = Dis_StopHere; + llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1))); + dres.jk_StopHere = Ijk_Boring; + dres.whatNext = Dis_StopHere; } DIP("add(hi) r%u, r%u\n", rD, rM); goto decode_success; @@ -20250,7 +20320,7 @@ /* --------------- LD/ST reg+imm12 --------------- */ /* Loads and stores of the form: - op Rt, [Rn, +#imm12] + op Rt, [Rn, #+-imm12] where op is one of ldrb ldrh ldr ldrsb ldrsh strb strh str @@ -20257,27 +20327,25 @@ */ if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) { Bool valid = True; - Bool syned = False; + Bool syned = INSN0(8,8) == 1; Bool isST = False; IRType ty = Ity_I8; + UInt bU = INSN0(7,7); // 1: +imm 0: -imm + // -imm is only supported by literal versions const HChar* nm = "???"; - switch (INSN0(8,4)) { - case BITS5(0,1,0,0,0): // strb + switch (INSN0(6,4)) { + case BITS3(0,0,0): // strb nm = "strb"; isST = True; break; - case BITS5(0,1,0,0,1): // ldrb - nm = "ldrb"; break; - case BITS5(1,1,0,0,1): // ldrsb - nm = "ldrsb"; syned = True; break; - case BITS5(0,1,0,1,0): // strh + case BITS3(0,0,1): // ldrb + nm = syned ? "ldrsb" : "ldrb"; break; + case BITS3(0,1,0): // strh nm = "strh"; ty = Ity_I16; isST = True; break; - case BITS5(0,1,0,1,1): // ldrh - nm = "ldrh"; ty = Ity_I16; break; - case BITS5(1,1,0,1,1): // ldrsh - nm = "ldrsh"; ty = Ity_I16; syned = True; break; - case BITS5(0,1,1,0,0): // str + case BITS3(0,1,1): // ldrh + nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break; + case BITS3(1,0,0): // str nm = "str"; ty = Ity_I32; isST = True; break; - case BITS5(0,1,1,0,1): + case BITS3(1,0,1): nm = "ldr"; ty = Ity_I32; break; // ldr default: valid = False; break; @@ -20288,25 +20356,27 @@ UInt imm12 = INSN1(11,0); Bool loadsPC = False; - if (ty == Ity_I8 || ty == Ity_I16) { - /* all 8- and 16-bit load and store cases have the - same exclusion set. */ - if (rN == 15 || isBadRegT(rT)) + if (rN != 15 && bU == 0) { + // only pc supports #-imm12 + valid = False; + } + + if (isST) { + if (syned) valid = False; + if (rN == 15 || rT == 15) valid = False; } else { - vassert(ty == Ity_I32); - if (isST) { - if (rN == 15 || rT == 15) + /* For a 32-bit load, rT == 15 is only allowable if we are not + in an IT block, or are the last in it. Need to insert + a dynamic check for that. Also, in this particular + case, rN == 15 is allowable. In this case however, the + value obtained for rN is (apparently) + "word-align(address of current insn + 4)". */ + if (rT == 15) { + if (ty == Ity_I32) + loadsPC = True; + else // Can't do it for B/H loads valid = False; - } else { - /* For a 32-bit load, rT == 15 is only allowable if we not - in an IT block, or are the last in it. Need to insert - a dynamic check for that. Also, in this particular - case, rN == 15 is allowable. In this case however, the - value obtained for rN is (apparently) - "word-align(address of current insn + 4)". */ - if (rT == 15) - loadsPC = True; } } @@ -20324,8 +20394,8 @@ IRTemp rNt = newTemp(Ity_I32); if (rN == 15) { - vassert(ty == Ity_I32 && !isST); - assign(rNt, binop(Iop_And32, getIRegT(rN), mkU32(~3))); + vassert(!isST); + assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3))); } else { assign(rNt, getIRegT(rN)); } @@ -20332,7 +20402,8 @@ IRTemp transAddr = newTemp(Ity_I32); assign(transAddr, - binop( Iop_Add32, mkexpr(rNt), mkU32(imm12) )); + binop(bU == 1 ? Iop_Add32 : Iop_Sub32, + mkexpr(rNt), mkU32(imm12))); IRTemp oldRt = newTemp(Ity_I32); assign(oldRt, getIRegT(rT)); @@ -20387,9 +20458,8 @@ vassert(rT == 15); vassert(condT == IRTemp_INVALID); /* due to check above */ llPutIReg(15, mkexpr(newRt)); - irsb->next = mkexpr(newRt); - irsb->jumpkind = Ijk_Boring; /* or _Ret ? */ - dres.whatNext = Dis_StopHere; + dres.jk_StopHere = Ijk_Boring; + dres.whatNext = Dis_StopHere; } } @@ -20401,7 +20471,7 @@ /* -------------- LDRD/STRD reg+/-#imm8 -------------- */ /* Doubleword loads and stores of the form: - ldrd/strd Rt, Rt2, [Rn, #-imm8] or + ldrd/strd Rt, Rt2, [Rn, #+/-imm8] or ldrd/strd Rt, Rt2, [Rn], #+/-imm8 or ldrd/strd Rt, Rt2, [Rn, #+/-imm8]! */ @@ -20419,12 +20489,17 @@ if (bP == 0 && bW == 0) valid = False; if (bW == 1 && (rN == rT || rN == rT2)) valid = False; if (isBadRegT(rT) || isBadRegT(rT2)) valid = False; - if (rN == 15) valid = False; if (bL == 1 && rT == rT2) valid = False; + /* It's OK to use PC as the base register only in the + following case: ldrd Rt, Rt2, [PC, #+/-imm8] */ + if (rN == 15 && (bL == 0/*store*/ + || bW == 1/*wb*/)) valid = False; if (valid) { IRTemp preAddr = newTemp(Ity_I32); - assign(preAddr, getIRegT(rN)); + assign(preAddr, 15 == rN + ? binop(Iop_And32, getIRegT(15), mkU32(~3U)) + : getIRegT(rN)); IRTemp postAddr = newTemp(Ity_I32); assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32, Index: priv/guest_generic_bb_to_IR.c =================================================================== --- priv/guest_generic_bb_to_IR.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_generic_bb_to_IR.c (.../trunk) (revision 2863) @@ -131,8 +131,8 @@ not to disassemble any instructions into it; this is indicated by the callback returning True. - offB_TIADDR and offB_TILEN are the offsets of guest_TIADDR and - guest_TILEN. Since this routine has to work for any guest state, + offB_CMADDR and offB_CMLEN are the offsets of guest_CMADDR and + guest_CMLEN. Since this routine has to work for any guest state, without knowing what it is, those offsets have to passed in. callback_opaque is a caller-supplied pointer to data which the @@ -194,8 +194,8 @@ /*IN*/ IRType guest_word_type, /*IN*/ UInt (*needs_self_check)(void*,VexGuestExtents*), /*IN*/ Bool (*preamble_function)(void*,IRSB*), - /*IN*/ Int offB_GUEST_TISTART, - /*IN*/ Int offB_GUEST_TILEN, + /*IN*/ Int offB_GUEST_CMSTART, + /*IN*/ Int offB_GUEST_CMLEN, /*IN*/ Int offB_GUEST_IP, /*IN*/ Int szB_GUEST_IP ) @@ -663,7 +663,7 @@ vassert(!nm_spec); } - /* Set TISTART and TILEN. These will describe to the despatcher + /* Set CMSTART and CMLEN. These will describe to the despatcher the area of guest code to invalidate should we exit with a self-check failure. */ @@ -684,10 +684,10 @@ = IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) ); irsb->stmts[selfcheck_idx + i * 5 + 2] - = IRStmt_Put( offB_GUEST_TISTART, IRExpr_RdTmp(tistart_tmp) ); + = IRStmt_Put( offB_GUEST_CMSTART, IRExpr_RdTmp(tistart_tmp) ); irsb->stmts[selfcheck_idx + i * 5 + 3] - = IRStmt_Put( offB_GUEST_TILEN, IRExpr_RdTmp(tilen_tmp) ); + = IRStmt_Put( offB_GUEST_CMLEN, IRExpr_RdTmp(tilen_tmp) ); /* Generate the entry point descriptors */ if (abiinfo_both->host_ppc_calls_use_fndescrs) { @@ -737,7 +737,7 @@ ? IRExpr_Const(IRConst_U64(expectedhW)) : IRExpr_Const(IRConst_U32(expectedhW)) ), - Ijk_TInval, + Ijk_InvalICache, /* Where we must restart if there's a failure: at the first extent, regardless of which extent the failure actually happened in. */ Index: priv/guest_generic_bb_to_IR.h =================================================================== --- priv/guest_generic_bb_to_IR.h (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_generic_bb_to_IR.h (.../trunk) (revision 2863) @@ -184,8 +184,8 @@ /*IN*/ IRType guest_word_type, /*IN*/ UInt (*needs_self_check)(void*,VexGuestExtents*), /*IN*/ Bool (*preamble_function)(void*,IRSB*), - /*IN*/ Int offB_GUEST_TISTART, - /*IN*/ Int offB_GUEST_TILEN, + /*IN*/ Int offB_GUEST_CMSTART, + /*IN*/ Int offB_GUEST_CMLEN, /*IN*/ Int offB_GUEST_IP, /*IN*/ Int szB_GUEST_IP ); Index: priv/guest_mips_defs.h =================================================================== --- priv/guest_mips_defs.h (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_mips_defs.h (.../trunk) (revision 2863) @@ -85,7 +85,8 @@ TRUNCWS, TRUNCWD, TRUNCLS, TRUNCLD, CVTDS, CVTDW, CVTSD, CVTSW, CVTWS, CVTWD, CVTDL, CVTLS, - CVTLD, CVTSL + CVTLD, CVTSL, ADDS, ADDD, + SUBS, SUBD, DIVS } flt_op; extern UInt mips32_dirtyhelper_mfc0 ( UInt rd, UInt sel ); @@ -98,8 +99,12 @@ extern ULong mips64_dirtyhelper_rdhwr ( ULong rt, ULong rd ); #endif -extern UInt mips_dirtyhelper_calculate_FCSR ( void* guest_state, UInt fs, - flt_op op ); +/* Calculate FCSR in fp32 mode. */ +extern UInt mips_dirtyhelper_calculate_FCSR_fp32 ( void* guest_state, UInt fs, + UInt ft, flt_op op ); +/* Calculate FCSR in fp64 mode. */ +extern UInt mips_dirtyhelper_calculate_FCSR_fp64 ( void* guest_state, UInt fs, + UInt ft, flt_op op ); /*---------------------------------------------------------*/ /*--- Condition code stuff ---*/ Index: priv/guest_mips_helpers.c =================================================================== --- priv/guest_mips_helpers.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_mips_helpers.c (.../trunk) (revision 2863) @@ -44,7 +44,7 @@ these functions are generated by the back end. */ -#define ALWAYSDEFD32(field) \ +#define ALWAYSDEFD32(field) \ { offsetof(VexGuestMIPS32State, field), \ (sizeof ((VexGuestMIPS32State*)0)->field) } @@ -105,38 +105,38 @@ vex_state->guest_LO = 0; /* Multiply and divide register lower result */ /* FPU Registers */ - vex_state->guest_f0 = 0x7ff80000; /* Floting point general purpose registers */ - vex_state->guest_f1 = 0x7ff80000; - vex_state->guest_f2 = 0x7ff80000; - vex_state->guest_f3 = 0x7ff80000; - vex_state->guest_f4 = 0x7ff80000; - vex_state->guest_f5 = 0x7ff80000; - vex_state->guest_f6 = 0x7ff80000; - vex_state->guest_f7 = 0x7ff80000; - vex_state->guest_f8 = 0x7ff80000; - vex_state->guest_f9 = 0x7ff80000; - vex_state->guest_f10 = 0x7ff80000; - vex_state->guest_f11 = 0x7ff80000; - vex_state->guest_f12 = 0x7ff80000; - vex_state->guest_f13 = 0x7ff80000; - vex_state->guest_f14 = 0x7ff80000; - vex_state->guest_f15 = 0x7ff80000; - vex_state->guest_f16 = 0x7ff80000; - vex_state->guest_f17 = 0x7ff80000; - vex_state->guest_f18 = 0x7ff80000; - vex_state->guest_f19 = 0x7ff80000; - vex_state->guest_f20 = 0x7ff80000; - vex_state->guest_f21 = 0x7ff80000; - vex_state->guest_f22 = 0x7ff80000; - vex_state->guest_f23 = 0x7ff80000; - vex_state->guest_f24 = 0x7ff80000; - vex_state->guest_f25 = 0x7ff80000; - vex_state->guest_f26 = 0x7ff80000; - vex_state->guest_f27 = 0x7ff80000; - vex_state->guest_f28 = 0x7ff80000; - vex_state->guest_f29 = 0x7ff80000; - vex_state->guest_f30 = 0x7ff80000; - vex_state->guest_f31 = 0x7ff80000; + vex_state->guest_f0 = 0x7ff800007ff80000ULL; /* Floting point GP registers */ + vex_state->guest_f1 = 0x7ff800007ff80000ULL; + vex_state->guest_f2 = 0x7ff800007ff80000ULL; + vex_state->guest_f3 = 0x7ff800007ff80000ULL; + vex_state->guest_f4 = 0x7ff800007ff80000ULL; + vex_state->guest_f5 = 0x7ff800007ff80000ULL; + vex_state->guest_f6 = 0x7ff800007ff80000ULL; + vex_state->guest_f7 = 0x7ff800007ff80000ULL; + vex_state->guest_f8 = 0x7ff800007ff80000ULL; + vex_state->guest_f9 = 0x7ff800007ff80000ULL; + vex_state->guest_f10 = 0x7ff800007ff80000ULL; + vex_state->guest_f11 = 0x7ff800007ff80000ULL; + vex_state->guest_f12 = 0x7ff800007ff80000ULL; + vex_state->guest_f13 = 0x7ff800007ff80000ULL; + vex_state->guest_f14 = 0x7ff800007ff80000ULL; + vex_state->guest_f15 = 0x7ff800007ff80000ULL; + vex_state->guest_f16 = 0x7ff800007ff80000ULL; + vex_state->guest_f17 = 0x7ff800007ff80000ULL; + vex_state->guest_f18 = 0x7ff800007ff80000ULL; + vex_state->guest_f19 = 0x7ff800007ff80000ULL; + vex_state->guest_f20 = 0x7ff800007ff80000ULL; + vex_state->guest_f21 = 0x7ff800007ff80000ULL; + vex_state->guest_f22 = 0x7ff800007ff80000ULL; + vex_state->guest_f23 = 0x7ff800007ff80000ULL; + vex_state->guest_f24 = 0x7ff800007ff80000ULL; + vex_state->guest_f25 = 0x7ff800007ff80000ULL; + vex_state->guest_f26 = 0x7ff800007ff80000ULL; + vex_state->guest_f27 = 0x7ff800007ff80000ULL; + vex_state->guest_f28 = 0x7ff800007ff80000ULL; + vex_state->guest_f29 = 0x7ff800007ff80000ULL; + vex_state->guest_f30 = 0x7ff800007ff80000ULL; + vex_state->guest_f31 = 0x7ff800007ff80000ULL; vex_state->guest_FIR = 0; /* FP implementation and revision register */ vex_state->guest_FCCR = 0; /* FP condition codes register */ @@ -150,8 +150,8 @@ vex_state->guest_EMNOTE = 0; /* For clflush: record start and length of area to invalidate */ - vex_state->guest_TISTART = 0; - vex_state->guest_TILEN = 0; + vex_state->guest_CMSTART = 0; + vex_state->guest_CMLEN = 0; vex_state->host_EvC_COUNTER = 0; vex_state->host_EvC_FAILADDR = 0; @@ -212,38 +212,38 @@ vex_state->guest_LO = 0; /* Multiply and divide register lower result */ /* FPU Registers */ - vex_state->guest_f0 = 0xffffffffffffffffULL; /* Floting point registers */ - vex_state->guest_f1 = 0xffffffffffffffffULL; - vex_state->guest_f2 = 0xffffffffffffffffULL; - vex_state->guest_f3 = 0xffffffffffffffffULL; - vex_state->guest_f4 = 0xffffffffffffffffULL; - vex_state->guest_f5 = 0xffffffffffffffffULL; - vex_state->guest_f6 = 0xffffffffffffffffULL; - vex_state->guest_f7 = 0xffffffffffffffffULL; - vex_state->guest_f8 = 0xffffffffffffffffULL; - vex_state->guest_f9 = 0xffffffffffffffffULL; - vex_state->guest_f10 = 0xffffffffffffffffULL; - vex_state->guest_f11 = 0xffffffffffffffffULL; - vex_state->guest_f12 = 0xffffffffffffffffULL; - vex_state->guest_f13 = 0xffffffffffffffffULL; - vex_state->guest_f14 = 0xffffffffffffffffULL; - vex_state->guest_f15 = 0xffffffffffffffffULL; - vex_state->guest_f16 = 0xffffffffffffffffULL; - vex_state->guest_f17 = 0xffffffffffffffffULL; - vex_state->guest_f18 = 0xffffffffffffffffULL; - vex_state->guest_f19 = 0xffffffffffffffffULL; - vex_state->guest_f20 = 0xffffffffffffffffULL; - vex_state->guest_f21 = 0xffffffffffffffffULL; - vex_state->guest_f22 = 0xffffffffffffffffULL; - vex_state->guest_f23 = 0xffffffffffffffffULL; - vex_state->guest_f24 = 0xffffffffffffffffULL; - vex_state->guest_f25 = 0xffffffffffffffffULL; - vex_state->guest_f26 = 0xffffffffffffffffULL; - vex_state->guest_f27 = 0xffffffffffffffffULL; - vex_state->guest_f28 = 0xffffffffffffffffULL; - vex_state->guest_f29 = 0xffffffffffffffffULL; - vex_state->guest_f30 = 0xffffffffffffffffULL; - vex_state->guest_f31 = 0xffffffffffffffffULL; + vex_state->guest_f0 = 0x7ff800007ff80000ULL; /* Floting point registers */ + vex_state->guest_f1 = 0x7ff800007ff80000ULL; + vex_state->guest_f2 = 0x7ff800007ff80000ULL; + vex_state->guest_f3 = 0x7ff800007ff80000ULL; + vex_state->guest_f4 = 0x7ff800007ff80000ULL; + vex_state->guest_f5 = 0x7ff800007ff80000ULL; + vex_state->guest_f6 = 0x7ff800007ff80000ULL; + vex_state->guest_f7 = 0x7ff800007ff80000ULL; + vex_state->guest_f8 = 0x7ff800007ff80000ULL; + vex_state->guest_f9 = 0x7ff800007ff80000ULL; + vex_state->guest_f10 = 0x7ff800007ff80000ULL; + vex_state->guest_f11 = 0x7ff800007ff80000ULL; + vex_state->guest_f12 = 0x7ff800007ff80000ULL; + vex_state->guest_f13 = 0x7ff800007ff80000ULL; + vex_state->guest_f14 = 0x7ff800007ff80000ULL; + vex_state->guest_f15 = 0x7ff800007ff80000ULL; + vex_state->guest_f16 = 0x7ff800007ff80000ULL; + vex_state->guest_f17 = 0x7ff800007ff80000ULL; + vex_state->guest_f18 = 0x7ff800007ff80000ULL; + vex_state->guest_f19 = 0x7ff800007ff80000ULL; + vex_state->guest_f20 = 0x7ff800007ff80000ULL; + vex_state->guest_f21 = 0x7ff800007ff80000ULL; + vex_state->guest_f22 = 0x7ff800007ff80000ULL; + vex_state->guest_f23 = 0x7ff800007ff80000ULL; + vex_state->guest_f24 = 0x7ff800007ff80000ULL; + vex_state->guest_f25 = 0x7ff800007ff80000ULL; + vex_state->guest_f26 = 0x7ff800007ff80000ULL; + vex_state->guest_f27 = 0x7ff800007ff80000ULL; + vex_state->guest_f28 = 0x7ff800007ff80000ULL; + vex_state->guest_f29 = 0x7ff800007ff80000ULL; + vex_state->guest_f30 = 0x7ff800007ff80000ULL; + vex_state->guest_f31 = 0x7ff800007ff80000ULL; vex_state->guest_FIR = 0; /* FP implementation and revision register */ vex_state->guest_FCCR = 0; /* FP condition codes register */ @@ -258,8 +258,8 @@ vex_state->guest_EMNOTE = 0; /* For clflush: record start and length of area to invalidate */ - vex_state->guest_TISTART = 0; - vex_state->guest_TILEN = 0; + vex_state->guest_CMSTART = 0; + vex_state->guest_CMLEN = 0; vex_state->host_EvC_COUNTER = 0; vex_state->host_EvC_FAILADDR = 0; @@ -375,8 +375,8 @@ /* 0 */ ALWAYSDEFD32(guest_r0), /* 1 */ ALWAYSDEFD32(guest_r1), /* 2 */ ALWAYSDEFD32(guest_EMNOTE), - /* 3 */ ALWAYSDEFD32(guest_TISTART), - /* 4 */ ALWAYSDEFD32(guest_TILEN), + /* 3 */ ALWAYSDEFD32(guest_CMSTART), + /* 4 */ ALWAYSDEFD32(guest_CMLEN), /* 5 */ ALWAYSDEFD32(guest_r29), /* 6 */ ALWAYSDEFD32(guest_r31), /* 7 */ ALWAYSDEFD32(guest_ULR) @@ -402,8 +402,8 @@ .alwaysDefd = { /* 0 */ ALWAYSDEFD64 (guest_r0), /* 1 */ ALWAYSDEFD64 (guest_EMNOTE), - /* 2 */ ALWAYSDEFD64 (guest_TISTART), - /* 3 */ ALWAYSDEFD64 (guest_TILEN), + /* 2 */ ALWAYSDEFD64 (guest_CMSTART), + /* 3 */ ALWAYSDEFD64 (guest_CMLEN), /* 4 */ ALWAYSDEFD64 (guest_r29), /* 5 */ ALWAYSDEFD64 (guest_r31), /* 6 */ ALWAYSDEFD64 (guest_ULR) @@ -1107,145 +1107,324 @@ } #endif -#define ASM_VOLATILE_ROUND32(fs, inst) \ - __asm__ volatile("cfc1 $t0, $31" "\n\t" \ - "ctc1 %2, $31" "\n\t" \ - "mtc1 %1, $f0" "\n\t" \ - ""#inst" $f0, $f0" "\n\t" \ - "cfc1 %0, $31" "\n\t" \ - "ctc1 $t0, $31" "\n\t" \ +#define ASM_VOLATILE_UNARY32(inst) \ + __asm__ volatile("cfc1 $t0, $31" "\n\t" \ + "ctc1 %2, $31" "\n\t" \ + "mtc1 %1, $f20" "\n\t" \ + #inst" $f20, $f20" "\n\t" \ + "cfc1 %0, $31" "\n\t" \ + "ctc1 $t0, $31" "\n\t" \ : "=r" (ret) \ - : "r" (loVal), "r" (fcsr) \ - : "t0", "$f0", "$f1" \ + : "r" (loFsVal), "r" (fcsr) \ + : "t0", "$f20" \ ); -#define ASM_VOLATILE_ROUND32_DOUBLE(fs, inst) \ - __asm__ volatile("cfc1 $t0, $31" "\n\t" \ - "ctc1 %3, $31" "\n\t" \ - "mtc1 %1, $f0" "\n\t" \ - "mtc1 %2, $f1" "\n\t" \ - ""#inst" $f0, $f0" "\n\t" \ - "cfc1 %0, $31" "\n\t" \ - "ctc1 $t0, $31" "\n\t" \ +#define ASM_VOLATILE_UNARY32_DOUBLE(inst) \ + __asm__ volatile("cfc1 $t0, $31" "\n\t" \ + "ctc1 %3, $31" "\n\t" \ + "mtc1 %1, $f20" "\n\t" \ + "mtc1 %2, $f21" "\n\t" \ + #inst" $f20, $f20" "\n\t" \ + "cfc1 %0, $31" "\n\t" \ + "ctc1 $t0, $31" "\n\t" \ : "=r" (ret) \ - : "r" (loVal), "r" (hiVal), "r" (fcsr) \ - : "t0", "$f0", "$f1" \ + : "r" (loFsVal), "r" (hiFsVal), "r" (fcsr) \ + : "t0", "$f20", "$f21" \ ); -#define ASM_VOLATILE_ROUND64(fs, inst) \ - __asm__ volatile("cfc1 $t0, $31" "\n\t" \ - "ctc1 %2, $31" "\n\t" \ - "dmtc1 %1, $f0" "\n\t" \ - ""#inst" $f0, $f0" "\n\t" \ - "cfc1 %0, $31" "\n\t" \ - "ctc1 $t0, $31" "\n\t" \ +#define ASM_VOLATILE_UNARY64(inst) \ + __asm__ volatile("cfc1 $t0, $31" "\n\t" \ + "ctc1 %2, $31" "\n\t" \ + "ldc1 $f24, 0(%1)" "\n\t" \ + #inst" $f24, $f24" "\n\t" \ + "cfc1 %0, $31" "\n\t" \ + "ctc1 $t0, $31" "\n\t" \ : "=r" (ret) \ - : "r" (addr[fs]), "r" (fcsr) \ - : "t0", "$f0" \ + : "r" (&(addr[fs])), "r" (fcsr) \ + : "t0", "$f24" \ ); +#define ASM_VOLATILE_BINARY32(inst) \ + __asm__ volatile("cfc1 $t0, $31" "\n\t" \ + "ctc1 %3, $31" "\n\t" \ + "mtc1 %1, $f20" "\n\t" \ + "mtc1 %2, $f22" "\n\t" \ + #inst" $f20, $f20, $f22" "\n\t" \ + "cfc1 %0, $31" "\n\t" \ + "ctc1 $t0, $31" "\n\t" \ + : "=r" (ret) \ + : "r" (loFsVal), "r" (loFtVal), "r" (fcsr) \ + : "t0", "$f20", "$f22" \ + ); + +#define ASM_VOLATILE_BINARY32_DOUBLE(inst) \ + __asm__ volatile("cfc1 $t0, $31" "\n\t" \ + "ctc1 %5, $31" "\n\t" \ + "mtc1 %1, $f20" "\n\t" \ + "mtc1 %2, $f21" "\n\t" \ + "mtc1 %3, $f22" "\n\t" \ + "mtc1 %4, $f23" "\n\t" \ + #inst" $f20, $f20, $f22" "\n\t" \ + "cfc1 %0, $31" "\n\t" \ + "ctc1 $t0, $31" "\n\t" \ + : "=r" (ret) \ + : "r" (loFsVal), "r" (hiFsVal), "r" (loFtVal), \ + "r" (hiFtVal), "r" (fcsr) \ + : "t0", "$f20", "$f21", "$f22", "$f23" \ + ); + +#define ASM_VOLATILE_BINARY64(inst) \ + __asm__ volatile("cfc1 $t0, $31" "\n\t" \ + "ctc1 %3, $31" "\n\t" \ + "ldc1 $f24, 0(%1)" "\n\t" \ + "ldc1 $f26, 0(%2)" "\n\t" \ + #inst" $f24, $f24, $f26" "\n\t" \ + "cfc1 %0, $31" "\n\t" \ + "ctc1 $t0, $31" "\n\t" \ + : "=r" (ret) \ + : "r" (&(addr[fs])), "r" (&(addr[ft])), "r" (fcsr) \ + : "t0", "$f24", "$f26" \ + ); + /* TODO: Add cases for all fpu instructions because all fpu instructions are change the value of FCSR register. */ -extern UInt mips_dirtyhelper_calculate_FCSR ( void* gs, UInt fs, flt_op inst ) +extern UInt mips_dirtyhelper_calculate_FCSR_fp32 ( void* gs, UInt fs, UInt ft, + flt_op inst ) { UInt ret = 0; #if defined(__mips__) + VexGuestMIPS32State* guest_state = (VexGuestMIPS32State*)gs; + UInt loFsVal, hiFsVal, loFtVal, hiFtVal; +#if defined (_MIPSEL) + ULong *addr = (ULong *)&guest_state->guest_f0; + loFsVal = (UInt)addr[fs]; + hiFsVal = (UInt)addr[fs+1]; + loFtVal = (UInt)addr[ft]; + hiFtVal = (UInt)addr[ft+1]; +#elif defined (_MIPSEB) + UInt *addr = (UInt *)&guest_state->guest_f0; + loFsVal = (UInt)addr[fs*2]; + hiFsVal = (UInt)addr[fs*2+2]; + loFtVal = (UInt)addr[ft*2]; + hiFtVal = (UInt)addr[ft*2+2]; +#endif + UInt fcsr = guest_state->guest_FCSR; + switch (inst) { + case ROUNDWD: + ASM_VOLATILE_UNARY32_DOUBLE(round.w.d) + break; + case FLOORWS: + ASM_VOLATILE_UNARY32(floor.w.s) + break; + case FLOORWD: + ASM_VOLATILE_UNARY32_DOUBLE(floor.w.d) + break; + case TRUNCWS: + ASM_VOLATILE_UNARY32(trunc.w.s) + break; + case TRUNCWD: + ASM_VOLATILE_UNARY32_DOUBLE(trunc.w.d) + break; + case CEILWS: + ASM_VOLATILE_UNARY32(ceil.w.s) + break; + case CEILWD: + ASM_VOLATILE_UNARY32_DOUBLE(ceil.w.d) + break; + case CVTDS: + ASM_VOLATILE_UNARY32(cvt.d.s) + break; + case CVTDW: + ASM_VOLATILE_UNARY32(cvt.d.w) + break; + case CVTSW: + ASM_VOLATILE_UNARY32(cvt.s.w) + break; + case CVTSD: + ASM_VOLATILE_UNARY32_DOUBLE(cvt.s.d) + break; + case CVTWS: + ASM_VOLATILE_UNARY32(cvt.w.s) + break; + case CVTWD: + ASM_VOLATILE_UNARY32_DOUBLE(cvt.w.d) + break; + case ROUNDWS: + ASM_VOLATILE_UNARY32(round.w.s) + break; +#if ((__mips == 32) && defined(__mips_isa_rev) && (__mips_isa_rev >= 2)) \ + || (__mips == 64) + case CEILLS: + ASM_VOLATILE_UNARY32(ceil.l.s) + break; + case CEILLD: + ASM_VOLATILE_UNARY32_DOUBLE(ceil.l.d) + break; + case CVTDL: + ASM_VOLATILE_UNARY32_DOUBLE(cvt.d.l) + break; + case CVTLS: + ASM_VOLATILE_UNARY32(cvt.l.s) + break; + case CVTLD: + ASM_VOLATILE_UNARY32_DOUBLE(cvt.l.d) + break; + case CVTSL: + ASM_VOLATILE_UNARY32_DOUBLE(cvt.s.l) + break; + case FLOORLS: + ASM_VOLATILE_UNARY32(floor.l.s) + break; + case FLOORLD: + ASM_VOLATILE_UNARY32_DOUBLE(floor.l.d) + break; + case ROUNDLS: + ASM_VOLATILE_UNARY32(round.l.s) + break; + case ROUNDLD: + ASM_VOLATILE_UNARY32_DOUBLE(round.l.d) + break; + case TRUNCLS: + ASM_VOLATILE_UNARY32(trunc.l.s) + break; + case TRUNCLD: + ASM_VOLATILE_UNARY32_DOUBLE(trunc.l.d) + break; +#endif + case ADDS: + ASM_VOLATILE_BINARY32(add.s) + break; + case ADDD: + ASM_VOLATILE_BINARY32_DOUBLE(add.d) + break; + case SUBS: + ASM_VOLATILE_BINARY32(sub.s) + break; + case SUBD: + ASM_VOLATILE_BINARY32_DOUBLE(sub.d) + break; + case DIVS: + ASM_VOLATILE_BINARY32(div.s) + break; + default: + vassert(0); + break; + } +#endif + return ret; +} + +/* TODO: Add cases for all fpu instructions because all fpu instructions are + change the value of FCSR register. */ +extern UInt mips_dirtyhelper_calculate_FCSR_fp64 ( void* gs, UInt fs, UInt ft, + flt_op inst ) +{ + UInt ret = 0; +#if defined(__mips__) #if defined(VGA_mips32) VexGuestMIPS32State* guest_state = (VexGuestMIPS32State*)gs; - UInt *addr = (UInt *)&guest_state->guest_f0; - UInt loVal = addr[fs]; - UInt hiVal = addr[fs+1]; -#define ASM_VOLATILE_ROUND(fs, inst) ASM_VOLATILE_ROUND32(fs, inst) -#define ASM_VOLATILE_ROUND_DOUBLE(fs, inst) ASM_VOLATILE_ROUND32_DOUBLE(fs, inst) #else VexGuestMIPS64State* guest_state = (VexGuestMIPS64State*)gs; +#endif ULong *addr = (ULong *)&guest_state->guest_f0; -#define ASM_VOLATILE_ROUND(fs, inst) ASM_VOLATILE_ROUND64(fs, inst) -#define ASM_VOLATILE_ROUND_DOUBLE(fs, inst) ASM_VOLATILE_ROUND64(fs, inst) -#endif - UInt fcsr = guest_state->guest_FCSR; + UInt fcsr = guest_state->guest_FCSR; switch (inst) { case ROUNDWD: - ASM_VOLATILE_ROUND_DOUBLE(fs, round.w.d) + ASM_VOLATILE_UNARY64(round.w.d) break; case FLOORWS: - ASM_VOLATILE_ROUND(fs, floor.w.s) + ASM_VOLATILE_UNARY64(floor.w.s) break; case FLOORWD: - ASM_VOLATILE_ROUND_DOUBLE(fs, floor.w.d) + ASM_VOLATILE_UNARY64(floor.w.d) break; case TRUNCWS: - ASM_VOLATILE_ROUND(fs, trunc.w.s) + ASM_VOLATILE_UNARY64(trunc.w.s) break; case TRUNCWD: - ASM_VOLATILE_ROUND_DOUBLE(fs, trunc.w.d) + ASM_VOLATILE_UNARY64(trunc.w.d) break; case CEILWS: - ASM_VOLATILE_ROUND(fs, ceil.w.s) + ASM_VOLATILE_UNARY64(ceil.w.s) break; case CEILWD: - ASM_VOLATILE_ROUND_DOUBLE(fs, ceil.w.d) + ASM_VOLATILE_UNARY64(ceil.w.d) break; case CVTDS: - ASM_VOLATILE_ROUND(fs, cvt.d.s) + ASM_VOLATILE_UNARY64(cvt.d.s) break; case CVTDW: - ASM_VOLATILE_ROUND(fs, cvt.d.w) + ASM_VOLATILE_UNARY64(cvt.d.w) break; case CVTSW: - ASM_VOLATILE_ROUND(fs, cvt.s.w) + ASM_VOLATILE_UNARY64(cvt.s.w) break; case CVTSD: - ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.s.d) + ASM_VOLATILE_UNARY64(cvt.s.d) break; case CVTWS: - ASM_VOLATILE_ROUND(fs, cvt.w.s) + ASM_VOLATILE_UNARY64(cvt.w.s) break; case CVTWD: - ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.w.d) + ASM_VOLATILE_UNARY64(cvt.w.d) break; case ROUNDWS: - ASM_VOLATILE_ROUND(fs, round.w.s) + ASM_VOLATILE_UNARY64(round.w.s) break; #if ((__mips == 32) && defined(__mips_isa_rev) && (__mips_isa_rev >= 2)) \ || (__mips == 64) case CEILLS: - ASM_VOLATILE_ROUND(fs, ceil.l.s) + ASM_VOLATILE_UNARY64(ceil.l.s) break; case CEILLD: - ASM_VOLATILE_ROUND_DOUBLE(fs, ceil.l.d) + ASM_VOLATILE_UNARY64(ceil.l.d) break; case CVTDL: - ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.d.l) + ASM_VOLATILE_UNARY64(cvt.d.l) break; case CVTLS: - ASM_VOLATILE_ROUND(fs, cvt.l.s) + ASM_VOLATILE_UNARY64(cvt.l.s) break; case CVTLD: - ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.l.d) + ASM_VOLATILE_UNARY64(cvt.l.d) break; case CVTSL: - ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.s.l) + ASM_VOLATILE_UNARY64(cvt.s.l) break; case FLOORLS: - ASM_VOLATILE_ROUND(fs, floor.l.s) + ASM_VOLATILE_UNARY64(floor.l.s) break; case FLOORLD: - ASM_VOLATILE_ROUND_DOUBLE(fs, floor.l.d) + ASM_VOLATILE_UNARY64(floor.l.d) break; case ROUNDLS: - ASM_VOLATILE_ROUND(fs, round.l.s) + ASM_VOLATILE_UNARY64(round.l.s) break; case ROUNDLD: - ASM_VOLATILE_ROUND_DOUBLE(fs, round.l.d) + ASM_VOLATILE_UNARY64(round.l.d) break; case TRUNCLS: - ASM_VOLATILE_ROUND(fs, trunc.l.s) + ASM_VOLATILE_UNARY64(trunc.l.s) break; case TRUNCLD: - ASM_VOLATILE_ROUND_DOUBLE(fs, trunc.l.d) + ASM_VOLATILE_UNARY64(trunc.l.d) break; #endif + case ADDS: + ASM_VOLATILE_BINARY64(add.s) + break; + case ADDD: + ASM_VOLATILE_BINARY64(add.d) + break; + case SUBS: + ASM_VOLATILE_BINARY64(sub.s) + break; + case SUBD: + ASM_VOLATILE_BINARY64(sub.d) + break; + case DIVS: + ASM_VOLATILE_BINARY64(div.s) + break; default: vassert(0); break; Index: priv/guest_mips_toIR.c =================================================================== --- priv/guest_mips_toIR.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_mips_toIR.c (.../trunk) (revision 2863) @@ -72,6 +72,9 @@ disInstr_MIPS below. */ static Bool mode64 = False; +/* CPU has FPU and 32 dbl. prec. FP registers. */ +static Bool fp_mode64 = False; + /* Define 1.0 in single and double precision. */ #define ONE_SINGLE 0x3F800000 #define ONE_DOUBLE 0x3FF0000000000000ULL @@ -466,6 +469,13 @@ assign(t1, binop(Iop_Add64, getIReg(rs), \ mkU64(extend_s_16to64(imm)))); \ +#define LOADX_STORE_PATTERN \ + t1 = newTemp(mode64 ? Ity_I64 : Ity_I32); \ + if(!mode64) \ + assign(t1, binop(Iop_Add32, getIReg(regRs), getIReg(regRt))); \ + else \ + assign(t1, binop(Iop_Add64, getIReg(regRs), getIReg(regRt))); + #define LWX_SWX_PATTERN64 \ t2 = newTemp(Ity_I64); \ assign(t2, binop(Iop_And64, mkexpr(t1), mkU64(0xFFFFFFFFFFFFFFFCULL))); \ @@ -534,6 +544,11 @@ binop(Iop_Shr32, getFCSR(), mkU8(24+cc))), \ mkU32(0x1))); +#define ILLEGAL_INSTRUCTON \ + putPC(mkU32(guest_PC_curr_instr + 4)); \ + dres.jk_StopHere = Ijk_SigILL; \ + dres.whatNext = Dis_StopHere; + /*------------------------------------------------------------*/ /*--- Field helpers ---*/ /*------------------------------------------------------------*/ @@ -1090,38 +1105,84 @@ /* fs - fpu source register number. inst - fpu instruction that needs to be executed. - sz32 - size of source register. */ -static void calculateFCSR(UInt fs, UInt inst, Bool sz32) + sz32 - size of source register. + opN - number of operads: + 1 - unary operation. + 2 - binary operation. */ +static void calculateFCSR(UInt fs, UInt ft, UInt inst, Bool sz32, UInt opN) { IRDirty *d; IRTemp fcsr = newTemp(Ity_I32); - /* IRExpr_BBPTR() => Need to pass pointer to guest - state to helper. */ - d = unsafeIRDirty_1_N(fcsr, 0, - "mips_dirtyhelper_calculate_FCSR", - &mips_dirtyhelper_calculate_FCSR, - mkIRExprVec_3(IRExpr_BBPTR(), - mkU32(fs), - mkU32(inst))); - - /* Declare we're reading guest state. */ - if (!mode64 && !sz32) - d->nFxState = 2; + /* IRExpr_BBPTR() => Need to pass pointer to guest state to helper. */ + if (fp_mode64) + d = unsafeIRDirty_1_N(fcsr, 0, + "mips_dirtyhelper_calculate_FCSR_fp64", + &mips_dirtyhelper_calculate_FCSR_fp64, + mkIRExprVec_4(IRExpr_BBPTR(), + mkU32(fs), + mkU32(ft), + mkU32(inst))); else - d->nFxState = 1; - vex_bzero(&d->fxState, sizeof(d->fxState)); + d = unsafeIRDirty_1_N(fcsr, 0, + "mips_dirtyhelper_calculate_FCSR_fp32", + &mips_dirtyhelper_calculate_FCSR_fp32, + mkIRExprVec_4(IRExpr_BBPTR(), + mkU32(fs), + mkU32(ft), + mkU32(inst))); - d->fxState[0].fx = Ifx_Read; /* read */ - d->fxState[0].offset = floatGuestRegOffset(fs); - if (mode64) - d->fxState[0].size = sizeof(ULong); - else + if (opN == 1) { /* Unary operation. */ + /* Declare we're reading guest state. */ + if (sz32 || fp_mode64) + d->nFxState = 2; + else + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + + d->fxState[0].fx = Ifx_Read; /* read */ + if (mode64) + d->fxState[0].offset = offsetof(VexGuestMIPS64State, guest_FCSR); + else + d->fxState[0].offset = offsetof(VexGuestMIPS32State, guest_FCSR); d->fxState[0].size = sizeof(UInt); + d->fxState[1].fx = Ifx_Read; /* read */ + d->fxState[1].offset = floatGuestRegOffset(fs); + d->fxState[1].size = sizeof(ULong); - if (!mode64 && !sz32) { + if (!(sz32 || fp_mode64)) { + d->fxState[2].fx = Ifx_Read; /* read */ + d->fxState[2].offset = floatGuestRegOffset(fs+1); + d->fxState[2].size = sizeof(ULong); + } + } else if (opN == 2) { /* Binary operation. */ + /* Declare we're reading guest state. */ + if (sz32 || fp_mode64) + d->nFxState = 3; + else + d->nFxState = 5; + vex_bzero(&d->fxState, sizeof(d->fxState)); + + d->fxState[0].fx = Ifx_Read; /* read */ + if (mode64) + d->fxState[0].offset = offsetof(VexGuestMIPS64State, guest_FCSR); + else + d->fxState[0].offset = offsetof(VexGuestMIPS32State, guest_FCSR); + d->fxState[0].size = sizeof(UInt); d->fxState[1].fx = Ifx_Read; /* read */ - d->fxState[1].offset = floatGuestRegOffset(fs+1); - d->fxState[1].size = sizeof(UInt); + d->fxState[1].offset = floatGuestRegOffset(fs); + d->fxState[1].size = sizeof(ULong); + d->fxState[2].fx = Ifx_Read; /* read */ + d->fxState[2].offset = floatGuestRegOffset(ft); + d->fxState[2].size = sizeof(ULong); + + if (!(sz32 || fp_mode64)) { + d->fxState[3].fx = Ifx_Read; /* read */ + d->fxState[3].offset = floatGuestRegOffset(fs+1); + d->fxState[3].size = sizeof(ULong); + d->fxState[4].fx = Ifx_Read; /* read */ + d->fxState[4].offset = floatGuestRegOffset(ft+1); + d->fxState[4].size = sizeof(ULong); + } } stmt(IRStmt_Dirty(d)); @@ -1146,6 +1207,12 @@ stmt(IRStmt_Put(integerGuestRegOffset(archreg), e)); } +static IRExpr *mkNarrowTo32(IRType ty, IRExpr * src) +{ + vassert(ty == Ity_I32 || ty == Ity_I64); + return ty == Ity_I64 ? unop(Iop_64to32, src) : src; +} + static void putLO(IRExpr * e) { if (mode64) { @@ -1239,12 +1306,6 @@ return 0; } -static IRExpr *mkNarrowTo32(IRType ty, IRExpr * src) -{ - vassert(ty == Ity_I32 || ty == Ity_I64); - return ty == Ity_I64 ? unop(Iop_64to32, src) : src; -} - static IRExpr *getLoFromF64(IRType ty, IRExpr * src) { vassert(ty == Ity_F32 || ty == Ity_F64); @@ -1340,21 +1401,21 @@ (UInt) branch_offset), OFFB_PC); } -static IRExpr *getFReg(UInt dregNo) +static IRExpr *getFReg(UInt fregNo) { - vassert(dregNo < 32); - IRType ty = mode64 ? Ity_F64 : Ity_F32; - return IRExpr_Get(floatGuestRegOffset(dregNo), ty); + vassert(fregNo < 32); + IRType ty = fp_mode64 ? Ity_F64 : Ity_F32; + return IRExpr_Get(floatGuestRegOffset(fregNo), ty); } static IRExpr *getDReg(UInt dregNo) { - if (mode64) { - vassert(dregNo < 32); - IRType ty = Ity_F64; - return IRExpr_Get(floatGuestRegOffset(dregNo), ty); + vassert(dregNo < 32); + if (fp_mode64) { + return IRExpr_Get(floatGuestRegOffset(dregNo), Ity_F64); } else { - vassert(dregNo < 32); + /* Read a floating point register pair and combine their contents into a + 64-bit value */ IRTemp t0 = newTemp(Ity_F32); IRTemp t1 = newTemp(Ity_F32); IRTemp t2 = newTemp(Ity_F64); @@ -1377,7 +1438,7 @@ static void putFReg(UInt dregNo, IRExpr * e) { vassert(dregNo < 32); - IRType ty = mode64 ? Ity_F64 : Ity_F32; + IRType ty = fp_mode64 ? Ity_F64 : Ity_F32; vassert(typeOfIRExpr(irsb->tyenv, e) == ty); stmt(IRStmt_Put(floatGuestRegOffset(dregNo), e)); } @@ -1384,7 +1445,7 @@ static void putDReg(UInt dregNo, IRExpr * e) { - if (mode64) { + if (fp_mode64) { vassert(dregNo < 32); IRType ty = Ity_F64; vassert(typeOfIRExpr(irsb->tyenv, e) == ty); @@ -1642,22 +1703,22 @@ static const HChar* showCondCode(UInt code) { const HChar* ret; switch (code) { - case 0: ret = "F"; break; - case 1: ret = "UN"; break; - case 2: ret = "EQ"; break; - case 3: ret = "UEQ"; break; - case 4: ret = "OLT"; break; - case 5: ret = "ULT"; break; - case 6: ret = "OLE"; break; - case 7: ret = "ULE"; break; - case 8: ret = "SF"; break; - case 9: ret = "NGLE"; break; - case 10: ret = "SEQ"; break; - case 11: ret = "NGL"; break; - case 12: ret = "LT"; break; - case 13: ret = "NGE"; break; - case 14: ret = "LE"; break; - case 15: ret = "NGT"; break; + case 0: ret = "f"; break; + case 1: ret = "un"; break; + case 2: ret = "eq"; break; + case 3: ret = "ueq"; break; + case 4: ret = "olt"; break; + case 5: ret = "ult"; break; + case 6: ret = "ole"; break; + case 7: ret = "ule"; break; + case 8: ret = "sf"; break; + case 9: ret = "ngle"; break; + case 10: ret = "seq"; break; + case 11: ret = "ngl"; break; + case 12: ret = "lt"; break; + case 13: ret = "nge"; break; + case 14: ret = "le"; break; + case 15: ret = "ngt"; break; default: vpanic("showCondCode"); break; } return ret; @@ -1678,8 +1739,8 @@ UInt fpc_cc = get_fpc_cc(cins); switch (fmt) { case 0x10: { /* C.cond.S */ - DIP("C.%s.S %d, f%d, f%d", showCondCode(cond), fpc_cc, fs, ft); - if (mode64) { + DIP("c.%s.s %d, f%d, f%d", showCondCode(cond), fpc_cc, fs, ft); + if (fp_mode64) { t0 = newTemp(Ity_I32); t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); @@ -1694,7 +1755,8 @@ getFReg(ft)))); assign(ccIR, binop(Iop_CmpF64, mkexpr(tmp5), mkexpr(tmp6))); - putHI(mkWidenFrom32(Ity_I64, mkexpr(ccIR), True)); + putHI(mkWidenFrom32(mode64 ? Ity_I64: Ity_I32, + mkexpr(ccIR), True)); /* Map compare result from IR to MIPS FP cmp result | MIPS | IR -------------------------- @@ -1711,7 +1773,8 @@ binop(Iop_And32, binop(Iop_Xor32, mkexpr(ccIR), binop(Iop_Shr32, mkexpr(ccIR), mkU8(6))), mkU32(1)))))); - putLO(mkWidenFrom32(Ity_I64, mkexpr(ccMIPS), True)); + putLO(mkWidenFrom32(mode64 ? Ity_I64: Ity_I32, + mkexpr(ccMIPS), True)); /* UN */ assign(t0, binop(Iop_And32, mkexpr(ccMIPS), mkU32(0x1))); @@ -1885,7 +1948,7 @@ break; case 0x11: { /* C.cond.D */ - DIP("C.%s.D %d, f%d, f%d", showCondCode(cond), fpc_cc, fs, ft); + DIP("c.%s.d %d, f%d, f%d", showCondCode(cond), fpc_cc, fs, ft); t0 = newTemp(Ity_I32); t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); @@ -2110,6 +2173,7 @@ static Bool dis_instr_CVM ( UInt theInstr ) { UChar opc2 = get_function(theInstr); + UChar opc1 = get_opcode(theInstr); UChar regRs = get_rs(theInstr); UChar regRt = get_rt(theInstr); UChar regRd = get_rd(theInstr); @@ -2120,99 +2184,184 @@ IRTemp tmp = newTemp(ty); IRTemp tmpRs = newTemp(ty); IRTemp tmpRt = newTemp(ty); + IRTemp t1 = newTemp(ty); UInt size; assign(tmpRs, getIReg(regRs)); - switch(opc2) { - case 0x03: { /* DMUL rd, rs, rt */ - DIP("dmul r%d, r%d, r%d", regRd, regRs, regRt); - IRType t0 = newTemp(Ity_I128); - assign(t0, binop(Iop_MullU64, getIReg(regRs), getIReg(regRt))); - putIReg(regRd, unop(Iop_128to64, mkexpr(t0))); - break; - } - case 0x32: /* 5. CINS rd, rs, p, lenm1 */ - DIP("cins r%u, r%u, %d, %d\n", regRt, regRs, p, lenM1); - assign ( tmp , binop(Iop_Shl64, mkexpr(tmpRs), mkU8(64-( lenM1+1 )))); - assign ( tmpRt, binop(Iop_Shr64, mkexpr( tmp ), mkU8(64-(p+lenM1+1)))); - putIReg( regRt, mkexpr(tmpRt)); - break; + switch(opc1){ + case 0x1C: { + switch(opc2) { + case 0x03: { /* DMUL rd, rs, rt */ + DIP("dmul r%d, r%d, r%d", regRd, regRs, regRt); + IRType t0 = newTemp(Ity_I128); + assign(t0, binop(Iop_MullU64, getIReg(regRs), getIReg(regRt))); + putIReg(regRd, unop(Iop_128to64, mkexpr(t0))); + break; + } - case 0x33: /* 6. CINS32 rd, rs, p+32, lenm1 */ - DIP("cins32 r%u, r%u, %d, %d\n", regRt, regRs, p+32, lenM1); - assign ( tmp , binop(Iop_Shl64, mkexpr(tmpRs), mkU8(64-( lenM1+1 )))); - assign ( tmpRt, binop(Iop_Shr64, mkexpr( tmp ), mkU8(32-(p+lenM1+1)))); - putIReg( regRt, mkexpr(tmpRt)); - break; + case 0x32: /* 5. CINS rd, rs, p, lenm1 */ + DIP("cins r%u, r%u, %d, %d\n", regRt, regRs, p, lenM1); + assign ( tmp , binop(Iop_Shl64, mkexpr(tmpRs), + mkU8(64-( lenM1+1 )))); + assign ( tmpRt, binop(Iop_Shr64, mkexpr( tmp ), + mkU8(64-(p+lenM1+1)))); + putIReg( regRt, mkexpr(tmpRt)); + break; - case 0x3A: /* 3. EXTS rt, rs, p len */ - DIP("exts r%u, r%u, %d, %d\n", regRt, regRs, p, lenM1); - size = lenM1 + 1; /* lenm1+1 */ - UChar lsAmt = 64 - (p + size); /* p+lenm1+1 */ - UChar rsAmt = 64 - size; /* lenm1+1 */ - tmp = newTemp(Ity_I64); - assign(tmp, binop(Iop_Shl64, mkexpr(tmpRs), mkU8(lsAmt))); - putIReg(regRt, binop(Iop_Sar64, mkexpr(tmp), mkU8(rsAmt))); - break; + case 0x33: /* 6. CINS32 rd, rs, p+32, lenm1 */ + DIP("cins32 r%u, r%u, %d, %d\n", regRt, regRs, p+32, lenM1); + assign ( tmp , binop(Iop_Shl64, mkexpr(tmpRs), + mkU8(64-( lenM1+1 )))); + assign ( tmpRt, binop(Iop_Shr64, mkexpr( tmp ), + mkU8(32-(p+lenM1+1)))); + putIReg( regRt, mkexpr(tmpRt)); + break; - case 0x3B: /* 4. EXTS32 rt, rs, p len */ - DIP("exts32 r%u, r%u, %d, %d\n", regRt, regRs, p, lenM1); - assign ( tmp , binop(Iop_Shl64, mkexpr(tmpRs), mkU8(32-(p+lenM1+1)))); - assign ( tmpRt, binop(Iop_Sar64, mkexpr(tmp) , mkU8(64-(lenM1+1))) ); - putIReg( regRt, mkexpr(tmpRt)); - break; + case 0x3A: /* 3. EXTS rt, rs, p len */ + DIP("exts r%u, r%u, %d, %d\n", regRt, regRs, p, lenM1); + size = lenM1 + 1; /* lenm1+1 */ + UChar lsAmt = 64 - (p + size); /* p+lenm1+1 */ + UChar rsAmt = 64 - size; /* lenm1+1 */ + tmp = newTemp(Ity_I64); + assign(tmp, binop(Iop_Shl64, mkexpr(tmpRs), mkU8(lsAmt))); + putIReg(regRt, binop(Iop_Sar64, mkexpr(tmp), mkU8(rsAmt))); + break; - case 0x2B: /* 20. SNE rd, rs, rt */ - DIP("sne r%d, r%d, r%d", regRd,regRs, regRt); - if (mode64) - putIReg(regRd, unop(Iop_1Uto64, binop(Iop_CmpNE64, getIReg(regRs), - getIReg(regRt)))); - else - putIReg(regRd,unop(Iop_1Uto32, binop(Iop_CmpNE32, getIReg(regRs), - getIReg(regRt)))); - break; + case 0x3B: /* 4. EXTS32 rt, rs, p len */ + DIP("exts32 r%u, r%u, %d, %d\n", regRt, regRs, p, lenM1); + assign ( tmp , binop(Iop_Shl64, mkexpr(tmpRs), + mkU8(32-(p+lenM1+1)))); + assign ( tmpRt, binop(Iop_Sar64, mkexpr(tmp), + mkU8(64-(lenM1+1))) ); + putIReg( regRt, mkexpr(tmpRt)); + break; - case 0x2A: /* Set Equals - SEQ; Cavium OCTEON */ - DIP("seq r%d, r%d, %d", regRd, regRs, regRt); - if (mode64) - putIReg(regRd, unop(Iop_1Uto64, - binop(Iop_CmpEQ64, getIReg(regRs), - getIReg(regRt)))); - else - putIReg(regRd, unop(Iop_1Uto32, - binop(Iop_CmpEQ32, getIReg(regRs), - getIReg(regRt)))); - break; + case 0x2B: /* 20. SNE rd, rs, rt */ + DIP("sne r%d, r%d, r%d", regRd,regRs, regRt); + if (mode64) + putIReg(regRd, unop(Iop_1Uto64, binop(Iop_CmpNE64, + getIReg(regRs), + getIReg(regRt)))); + else + putIReg(regRd,unop(Iop_1Uto32, binop(Iop_CmpNE32, + getIReg(regRs), + getIReg(regRt)))); + break; - case 0x2E: /* Set Equals Immediate - SEQI; Cavium OCTEON */ - DIP("seqi r%d, r%d, %d", regRt, regRs, imm); - if (mode64) - putIReg(regRt, unop(Iop_1Uto64, - binop(Iop_CmpEQ64, getIReg(regRs), - mkU64(extend_s_10to64(imm))))); - else - putIReg(regRt, unop(Iop_1Uto32, - binop(Iop_CmpEQ32, getIReg(regRs), - mkU32(extend_s_10to32(imm))))); - break; + case 0x2A: /* Set Equals - SEQ; Cavium OCTEON */ + DIP("seq r%d, r%d, %d", regRd, regRs, regRt); + if (mode64) + putIReg(regRd, unop(Iop_1Uto64, + binop(Iop_CmpEQ64, getIReg(regRs), + getIReg(regRt)))); + else + putIReg(regRd, unop(Iop_1Uto32, + binop(Iop_CmpEQ32, getIReg(regRs), + getIReg(regRt)))); + break; - case 0x2F: /* Set Not Equals Immediate - SNEI; Cavium OCTEON */ - DIP("snei r%d, r%d, %d", regRt, regRs, imm); - if (mode64) - putIReg(regRt, unop(Iop_1Uto64, - binop(Iop_CmpNE64, - getIReg(regRs), - mkU64(extend_s_10to64(imm))))); - else - putIReg(regRt, unop(Iop_1Uto32, - binop(Iop_CmpNE32, - getIReg(regRs), - mkU32(extend_s_10to32(imm))))); + case 0x2E: /* Set Equals Immediate - SEQI; Cavium OCTEON */ + DIP("seqi r%d, r%d, %d", regRt, regRs, imm); + if (mode64) + putIReg(regRt, unop(Iop_1Uto64, + binop(Iop_CmpEQ64, getIReg(regRs), + mkU64(extend_s_10to64(imm))))); + else + putIReg(regRt, unop(Iop_1Uto32, + binop(Iop_CmpEQ32, getIReg(regRs), + mkU32(extend_s_10to32(imm))))); + break; + + case 0x2F: /* Set Not Equals Immediate - SNEI; Cavium OCTEON */ + DIP("snei r%d, r%d, %d", regRt, regRs, imm); + if (mode64) + putIReg(regRt, unop(Iop_1Uto64, + binop(Iop_CmpNE64, + getIReg(regRs), + mkU64(extend_s_10to64(imm))))); + else + putIReg(regRt, unop(Iop_1Uto32, + binop(Iop_CmpNE32, + getIReg(regRs), + mkU32(extend_s_10to32(imm))))); + break; + + default: + return False; + } break; - + } /* opc1 0x1C ends here*/ + case 0x1F:{ + switch(opc2) { + case 0x0A: { // lx - Load indexed instructions + switch (get_sa(theInstr)) { + case 0x00: { // LWX rd, index(base) + DIP("lwx r%d, r%d(r%d)", regRd, regRt, regRs); + LOADX_STORE_PATTERN; + putIReg(regRd, mkWidenFrom32(ty, load(Ity_I32, mkexpr(t1)), + True)); + break; + } + case 0x08: { // LDX rd, index(base) + DIP("ldx r%d, r%d(r%d)", regRd, regRt, regRs); + vassert(mode64); /* Currently Implemented only for n64 */ + LOADX_STORE_PATTERN; + putIReg(regRd, load(Ity_I64, mkexpr(t1))); + break; + } + case 0x06: { // LBUX rd, index(base) + DIP("lbux r%d, r%d(r%d)", regRd, regRt, regRs); + LOADX_STORE_PATTERN; + if (mode64) + putIReg(regRd, unop(Iop_8Uto64, load(Ity_I8, + mkexpr(t1)))); + else + putIReg(regRd, unop(Iop_8Uto32, load(Ity_I8, + mkexpr(t1)))); + break; + } + case 0x10: { // LWUX rd, index(base) (Cavium OCTEON) + DIP("lwux r%d, r%d(r%d)", regRd, regRt, regRs); + LOADX_STORE_PATTERN; /* same for both 32 and 64 modes*/ + putIReg(regRd, mkWidenFrom32(ty, load(Ity_I32, mkexpr(t1)), + False)); + break; + } + case 0x14: { // LHUX rd, index(base) (Cavium OCTEON) + DIP("lhux r%d, r%d(r%d)", regRd, regRt, regRs); + LOADX_STORE_PATTERN; + if (mode64) + putIReg(regRd, + unop(Iop_16Uto64, load(Ity_I16, mkexpr(t1)))); + else + putIReg(regRd, + unop(Iop_16Uto32, load(Ity_I16, mkexpr(t1)))); + break; + } + case 0x16: { // LBX rd, index(base) (Cavium OCTEON) + DIP("lbx r%d, r%d(r%d)", regRd, regRs, regRt); + LOADX_STORE_PATTERN; + if (mode64) + putIReg(regRd, + unop(Iop_8Sto64, load(Ity_I8, mkexpr(t1)))); + else + putIReg(regRd, + unop(Iop_8Sto32, load(Ity_I8, mkexpr(t1)))); + break; + } + default: + vex_printf("\nUnhandled LX instruction opc3 = %x\n", + get_sa(theInstr)); + return False; + } + break; + } + } /* opc1 = 0x1F & opc2 = 0xA (LX) ends here*/ + break; + } /* opc1 = 0x1F ends here*/ default: - return False; - } + return False; + } /* main opc1 switch ends here */ return True; } @@ -2223,7 +2372,7 @@ static UInt disDSPInstr_MIPS_WRK ( UInt cins ) { IRTemp t0, t1 = 0, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, - t15, t16, t17, t18; + t15, t16, t17; UInt opcode, rs, rt, rd, sa, function, ac, ac_mfhilo, rddsp_mask, wrdsp_mask, dsp_imm, shift; @@ -2875,40 +3024,123 @@ t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I32); - t3 = newTemp(Ity_I32); - t4 = newTemp(Ity_I32); + t3 = newTemp(Ity_I1); + t4 = newTemp(Ity_I1); + t5 = newTemp(Ity_I1); + t6 = newTemp(Ity_I1); + t7 = newTemp(Ity_I32); + t8 = newTemp(Ity_I64); + t9 = newTemp(Ity_I64); + t10 = newTemp(Ity_I1); + t11 = newTemp(Ity_I1); + t12 = newTemp(Ity_I1); + t13 = newTemp(Ity_I1); + t14 = newTemp(Ity_I32); assign(t0, getAcc(ac)); - assign(t1, binop(Iop_Sar64, mkexpr(t0), mkU8(rs))); - putIReg(rt, unop(Iop_64to32, mkexpr(t1))); + if (0 == rs) { + assign(t1, mkexpr(t0)); + } else { + assign(t1, binop(Iop_Sar64, mkexpr(t0), mkU8(rs))); + } + /* Check if bits 63..31 of the result in t1 aren't 0. */ + assign(t3, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), + mkU32(0))); + assign(t4, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0))); + /* Check if bits 63..31 of the result in t1 aren't + 0x1ffffffff. */ + assign(t5, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), + mkU32(0xffffffff))); + assign(t6, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0x80000000))); + /* If bits 63..31 aren't 0 nor 0x1ffffffff, set DSP + control register. */ + assign(t7, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t3)), + unop(Iop_1Sto32, mkexpr(t4))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t5)), + unop(Iop_1Sto32, mkexpr(t6))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t7), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); - assign(t2, binop(Iop_Or32, - getDSPControl(), mkU32(0x00800000))); + /* If the last discarded bit is 1, there would be carry + when rounding, otherwise there wouldn't. We use that + fact and just add the value of the last discarded bit + to the least sifgnificant bit of the shifted value + from acc. */ + if (0 == rs) { + assign(t8, mkU64(0x0ULL)); + } else { + assign(t8, binop(Iop_And64, + binop(Iop_Shr64, + mkexpr(t0), + mkU8(rs-1)), + mkU64(0x1ULL))); + } + assign(t9, binop(Iop_Add64, mkexpr(t1), mkexpr(t8))); - /* Check if signOut == signIn */ - assign(t3, IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t0)), - mkU32(0x80000000)), - binop(Iop_And32, - getIReg(rt), - mkU32(0x80000000))), - getDSPControl(), - mkexpr(t2))); + /* Repeat previous steps for the rounded value. */ + assign(t10, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0))); + assign(t11, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0))); - assign(t4, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t1)), - mkU32(0x0)), - IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t1)), - mkU32(0xffffffff)), - mkexpr(t2), - mkexpr(t3)), - mkexpr(t3))); - putDSPControl(mkexpr(t4)); + assign(t12, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0xffffffff))); + assign(t13, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0x80000000))); + + assign(t14, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t10)), + unop(Iop_1Sto32, mkexpr(t11))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t12)), + unop(Iop_1Sto32, mkexpr(t13))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t14), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); + if (0 == rs) { + putIReg(rt, unop(Iop_64to32, mkexpr(t0))); + } else { + putIReg(rt, unop(Iop_64to32, mkexpr(t1))); + } break; } case 0x1: { /* EXTRV.W */ @@ -2917,43 +3149,133 @@ t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I32); - t3 = newTemp(Ity_I32); + t3 = newTemp(Ity_I1); t4 = newTemp(Ity_I1); + t5 = newTemp(Ity_I1); + t6 = newTemp(Ity_I1); + t7 = newTemp(Ity_I32); + t8 = newTemp(Ity_I64); + t9 = newTemp(Ity_I64); + t10 = newTemp(Ity_I1); + t11 = newTemp(Ity_I1); + t12 = newTemp(Ity_I1); + t13 = newTemp(Ity_I1); + t14 = newTemp(Ity_I32); + t15 = newTemp(Ity_I8); + assign(t15, unop(Iop_32to8, + binop(Iop_And32, + getIReg(rs), + mkU32(0x1f)))); assign(t0, getAcc(ac)); - assign(t1, binop(Iop_Sar64, - mkexpr(t0), - unop(Iop_32to8, - binop(Iop_And32, - getIReg(rs), - mkU32(0x1f))))); - putIReg(rt, unop(Iop_64to32, mkexpr(t1))); + assign(t1, binop(Iop_Sar64, mkexpr(t0), mkexpr(t15))); + putIReg(rt, IRExpr_ITE(binop(Iop_CmpEQ32, + unop(Iop_8Uto32, + mkexpr(t15)), + mkU32(0)), + unop(Iop_64to32, mkexpr(t0)), + unop(Iop_64to32, mkexpr(t1)))); - assign(t2, binop(Iop_Or32, - getDSPControl(), mkU32(0x00800000))); - - /* Check if signOut == signIn */ - assign(t3, IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t0)), - mkU32(0x80000000)), - binop(Iop_And32, - getIReg(rt), - mkU32(0x80000000))), - getDSPControl(), - mkexpr(t2))); + /* Check if bits 63..31 of the result in t1 aren't 0. */ + assign(t3, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), + mkU32(0))); assign(t4, binop(Iop_CmpNE32, - unop(Iop_64HIto32, mkexpr(t1)), + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0))); + /* Check if bits 63..31 of the result in t1 aren't + 0x1ffffffff. */ + assign(t5, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), mkU32(0xffffffff))); + assign(t6, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0x80000000))); + /* If bits 63..31 aren't 0 nor 0x1ffffffff, set DSP + control register. */ + assign(t7, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t3)), + unop(Iop_1Sto32, mkexpr(t4))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t5)), + unop(Iop_1Sto32, mkexpr(t6))))); putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t1)), - mkU32(0x0)), - IRExpr_ITE(mkexpr(t4), - mkexpr(t2), - mkexpr(t3)), - mkexpr(t3))); + mkexpr(t7), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); + + /* If the last discarded bit is 1, there would be carry + when rounding, otherwise there wouldn't. We use that + fact and just add the value of the last discarded bit + to the least sifgnificant bit of the shifted value + from acc. */ + assign(t8, + IRExpr_ITE(binop(Iop_CmpEQ32, + unop(Iop_8Uto32, + mkexpr(t15)), + mkU32(0)), + mkU64(0x0ULL), + binop(Iop_And64, + binop(Iop_Shr64, + mkexpr(t0), + unop(Iop_32to8, + binop(Iop_Sub32, + unop(Iop_8Uto32, + mkexpr(t15)), + mkU32(1)))), + mkU64(0x1ULL)))); + + assign(t9, binop(Iop_Add64, mkexpr(t1), mkexpr(t8))); + + /* Repeat previous steps for the rounded value. */ + assign(t10, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0))); + assign(t11, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0))); + + assign(t12, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0xffffffff))); + assign(t13, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0x80000000))); + + assign(t14, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t10)), + unop(Iop_1Sto32, mkexpr(t11))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t12)), + unop(Iop_1Sto32, mkexpr(t13))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t14), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); break; } case 0x2: { /* EXTP */ @@ -3140,61 +3462,131 @@ t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I32); - t4 = newTemp(Ity_I32); - t5 = newTemp(Ity_I64); - t6 = newTemp(Ity_I64); + t3 = newTemp(Ity_I1); + t4 = newTemp(Ity_I1); + t5 = newTemp(Ity_I1); + t6 = newTemp(Ity_I1); + t7 = newTemp(Ity_I32); + t8 = newTemp(Ity_I64); + t9 = newTemp(Ity_I64); + t10 = newTemp(Ity_I1); + t11 = newTemp(Ity_I1); + t12 = newTemp(Ity_I1); + t13 = newTemp(Ity_I1); + t14 = newTemp(Ity_I32); + t15 = newTemp(Ity_I64); + t16 = newTemp(Ity_I1); assign(t0, getAcc(ac)); - if (0 == rs) { - putIReg(rt, unop(Iop_64to32, mkexpr(t0))); - } else { - assign(t1, binop(Iop_Sar64, mkexpr(t0), mkU8(rs))); + assign(t16, binop(Iop_CmpEQ32, + mkU32(rs), + mkU32(0))); + assign(t1, IRExpr_ITE(mkexpr(t16), + mkexpr(t0), + binop(Iop_Sar64, + mkexpr(t0), + mkU8(rs)))); + /* If the last discarded bit is 1, there would be carry + when rounding, otherwise there wouldn't. We use that + fact and just add the value of the last discarded bit + to the least significant bit of the shifted value + from acc. */ + assign(t15, binop(Iop_Shr64, + mkexpr(t0), + unop(Iop_32to8, + binop(Iop_Sub32, + binop(Iop_And32, + mkU32(rs), + mkU32(0x1f)), + mkU32(1))))); - assign(t2, binop(Iop_Or32, - getDSPControl(), mkU32(0x800000))); + assign(t8, + IRExpr_ITE(mkexpr(t16), + mkU64(0x0ULL), + binop(Iop_And64, + mkexpr(t15), + mkU64(0x0000000000000001ULL)))); + assign(t9, binop(Iop_Add64, mkexpr(t1), mkexpr(t8))); + putIReg(rt, unop(Iop_64to32, mkexpr(t9))); - putDSPControl(IRExpr_ITE( - binop(Iop_CmpNE32, - unop(Iop_64HIto32, mkexpr(t1)), - mkU32(0x0)), - IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t1)), - mkU32(0xffffffff)), - mkexpr(t2), - getDSPControl()), - getDSPControl())); + /* Check if bits 63..31 of the result in t1 aren't 0. */ + assign(t3, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), + mkU32(0))); + assign(t4, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0))); - assign(t4, binop(Iop_Or32, - getDSPControl(), mkU32(0x800000))); - /* If the last discarded bit is 1, there would be carry - when rounding, otherwise there wouldn't. We use that - fact and just add the value of the last discarded bit - to the least sifgnificant bit of the shifted value - from acc. */ - assign(t5, binop(Iop_Shr64, - binop(Iop_And64, - mkexpr(t0), - binop(Iop_Shl64, - mkU64(0x1ULL), - mkU8(rs-1))), - mkU8(rs-1))); + /* Check if bits 63..31 of the result in t1 aren't + 0x1ffffffff. */ + assign(t5, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), + mkU32(0xffffffff))); + assign(t6, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0x80000000))); + /* If bits 63..31 aren't 0 nor 0x1ffffffff, set DSP + control register. */ + assign(t7, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t3)), + unop(Iop_1Sto32, mkexpr(t4))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t5)), + unop(Iop_1Sto32, mkexpr(t6))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t7), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); - assign(t6, binop(Iop_Add64, mkexpr(t1), mkexpr(t5))); + /* Repeat previous steps for the rounded value. */ + assign(t10, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0))); + assign(t11, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0))); - putDSPControl(IRExpr_ITE( - binop(Iop_CmpNE32, - unop(Iop_64HIto32, mkexpr(t6)), - mkU32(0x0)), - IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t6)), - mkU32(0xffffffff)), - mkexpr(t4), - getDSPControl()), - getDSPControl())); - putIReg(rt, unop(Iop_64to32, mkexpr(t6))); - } + assign(t12, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0xffffffff))); + assign(t13, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0x80000000))); + + assign(t14, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t10)), + unop(Iop_1Sto32, mkexpr(t11))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t12)), + unop(Iop_1Sto32, mkexpr(t13))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t14), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); break; } case 0x5: { /* EXTRV_R.W */ @@ -3203,79 +3595,129 @@ t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I32); - t4 = newTemp(Ity_I32); - t5 = newTemp(Ity_I64); - t6 = newTemp(Ity_I64); + t3 = newTemp(Ity_I1); + t4 = newTemp(Ity_I1); + t5 = newTemp(Ity_I1); + t6 = newTemp(Ity_I1); + t7 = newTemp(Ity_I32); + t8 = newTemp(Ity_I64); + t9 = newTemp(Ity_I64); + t10 = newTemp(Ity_I1); + t11 = newTemp(Ity_I1); + t12 = newTemp(Ity_I1); + t13 = newTemp(Ity_I1); + t14 = newTemp(Ity_I32); + t15 = newTemp(Ity_I8); + assign(t15, unop(Iop_32to8, + binop(Iop_And32, + getIReg(rs), + mkU32(0x1f)))); assign(t0, getAcc(ac)); + assign(t1, binop(Iop_Sar64, mkexpr(t0), mkexpr(t15))); - assign(t1, binop(Iop_Sar64, - mkexpr(t0), - unop(Iop_32to8, - binop(Iop_And32, - getIReg(rs), - mkU32(0x1f))))); + /* Check if bits 63..31 of the result in t1 aren't 0. */ + assign(t3, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), + mkU32(0))); + assign(t4, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0))); + /* Check if bits 63..31 of the result in t1 aren't + 0x1ffffffff. */ + assign(t5, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), + mkU32(0xffffffff))); + assign(t6, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0x80000000))); + /* If bits 63..31 aren't 0 nor 0x1ffffffff, set DSP + control register. */ + assign(t7, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t3)), + unop(Iop_1Sto32, mkexpr(t4))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t5)), + unop(Iop_1Sto32, mkexpr(t6))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t7), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); - assign(t2, binop(Iop_Or32, - getDSPControl(), mkU32(0x00800000))); - - putDSPControl(IRExpr_ITE( - binop(Iop_CmpNE32, - unop(Iop_64HIto32, mkexpr(t1)), - mkU32(0x0)), - IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t1)), - mkU32(0xffffffff)), - mkexpr(t2), - getDSPControl()), - getDSPControl())); - - assign(t4, binop(Iop_Or32, - getDSPControl(), mkU32(0x00800000))); /* If the last discarded bit is 1, there would be carry when rounding, otherwise there wouldn't. We use that - fact and just add the value of the last discarded bit to - the least sifgnificant bit of the shifted value from - acc. */ - assign(t5, binop(Iop_Shr64, - binop(Iop_And64, - mkexpr(t0), - binop(Iop_Shl64, - mkU64(0x1ULL), - unop(Iop_32to8, - binop(Iop_Sub32, - binop(Iop_And32, - getIReg(rs), - mkU32(0x1f)), - mkU32(0x1))))), - unop(Iop_32to8, - binop(Iop_Sub32, - binop(Iop_And32, - getIReg(rs), - mkU32(0x1f)), - mkU32(0x1))))); + fact and just add the value of the last discarded bit + to the least sifgnificant bit of the shifted value + from acc. */ + assign(t8, + IRExpr_ITE(binop(Iop_CmpEQ32, + unop(Iop_8Uto32, + mkexpr(t15)), + mkU32(0)), + mkU64(0x0ULL), + binop(Iop_And64, + binop(Iop_Shr64, + mkexpr(t0), + unop(Iop_32to8, + binop(Iop_Sub32, + unop(Iop_8Uto32, + mkexpr(t15)), + mkU32(1)))), + mkU64(0x1ULL)))); - assign(t6, binop(Iop_Add64, mkexpr(t1), mkexpr(t5))); + assign(t9, binop(Iop_Add64, mkexpr(t1), mkexpr(t8))); + /* Put rounded value in destination register. */ + putIReg(rt, unop(Iop_64to32, mkexpr(t9))); - putDSPControl(IRExpr_ITE( - binop(Iop_CmpNE32, - unop(Iop_64HIto32, mkexpr(t6)), - mkU32(0x0)), - IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t6)), - mkU32(0xffffffff)), - mkexpr(t4), - getDSPControl()), - getDSPControl())); - putIReg(rt, IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_And32, - getIReg(rs), - mkU32(0x1f)), - mkU32(0x0)), - unop(Iop_64to32, mkexpr(t0)), - unop(Iop_64to32, mkexpr(t6)))); + /* Repeat previous steps for the rounded value. */ + assign(t10, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0))); + assign(t11, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0))); + + assign(t12, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0xffffffff))); + assign(t13, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0x80000000))); + + assign(t14, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t10)), + unop(Iop_1Sto32, mkexpr(t11))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t12)), + unop(Iop_1Sto32, mkexpr(t13))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t14), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); break; } case 0x6: { /* EXTR_RS.W */ @@ -3283,81 +3725,136 @@ vassert(!mode64); t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I64); - t2 = newTemp(Ity_I64); - t3 = newTemp(Ity_I32); - t4 = newTemp(Ity_I32); - t5 = newTemp(Ity_I32); - t6 = newTemp(Ity_I32); + t2 = newTemp(Ity_I32); + t3 = newTemp(Ity_I1); + t4 = newTemp(Ity_I1); + t5 = newTemp(Ity_I1); + t6 = newTemp(Ity_I1); + t7 = newTemp(Ity_I32); + t8 = newTemp(Ity_I64); + t9 = newTemp(Ity_I64); + t10 = newTemp(Ity_I1); + t11 = newTemp(Ity_I1); + t12 = newTemp(Ity_I1); + t13 = newTemp(Ity_I1); + t14 = newTemp(Ity_I32); + t16 = newTemp(Ity_I32); - if (0 != rs) { - assign(t0, getAcc(ac)); + assign(t0, getAcc(ac)); + if (0 == rs) { + assign(t1, mkexpr(t0)); + } else { assign(t1, binop(Iop_Sar64, mkexpr(t0), mkU8(rs))); - putDSPControl(IRExpr_ITE( - binop(Iop_CmpNE32, - unop(Iop_64HIto32, mkexpr(t1)), - mkU32(0x0)), - IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t1)), - mkU32(0xffffffff)), - binop(Iop_Or32, - getDSPControl(), - mkU32(0x00800000)), - getDSPControl()), - getDSPControl())); - /* If the last discarded bit is 1, there would be carry - when rounding, otherwise there wouldn't. We use that - fact and just add the value of the last discarded bit - to the least sifgnificant bit of the shifted value - from acc. */ - assign(t2, binop(Iop_Add64, - mkexpr(t1), - binop(Iop_Shr64, - binop(Iop_And64, - mkexpr(t0), - binop(Iop_Shl64, - mkU64(0x1ULL), - unop(Iop_32to8, - mkU32(rs-1)))), - unop(Iop_32to8, mkU32(rs-1))))); - assign(t6, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t2)), - mkU32(0xffffffff)), + } + + /* Check if bits 63..31 of the result in t1 aren't 0. */ + assign(t3, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), + mkU32(0))); + assign(t4, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0))); + /* Check if bits 63..31 of the result in t1 aren't + 0x1ffffffff. */ + assign(t5, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), + mkU32(0xffffffff))); + assign(t6, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0x80000000))); + /* If bits 63..31 aren't 0 nor 0x1ffffffff, set DSP + control register. */ + assign(t7, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t3)), + unop(Iop_1Sto32, mkexpr(t4))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t5)), + unop(Iop_1Sto32, mkexpr(t6))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t7), + mkU32(0)), binop(Iop_Or32, getDSPControl(), mkU32(0x00800000)), getDSPControl())); - putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t2)), - mkU32(0x0)), - mkexpr(t6), - getDSPControl())); - assign(t3, IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t2)), - mkU32(0x80000000)), - mkU32(0x0)), - mkU32(0x7fffffff), - mkU32(0x80000000))); - assign(t4, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t2)), - mkU32(0xffffffff)), - mkexpr(t3), - unop(Iop_64to32, mkexpr(t2)))); - assign(t5, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t2)), - mkU32(0x0)), - mkexpr(t4), - unop(Iop_64to32, mkexpr(t2)))); - putIReg(rt, mkexpr(t5)); + + /* If the last discarded bit is 1, there would be carry + when rounding, otherwise there wouldn't. We use that + fact and just add the value of the last discarded bit + to the least sifgnificant bit of the shifted value + from acc. */ + if (0 == rs) { + assign(t8, mkU64(0x0ULL)); } else { - putIReg(rt, unop(Iop_64to32, getAcc(ac))); + assign(t8, binop(Iop_And64, + binop(Iop_Shr64, + mkexpr(t0), + mkU8(rs-1)), + mkU64(0x1ULL))); } + + assign(t9, binop(Iop_Add64, mkexpr(t1), mkexpr(t8))); + + /* Repeat previous steps for the rounded value. */ + assign(t10, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0))); + assign(t11, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0))); + + assign(t12, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0xffffffff))); + assign(t13, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0x80000000))); + + assign(t14, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t10)), + unop(Iop_1Sto32, mkexpr(t11))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t12)), + unop(Iop_1Sto32, mkexpr(t13))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t14), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); + + assign(t16, binop(Iop_And32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0x80000000))); + putIReg(rt, IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t14), + mkU32(0)), + IRExpr_ITE(binop(Iop_CmpEQ32, + mkexpr(t16), + mkU32(0)), + mkU32(0x7fffffff), + mkU32(0x80000000)), + unop(Iop_64to32, mkexpr(t9)))); break; } case 0x7: { /* EXTRV_RS.W */ @@ -3366,104 +3863,146 @@ t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I32); - t4 = newTemp(Ity_I32); - t5 = newTemp(Ity_I64); - t6 = newTemp(Ity_I64); + t3 = newTemp(Ity_I1); + t4 = newTemp(Ity_I1); + t5 = newTemp(Ity_I1); + t6 = newTemp(Ity_I1); t7 = newTemp(Ity_I32); - t8 = newTemp(Ity_I32); - t9 = newTemp(Ity_I32); - t10 = newTemp(Ity_I32); + t8 = newTemp(Ity_I64); + t9 = newTemp(Ity_I64); + t10 = newTemp(Ity_I1); + t11 = newTemp(Ity_I1); + t12 = newTemp(Ity_I1); + t13 = newTemp(Ity_I1); + t14 = newTemp(Ity_I32); + t15 = newTemp(Ity_I32); + t16 = newTemp(Ity_I32); + t17 = newTemp(Ity_I1); + assign(t15, binop(Iop_And32, + getIReg(rs), + mkU32(0x1f))); + assign(t17, binop(Iop_CmpEQ32, + mkexpr(t15), + mkU32(0))); assign(t0, getAcc(ac)); + assign(t1, IRExpr_ITE(mkexpr(t17), + mkexpr(t0), + binop(Iop_Sar64, + mkexpr(t0), + unop(Iop_32to8, + mkexpr(t15))))); - assign(t1, binop(Iop_Sar64, - mkexpr(t0), - unop(Iop_32to8, binop(Iop_And32, - getIReg(rs), - mkU32(0x1f))))); + /* Check if bits 63..31 of the result in t1 aren't 0. */ + assign(t3, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), + mkU32(0))); + assign(t4, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0))); + /* Check if bits 63..31 of the result in t1 aren't + 0x1ffffffff. */ + assign(t5, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t1)), + mkU32(0xffffffff))); + assign(t6, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000)), + mkU32(0x80000000))); + /* If bits 63..31 aren't 0 nor 0x1ffffffff, set DSP + control register. */ + assign(t7, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t3)), + unop(Iop_1Sto32, mkexpr(t4))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t5)), + unop(Iop_1Sto32, mkexpr(t6))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t7), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); - assign(t2, binop(Iop_Or32, - getDSPControl(), mkU32(0x00800000))); + /* If the last discarded bit is 1, there would be carry + when rounding, otherwise there wouldn't. We use that + fact and just add the value of the last discarded bit + to the least sifgnificant bit of the shifted value + from acc. */ + assign(t8, + IRExpr_ITE(mkexpr(t17), + mkU64(0x0ULL), + binop(Iop_And64, + binop(Iop_Shr64, + mkexpr(t0), + unop(Iop_32to8, + binop(Iop_Sub32, + mkexpr(t15), + mkU32(1)))), + mkU64(0x1ULL)))); - assign(t10, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t1)), - mkU32(0x0)), - IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t1)), - mkU32(0xffffffff)), - mkexpr(t2), - getDSPControl()), - getDSPControl())); + assign(t9, binop(Iop_Add64, mkexpr(t1), mkexpr(t8))); - putDSPControl(mkexpr(t10)); + /* Repeat previous steps for the rounded value. */ + assign(t10, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0))); + assign(t11, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0))); - assign(t4, binop(Iop_Or32, - getDSPControl(), mkU32(0x00800000))); - /* If the last discarded bit is 1, there would be carry - when rounding, otherwise there wouldn't. We use that - fact and just add the value of the last discarded bit to - the least sifgnificant bit of the shifted value from - acc. */ - assign(t5, binop(Iop_Shr64, - binop(Iop_And64, - mkexpr(t0), - binop(Iop_Shl64, - mkU64(0x1ULL), - unop(Iop_32to8, - binop(Iop_Sub32, - binop(Iop_And32, - getIReg(rs), - mkU32(0x1f)), - mkU32(0x1))))), - unop(Iop_32to8, - binop(Iop_Sub32, - binop(Iop_And32, - getIReg(rs), - mkU32(0x1f)), - mkU32(0x1))))); + assign(t12, binop(Iop_CmpNE32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0xffffffff))); + assign(t13, binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t9)), + mkU32(0x80000000)), + mkU32(0x80000000))); - assign(t6, binop(Iop_Add64, mkexpr(t1), mkexpr(t5))); - - assign(t8, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t6)), - mkU32(0xffffffff)), - mkexpr(t4), - getDSPControl())); + assign(t14, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t10)), + unop(Iop_1Sto32, mkexpr(t11))), + binop(Iop_Or32, + unop(Iop_1Sto32, mkexpr(t12)), + unop(Iop_1Sto32, mkexpr(t13))))); putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t6)), - mkU32(0x0)), - mkexpr(t8), + mkexpr(t14), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), getDSPControl())); - assign(t9, IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t6)), + + assign(t16, binop(Iop_And32, + unop(Iop_64HIto32, + mkexpr(t9)), + mkU32(0x80000000))); + putIReg(rt, IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t14), + mkU32(0)), + IRExpr_ITE(binop(Iop_CmpEQ32, + mkexpr(t16), + mkU32(0)), + mkU32(0x7fffffff), mkU32(0x80000000)), - mkU32(0x0)), - mkU32(0x7fffffff), - mkU32(0x80000000))); - assign(t7, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, mkexpr(t6)), - mkU32(0x0)), - IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - mkexpr(t6)), - mkU32(0xffffffff)), - mkexpr(t9), - unop(Iop_64to32, - mkexpr(t6))), - unop(Iop_64to32, mkexpr(t6)))); - putIReg(rt, IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_And32, - getIReg(rs), - mkU32(0x1f)), - mkU32(0x0)), - unop(Iop_64to32, mkexpr(t0)), - mkexpr(t7))); + unop(Iop_64to32, mkexpr(t9)))); break; } case 0xA: { /* EXTPDP */ @@ -3678,9 +4217,7 @@ t5 = newTemp(Ity_I32); t6 = newTemp(Ity_I64); t7 = newTemp(Ity_I32); - t8 = newTemp(Ity_I32); t9 = newTemp(Ity_I32); - t10 = newTemp(Ity_I1); assign(t0, getAcc(ac)); @@ -3689,12 +4226,10 @@ assign(t2, binop(Iop_Or32, getDSPControl(), mkU32(0x00800000))); - assign(t9, binop(Iop_Shl32, - binop(Iop_And32, - unop(Iop_64to32, - mkexpr(t1)), - mkU32(0x00008000)), - mkU8(16))); + assign(t9, binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000))); putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, mkexpr(t9), binop(Iop_And32, @@ -3711,120 +4246,79 @@ assign(t3, binop(Iop_Sub64, mkexpr(t1), mkU64(0x0000000000007fffULL))); - assign(t4, binop(Iop_Or32, - unop(Iop_1Uto32, - binop(Iop_CmpNE32, - mkU32(0), - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t3)), - mkU32(0x7fffffff)))), - unop(Iop_1Uto32, - binop(Iop_CmpNE32, - mkU32(0), - binop(Iop_And32, + assign(t4, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, + binop(Iop_CmpNE32, + mkU32(0), + binop(Iop_And32, + unop(Iop_64HIto32, + mkexpr(t3)), + mkU32(0x7fffffff)))), + unop(Iop_1Sto32, + binop(Iop_CmpNE32, + mkU32(0), unop(Iop_64to32, - mkexpr(t3)), - mkU32(0xffffffff)))))); - - assign(t5, IRExpr_ITE(unop(Iop_32to1, - binop(Iop_Shr32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t3)), - mkU32(0x80000000)), - mkU8(31))), - unop(Iop_64to32, mkexpr(t1)), - IRExpr_ITE(binop(Iop_CmpNE32, - mkexpr(t4), - mkU32(0x0)), - mkU32(0x7fff), - unop(Iop_64to32, - mkexpr(t1))))); - - assign(t10, unop(Iop_32to1, - binop(Iop_Shr32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t3)), - mkU32(0x80000000)), - mkU8(31)))); - putDSPControl(IRExpr_ITE(mkexpr(t10), - getDSPControl(), - IRExpr_ITE(binop(Iop_CmpNE32, - mkexpr(t4), - mkU32(0x0)), - binop(Iop_Or32, - getDSPControl(), - mkU32(0x00800000)), - getDSPControl()))); - + mkexpr(t3))))), + unop(Iop_1Sto32, + binop(Iop_CmpEQ32, + binop(Iop_And32, + unop(Iop_64HIto32, + mkexpr(t3)), + mkU32(0x80000000)), + mkU32(0))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkU32(0), + mkexpr(t4)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); /* Check if t1<0xffffffffffff8000 (0xffffffffffff8000-t1)>0 - 1. subtract t1 from 0x7fff + 1. subtract t1 from 0xffffffffffff8000 2. if the resulting number is positive (sign bit = 0) and any of the other bits is 1, the value is > 0 */ assign(t6, binop(Iop_Sub64, mkU64(0xffffffffffff8000ULL), mkexpr(t1))); - - assign(t7, binop(Iop_Or32, - unop(Iop_1Uto32, - binop(Iop_CmpNE32, - mkU32(0), - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t6)), - mkU32(0x7fffffff)))), - unop(Iop_1Uto32, - binop(Iop_CmpNE32, - mkU32(0), - binop(Iop_And32, - unop(Iop_64to32, - mkexpr(t6)), - mkU32(0xffffffff)))))); - - assign(t8, IRExpr_ITE(unop(Iop_32to1, - binop(Iop_Shr32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t6)), - mkU32(0x80000000)), - mkU8(31))), - unop(Iop_64to32, mkexpr(t1)), - IRExpr_ITE(binop(Iop_CmpNE32, - mkexpr(t7), - mkU32(0x0)), - mkU32(0xffff8000), - unop(Iop_64to32, - mkexpr(t1))))); - putDSPControl(IRExpr_ITE(unop(Iop_32to1, - binop(Iop_Shr32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t6)), - mkU32(0x80000000)), - mkU8(31))), - getDSPControl(), - IRExpr_ITE(binop(Iop_CmpNE32, - mkexpr(t7), - mkU32(0x0)), - binop(Iop_Or32, - getDSPControl(), - mkU32(0x00800000)), - getDSPControl()))); - - /* If the shifted value is positive, it can only be >0x7fff - and the final result is the value stored in t5, - otherwise, the final result is in t8. */ - putIReg(rt, IRExpr_ITE(unop(Iop_32to1, - binop(Iop_Shr32, + assign(t7, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, + binop(Iop_CmpNE32, + mkU32(0), binop(Iop_And32, unop(Iop_64HIto32, - mkexpr(t1)), + mkexpr(t6)), + mkU32(0x7fffffff)))), + unop(Iop_1Sto32, + binop(Iop_CmpNE32, + mkU32(0), + unop(Iop_64to32, + mkexpr(t6))))), + unop(Iop_1Sto32, + binop(Iop_CmpEQ32, + binop(Iop_And32, + unop(Iop_64HIto32, + mkexpr(t6)), mkU32(0x80000000)), - mkU8(31))), - mkexpr(t8), - mkexpr(t5))); + mkU32(0))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkU32(0), + mkexpr(t7)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); + putIReg(rt, IRExpr_ITE(binop(Iop_CmpNE32, + mkU32(0), + mkexpr(t4)), + mkU32(0x00007fff), + IRExpr_ITE(binop(Iop_CmpNE32, + mkU32(0), + mkexpr(t7)), + mkU32(0xffff8000), + unop(Iop_64to32, + mkexpr(t1))))); break; } case 0xF: { /* EXTRV_S.H */ @@ -3838,10 +4332,7 @@ t5 = newTemp(Ity_I32); t6 = newTemp(Ity_I64); t7 = newTemp(Ity_I32); - t8 = newTemp(Ity_I32); t9 = newTemp(Ity_I32); - t10 = newTemp(Ity_I32); - t11 = newTemp(Ity_I32); assign(t0, getAcc(ac)); @@ -3855,12 +4346,10 @@ assign(t2, binop(Iop_Or32, getDSPControl(), mkU32(0x00800000))); - assign(t9, binop(Iop_Shl32, - binop(Iop_And32, - unop(Iop_64to32, - mkexpr(t1)), - mkU32(0x00008000)), - mkU8(16))); + assign(t9, binop(Iop_And32, + unop(Iop_64to32, + mkexpr(t1)), + mkU32(0x80000000))); putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, mkexpr(t9), binop(Iop_And32, @@ -3873,127 +4362,83 @@ /* Check if t1 > 0x7fff ((t1 - 0x7fff) > 0) 1. subtract 0x7fff from t1 2. if the resulting number is positive (sign bit = 0) - and any of the other bits is 1, the value is > 0 */ + and any of the other bits is 1, the value is > 0. */ assign(t3, binop(Iop_Sub64, mkexpr(t1), mkU64(0x0000000000007fffULL))); - assign(t4, binop(Iop_Or32, - unop(Iop_1Uto32, - binop(Iop_CmpNE32, - mkU32(0), - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t3)), - mkU32(0x7fffffff)))), - unop(Iop_1Uto32, - binop(Iop_CmpNE32, - mkU32(0), - binop(Iop_And32, + assign(t4, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, + binop(Iop_CmpNE32, + mkU32(0), + binop(Iop_And32, + unop(Iop_64HIto32, + mkexpr(t3)), + mkU32(0x7fffffff)))), + unop(Iop_1Sto32, + binop(Iop_CmpNE32, + mkU32(0), unop(Iop_64to32, - mkexpr(t3)), - mkU32(0xffffffff)))))); - - assign(t5, IRExpr_ITE(unop(Iop_32to1, - binop(Iop_Shr32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t3)), - mkU32(0x80000000)), - mkU8(31))), - unop(Iop_64to32, mkexpr(t1)), - IRExpr_ITE(binop(Iop_CmpNE32, - mkexpr(t4), - mkU32(0x0)), - mkU32(0x7fff), - unop(Iop_64to32, - mkexpr(t1))))); - - assign(t10, binop(Iop_Shr32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t3)), - mkU32(0x80000000)), - mkU8(31))); - assign(t11, IRExpr_ITE(binop(Iop_CmpNE32, - mkexpr(t4), - mkU32(0x0)), - binop(Iop_Or32, - getDSPControl(), - mkU32(0x00800000)), - getDSPControl())); - putDSPControl(IRExpr_ITE(unop(Iop_32to1, - mkexpr(t10)), - getDSPControl(), - mkexpr(t11))); - - /* Check if t1<0xffffffffffff8000 - 1. subtract t1 from 0x7fff - 2. if the resulting number is positive (sign bit == 0) + mkexpr(t3))))), + unop(Iop_1Sto32, + binop(Iop_CmpEQ32, + binop(Iop_And32, + unop(Iop_64HIto32, + mkexpr(t3)), + mkU32(0x80000000)), + mkU32(0))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkU32(0), + mkexpr(t4)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); + /* Check if t1<0xffffffffffff8000 (0xffffffffffff8000-t1)>0 + 1. subtract t1 from 0xffffffffffff8000 + 2. if the resulting number is positive (sign bit = 0) and any of the other bits is 1, the value is > 0 */ assign(t6, binop(Iop_Sub64, - mkU64(0xffffffffffff8000ULL), - mkexpr(t1))); - - assign(t7, binop(Iop_Or32, - unop(Iop_1Uto32, - binop(Iop_CmpNE32, - mkU32(0), - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t6)), - mkU32(0x7fffffff)))), - unop(Iop_1Uto32, - binop(Iop_CmpNE32, - mkU32(0), - binop(Iop_And32, - unop(Iop_64to32, - mkexpr(t6)), - mkU32(0xffffffff)))))); - - assign(t8, IRExpr_ITE(unop(Iop_32to1, - binop(Iop_Shr32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t6)), - mkU32(0x80000000)), - mkU8(31))), - unop(Iop_64to32, mkexpr(t1)), - IRExpr_ITE(binop(Iop_CmpNE32, - mkexpr(t7), - mkU32(0x0)), - mkU32(0xffff8000), - unop(Iop_64to32, - mkexpr(t1))))); - putDSPControl(IRExpr_ITE(unop(Iop_32to1, - binop(Iop_Shr32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t6)), - mkU32(0x80000000) - ), - mkU8(31))), - getDSPControl(), - IRExpr_ITE(binop(Iop_CmpNE32, - mkexpr(t7), - mkU32(0x0)), - binop(Iop_Or32, - getDSPControl(), - mkU32(0x00800000) - ), - getDSPControl()))); - - /* If the shifted value is positive, it can only be >0x7fff - and the final result is the value stored in t5, - otherwise, the final result is in t8. */ - putIReg(rt, IRExpr_ITE(unop(Iop_32to1, - binop(Iop_Shr32, + mkU64(0xffffffffffff8000ULL), + mkexpr(t1))); + assign(t7, binop(Iop_And32, + binop(Iop_Or32, + unop(Iop_1Sto32, + binop(Iop_CmpNE32, + mkU32(0), binop(Iop_And32, unop(Iop_64HIto32, - mkexpr(t1)), + mkexpr(t6)), + mkU32(0x7fffffff)))), + unop(Iop_1Sto32, + binop(Iop_CmpNE32, + mkU32(0), + unop(Iop_64to32, + mkexpr(t6))))), + unop(Iop_1Sto32, + binop(Iop_CmpEQ32, + binop(Iop_And32, + unop(Iop_64HIto32, + mkexpr(t6)), mkU32(0x80000000)), - mkU8(31))), - mkexpr(t8), - mkexpr(t5))); + mkU32(0))))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkU32(0), + mkexpr(t7)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x00800000)), + getDSPControl())); + putIReg(rt, IRExpr_ITE(binop(Iop_CmpNE32, + mkU32(0), + mkexpr(t4)), + mkU32(0x00007fff), + IRExpr_ITE(binop(Iop_CmpNE32, + mkU32(0), + mkexpr(t7)), + mkU32(0xffff8000), + unop(Iop_64to32, + mkexpr(t1))))); break; } case 0x12: { /* RDDSP*/ @@ -4192,38 +4637,38 @@ DIP("shilov ac%d, r%d", ac, rs); vassert(!mode64); t0 = newTemp(Ity_I64); - t1 = newTemp(Ity_I64); - t2 = newTemp(Ity_I32); - t3 = newTemp(Ity_I1); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I1); + t3 = newTemp(Ity_I64); t4 = newTemp(Ity_I64); - t5 = newTemp(Ity_I64); assign(t0, getAcc(ac)); - assign(t2, binop(Iop_And32, getIReg(rs), mkU32(0x3f))); - assign(t3, binop(Iop_CmpEQ32, mkexpr(t2), mkU32(0x20))); - - assign(t4, binop(Iop_Shl64, + assign(t1, binop(Iop_And32, getIReg(rs), mkU32(0x3f))); + assign(t2, binop(Iop_CmpEQ32, mkexpr(t1), mkU32(0x20))); + assign(t3, binop(Iop_Shl64, mkexpr(t0), unop(Iop_32to8, binop(Iop_Add32, unop(Iop_Not32, - mkexpr(t2)), + mkexpr(t1)), mkU32(0x1))))); - assign(t5, binop(Iop_Shr64, + assign(t4, binop(Iop_Shr64, mkexpr(t0), unop(Iop_32to8, - mkexpr(t2)))); - putAcc(ac, IRExpr_ITE(mkexpr(t3), - binop(Iop_32HLto64, - unop(Iop_64to32, mkexpr(t0)), - mkU32(0x0)), - IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_And32, - mkexpr(t2), - mkU32(0x20)), - mkU32(0x20)), - mkexpr(t4), - mkexpr(t5)))); + mkexpr(t1)))); + + putAcc(ac, + IRExpr_ITE(mkexpr(t2), + binop(Iop_32HLto64, + unop(Iop_64to32, mkexpr(t0)), + mkU32(0x0)), + IRExpr_ITE(binop(Iop_CmpEQ32, + binop(Iop_And32, + mkexpr(t1), + mkU32(0x20)), + mkU32(0x20)), + mkexpr(t3), + mkexpr(t4)))); break; } case 0x1F: { /* MTHLIP */ @@ -7201,160 +7646,113 @@ t0 = newTemp(Ity_I32); t1 = newTemp(Ity_I1); t2 = newTemp(Ity_I1); - t3 = newTemp(Ity_I1); - t4 = newTemp(Ity_I32); - t5 = newTemp(Ity_I32); - t6 = newTemp(Ity_I1); + t3 = newTemp(Ity_I32); + t4 = newTemp(Ity_I1); + t5 = newTemp(Ity_I1); + t6 = newTemp(Ity_I32); t7 = newTemp(Ity_I1); t8 = newTemp(Ity_I1); - t9 = newTemp(Ity_I32); - t10 = newTemp(Ity_I32); - t11 = newTemp(Ity_I1); - t12 = newTemp(Ity_I1); - t13 = newTemp(Ity_I1); - t14 = newTemp(Ity_I32); - t15 = newTemp(Ity_I32); - t16 = newTemp(Ity_I1); - t17 = newTemp(Ity_I1); - t18 = newTemp(Ity_I32); + t9 = newTemp(Ity_I1); + t10 = newTemp(Ity_I1); if (0 == rs) { putIReg(rd, getIReg(rt)); } else { - /* Shift bits 7..0. */ + /* Shift bits 7..0 and 23..16. */ assign(t0, binop(Iop_Shl32, - unop(Iop_8Uto32, - unop(Iop_32to8, getIReg(rt))), - unop(Iop_32to8, - binop(Iop_And32, - mkU32(rs), - mkU32(0x7))))); - /* Check if discard isn't 0x0 and 0xffffffff. */ + binop(Iop_And32, + getIReg(rt), + mkU32(0x00ff00ff)), + mkU8(rs))); assign(t1, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t0)))), - mkU32(0x00000000))); + binop(Iop_And32, + mkexpr(t0), + mkU32(0xff000000)), + mkU32(0x00000000))); assign(t2, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t0)))), - mkU32(0x000000ff))); - assign(t4, binop(Iop_Or32, - getDSPControl(), mkU32(0x400000))); - putDSPControl(IRExpr_ITE(mkexpr(t1), - IRExpr_ITE(mkexpr(t2), - mkexpr(t4), - getDSPControl()), - getDSPControl())); - - /* Shift bits 15..8. */ - assign(t5, binop(Iop_Shl32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - getIReg(rt)))), - unop(Iop_32to8, - binop(Iop_And32, - mkU32(rs), - mkU32(0x7))))); - /* Check if discard isn't 0x0 and 0xffffffff. */ - assign(t6, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t5)))), - mkU32(0x00000000))); + binop(Iop_And32, + mkexpr(t0), + mkU32(0xff000000)), + mkU32(0xff000000))); assign(t7, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t5)))), - mkU32(0x000000ff))); - assign(t9, binop(Iop_Or32, - getDSPControl(), - mkU32(0x400000))); - putDSPControl(IRExpr_ITE(mkexpr(t6), - IRExpr_ITE(mkexpr(t7), - mkexpr(t9), - getDSPControl()), - getDSPControl())); - - /* Shift bits 23..16. */ - assign(t10, binop(Iop_Shl32, - unop(Iop_8Uto32, - unop(Iop_16to8, - unop(Iop_32HIto16, - getIReg(rt)))), - unop(Iop_32to8, + binop(Iop_And32, + mkexpr(t0), + mkU32(0x0000ff00)), + mkU32(0x00000000))); + assign(t8, binop(Iop_CmpNE32, + binop(Iop_And32, + mkexpr(t0), + mkU32(0x0000ff00)), + mkU32(0x000ff00))); + /* Shift bits 15..8 and 31..24. */ + assign(t3, binop(Iop_Shl32, + binop(Iop_Shr32, binop(Iop_And32, - mkU32(rs), - mkU32(0x7))))); - /* Check if discard isn't 0x0 and 0xffffffff. */ - assign(t11, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t10)))), - mkU32(0x00000000))); - assign(t12, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t10)))), - mkU32(0x000000ff))); + getIReg(rt), + mkU32(0xff00ff00)), + mkU8(8)), + mkU8(rs))); + assign(t4, binop(Iop_CmpNE32, + binop(Iop_And32, + mkexpr(t3), + mkU32(0xff000000)), + mkU32(0x00000000))); + assign(t5, binop(Iop_CmpNE32, + binop(Iop_And32, + mkexpr(t3), + mkU32(0xff000000)), + mkU32(0xff000000))); + assign(t9, binop(Iop_CmpNE32, + binop(Iop_And32, + mkexpr(t3), + mkU32(0x0000ff00)), + mkU32(0x00000000))); + assign(t10, binop(Iop_CmpNE32, + binop(Iop_And32, + mkexpr(t3), + mkU32(0x0000ff00)), + mkU32(0x0000ff00))); - assign(t14, binop(Iop_Or32, - getDSPControl(), - mkU32(0x400000))); - putDSPControl(IRExpr_ITE(mkexpr(t11), - IRExpr_ITE(mkexpr(t12), - mkexpr(t14), - getDSPControl()), - getDSPControl())); - - /* Shift bits 31..24. */ - assign(t15, binop(Iop_Shl32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32HIto16, - getIReg(rt)))), - unop(Iop_32to8, + assign(t6, binop(Iop_Or32, + binop(Iop_Or32, binop(Iop_And32, - mkU32(rs), - mkU32(0x7))))); - /* Check if discard isn't 0x0 and 0xffffffff. */ - assign(t16, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t15)))), - mkU32(0x00000000))); - assign(t17, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t15)))), - mkU32(0x000000ff))); + unop(Iop_1Uto32, + mkexpr(t1)), + unop(Iop_1Uto32, + mkexpr(t2))), + binop(Iop_And32, + unop(Iop_1Uto32, + mkexpr(t7)), + unop(Iop_1Uto32, + mkexpr(t8)))), + binop(Iop_Or32, + binop(Iop_And32, + unop(Iop_1Uto32, + mkexpr(t4)), + unop(Iop_1Uto32, + mkexpr(t5))), + binop(Iop_And32, + unop(Iop_1Uto32, + mkexpr(t9)), + unop(Iop_1Uto32, + mkexpr(t10)))))); - assign(t18, binop(Iop_Or32, - getDSPControl(), - mkU32(0x400000))); - putDSPControl(IRExpr_ITE(mkexpr(t16), - IRExpr_ITE(mkexpr(t17), - mkexpr(t18), - getDSPControl()), + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t6), + mkU32(0x0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x400000)), getDSPControl())); - - putIReg(rd, binop(Iop_16HLto32, - binop(Iop_8HLto16, - unop(Iop_32to8, mkexpr(t15)), - unop(Iop_32to8, mkexpr(t10))), - binop(Iop_8HLto16, - unop(Iop_32to8, mkexpr(t5)), - unop(Iop_32to8, mkexpr(t0))))); + putIReg(rd, binop(Iop_Or32, + binop(Iop_Shl32, + binop(Iop_And32, + mkexpr(t3), + mkU32(0x00ff00ff)), + mkU8(8)), + binop(Iop_And32, + mkexpr(t0), + mkU32(0x00ff00ff)))); } break; } @@ -7422,165 +7820,119 @@ t0 = newTemp(Ity_I32); t1 = newTemp(Ity_I1); t2 = newTemp(Ity_I1); - t3 = newTemp(Ity_I1); - t4 = newTemp(Ity_I32); - t5 = newTemp(Ity_I32); - t6 = newTemp(Ity_I1); + t3 = newTemp(Ity_I32); + t4 = newTemp(Ity_I1); + t5 = newTemp(Ity_I1); + t6 = newTemp(Ity_I32); t7 = newTemp(Ity_I1); t8 = newTemp(Ity_I1); - t9 = newTemp(Ity_I32); - t10 = newTemp(Ity_I32); - t11 = newTemp(Ity_I1); - t12 = newTemp(Ity_I1); - t13 = newTemp(Ity_I1); - t14 = newTemp(Ity_I32); - t15 = newTemp(Ity_I32); - t16 = newTemp(Ity_I1); - t17 = newTemp(Ity_I1); - t18 = newTemp(Ity_I32); + t9 = newTemp(Ity_I1); + t10 = newTemp(Ity_I1); + t11 = newTemp(Ity_I8); - /* Shift bits 7..0. */ + assign(t11, unop(Iop_32to8, + binop(Iop_And32, + getIReg(rs), + mkU32(0x7)))); + /* Shift bits 7..0 and 23..16. */ assign(t0, binop(Iop_Shl32, - unop(Iop_8Uto32, - unop(Iop_32to8, getIReg(rt))), - unop(Iop_32to8, - binop(Iop_And32, - getIReg(rs), - mkU32(0x7))))); - /* Check if discard isn't 0x0 and 0xffffffff. */ + binop(Iop_And32, + getIReg(rt), + mkU32(0x00ff00ff)), + mkexpr(t11))); assign(t1, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, mkexpr(t0)))), - mkU32(0x00000000))); + binop(Iop_And32, + mkexpr(t0), + mkU32(0xff000000)), + mkU32(0x00000000))); assign(t2, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, mkexpr(t0)))), - mkU32(0x000000ff))); - - assign(t4, binop(Iop_Or32, - getDSPControl(), - mkU32(0x400000))); - putDSPControl(IRExpr_ITE(mkexpr(t1), - IRExpr_ITE(mkexpr(t2), - mkexpr(t4), - getDSPControl()), - getDSPControl())); - - /* Shift bits 15..8. */ - assign(t5, binop(Iop_Shl32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, getIReg(rt)))), - unop(Iop_32to8, - binop(Iop_And32, - getIReg(rs), - mkU32(0x7))))); - /* Check if discard isn't 0x0 and 0xffffffff. */ - assign(t6, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, mkexpr(t5)))), - mkU32(0x00000000))); + binop(Iop_And32, + mkexpr(t0), + mkU32(0xff000000)), + mkU32(0xff000000))); assign(t7, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, mkexpr(t5)))), - mkU32(0x000000ff))); - - assign(t9, binop(Iop_Or32, - getDSPControl(), - mkU32(0x400000))); - putDSPControl(IRExpr_ITE(mkexpr(t6), - IRExpr_ITE(mkexpr(t7), - mkexpr(t9), - getDSPControl()), - getDSPControl())); - - /* Shift bits 23..16. */ - assign(t10, binop(Iop_Shl32, - unop(Iop_8Uto32, - unop(Iop_16to8, - unop(Iop_32HIto16, - getIReg(rt)))), - unop(Iop_32to8, + binop(Iop_And32, + mkexpr(t0), + mkU32(0x0000ff00)), + mkU32(0x00000000))); + assign(t8, binop(Iop_CmpNE32, + binop(Iop_And32, + mkexpr(t0), + mkU32(0x0000ff00)), + mkU32(0x000ff00))); + /* Shift bits 15..8 and 31..24. */ + assign(t3, binop(Iop_Shl32, + binop(Iop_Shr32, binop(Iop_And32, - getIReg(rs), - mkU32(0x7))))); - /* Check if discard isn't 0x0 and 0xffffffff. */ - assign(t11, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t10)))), - mkU32(0x00000000))); - assign(t12, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t10)))), - mkU32(0x000000ff))); + getIReg(rt), + mkU32(0xff00ff00)), + mkU8(8)), + mkexpr(t11))); + assign(t4, binop(Iop_CmpNE32, + binop(Iop_And32, + mkexpr(t3), + mkU32(0xff000000)), + mkU32(0x00000000))); + assign(t5, binop(Iop_CmpNE32, + binop(Iop_And32, + mkexpr(t3), + mkU32(0xff000000)), + mkU32(0xff000000))); + assign(t9, binop(Iop_CmpNE32, + binop(Iop_And32, + mkexpr(t3), + mkU32(0x0000ff00)), + mkU32(0x00000000))); + assign(t10, binop(Iop_CmpNE32, + binop(Iop_And32, + mkexpr(t3), + mkU32(0x0000ff00)), + mkU32(0x0000ff00))); - assign(t14, binop(Iop_Or32, - getDSPControl(), - mkU32(0x400000))); - putDSPControl(IRExpr_ITE(mkexpr(t11), - IRExpr_ITE(mkexpr(t12), - mkexpr(t14), - getDSPControl()), - getDSPControl())); - - /* Shift bits 31..24. */ - assign(t15, binop(Iop_Shl32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32HIto16, - getIReg(rt)))), - unop(Iop_32to8, + assign(t6, binop(Iop_Or32, + binop(Iop_Or32, binop(Iop_And32, - getIReg(rs), - mkU32(0x7))))); - /* Check if discard isn't 0x0 and 0xffffffff. */ - assign(t16, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t15)))), - mkU32(0x00000000))); - assign(t17, binop(Iop_CmpNE32, - unop(Iop_8Uto32, - unop(Iop_16HIto8, - unop(Iop_32to16, - mkexpr(t15)))), - mkU32(0x000000ff))); + unop(Iop_1Uto32, + mkexpr(t1)), + unop(Iop_1Uto32, + mkexpr(t2))), + binop(Iop_And32, + unop(Iop_1Uto32, + mkexpr(t7)), + unop(Iop_1Uto32, + mkexpr(t8)))), + binop(Iop_Or32, + binop(Iop_And32, + unop(Iop_1Uto32, + mkexpr(t4)), + unop(Iop_1Uto32, + mkexpr(t5))), + binop(Iop_And32, + unop(Iop_1Uto32, + mkexpr(t9)), + unop(Iop_1Uto32, + mkexpr(t10)))))); - assign(t18, binop(Iop_Or32, - getDSPControl(), - mkU32(0x400000))); - putDSPControl(IRExpr_ITE(mkexpr(t16), - IRExpr_ITE(mkexpr(t17), - mkexpr(t18), - getDSPControl()), + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + mkexpr(t6), + mkU32(0x0)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x400000)), getDSPControl())); - putIReg(rd, IRExpr_ITE(binop(Iop_CmpEQ32, + unop(Iop_8Uto32, mkexpr(t11)), + mkU32(0)), + getIReg(rt), + binop(Iop_Or32, + binop(Iop_Shl32, + binop(Iop_And32, + mkexpr(t3), + mkU32(0xff00ff)), + mkU8(8)), binop(Iop_And32, - getIReg(rs), - mkU32(0x7)), - mkU32(0x0)), - getIReg(rt), - binop(Iop_16HLto32, - binop(Iop_8HLto16, - unop(Iop_32to8, - mkexpr(t15)), - unop(Iop_32to8, - mkexpr(t10))), - binop(Iop_8HLto16, - unop(Iop_32to8, - mkexpr(t5)), - unop(Iop_32to8, - mkexpr(t0)))))); + mkexpr(t0), + mkU32(0x00ff00ff))))); break; } case 0x1: { /* SHRLV.QB */ @@ -8075,7 +8427,10 @@ t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); t3 = newTemp(Ity_I32); - t4 = newTemp(Ity_I1); + t4 = newTemp(Ity_I32); + t5 = newTemp(Ity_I32); + t6 = newTemp(Ity_I32); + t7 = newTemp(Ity_I32); if (0 == rs) { putIReg(rd, getIReg(rt)); @@ -8086,21 +8441,27 @@ unop(Iop_32to16, getIReg(rt))), mkU8(rs))); - assign(t2, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t0))), - mkU32(0xffffffff)), - binop(Iop_Or32, - getDSPControl(), - mkU32(0x400000)), - getDSPControl())); - putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t0))), - mkU32(0x00000000)), - mkexpr(t2), + assign(t1, unop(Iop_1Uto32, + binop(Iop_CmpNE32, + binop(Iop_Sar32, + mkexpr(t0), + mkU8(16)), + mkU32(0)))); + assign(t2, unop(Iop_1Uto32, + binop(Iop_CmpNE32, + binop(Iop_Sar32, + mkexpr(t0), + mkU8(16)), + mkU32(0xffffffff)))); + assign(t3, binop(Iop_And32, + mkexpr(t1), + mkexpr(t2))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, + mkexpr(t3), + mkU32(0x1)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x400000)), getDSPControl())); putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, binop(Iop_And32, @@ -8115,46 +8476,56 @@ getDSPControl(), mkU32(0x400000)))); /* Shift higher 16 bits. */ - assign(t1, binop(Iop_Shl32, + assign(t4, binop(Iop_Shl32, unop(Iop_16Sto32, unop(Iop_32HIto16, getIReg(rt))), mkU8(rs))); - assign(t3, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t1))), - mkU32(0xffffffff)), - binop(Iop_Or32, - getDSPControl(), - mkU32(0x400000)), - getDSPControl())); - putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t1))), - mkU32(0x00000000)), - mkexpr(t3), - getDSPControl())); - assign(t4, binop(Iop_CmpEQ32, - binop(Iop_Shr32, - binop(Iop_And32, - getIReg(rt), - mkU32(0x80000000)), - mkU8(31)), - binop(Iop_Shr32, - binop(Iop_And32, - mkexpr(t1), - mkU32(0x00008000)), - mkU8(15)))); - putDSPControl(IRExpr_ITE(mkexpr(t4), + assign(t5, unop(Iop_1Uto32, + binop(Iop_CmpNE32, + binop(Iop_Sar32, + mkexpr(t4), + mkU8(16)), + mkU32(0)))); + assign(t6, unop(Iop_1Uto32, + binop(Iop_CmpNE32, + binop(Iop_Sar32, + mkexpr(t4), + mkU8(16)), + mkU32(0xffffffff)))); + assign(t7, binop(Iop_And32, + mkexpr(t5), + mkexpr(t6))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, + mkexpr(t7), + mkU32(0x1)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x400000)), + getDSPControl())); + putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, + mkexpr(t7), + mkU32(0x1)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x400000)), + getDSPControl())); + putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, + binop(Iop_And32, + getIReg(rt), + mkU32(0x80000000)), + binop(Iop_Shl32, + binop(Iop_And32, + mkexpr(t4), + mkU32(0x00008000)), + mkU8(16)) + ), getDSPControl(), binop(Iop_Or32, getDSPControl(), mkU32(0x400000)))); - putIReg(rd, binop(Iop_16HLto32, - unop(Iop_32to16, mkexpr(t1)), + unop(Iop_32to16, mkexpr(t4)), unop(Iop_32to16, mkexpr(t0)))); } break; @@ -8323,18 +8694,20 @@ DIP("shll_s.ph r%d, r%d, %d", rd, rt, rs); vassert(!mode64); t0 = newTemp(Ity_I32); - t1 = newTemp(Ity_I16); - t2 = newTemp(Ity_I16); - t3 = newTemp(Ity_I16); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I32); + t3 = newTemp(Ity_I32); t4 = newTemp(Ity_I32); - t5 = newTemp(Ity_I16); - t6 = newTemp(Ity_I16); - t7 = newTemp(Ity_I16); + t5 = newTemp(Ity_I32); + t6 = newTemp(Ity_I32); + t7 = newTemp(Ity_I32); t8 = newTemp(Ity_I32); t9 = newTemp(Ity_I32); - t10 = newTemp(Ity_I1); - t11 = newTemp(Ity_I16); - t12 = newTemp(Ity_I16); + t10 = newTemp(Ity_I32); + t11 = newTemp(Ity_I32); + t12 = newTemp(Ity_I32); + t13 = newTemp(Ity_I32); + t14 = newTemp(Ity_I32); if (0 == rs) { putIReg(rd, getIReg(rt)); @@ -8345,69 +8718,70 @@ unop(Iop_32to16, getIReg(rt))), mkU8(rs))); - assign(t1, IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_And32, - getIReg(rt), - mkU32(0x00008000)), - mkU32(0x0)), - mkU16(0x7fff), - mkU16(0x8000))); - assign(t2, - IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_Shr32, - binop(Iop_And32, - getIReg(rt), - mkU32(0x00008000)), - mkU8(15)), - binop(Iop_Shr32, - binop(Iop_And32, - mkexpr(t0), - mkU32(0x00008000)), - mkU8(15))), - unop(Iop_32to16, mkexpr(t0)), - mkexpr(t1))); - assign(t11, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t0))), - mkU32(0xffffffff)), - mkexpr(t1), - mkexpr(t2))); - assign(t3, - IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t0))), - mkU32(0x00000000)), - mkexpr(t11), - mkexpr(t2))); - assign(t8, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t0))), - mkU32(0xffffffff)), - binop(Iop_Or32, - getDSPControl(), - mkU32(0x400000)), - getDSPControl())); - putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t0))), - mkU32(0x00000000)), - mkexpr(t8), - getDSPControl())); + assign(t1, unop(Iop_1Uto32, + binop(Iop_CmpNE32, + binop(Iop_Sar32, + mkexpr(t0), + mkU8(16)), + mkU32(0)))); + assign(t2, unop(Iop_1Uto32, + binop(Iop_CmpNE32, + binop(Iop_Sar32, + mkexpr(t0), + mkU8(16)), + mkU32(0xffffffff)))); + assign(t3, binop(Iop_And32, + mkexpr(t1), + mkexpr(t2))); putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, + mkexpr(t3), + mkU32(0x1)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x400000)), + getDSPControl())); + putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, binop(Iop_And32, getIReg(rt), mkU32(0x00008000)), binop(Iop_And32, - mkexpr(t0), - mkU32(0x00008000))), + mkexpr(t0), + mkU32(0x00008000)) + ), getDSPControl(), binop(Iop_Or32, getDSPControl(), mkU32(0x400000)))); + assign(t8, + IRExpr_ITE(binop(Iop_CmpEQ32, + mkexpr(t3), + mkU32(0x1)), + IRExpr_ITE(binop(Iop_CmpEQ32, + binop(Iop_And32, + getIReg(rt), + mkU32(0x8000)), + mkU32(0)), + mkU32(0x00007fff), + mkU32(0x00008000)), + binop(Iop_And32, + mkexpr(t0), + mkU32(0x0000ffff)))); + assign(t10, + IRExpr_ITE(binop(Iop_CmpEQ32, + binop(Iop_And32, + getIReg(rt), + mkU32(0x00008000)), + binop(Iop_And32, + mkexpr(t0), + mkU32(0x00008000))), + mkexpr(t8), + IRExpr_ITE(binop(Iop_CmpEQ32, + binop(Iop_And32, + getIReg(rt), + mkU32(0x8000)), + mkU32(0)), + mkU32(0x00007fff), + mkU32(0x00008000)))); /* Shift higher 16 bits. */ assign(t4, binop(Iop_Shl32, unop(Iop_16Sto32, @@ -8414,77 +8788,88 @@ unop(Iop_32HIto16, getIReg(rt))), mkU8(rs))); - assign(t5, IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_And32, - getIReg(rt), - mkU32(0x80000000)), - mkU32(0x0)), - mkU16(0x7fff), - mkU16(0x8000))); - assign(t6, - IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_Shr32, - binop(Iop_And32, - getIReg(rt), - mkU32(0x80000000)), - mkU8(31)), - binop(Iop_Shr32, - binop(Iop_And32, - mkexpr(t4), - mkU32(0x00008000)), - mkU8(15))), - unop(Iop_32to16, mkexpr(t4)), - mkexpr(t5))); - assign(t12, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t4))), - mkU32(0xffffffff)), - mkexpr(t5), - mkexpr(t6))); - assign(t7, - IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t4))), - mkU32(0x00000000)), - mkexpr(t12), - mkexpr(t6))); - assign(t9, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t4))), - mkU32(0xffffffff)), - binop(Iop_Or32, - getDSPControl(), - mkU32(0x400000)), - getDSPControl())); - putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_16Sto32, - unop(Iop_32HIto16, - mkexpr(t4))), - mkU32(0x00000000)), - mkexpr(t9), + assign(t5, unop(Iop_1Uto32, + binop(Iop_CmpNE32, + binop(Iop_Sar32, + mkexpr(t4), + mkU8(16)), + mkU32(0)))); + assign(t6, unop(Iop_1Uto32, + binop(Iop_CmpNE32, + binop(Iop_Sar32, + mkexpr(t4), + mkU8(16)), + mkU32(0xffffffff)))); + assign(t7, binop(Iop_And32, + mkexpr(t5), + mkexpr(t6))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, + mkexpr(t7), + mkU32(0x1)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x400000)), getDSPControl())); - assign(t10, binop(Iop_CmpEQ32, - binop(Iop_Shr32, - binop(Iop_And32, - getIReg(rt), - mkU32(0x80000000)), - mkU8(31)), - binop(Iop_Shr32, - binop(Iop_And32, - mkexpr(t4), - mkU32(0x00008000)), - mkU8(15)))); - putDSPControl(IRExpr_ITE(mkexpr(t10), + putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, + mkexpr(t7), + mkU32(0x1)), + binop(Iop_Or32, + getDSPControl(), + mkU32(0x400000)), + getDSPControl())); + assign(t12, binop(Iop_Shl32, + binop(Iop_And32, + mkexpr(t4), + mkU32(0x8000)), + mkU8(16))); + putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, + binop(Iop_And32, + getIReg(rt), + mkU32(0x80000000)), + mkexpr(t12)), getDSPControl(), binop(Iop_Or32, getDSPControl(), mkU32(0x400000)))); - - putIReg(rd, binop(Iop_16HLto32, - mkexpr(t7), mkexpr(t3))); + assign(t13, IRExpr_ITE(binop(Iop_CmpEQ32, + binop(Iop_And32, + getIReg(rt), + mkU32(0x80000000)), + mkU32(0)), + mkU32(0x7fff0000), + mkU32(0x80000000))); + assign(t9, + IRExpr_ITE(binop(Iop_CmpEQ32, + mkexpr(t7), + mkU32(0x1)), + mkexpr(t13), + binop(Iop_Shl32, + binop(Iop_And32, + mkexpr(t4), + mkU32(0x0000ffff)), + mkU8(16)))); + assign(t14, IRExpr_ITE(binop(Iop_CmpEQ32, + binop(Iop_And32, + getIReg(rt), + mkU32(0x80000000)), + mkU32(0)), + mkU32(0x7fff0000), + mkU32(0x80000000))); + assign(t11, + IRExpr_ITE(binop(Iop_CmpEQ32, + binop(Iop_And32, + getIReg(rt), + mkU32(0x80000000)), + binop(Iop_Shl32, + binop(Iop_And32, + mkexpr(t4), + mkU32(0x00008000)), + mkU8(16))), + mkexpr(t9), + mkexpr(t14))); + putIReg(rd, binop(Iop_Or32, + mkexpr(t10), + mkexpr(t11))); } break; } @@ -10831,10 +11216,9 @@ t8 = newTemp(Ity_I64); t9 = newTemp(Ity_I64); t10 = newTemp(Ity_I32); - t11 = newTemp(Ity_I32); assign(t0, getAcc(ac)); - /* Calculate first cross dot product and saturate if + /* Calculate the first cross dot product and saturate if needed. */ assign(t1, unop(Iop_32Sto64, binop(Iop_Shl32, @@ -10859,23 +11243,28 @@ unop(Iop_32to16, getIReg(rt))), mkU32(0x00008000))); - assign(t4, - IRExpr_ITE(mkexpr(t2), - IRExpr_ITE(mkexpr(t3), - mkU64(0x000000007fffffffULL), - mkexpr(t1)), - mkexpr(t1))); + assign(t4, IRExpr_ITE(binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_1Sto32, + mkexpr(t2)), + unop(Iop_1Sto32, + mkexpr(t3))), + mkU32(0)), + mkU64(0x000000007fffffffULL), + mkexpr(t1))); - putDSPControl(IRExpr_ITE(mkexpr(t2), - IRExpr_ITE(mkexpr(t3), - binop(Iop_Or32, - getDSPControl(), - binop(Iop_Shl32, - mkU32(0x1), - mkU8(ac+16) - ) - ), - getDSPControl()), + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_1Sto32, + mkexpr(t2)), + unop(Iop_1Sto32, + mkexpr(t3))), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + binop(Iop_Shl32, + mkU32(0x1), + mkU8(ac+16))), getDSPControl())); /* Calculate second cross dot product and saturate if needed. */ @@ -10902,29 +11291,35 @@ unop(Iop_32HIto16, getIReg(rt))), mkU32(0x00008000))); - assign(t8, - IRExpr_ITE(mkexpr(t6), - IRExpr_ITE(mkexpr(t7), - mkU64(0x000000007fffffffULL), - mkexpr(t5)), - mkexpr(t5))); + assign(t8, IRExpr_ITE(binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_1Sto32, + mkexpr(t6)), + unop(Iop_1Sto32, + mkexpr(t7))), + mkU32(0)), + mkU64(0x000000007fffffffULL), + mkexpr(t5))); - putDSPControl(IRExpr_ITE(mkexpr(t6), - IRExpr_ITE(mkexpr(t7), - binop(Iop_Or32, - getDSPControl(), - binop(Iop_Shl32, - mkU32(0x1), - mkU8(ac+16) - ) - ), - getDSPControl()), + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_1Sto32, + mkexpr(t6)), + unop(Iop_1Sto32, + mkexpr(t7))), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + binop(Iop_Shl32, + mkU32(0x1), + mkU8(ac+16))), getDSPControl())); - /* Add intermediate products with value in the + /* Subtract intermediate products from value in the accumulator. */ - assign(t9, binop(Iop_Add64, - mkexpr(t0), - binop(Iop_Add64, mkexpr(t8), mkexpr(t4)))); + assign(t9, + binop(Iop_Add64, + mkexpr(t0), + binop(Iop_Add64, mkexpr(t8), mkexpr(t4)))); putAcc(ac, IRExpr_ITE(binop(Iop_CmpEQ32, @@ -10949,38 +11344,28 @@ mkU32(0xffffffff)), mkU64(0xffffffff80000000ULL), mkexpr(t9)))); - assign(t10, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - binop(Iop_Shl64, - mkexpr(t9), - mkU8(1))), - mkU32(0x0)), - binop(Iop_Or32, - getDSPControl(), - binop(Iop_Shl32, - mkU32(0x1), - mkU8(ac+16))), - getDSPControl())); - assign(t11, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - binop(Iop_Shl64, - mkexpr(t9), - mkU8(1))), - mkU32(0xffffffff)), - binop(Iop_Or32, - getDSPControl(), - binop(Iop_Shl32, - mkU32(0x1), - mkU8(ac+16))), - getDSPControl())); + assign(t10, IRExpr_ITE(binop(Iop_CmpEQ32, + unop(Iop_64to32, + mkexpr(t9)), + unop(Iop_64to32, + getAcc(ac))), + getDSPControl(), + binop(Iop_Or32, + getDSPControl(), + binop(Iop_Shl32, + mkU32(0x1), + mkU8(ac+16))))); putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t9)), - mkU32(0x80000000)), - mkU32(0x0)), + unop(Iop_64HIto32, + mkexpr(t9)), + unop(Iop_64HIto32, + getAcc(ac))), mkexpr(t10), - mkexpr(t11))); + binop(Iop_Or32, + getDSPControl(), + binop(Iop_Shl32, + mkU32(0x1), + mkU8(ac+16))))); break; } case 0x1B: { /* DPSQX_SA.W.PH */ @@ -10997,10 +11382,9 @@ t8 = newTemp(Ity_I64); t9 = newTemp(Ity_I64); t10 = newTemp(Ity_I32); - t11 = newTemp(Ity_I32); assign(t0, getAcc(ac)); - /* Calculate first cross dot product and saturate if + /* Calculate the first cross dot product and saturate if needed. */ assign(t1, unop(Iop_32Sto64, binop(Iop_Shl32, @@ -11025,23 +11409,28 @@ unop(Iop_32to16, getIReg(rt))), mkU32(0x00008000))); - assign(t4, - IRExpr_ITE(mkexpr(t2), - IRExpr_ITE(mkexpr(t3), - mkU64(0x000000007fffffffULL), - mkexpr(t1)), - mkexpr(t1))); + assign(t4, IRExpr_ITE(binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_1Sto32, + mkexpr(t2)), + unop(Iop_1Sto32, + mkexpr(t3))), + mkU32(0)), + mkU64(0x000000007fffffffULL), + mkexpr(t1))); - putDSPControl(IRExpr_ITE(mkexpr(t2), - IRExpr_ITE(mkexpr(t3), - binop(Iop_Or32, - getDSPControl(), - binop(Iop_Shl32, - mkU32(0x1), - mkU8(ac+16) - ) - ), - getDSPControl()), + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_1Sto32, + mkexpr(t2)), + unop(Iop_1Sto32, + mkexpr(t3))), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + binop(Iop_Shl32, + mkU32(0x1), + mkU8(ac+16))), getDSPControl())); /* Calculate second cross dot product and saturate if needed. */ @@ -11060,31 +11449,36 @@ intermediate product and write to DSPControl register. */ assign(t6, binop(Iop_CmpEQ32, - binop(Iop_And32, - getIReg(rs), - mkU32(0x0000ffff)), + unop(Iop_16Uto32, + unop(Iop_32to16, getIReg(rs))), mkU32(0x00008000))); assign(t7, binop(Iop_CmpEQ32, - binop(Iop_And32, - getIReg(rt), - mkU32(0xffff0000)), - mkU32(0x80000000))); + unop(Iop_16Uto32, + unop(Iop_32HIto16, getIReg(rt))), + mkU32(0x00008000))); - assign(t8, - IRExpr_ITE(mkexpr(t6), - IRExpr_ITE(mkexpr(t7), - mkU64(0x000000007fffffffULL), - mkexpr(t5)), - mkexpr(t5))); + assign(t8, IRExpr_ITE(binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_1Sto32, + mkexpr(t6)), + unop(Iop_1Sto32, + mkexpr(t7))), + mkU32(0)), + mkU64(0x000000007fffffffULL), + mkexpr(t5))); - putDSPControl(IRExpr_ITE(mkexpr(t6), - IRExpr_ITE(mkexpr(t7), - binop(Iop_Or32, - getDSPControl(), - binop(Iop_Shl32, - mkU32(0x1), - mkU8(ac+16))), - getDSPControl()), + putDSPControl(IRExpr_ITE(binop(Iop_CmpNE32, + binop(Iop_And32, + unop(Iop_1Sto32, + mkexpr(t6)), + unop(Iop_1Sto32, + mkexpr(t7))), + mkU32(0)), + binop(Iop_Or32, + getDSPControl(), + binop(Iop_Shl32, + mkU32(0x1), + mkU8(ac+16))), getDSPControl())); /* Subtract intermediate products from value in the accumulator. */ @@ -11116,38 +11510,28 @@ mkU32(0xffffffff)), mkU64(0xffffffff80000000ULL), mkexpr(t9)))); - assign(t10, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - binop(Iop_Shl64, - mkexpr(t9), - mkU8(1))), - mkU32(0x0)), - binop(Iop_Or32, - getDSPControl(), - binop(Iop_Shl32, - mkU32(0x1), - mkU8(ac+16))), - getDSPControl())); - assign(t11, IRExpr_ITE(binop(Iop_CmpNE32, - unop(Iop_64HIto32, - binop(Iop_Shl64, - mkexpr(t9), - mkU8(1))), - mkU32(0xffffffff)), - binop(Iop_Or32, - getDSPControl(), - binop(Iop_Shl32, - mkU32(0x1), - mkU8(ac+16))), - getDSPControl())); + assign(t10, IRExpr_ITE(binop(Iop_CmpEQ32, + unop(Iop_64to32, + mkexpr(t9)), + unop(Iop_64to32, + getAcc(ac))), + getDSPControl(), + binop(Iop_Or32, + getDSPControl(), + binop(Iop_Shl32, + mkU32(0x1), + mkU8(ac+16))))); putDSPControl(IRExpr_ITE(binop(Iop_CmpEQ32, - binop(Iop_And32, - unop(Iop_64HIto32, - mkexpr(t9)), - mkU32(0x80000000)), - mkU32(0x0)), + unop(Iop_64HIto32, + mkexpr(t9)), + unop(Iop_64HIto32, + getAcc(ac))), mkexpr(t10), - mkexpr(t11))); + binop(Iop_Or32, + getDSPControl(), + binop(Iop_Shl32, + mkU32(0x1), + mkU8(ac+16))))); break; } default: @@ -11417,22 +11801,22 @@ vex_inject_ir(irsb, Iend_BE); #endif if (mode64) { - stmt(IRStmt_Put(offsetof(VexGuestMIPS64State, guest_TISTART), + stmt(IRStmt_Put(offsetof(VexGuestMIPS64State, guest_CMSTART), mkU64(guest_PC_curr_instr))); - stmt(IRStmt_Put(offsetof(VexGuestMIPS64State, guest_TILEN), + stmt(IRStmt_Put(offsetof(VexGuestMIPS64State, guest_CMLEN), mkU64(20))); putPC(mkU64(guest_PC_curr_instr + 20)); } else { - stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_TISTART), + stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_CMSTART), mkU32(guest_PC_curr_instr))); - stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_TILEN), + stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_CMLEN), mkU32(20))); putPC(mkU32(guest_PC_curr_instr + 20)); } dres.whatNext = Dis_StopHere; - dres.jk_StopHere = Ijk_TInval; + dres.jk_StopHere = Ijk_InvalICache; dres.len = 20; delta += 20; goto decode_success; @@ -11462,7 +11846,7 @@ trap_code = get_code(cins); function = get_function(cins); IRType ty = mode64 ? Ity_I64 : Ity_I32; - IRType tyF = mode64 ? Ity_F64 : Ity_F32; + IRType tyF = fp_mode64 ? Ity_F64 : Ity_F32; ac = get_acNo(cins); @@ -11495,102 +11879,112 @@ lastn = mkexpr(t0); break; - case 0x11: /* COP1 */ - { + case 0x11: { /* COP1 */ + if (fmt == 0x3 && fd == 0 && function == 0) { /* MFHC1 */ + DIP("mfhc1 r%d, f%d", rt, fs); + if (fp_mode64) { + t0 = newTemp(Ity_I64); + t1 = newTemp(Ity_I32); + assign(t0, unop(Iop_ReinterpF64asI64, getDReg(fs))); + assign(t1, unop(Iop_64HIto32, mkexpr(t0))); + putIReg(rt, mkWidenFrom32(ty, mkexpr(t1), True)); + } else { + ILLEGAL_INSTRUCTON; + } + break; + } else if (fmt == 0x7 && fd == 0 && function == 0) { /* MTHC1 */ + DIP("mthc1 r%d, f%d", rt, fs); + if (fp_mode64) { + t0 = newTemp(Ity_I64); + assign(t0, binop(Iop_32HLto64, getIReg(rt), + unop(Iop_ReinterpF32asI32, + getLoFromF64(Ity_F64 /* 32FPR mode. */, + getDReg(fs))))); + putDReg(fs, unop(Iop_ReinterpI64asF64, mkexpr(t0))); + } else { + ILLEGAL_INSTRUCTON; + } + break; + } else if (fmt == 0x8) { /* BC */ + /* FcConditionalCode(bc1_cc) */ UInt bc1_cc = get_bc1_cc(cins); - if (0x08 == fmt) { - switch (fmt) { - case 0x08: /* BC */ - { - DIP("tf: %d, nd: %d", tf, nd); - /* FcConditionalCode(bc1_cc) */ - t1 = newTemp(Ity_I1); - t2 = newTemp(Ity_I32); - t3 = newTemp(Ity_I1); + t1 = newTemp(Ity_I1); + t2 = newTemp(Ity_I32); + t3 = newTemp(Ity_I1); - assign(t1, binop(Iop_CmpEQ32, mkU32(0), mkU32(bc1_cc))); - assign(t2, IRExpr_ITE(mkexpr(t1), - binop(Iop_And32, - binop(Iop_Shr32, getFCSR(), - mkU8(23)), - mkU32(0x1)), - binop(Iop_And32, - binop(Iop_Shr32, getFCSR(), - mkU8(24 + bc1_cc)), - mkU32(0x1)) - )); + assign(t1, binop(Iop_CmpEQ32, mkU32(0), mkU32(bc1_cc))); + assign(t2, IRExpr_ITE(mkexpr(t1), + binop(Iop_And32, + binop(Iop_Shr32, getFCSR(), mkU8(23)), + mkU32(0x1)), + binop(Iop_And32, + binop(Iop_Shr32, getFCSR(), + mkU8(24 + bc1_cc)), + mkU32(0x1)))); - if (tf == 1 && nd == 0) { - /* branch on true */ - DIP("bc1t %d, %d", bc1_cc, imm); - assign(t3, binop(Iop_CmpEQ32, mkU32(1), mkexpr(t2))); - dis_branch(False, mkexpr(t3), imm, &bstmt); + if (tf == 1 && nd == 0) { + /* branch on true */ + DIP("bc1t %d, %d", bc1_cc, imm); + assign(t3, binop(Iop_CmpEQ32, mkU32(1), mkexpr(t2))); + dis_branch(False, mkexpr(t3), imm, &bstmt); + break; + } else if (tf == 0 && nd == 0) { + /* branch on false */ + DIP("bc1f %d, %d", bc1_cc, imm); + assign(t3, binop(Iop_CmpEQ32, mkU32(0), mkexpr(t2))); + dis_branch(False, mkexpr(t3), imm, &bstmt); + break; + } else if (nd == 1 && tf == 0) { + DIP("bc1fl %d, %d", bc1_cc, imm); + lastn = dis_branch_likely(binop(Iop_CmpNE32, mkexpr(t2), + mkU32(0x0)), imm); + break; + } else if (nd == 1 && tf == 1) { + DIP("bc1tl %d, %d", bc1_cc, imm); + lastn = dis_branch_likely(binop(Iop_CmpEQ32, mkexpr(t2), + mkU32(0x0)), imm); + break; + } else + goto decode_failure; + } else { + switch (function) { + case 0x4: { /* SQRT.fmt */ + switch (fmt) { + case 0x10: { /* S */ + IRExpr *rm = get_IR_roundingmode(); + putFReg(fd, mkWidenFromF32(tyF, binop(Iop_SqrtF32, rm, + getLoFromF64(tyF, getFReg(fs))))); break; - } else if (tf == 0 && nd == 0) { - /* branch on false */ - DIP("bc1f %d, %d", bc1_cc, imm); - assign(t3, binop(Iop_CmpEQ32, mkU32(0), mkexpr(t2))); - dis_branch(False, mkexpr(t3), imm, &bstmt); + } + case 0x11: { /* D */ + IRExpr *rm = get_IR_roundingmode(); + putDReg(fd, binop(Iop_SqrtF64, rm, getDReg(fs))); break; - } else if (nd == 1 && tf == 0) { - DIP("bc1fl %d, %d", bc1_cc, imm); - lastn = dis_branch_likely(binop(Iop_CmpNE32, mkexpr(t2), - mkU32(0x0)), imm); - break; - } else if (nd == 1 && tf == 1) { - DIP("bc1tl %d, %d", bc1_cc, imm); - lastn = dis_branch_likely(binop(Iop_CmpEQ32, mkexpr(t2), - mkU32(0x0)), imm); - break; - } else + } + default: goto decode_failure; - } - - default: - goto decode_failure; - } - } else { - switch (function) { - - case 0x4: /* SQRT.fmt */ - { - switch (fmt) { - case 0x10: /* S */ - { - IRExpr *rm = get_IR_roundingmode(); - putFReg(fd, mkWidenFromF32(tyF, binop(Iop_SqrtF32, rm, - getLoFromF64(tyF, getFReg(fs))))); - } - break; - case 0x11: /* D */ - { - IRExpr *rm = get_IR_roundingmode(); - putDReg(fd, binop(Iop_SqrtF64, rm, getDReg(fs))); - } - break; } } break; case 0x5: /* abs.fmt */ switch (fmt) { - case 0x10: /* S */ - DIP("abs.s f%d, f%d", fd, fs); - putFReg(fd, mkWidenFromF32(tyF, unop(Iop_AbsF32, - getLoFromF64(tyF, getFReg(fs))))); - break; - case 0x11: /* D */ - DIP("abs.d f%d, f%d", fd, fs); - putDReg(fd, unop(Iop_AbsF64, getDReg(fs))); - break; - default: - goto decode_failure; + case 0x10: /* S */ + DIP("abs.s f%d, f%d", fd, fs); + putFReg(fd, mkWidenFromF32(tyF, unop(Iop_AbsF32, + getLoFromF64(tyF, getFReg(fs))))); + break; + case 0x11: /* D */ + DIP("abs.d f%d, f%d", fd, fs); + putDReg(fd, unop(Iop_AbsF64, getDReg(fs))); + break; + default: + goto decode_failure; } break; /* case 0x5 */ case 0x02: /* MUL.fmt */ switch (fmt) { - case 0x11: /* D */ - { + case 0x11: { /* D */ DIP("mul.d f%d, f%d, f%d", fd, fs, ft); IRExpr *rm = get_IR_roundingmode(); putDReg(fd, triop(Iop_MulF64, rm, getDReg(fs), @@ -11597,8 +11991,7 @@ getDReg(ft))); break; } - case 0x10: /* S */ - { + case 0x10: { /* S */ DIP("mul.s f%d, f%d, f%d", fd, fs, ft); IRExpr *rm = get_IR_roundingmode(); putFReg(fd, mkWidenFromF32(tyF, triop(Iop_MulF32, rm, @@ -11606,15 +11999,14 @@ getLoFromF64(tyF, getFReg(ft))))); break; } - default: - goto decode_failure; + default: + goto decode_failure; } break; /* MUL.fmt */ case 0x03: /* DIV.fmt */ switch (fmt) { - case 0x11: /* D */ - { + case 0x11: { /* D */ DIP("div.d f%d, f%d, f%d", fd, fs, ft); IRExpr *rm = get_IR_roundingmode(); putDReg(fd, triop(Iop_DivF64, rm, getDReg(fs), @@ -11621,9 +12013,9 @@ getDReg(ft))); break; } - case 0x10: /* S */ - { + case 0x10: { /* S */ DIP("div.s f%d, f%d, f%d", fd, fs, ft); + calculateFCSR(fs, ft, DIVS, False, 2); IRExpr *rm = get_IR_roundingmode(); putFReg(fd, mkWidenFromF32(tyF, triop(Iop_DivF32, rm, getLoFromF64(tyF, getFReg(fs)), @@ -11630,24 +12022,24 @@ getLoFromF64(tyF, getFReg(ft))))); break; } - default: - goto decode_failure; + default: + goto decode_failure; } break; /* DIV.fmt */ case 0x01: /* SUB.fmt */ switch (fmt) { - case 0x11: /* D */ - { + case 0x11: { /* D */ DIP("sub.d f%d, f%d, f%d", fd, fs, ft); + calculateFCSR(fs, ft, SUBD, False, 2); IRExpr *rm = get_IR_roundingmode(); putDReg(fd, triop(Iop_SubF64, rm, getDReg(fs), getDReg(ft))); break; } - case 0x10: /* S */ - { + case 0x10: { /* S */ DIP("sub.s f%d, f%d, f%d", fd, fs, ft); + calculateFCSR(fs, ft, SUBS, True, 2); IRExpr *rm = get_IR_roundingmode(); putFReg(fd, mkWidenFromF32(tyF, triop(Iop_SubF32, rm, getLoFromF64(tyF, getFReg(fs)), @@ -11654,8 +12046,8 @@ getLoFromF64(tyF, getFReg(ft))))); break; } - default: - goto decode_failure; + default: + goto decode_failure; } break; /* SUB.fmt */ @@ -11663,8 +12055,8 @@ switch (fmt) { case 0x11: /* D */ DIP("mov.d f%d, f%d", fd, fs); - if (mode64) { - putFReg(fd, getFReg(fs)); + if (fp_mode64) { + putDReg(fd, getDReg(fs)); } else { putFReg(fd, getFReg(fs)); putFReg(fd + 1, getFReg(fs + 1)); @@ -11699,19 +12091,27 @@ switch (fmt) { case 0x10: /* S */ DIP("round.l.s f%d, f%d", fd, fs); - calculateFCSR(fs, ROUNDLS, True); - t0 = newTemp(Ity_I64); + if (fp_mode64) { + calculateFCSR(fs, 0, ROUNDLS, True, 1); + t0 = newTemp(Ity_I64); - assign(t0, binop(Iop_F32toI64S, mkU32(0x0), - getLoFromF64(Ity_F64, getFReg(fs)))); + assign(t0, binop(Iop_F32toI64S, mkU32(0x0), + getLoFromF64(Ity_F64, getFReg(fs)))); - putFReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t0))); - break; + putDReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t0))); + } else { + ILLEGAL_INSTRUCTON; + } + break; case 0x11: /* D */ DIP("round.l.d f%d, f%d", fd, fs); - calculateFCSR(fs, ROUNDLD, False); - putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x0), - getFReg(fs))); + if (fp_mode64) { + calculateFCSR(fs, 0, ROUNDLD, False, 1); + putDReg(fd, binop(Iop_RoundF64toInt, mkU32(0x0), + getDReg(fs))); + } else { + ILLEGAL_INSTRUCTON; + } break; default: goto decode_failure; @@ -11723,18 +12123,26 @@ switch (fmt) { case 0x10: /* S */ DIP("trunc.l.s f%d, f%d", fd, fs); - calculateFCSR(fs, TRUNCLS, True); - t0 = newTemp(Ity_I64); - assign(t0, binop(Iop_F32toI64S, mkU32(0x3), - getLoFromF64(Ity_F64, getFReg(fs)))); + if (fp_mode64) { + calculateFCSR(fs, 0, TRUNCLS, True, 1); + t0 = newTemp(Ity_I64); + assign(t0, binop(Iop_F32toI64S, mkU32(0x3), + getLoFromF64(Ity_F64, getFReg(fs)))); - putFReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t0))); + putDReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t0))); + } else { + ILLEGAL_INSTRUCTON; + } break; case 0x11: /* D */ DIP("trunc.l.d f%d, f%d", fd, fs); - calculateFCSR(fs, TRUNCLD, False); - putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x3), - getFReg(fs))); + if (fp_mode64) { + calculateFCSR(fs, 0, TRUNCLD, False, 1); + putDReg(fd, binop(Iop_RoundF64toInt, mkU32(0x3), + getDReg(fs))); + } else { + ILLEGAL_INSTRUCTON; + } break; default: goto decode_failure; @@ -11771,7 +12179,6 @@ switch (fmt) { case 0x10: /* S */ DIP("movn.s f%d, f%d, r%d", fd, fs, rt); - t1 = newTemp(Ity_F64); t2 = newTemp(Ity_F64); t3 = newTemp(Ity_I1); @@ -11781,13 +12188,19 @@ assign(t2, getFReg(fd)); assign(t3, binop(Iop_CmpNE64, mkU64(0), getIReg(rt))); } else { - assign(t1, unop(Iop_F32toF64, getFReg(fs))); - assign(t2, unop(Iop_F32toF64, getFReg(fd))); - assign(t3, binop(Iop_CmpNE32, mkU32(0), getIReg(rt))); + if (fp_mode64) { + assign(t1, getFReg(fs)); + assign(t2, getFReg(fd)); + assign(t3, binop(Iop_CmpNE32, mkU32(0), getIReg(rt))); + } else { + assign(t1, unop(Iop_F32toF64, getFReg(fs))); + assign(t2, unop(Iop_F32toF64, getFReg(fd))); + assign(t3, binop(Iop_CmpNE32, mkU32(0), getIReg(rt))); + } } assign(t4, IRExpr_ITE(mkexpr(t3), mkexpr(t1), mkexpr(t2))); - if (mode64) { + if (fp_mode64) { IRTemp f = newTemp(Ity_F64); IRTemp fd_hi = newTemp(Ity_I32); t5 = newTemp(Ity_I64); @@ -11795,7 +12208,7 @@ assign(fd_hi, unop(Iop_64HIto32, unop(Iop_ReinterpF64asI64, mkexpr(f)))); - assign(t5, mkWidenFrom32(ty, unop(Iop_64to32, + assign(t5, mkWidenFrom32(Ity_I64, unop(Iop_64to32, unop(Iop_ReinterpF64asI64, mkexpr(t4))), True)); putFReg(fd, unop (Iop_ReinterpI64asF64, mkexpr(t5))); @@ -11830,10 +12243,13 @@ t2 = newTemp(Ity_F64); t3 = newTemp(Ity_I1); t4 = newTemp(Ity_F64); - if (mode64) { + if (fp_mode64) { assign(t1, getFReg(fs)); assign(t2, getFReg(fd)); - assign(t3, binop(Iop_CmpEQ64, mkU64(0), getIReg(rt))); + if (mode64) + assign(t3, binop(Iop_CmpEQ64, mkU64(0), getIReg(rt))); + else + assign(t3, binop(Iop_CmpEQ32, mkU32(0), getIReg(rt))); } else { assign(t1, unop(Iop_F32toF64, getFReg(fs))); assign(t2, unop(Iop_F32toF64, getFReg(fd))); @@ -11841,7 +12257,7 @@ } assign(t4, IRExpr_ITE(mkexpr(t3), mkexpr(t1), mkexpr(t2))); - if (mode64) { + if (fp_mode64) { IRTemp f = newTemp(Ity_F64); IRTemp fd_hi = newTemp(Ity_I32); t7 = newTemp(Ity_I64); @@ -11848,7 +12264,7 @@ assign(f, getFReg(fd)); assign(fd_hi, unop(Iop_64HIto32, unop(Iop_ReinterpF64asI64, mkexpr(f)))); - assign(t7, mkWidenFrom32(ty, unop(Iop_64to32, + assign(t7, mkWidenFrom32(Ity_I64, unop(Iop_64to32, unop(Iop_ReinterpF64asI64, mkexpr(t4))), True)); putFReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t7))); @@ -11911,7 +12327,7 @@ t6 = newTemp(Ity_F64); t7 = newTemp(Ity_I64); - if (mode64) { + if (fp_mode64) { assign(t5, getFReg(fs)); assign(t6, getFReg(fd)); } else { @@ -11935,13 +12351,13 @@ assign(t4, IRExpr_ITE(mkexpr(t3), mkexpr(t5), mkexpr(t6))); - if (mode64) { + if (fp_mode64) { IRTemp f = newTemp(Ity_F64); IRTemp fd_hi = newTemp(Ity_I32); assign(f, getFReg(fd)); assign(fd_hi, unop(Iop_64HIto32, unop(Iop_ReinterpF64asI64, mkexpr(f)))); - assign(t7, mkWidenFrom32(ty, unop(Iop_64to32, + assign(t7, mkWidenFrom32(Ity_I64, unop(Iop_64to32, unop(Iop_ReinterpF64asI64, mkexpr(t4))), True)); @@ -11991,7 +12407,7 @@ t5 = newTemp(Ity_F64); t6 = newTemp(Ity_F64); - if (mode64) { + if (fp_mode64) { assign(t5, getFReg(fs)); assign(t6, getFReg(fd)); } else { @@ -12015,7 +12431,7 @@ assign(t4, IRExpr_ITE(mkexpr(t3), mkexpr(t5), mkexpr(t6))); - if (mode64) { + if (fp_mode64) { IRTemp f = newTemp(Ity_F64); IRTemp fd_hi = newTemp(Ity_I32); t7 = newTemp(Ity_I64); @@ -12022,7 +12438,7 @@ assign(f, getFReg(fd)); assign(fd_hi, unop(Iop_64HIto32, unop(Iop_ReinterpF64asI64, mkexpr(f)))); - assign(t7, mkWidenFrom32(ty, unop(Iop_64to32, + assign(t7, mkWidenFrom32(Ity_I64, unop(Iop_64to32, unop(Iop_ReinterpF64asI64, mkexpr(t4))), True)); @@ -12040,17 +12456,18 @@ case 0x0: /* add.fmt */ switch (fmt) { - case 0x10: /* S */ - { - DIP("add.s f%d, f%d, f%d", fd, fs, ft); - IRExpr *rm = get_IR_roundingmode(); - putFReg(fd, mkWidenFromF32(tyF, triop(Iop_AddF32, rm, - getLoFromF64(tyF, getFReg(fs)), - getLoFromF64(tyF, getFReg(ft))))); - break; - } + case 0x10: { /* S */ + DIP("add.s f%d, f%d, f%d", fd, fs, ft); + calculateFCSR(fs, ft, ADDS, True, 2); + IRExpr *rm = get_IR_roundingmode(); + putFReg(fd, mkWidenFromF32(tyF, triop(Iop_AddF32, rm, + getLoFromF64(tyF, getFReg(fs)), + getLoFromF64(tyF, getFReg(ft))))); + break; + } case 0x11: { /* D */ DIP("add.d f%d, f%d, f%d", fd, fs, ft); + calculateFCSR(fs, ft, ADDD, False, 2); IRExpr *rm = get_IR_roundingmode(); putDReg(fd, triop(Iop_AddF64, rm, getDReg(fs), getDReg(ft))); break; @@ -12058,10 +12475,10 @@ case 0x4: /* MTC1 (Move Word to Floating Point) */ DIP("mtc1 r%d, f%d", rt, fs); - if (mode64) { + if (fp_mode64) { t0 = newTemp(Ity_I32); t1 = newTemp(Ity_F32); - assign(t0, unop(Iop_64to32, getIReg(rt))); + assign(t0, mkNarrowTo32(ty, getIReg(rt))); assign(t1, unop(Iop_ReinterpI32asF32, mkexpr(t0))); putFReg(fs, mkWidenFromF32(tyF, mkexpr(t1))); @@ -12077,7 +12494,7 @@ case 0x0: /* MFC1 */ DIP("mfc1 r%d, f%d", rt, fs); - if (mode64) { + if (fp_mode64) { t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I32); assign(t0, unop(Iop_ReinterpF64asI64, getFReg(fs))); @@ -12200,8 +12617,8 @@ switch (fmt) { case 0x10: /* S */ DIP("cvt.d.s f%d, f%d", fd, fs); - calculateFCSR(fs, CVTDS, True); - if (mode64) { + calculateFCSR(fs, 0, CVTDS, True, 1); + if (fp_mode64) { t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I32); t3 = newTemp(Ity_F32); @@ -12220,8 +12637,8 @@ case 0x14: DIP("cvt.d.w %d, %d", fd, fs); - calculateFCSR(fs, CVTDW, True); - if (mode64) { + calculateFCSR(fs, 0, CVTDW, True, 1); + if (fp_mode64) { t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I32); t3 = newTemp(Ity_F32); @@ -12240,9 +12657,9 @@ } case 0x15: { /* L */ - if (mode64) { + if (fp_mode64) { DIP("cvt.d.l %d, %d", fd, fs); - calculateFCSR(fs, CVTDL, False); + calculateFCSR(fs, 0, CVTDL, False, 1); t0 = newTemp(Ity_I64); assign(t0, unop(Iop_ReinterpF64asI64, getFReg(fs))); @@ -12261,8 +12678,8 @@ switch (fmt) { case 0x14: /* W */ DIP("cvt.s.w %d, %d", fd, fs); - calculateFCSR(fs, CVTSW, True); - if (mode64) { + calculateFCSR(fs, 0, CVTSW, True, 1); + if (fp_mode64) { t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I32); t3 = newTemp(Ity_F32); @@ -12283,20 +12700,16 @@ case 0x11: /* D */ DIP("cvt.s.d %d, %d", fd, fs); - calculateFCSR(fs, CVTSD, False); - if (mode64) { - t0 = newTemp(Ity_F32); - assign(t0, binop(Iop_F64toF32, get_IR_roundingmode(), - getFReg(fs))); - putFReg(fd, mkWidenFromF32(tyF, mkexpr(t0))); - } else - putFReg(fd, binop(Iop_F64toF32, get_IR_roundingmode(), - getDReg(fs))); + calculateFCSR(fs, 0, CVTSD, False, 1); + t0 = newTemp(Ity_F32); + assign(t0, binop(Iop_F64toF32, get_IR_roundingmode(), + getDReg(fs))); + putFReg(fd, mkWidenFromF32(tyF, mkexpr(t0))); break; case 0x15: /* L */ DIP("cvt.s.l %d, %d", fd, fs); - calculateFCSR(fs, CVTSL, False); + calculateFCSR(fs, 0, CVTSL, False, 1); t0 = newTemp(Ity_I64); assign(t0, unop(Iop_ReinterpF64asI64, getFReg(fs))); @@ -12313,34 +12726,24 @@ switch (fmt) { case 0x10: /* S */ DIP("cvt.w.s %d, %d", fd, fs); - calculateFCSR(fs, CVTWS, True); - if (mode64) { - putFReg(fd, mkWidenFromF32(tyF, binop(Iop_RoundF32toInt, - get_IR_roundingmode(), getLoFromF64(tyF, - getFReg(fs))))); - } else - putFReg(fd, binop(Iop_RoundF32toInt, get_IR_roundingmode(), - getFReg(fs))); + calculateFCSR(fs, 0, CVTWS, True, 1); + putFReg(fd, + mkWidenFromF32(tyF, + binop(Iop_RoundF32toInt, + get_IR_roundingmode(), + getLoFromF64(tyF, getFReg(fs)))) + ); break; case 0x11: DIP("cvt.w.d %d, %d", fd, fs); - calculateFCSR(fs, CVTWD, False); - if (mode64) { - t0 = newTemp(Ity_I32); - t1 = newTemp(Ity_F32); - assign(t0, binop(Iop_F64toI32S, get_IR_roundingmode(), - getFReg(fs))); - assign(t1, unop(Iop_ReinterpI32asF32, mkexpr(t0))); - putFReg(fd, mkWidenFromF32(tyF, mkexpr(t1))); - } else { - t0 = newTemp(Ity_I32); - - assign(t0, binop(Iop_F64toI32S, get_IR_roundingmode(), - getDReg(fs))); - - putFReg(fd, unop(Iop_ReinterpI32asF32, mkexpr(t0))); - } + calculateFCSR(fs, 0, CVTWD, False, 1); + t0 = newTemp(Ity_I32); + t1 = newTemp(Ity_F32); + assign(t0, binop(Iop_F64toI32S, get_IR_roundingmode(), + getDReg(fs))); + assign(t1, unop(Iop_ReinterpI32asF32, mkexpr(t0))); + putFReg(fd, mkWidenFromF32(tyF, mkexpr(t1))); break; default: @@ -12353,20 +12756,28 @@ switch (fmt) { case 0x10: /* S */ DIP("cvt.l.s %d, %d", fd, fs); - calculateFCSR(fs, CVTLS, True); - t0 = newTemp(Ity_I64); + if (fp_mode64) { + calculateFCSR(fs, 0, CVTLS, True, 1); + t0 = newTemp(Ity_I64); - assign(t0, binop(Iop_F32toI64S, get_IR_roundingmode(), - getLoFromF64(Ity_F64, getFReg(fs)))); + assign(t0, binop(Iop_F32toI64S, get_IR_roundingmode(), + getLoFromF64(tyF, getFReg(fs)))); - putFReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t0))); + putDReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t0))); + } else { + ILLEGAL_INSTRUCTON; + } break; case 0x11: { /* D */ DIP("cvt.l.d %d, %d", fd, fs); - calculateFCSR(fs, CVTLD, False); - putFReg(fd, binop(Iop_RoundF64toInt, - get_IR_roundingmode(), getFReg(fs))); + if (fp_mode64) { + calculateFCSR(fs, 0, CVTLD, False, 1); + putDReg(fd, binop(Iop_RoundF64toInt, + get_IR_roundingmode(), getDReg(fs))); + } else { + ILLEGAL_INSTRUCTON; + } break; } @@ -12379,20 +12790,28 @@ switch (fmt) { case 0x10: /* S */ DIP("floor.l.s %d, %d", fd, fs); - calculateFCSR(fs, FLOORLS, True); - t0 = newTemp(Ity_I64); + if (fp_mode64) { + calculateFCSR(fs, 0, FLOORLS, True, 1); + t0 = newTemp(Ity_I64); - assign(t0, binop(Iop_F32toI64S, mkU32(0x1), - getLoFromF64(Ity_F64, getFReg(fs)))); + assign(t0, binop(Iop_F32toI64S, mkU32(0x1), + getLoFromF64(tyF, getFReg(fs)))); - putFReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t0))); + putDReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t0))); + } else { + ILLEGAL_INSTRUCTON; + } break; case 0x11: /* D */ DIP("floor.l.d %d, %d", fd, fs); - calculateFCSR(fs, FLOORLD, False); - putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x1), - getFReg(fs))); + if (fp_mode64) { + calculateFCSR(fs, 0, FLOORLD, False, 1); + putDReg(fd, binop(Iop_RoundF64toInt, mkU32(0x1), + getDReg(fs))); + } else { + ILLEGAL_INSTRUCTON; + } break; default: goto decode_failure; @@ -12403,8 +12822,8 @@ switch (fmt) { case 0x10: /* S */ DIP("round.w.s f%d, f%d", fd, fs); - calculateFCSR(fs, ROUNDWS, True); - if (mode64) { + calculateFCSR(fs, 0, ROUNDWS, True, 1); + if (fp_mode64) { t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I32); t3 = newTemp(Ity_F32); @@ -12427,8 +12846,8 @@ case 0x11: /* D */ DIP("round.w.d f%d, f%d", fd, fs); - calculateFCSR(fs, ROUNDWD, False); - if (mode64) { + calculateFCSR(fs, 0, ROUNDWD, False, 1); + if (fp_mode64) { t0 = newTemp(Ity_I32); assign(t0, binop(Iop_F64toI32S, mkU32(0x0), getDReg(fs))); @@ -12453,8 +12872,8 @@ switch (fmt) { case 0x10: /* S */ DIP("floor.w.s f%d, f%d", fd, fs); - calculateFCSR(fs, FLOORWS, True); - if (mode64) { + calculateFCSR(fs, 0, FLOORWS, True, 1); + if (fp_mode64) { t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I32); t3 = newTemp(Ity_F32); @@ -12477,8 +12896,8 @@ case 0x11: /* D */ DIP("floor.w.d f%d, f%d", fd, fs); - calculateFCSR(fs, FLOORWD, False); - if (mode64) { + calculateFCSR(fs, 0, FLOORWD, False, 1); + if (fp_mode64) { t0 = newTemp(Ity_I32); assign(t0, binop(Iop_F64toI32S, mkU32(0x1), getDReg(fs))); @@ -12504,8 +12923,8 @@ switch (fmt) { case 0x10: /* S */ DIP("trunc.w.s %d, %d", fd, fs); - calculateFCSR(fs, TRUNCWS, True); - if (mode64) { + calculateFCSR(fs, 0, TRUNCWS, True, 1); + if (fp_mode64) { t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I32); t3 = newTemp(Ity_F32); @@ -12527,8 +12946,8 @@ break; case 0x11: /* D */ DIP("trunc.w.d %d, %d", fd, fs); - calculateFCSR(fs, TRUNCWD, False); - if (mode64) { + calculateFCSR(fs, 0, TRUNCWD, False, 1); + if (fp_mode64) { t0 = newTemp(Ity_I32); assign(t0, binop(Iop_F64toI32S, mkU32(0x3), @@ -12555,8 +12974,8 @@ switch (fmt) { case 0x10: /* S */ DIP("ceil.w.s %d, %d", fd, fs); - calculateFCSR(fs, CEILWS, True); - if (mode64) { + calculateFCSR(fs, 0, CEILWS, True, 1); + if (fp_mode64) { t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I32); t3 = newTemp(Ity_F32); @@ -12579,8 +12998,8 @@ case 0x11: /* D */ DIP("ceil.w.d %d, %d", fd, fs); - calculateFCSR(fs, CEILWD, False); - if (!mode64) { + calculateFCSR(fs, 0, CEILWD, False, 1); + if (!fp_mode64) { t0 = newTemp(Ity_I32); assign(t0, binop(Iop_F64toI32S, mkU32(0x2), getDReg(fs))); @@ -12603,20 +13022,28 @@ switch (fmt) { case 0x10: /* S */ DIP("ceil.l.s %d, %d", fd, fs); - calculateFCSR(fs, CEILLS, True); - t0 = newTemp(Ity_I64); + if (fp_mode64) { + calculateFCSR(fs, 0, CEILLS, True, 1); + t0 = newTemp(Ity_I64); - assign(t0, binop(Iop_F32toI64S, mkU32(0x2), - getLoFromF64(Ity_F64, getFReg(fs)))); + assign(t0, binop(Iop_F32toI64S, mkU32(0x2), + getLoFromF64(tyF, getFReg(fs)))); - putFReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t0))); + putFReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t0))); + } else { + ILLEGAL_INSTRUCTON; + } break; case 0x11: /* D */ DIP("ceil.l.d %d, %d", fd, fs); - calculateFCSR(fs, CEILLD, False); - putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x2), - getFReg(fs))); + if (fp_mode64) { + calculateFCSR(fs, 0, CEILLD, False, 1); + putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x2), + getFReg(fs))); + } else { + ILLEGAL_INSTRUCTON; + } break; default: @@ -12692,17 +13119,24 @@ case 0x31: /* LWC1 */ /* Load Word to Floating Point - LWC1 (MIPS32) */ DIP("lwc1 f%d, %d(r%d)", ft, imm, rs); - if (mode64) { - t0 = newTemp(Ity_I64); + if (fp_mode64) { t1 = newTemp(Ity_F32); t2 = newTemp(Ity_I64); - /* new LO */ - assign(t0, binop(Iop_Add64, getIReg(rs), - mkU64(extend_s_16to64(imm)))); + if (mode64) { + t0 = newTemp(Ity_I64); + /* new LO */ + assign(t0, binop(Iop_Add64, getIReg(rs), + mkU64(extend_s_16to64(imm)))); + } else { + t0 = newTemp(Ity_I32); + /* new LO */ + assign(t0, binop(Iop_Add32, getIReg(rs), + mkU32(extend_s_16to32(imm)))); + } assign(t1, load(Ity_F32, mkexpr(t0))); - assign(t2, mkWidenFrom32(ty, unop(Iop_ReinterpF32asI32, - mkexpr(t1)), True)); - putFReg(ft, unop(Iop_ReinterpI64asF64, mkexpr(t2))); + assign(t2, mkWidenFrom32(Ity_I64, unop(Iop_ReinterpF32asI32, + mkexpr(t1)), True)); + putDReg(ft, unop(Iop_ReinterpI64asF64, mkexpr(t2))); } else { t0 = newTemp(Ity_I32); assign(t0, binop(Iop_Add32, getIReg(rs), @@ -12713,7 +13147,7 @@ case 0x39: /* SWC1 */ DIP("swc1 f%d, %d(r%d)", ft, imm, rs); - if (mode64) { + if (fp_mode64) { t0 = newTemp(Ity_I64); t2 = newTemp(Ity_I32); LOAD_STORE_PATTERN; @@ -12732,22 +13166,16 @@ case 0x35: /* Load Doubleword to Floating Point - LDC1 (MIPS32) */ + DIP("ldc1 f%d, %d(%d)", rt, imm, rs); LOAD_STORE_PATTERN; - if (mode64) - putFReg(ft, load(Ity_F64, mkexpr(t1))); - else - putDReg(ft, load(Ity_F64, mkexpr(t1))); - DIP("ldc1 f%d, %d(%d)", rt, imm, rs); + putDReg(ft, load(Ity_F64, mkexpr(t1))); break; case 0x3D: /* Store Doubleword from Floating Point - SDC1 */ + DIP("sdc1 f%d, %d(%d)", ft, imm, rs); LOAD_STORE_PATTERN; - if (mode64) - store(mkexpr(t1), getFReg(ft)); - else - store(mkexpr(t1), getDReg(ft)); - DIP("sdc1 f%d, %d(%d)", ft, imm, rs); + store(mkexpr(t1), getDReg(ft)); break; case 0x23: /* LW */ @@ -12806,19 +13234,20 @@ case 0x0: { /* LWXC1 */ /* Load Word Indexed to Floating Point - LWXC1 (MIPS32r2) */ DIP("lwxc1 f%d, r%d(r%d)", fd, rt, rs); - if (mode64) { + if (fp_mode64) { t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I32); - t2 = newTemp(Ity_I64); t3 = newTemp(Ity_F32); t4 = newTemp(Ity_I64); + t2 = newTemp(ty); /* new LO */ - assign(t2, binop(Iop_Add64, getIReg(rs), getIReg(rt))); + assign(t2, binop(mode64 ? Iop_Add64 : Iop_Add32, getIReg(rs), + getIReg(rt))); assign(t3, load(Ity_F32, mkexpr(t2))); - assign(t4, mkWidenFrom32(ty, unop(Iop_ReinterpF32asI32, - mkexpr(t3)), True)); + assign(t4, mkWidenFrom32(Ity_I64, unop(Iop_ReinterpF32asI32, + mkexpr(t3)), True)); putFReg(fd, unop(Iop_ReinterpI64asF64, mkexpr(t4))); } else { @@ -12832,10 +13261,11 @@ case 0x1: { /* LDXC1 */ /* Load Doubleword Indexed to Floating Point LDXC1 (MIPS32r2 and MIPS64) */ - if (mode64) { + if (fp_mode64) { DIP("ldxc1 f%d, r%d(r%d)", fd, rt, rs); - t0 = newTemp(Ity_I64); - assign(t0, binop(Iop_Add64, getIReg(rs), getIReg(rt))); + t0 = newTemp(ty); + assign(t0, binop(mode64 ? Iop_Add64 : Iop_Add32, getIReg(rs), + getIReg(rt))); putFReg(fd, load(Ity_F64, mkexpr(t0))); break; } else { @@ -12869,10 +13299,10 @@ case 0x8: { /* Store Word Indexed from Floating Point - SWXC1 */ DIP("swxc1 f%d, r%d(r%d)", ft, rt, rs); - if (mode64) { - t0 = newTemp(Ity_I64); - assign(t0, binop(Iop_Add64, getIReg(rs), getIReg(rt))); - + if (fp_mode64) { + t0 = newTemp(ty); + assign(t0, binop(mode64 ? Iop_Add64 : Iop_Add32, getIReg(rs), + getIReg(rt))); store(mkexpr(t0), getLoFromF64(tyF, getFReg(fs))); } else { @@ -12885,9 +13315,10 @@ } case 0x9: { /* Store Doubleword Indexed from Floating Point - SDXC1 */ DIP("sdc1 f%d, %d(%d)", ft, imm, rs); - if (mode64) { - t0 = newTemp(Ity_I64); - assign(t0, binop(Iop_Add64, getIReg(rs), getIReg(rt))); + if (fp_mode64) { + t0 = newTemp(ty); + assign(t0, binop(mode64 ? Iop_Add64 : Iop_Add32, getIReg(rs), + getIReg(rt))); store(mkexpr(t0), getFReg(fs)); } else { t0 = newTemp(Ity_I32); @@ -13692,6 +14123,7 @@ /* Cavium Specific instructions */ case 0x03: case 0x32: case 0x33: /* DMUL, CINS , CINS32 */ case 0x3A: case 0x3B: case 0x2B: /* EXT, EXT32, SNE */ + /* CVM Compare Instructions */ case 0x2A: case 0x2E: case 0x2F: /* SEQ, SEQI, SNEI */ if (VEX_MIPS_COMP_ID(archinfo->hwcaps) == VEX_PRID_COMP_CAVIUM) { if (dis_instr_CVM(cins)) @@ -14528,8 +14960,13 @@ } break; /* BSHFL */ - /* -------- MIPS32(r2) DSP ASE(r2) instructions -------- */ + /* --- MIPS32(r2) DSP ASE(r2) / Cavium Specfic (LX) instructions --- */ case 0xA: /* LX */ + if (VEX_MIPS_COMP_ID(archinfo->hwcaps) == VEX_PRID_COMP_CAVIUM) { + if (dis_instr_CVM(cins)) + break; + goto decode_failure; + } case 0xC: /* INSV */ case 0x38: { /* EXTR.W */ if (VEX_MIPS_PROC_DSP(archinfo->hwcaps)) { @@ -16318,8 +16755,8 @@ decode_failure_dsp: vex_printf("Error occured while trying to decode MIPS32 DSP " - "instruction.\nYour platform probably doesn't support " - "MIPS32 DSP ASE.\n"); + "instruction.\nYour platform probably doesn't support " + "MIPS32 DSP ASE.\n"); decode_failure: /* All decode failures end up here. */ if (sigill_diag) @@ -16422,7 +16859,6 @@ /* Disassemble a single instruction into IR. The instruction is located in host memory at &guest_code[delta]. */ - DisResult disInstr_MIPS( IRSB* irsb_IN, Bool (*resteerOkFn) ( void *, Addr64 ), Bool resteerCisOk, @@ -16441,6 +16877,10 @@ vassert(guest_arch == VexArchMIPS32 || guest_arch == VexArchMIPS64); mode64 = guest_arch != VexArchMIPS32; +#if (__mips_fpr==64) + fp_mode64 = ((VEX_MIPS_REV(archinfo->hwcaps) == VEX_PRID_CPU_32FPR) + || guest_arch == VexArchMIPS64); +#endif guest_code = guest_code_IN; irsb = irsb_IN; Index: priv/guest_ppc_helpers.c =================================================================== --- priv/guest_ppc_helpers.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_ppc_helpers.c (.../trunk) (revision 2863) @@ -498,8 +498,8 @@ vex_state->guest_EMNOTE = EmNote_NONE; - vex_state->guest_TISTART = 0; - vex_state->guest_TILEN = 0; + vex_state->guest_CMSTART = 0; + vex_state->guest_CMLEN = 0; vex_state->guest_NRADDR = 0; vex_state->guest_NRADDR_GPR2 = 0; @@ -665,8 +665,8 @@ vex_state->padding = 0; - vex_state->guest_TISTART = 0; - vex_state->guest_TILEN = 0; + vex_state->guest_CMSTART = 0; + vex_state->guest_CMLEN = 0; vex_state->guest_NRADDR = 0; vex_state->guest_NRADDR_GPR2 = 0; @@ -808,8 +808,8 @@ .alwaysDefd = { /* 0 */ ALWAYSDEFD32(guest_CIA), /* 1 */ ALWAYSDEFD32(guest_EMNOTE), - /* 2 */ ALWAYSDEFD32(guest_TISTART), - /* 3 */ ALWAYSDEFD32(guest_TILEN), + /* 2 */ ALWAYSDEFD32(guest_CMSTART), + /* 3 */ ALWAYSDEFD32(guest_CMLEN), /* 4 */ ALWAYSDEFD32(guest_VSCR), /* 5 */ ALWAYSDEFD32(guest_FPROUND), /* 6 */ ALWAYSDEFD32(guest_NRADDR), @@ -849,8 +849,8 @@ .alwaysDefd = { /* 0 */ ALWAYSDEFD64(guest_CIA), /* 1 */ ALWAYSDEFD64(guest_EMNOTE), - /* 2 */ ALWAYSDEFD64(guest_TISTART), - /* 3 */ ALWAYSDEFD64(guest_TILEN), + /* 2 */ ALWAYSDEFD64(guest_CMSTART), + /* 3 */ ALWAYSDEFD64(guest_CMLEN), /* 4 */ ALWAYSDEFD64(guest_VSCR), /* 5 */ ALWAYSDEFD64(guest_FPROUND), /* 6 */ ALWAYSDEFD64(guest_NRADDR), Index: priv/guest_ppc_toIR.c =================================================================== --- priv/guest_ppc_toIR.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_ppc_toIR.c (.../trunk) (revision 2863) @@ -69,6 +69,12 @@ unconditional calls and returns (bl, blr). They should also be emitted for conditional calls and returns, but we don't have a way to express that right now. Ah well. + + - Uses of Iop_{Add,Sub,Mul}32Fx4: the backend (host_ppc_isel.c) + ignores the rounding mode, and generates code that assumes + round-to-nearest. This means V will compute incorrect results + for uses of these IROps when the rounding mode (first) arg is + not mkU32(Irrm_NEAREST). */ /* "Special" instructions. @@ -78,9 +84,9 @@ concerned) but have meaning for supporting Valgrind. A special instruction is flagged by a 16-byte preamble: - 32-bit mode: 54001800 54006800 5400E800 54009800 - (rlwinm 0,0,3,0,0; rlwinm 0,0,13,0,0; - rlwinm 0,0,29,0,0; rlwinm 0,0,19,0,0) + 32-bit mode: 5400183E 5400683E 5400E83E 5400983E + (rlwinm 0,0,3,0,31; rlwinm 0,0,13,0,31; + rlwinm 0,0,29,0,31; rlwinm 0,0,19,0,31) 64-bit mode: 78001800 78006800 7800E802 78009802 (rotldi 0,0,3; rotldi 0,0,13; @@ -228,8 +234,8 @@ #define OFFB_VRSAVE offsetofPPCGuestState(guest_VRSAVE) #define OFFB_VSCR offsetofPPCGuestState(guest_VSCR) #define OFFB_EMNOTE offsetofPPCGuestState(guest_EMNOTE) -#define OFFB_TISTART offsetofPPCGuestState(guest_TISTART) -#define OFFB_TILEN offsetofPPCGuestState(guest_TILEN) +#define OFFB_CMSTART offsetofPPCGuestState(guest_CMSTART) +#define OFFB_CMLEN offsetofPPCGuestState(guest_CMLEN) #define OFFB_NRADDR offsetofPPCGuestState(guest_NRADDR) #define OFFB_NRADDR_GPR2 offsetofPPCGuestState(guest_NRADDR_GPR2) #define OFFB_TFHAR offsetofPPCGuestState(guest_TFHAR) @@ -377,8 +383,8 @@ PPC_GST_VRSAVE, // Vector Save/Restore Register PPC_GST_VSCR, // Vector Status and Control Register PPC_GST_EMWARN, // Emulation warnings - PPC_GST_TISTART,// For icbi: start of area to invalidate - PPC_GST_TILEN, // For icbi: length of area to invalidate + PPC_GST_CMSTART,// For icbi: start of area to invalidate + PPC_GST_CMLEN, // For icbi: length of area to invalidate PPC_GST_IP_AT_SYSCALL, // the CIA of the most recently executed SC insn PPC_GST_SPRG3_RO, // SPRG3 PPC_GST_TFHAR, // Transactional Failure Handler Address Register @@ -2781,14 +2787,14 @@ stmt( IRStmt_Put( OFFB_EMNOTE,src) ); break; - case PPC_GST_TISTART: + case PPC_GST_CMSTART: vassert( ty_src == ty ); - stmt( IRStmt_Put( OFFB_TISTART, src) ); + stmt( IRStmt_Put( OFFB_CMSTART, src) ); break; - case PPC_GST_TILEN: + case PPC_GST_CMLEN: vassert( ty_src == ty ); - stmt( IRStmt_Put( OFFB_TILEN, src) ); + stmt( IRStmt_Put( OFFB_CMLEN, src) ); break; case PPC_GST_TEXASR: @@ -5233,6 +5239,7 @@ Int simm16 = extend_s_16to32(uimm16); IRType ty = mode64 ? Ity_I64 : Ity_I32; + IROp mkAdd = mode64 ? Iop_Add64 : Iop_Add32; IRTemp EA = newTemp(ty); UInt r = 0; UInt ea_off = 0; @@ -5248,7 +5255,7 @@ } DIP("lmw r%u,%d(r%u)\n", rD_addr, simm16, rA_addr); for (r = rD_addr; r <= 31; r++) { - irx_addr = binop(Iop_Add32, mkexpr(EA), mkU32(ea_off)); + irx_addr = binop(mkAdd, mkexpr(EA), mode64 ? mkU64(ea_off) : mkU32(ea_off)); putIReg( r, mkWidenFrom32(ty, loadBE(Ity_I32, irx_addr ), False) ); ea_off += 4; @@ -5258,7 +5265,7 @@ case 0x2F: // stmw (Store Multiple Word, PPC32 p527) DIP("stmw r%u,%d(r%u)\n", rS_addr, simm16, rA_addr); for (r = rS_addr; r <= 31; r++) { - irx_addr = binop(Iop_Add32, mkexpr(EA), mkU32(ea_off)); + irx_addr = binop(mkAdd, mkexpr(EA), mode64 ? mkU64(ea_off) : mkU32(ea_off)); storeBE( irx_addr, mkNarrowTo32(ty, getIReg(r)) ); ea_off += 4; } @@ -7256,14 +7263,14 @@ assign( addr, binop( mkSzOp(ty, Iop_And8), mkexpr(EA), mkSzImm(ty, ~(((ULong)lineszB)-1) )) ); - putGST( PPC_GST_TISTART, mkexpr(addr) ); - putGST( PPC_GST_TILEN, mkSzImm(ty, lineszB) ); + putGST( PPC_GST_CMSTART, mkexpr(addr) ); + putGST( PPC_GST_CMLEN, mkSzImm(ty, lineszB) ); /* be paranoid ... */ stmt( IRStmt_MBE(Imbe_Fence) ); putGST( PPC_GST_CIA, mkSzImm(ty, nextInsnAddr())); - dres->jk_StopHere = Ijk_TInval; + dres->jk_StopHere = Ijk_InvalICache; dres->whatNext = Dis_StopHere; break; } @@ -12980,17 +12987,23 @@ switch (opc2) { case 0x100: // xvaddsp (VSX Vector Add Single-Precision) DIP("xvaddsp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); - putVSReg( XT, binop(Iop_Add32Fx4, getVSReg( XA ), getVSReg( XB )) ); + // WARNING: BOGUS! The backend ignores rm on Iop_Add32Fx4 + putVSReg( XT, triop(Iop_Add32Fx4, rm, + getVSReg( XA ), getVSReg( XB )) ); break; case 0x140: // xvmulsp (VSX Vector Multiply Single-Precision) DIP("xvmulsp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); - putVSReg( XT, binop(Iop_Mul32Fx4, getVSReg( XA ), getVSReg( XB )) ); + // WARNING: BOGUS! The backend ignores rm on Iop_Mul32Fx4 + putVSReg( XT, triop(Iop_Mul32Fx4, rm, + getVSReg( XA ), getVSReg( XB )) ); break; case 0x120: // xvsubsp (VSX Vector Subtract Single-Precision) DIP("xvsubsp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); - putVSReg( XT, binop(Iop_Sub32Fx4, getVSReg( XA ), getVSReg( XB )) ); + // WARNING: BOGUS! The backend ignores rm on Iop_Sub32Fx4 + putVSReg( XT, triop(Iop_Sub32Fx4, rm, + getVSReg( XA ), getVSReg( XB )) ); break; case 0x160: // xvdivsp (VSX Vector Divide Single-Precision) @@ -17774,6 +17787,9 @@ return False; } + IRTemp rm = newTemp(Ity_I32); + assign(rm, get_IR_roundingmode()); + opc2 = IFIELD( theInstr, 0, 6 ); switch (opc2) { case 0x2E: // vmaddfp (Multiply Add FP, AV p177) @@ -17780,8 +17796,10 @@ DIP("vmaddfp v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vC_addr, vB_addr); putVReg( vD_addr, - binop(Iop_Add32Fx4, mkexpr(vB), - binop(Iop_Mul32Fx4, mkexpr(vA), mkexpr(vC))) ); + triop(Iop_Add32Fx4, mkU32(Irrm_NEAREST), + mkexpr(vB), + triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST), + mkexpr(vA), mkexpr(vC))) ); return True; case 0x2F: { // vnmsubfp (Negative Multiply-Subtract FP, AV p215) @@ -17788,9 +17806,10 @@ DIP("vnmsubfp v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vC_addr, vB_addr); putVReg( vD_addr, - binop(Iop_Sub32Fx4, + triop(Iop_Sub32Fx4, mkU32(Irrm_NEAREST), mkexpr(vB), - binop(Iop_Mul32Fx4, mkexpr(vA), mkexpr(vC))) ); + triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST), + mkexpr(vA), mkexpr(vC))) ); return True; } @@ -17802,12 +17821,14 @@ switch (opc2) { case 0x00A: // vaddfp (Add FP, AV p137) DIP("vaddfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - putVReg( vD_addr, binop(Iop_Add32Fx4, mkexpr(vA), mkexpr(vB)) ); + putVReg( vD_addr, triop(Iop_Add32Fx4, + mkU32(Irrm_NEAREST), mkexpr(vA), mkexpr(vB)) ); return True; case 0x04A: // vsubfp (Subtract FP, AV p261) DIP("vsubfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - putVReg( vD_addr, binop(Iop_Sub32Fx4, mkexpr(vA), mkexpr(vB)) ); + putVReg( vD_addr, triop(Iop_Sub32Fx4, + mkU32(Irrm_NEAREST), mkexpr(vA), mkexpr(vB)) ); return True; case 0x40A: // vmaxfp (Maximum FP, AV p178) @@ -17924,8 +17945,9 @@ binop(Iop_CmpLE32Fx4, mkexpr(vA), mkexpr(vB))) ); assign( lt, unop(Iop_NotV128, binop(Iop_CmpGE32Fx4, mkexpr(vA), - binop(Iop_Sub32Fx4, mkexpr(zeros), - mkexpr(vB)))) ); + triop(Iop_Sub32Fx4, mkU32(Irrm_NEAREST), + mkexpr(zeros), + mkexpr(vB)))) ); // finally, just shift gt,lt to correct position assign( vD, binop(Iop_ShlN32x4, @@ -17986,7 +18008,7 @@ switch (opc2) { case 0x30A: // vcfux (Convert from Unsigned Fixed-Point W, AV p156) DIP("vcfux v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5); - putVReg( vD_addr, binop(Iop_Mul32Fx4, + putVReg( vD_addr, triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST), unop(Iop_I32UtoFx4, mkexpr(vB)), mkexpr(vInvScale)) ); return True; @@ -17994,7 +18016,7 @@ case 0x34A: // vcfsx (Convert from Signed Fixed-Point W, AV p155) DIP("vcfsx v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5); - putVReg( vD_addr, binop(Iop_Mul32Fx4, + putVReg( vD_addr, triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST), unop(Iop_I32StoFx4, mkexpr(vB)), mkexpr(vInvScale)) ); return True; @@ -18003,7 +18025,8 @@ DIP("vctuxs v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5); putVReg( vD_addr, unop(Iop_QFtoI32Ux4_RZ, - binop(Iop_Mul32Fx4, mkexpr(vB), mkexpr(vScale))) ); + triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST), + mkexpr(vB), mkexpr(vScale))) ); return True; case 0x3CA: // vctsxs (Convert to Signed Fixed-Point W Saturate, AV p171) @@ -18010,7 +18033,8 @@ DIP("vctsxs v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5); putVReg( vD_addr, unop(Iop_QFtoI32Sx4_RZ, - binop(Iop_Mul32Fx4, mkexpr(vB), mkexpr(vScale))) ); + triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST), + mkexpr(vB), mkexpr(vScale))) ); return True; default: @@ -18522,10 +18546,10 @@ UChar* code = (UChar*)(guest_code + delta); /* Spot the 16-byte preamble: 32-bit mode: - 54001800 rlwinm 0,0,3,0,0 - 54006800 rlwinm 0,0,13,0,0 - 5400E800 rlwinm 0,0,29,0,0 - 54009800 rlwinm 0,0,19,0,0 + 5400183E rlwinm 0,0,3,0,31 + 5400683E rlwinm 0,0,13,0,31 + 5400E83E rlwinm 0,0,29,0,31 + 5400983E rlwinm 0,0,19,0,31 64-bit mode: 78001800 rotldi 0,0,3 78006800 rotldi 0,0,13 @@ -18532,10 +18556,10 @@ 7800E802 rotldi 0,0,61 78009802 rotldi 0,0,51 */ - UInt word1 = mode64 ? 0x78001800 : 0x54001800; - UInt word2 = mode64 ? 0x78006800 : 0x54006800; - UInt word3 = mode64 ? 0x7800E802 : 0x5400E800; - UInt word4 = mode64 ? 0x78009802 : 0x54009800; + UInt word1 = mode64 ? 0x78001800 : 0x5400183E; + UInt word2 = mode64 ? 0x78006800 : 0x5400683E; + UInt word3 = mode64 ? 0x7800E802 : 0x5400E83E; + UInt word4 = mode64 ? 0x78009802 : 0x5400983E; if (getUIntBigendianly(code+ 0) == word1 && getUIntBigendianly(code+ 4) == word2 && getUIntBigendianly(code+ 8) == word3 && @@ -18593,12 +18617,12 @@ // be redone. For ease of handling, we simply invalidate all the // time. - stmt(IRStmt_Put(OFFB_TISTART, mkSzImm(ty, guest_CIA_curr_instr))); - stmt(IRStmt_Put(OFFB_TILEN, mkSzImm(ty, 20))); + stmt(IRStmt_Put(OFFB_CMSTART, mkSzImm(ty, guest_CIA_curr_instr))); + stmt(IRStmt_Put(OFFB_CMLEN, mkSzImm(ty, 20))); putGST( PPC_GST_CIA, mkSzImm( ty, guest_CIA_bbstart + delta )); dres.whatNext = Dis_StopHere; - dres.jk_StopHere = Ijk_TInval; + dres.jk_StopHere = Ijk_InvalICache; goto decode_success; } /* We don't know what it is. Set opc1/opc2 so decode_failure Index: priv/guest_s390_helpers.c =================================================================== --- priv/guest_s390_helpers.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_s390_helpers.c (.../trunk) (revision 2863) @@ -128,8 +128,8 @@ /*------------------------------------------------------------*/ state->guest_NRADDR = 0; - state->guest_TISTART = 0; - state->guest_TILEN = 0; + state->guest_CMSTART = 0; + state->guest_CMLEN = 0; state->guest_IP_AT_SYSCALL = 0; state->guest_EMNOTE = EmNote_NONE; state->host_EvC_COUNTER = 0; @@ -225,8 +225,8 @@ /* 0 */ ALWAYSDEFD(guest_CC_OP), /* generic */ /* 1 */ ALWAYSDEFD(guest_CC_NDEP), /* generic */ /* 2 */ ALWAYSDEFD(guest_EMNOTE), /* generic */ - /* 3 */ ALWAYSDEFD(guest_TISTART), /* generic */ - /* 4 */ ALWAYSDEFD(guest_TILEN), /* generic */ + /* 3 */ ALWAYSDEFD(guest_CMSTART), /* generic */ + /* 4 */ ALWAYSDEFD(guest_CMLEN), /* generic */ /* 5 */ ALWAYSDEFD(guest_IP_AT_SYSCALL), /* generic */ /* 6 */ ALWAYSDEFD(guest_IA), /* control reg */ /* 7 */ ALWAYSDEFD(guest_fpc), /* control reg */ Index: priv/guest_s390_toIR.c =================================================================== --- priv/guest_s390_toIR.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_s390_toIR.c (.../trunk) (revision 2863) @@ -417,7 +417,8 @@ { vassert(typeOfIRExpr(irsb->tyenv, condition) == Ity_I1); - stmt(IRStmt_Exit(condition, Ijk_TInval, IRConst_U64(guest_IA_curr_instr), + stmt(IRStmt_Exit(condition, Ijk_InvalICache, + IRConst_U64(guest_IA_curr_instr), S390X_GUEST_OFFSET(guest_IA))); } @@ -7606,7 +7607,7 @@ put_gpr_dw0(r1, binop(Iop_And64, mkexpr(op2), mkU64(mask))); } assign(result, get_gpr_dw0(r1)); - s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, op2); + s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, result); return "risbg"; } @@ -10862,9 +10863,9 @@ stmt(IRStmt_Dirty(d)); /* and restart */ - stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART), + stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_CMSTART), mkU64(guest_IA_curr_instr))); - stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4))); + stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_CMLEN), mkU64(4))); restart_if(mkexpr(cond)); ss.bytes = last_execute_target; @@ -10893,15 +10894,15 @@ mkIRExprVec_1(load(Ity_I64, mkexpr(addr2)))); stmt(IRStmt_Dirty(d)); /* and restart */ - stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART), + stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_CMSTART), mkU64(guest_IA_curr_instr))); - stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4))); + stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_CMLEN), mkU64(4))); restart_if(IRExpr_Const(IRConst_U1(True))); /* we know that this will be invalidated */ put_IA(mkaddr_expr(guest_IA_next_instr)); dis_res->whatNext = Dis_StopHere; - dis_res->jk_StopHere = Ijk_TInval; + dis_res->jk_StopHere = Ijk_InvalICache; break; } @@ -10967,8 +10968,8 @@ stmt(IRStmt_Dirty(d)); /* and restart */ - stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART), mkU64(guest_IA_curr_instr))); - stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4))); + stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_CMSTART), mkU64(guest_IA_curr_instr))); + stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_CMLEN), mkU64(4))); restart_if(mkexpr(cond)); /* Now comes the actual translation */ @@ -16362,9 +16363,9 @@ injecting here can change. In which case the translation has to be redone. For ease of handling, we simply invalidate all the time. */ - stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART), + stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_CMSTART), mkU64(guest_IA_curr_instr))); - stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), + stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_CMLEN), mkU64(guest_IA_next_instr - guest_IA_curr_instr))); vassert(guest_IA_next_instr - guest_IA_curr_instr == S390_SPECIAL_OP_PREAMBLE_SIZE + S390_SPECIAL_OP_SIZE); @@ -16371,7 +16372,7 @@ put_IA(mkaddr_expr(guest_IA_next_instr)); dis_res->whatNext = Dis_StopHere; - dis_res->jk_StopHere = Ijk_TInval; + dis_res->jk_StopHere = Ijk_InvalICache; } else { /* We don't know what it is. */ return S390_DECODE_UNKNOWN_SPECIAL_INSN; Index: priv/guest_x86_helpers.c =================================================================== --- priv/guest_x86_helpers.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_x86_helpers.c (.../trunk) (revision 2863) @@ -2757,18 +2757,14 @@ vex_state->guest_EMNOTE = EmNote_NONE; /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */ - vex_state->guest_TISTART = 0; - vex_state->guest_TILEN = 0; + vex_state->guest_CMSTART = 0; + vex_state->guest_CMLEN = 0; vex_state->guest_NRADDR = 0; vex_state->guest_SC_CLASS = 0; vex_state->guest_IP_AT_SYSCALL = 0; - Int i; - for (i = 0; i < sizeof(vex_state->padding) - / sizeof(vex_state->padding[0]); i++) { - vex_state->padding[i] = 0; - } + vex_state->padding1 = 0; } @@ -2866,8 +2862,8 @@ /* 17 */ ALWAYSDEFD(guest_GDT), /* 18 */ ALWAYSDEFD(guest_EMNOTE), /* 19 */ ALWAYSDEFD(guest_SSEROUND), - /* 20 */ ALWAYSDEFD(guest_TISTART), - /* 21 */ ALWAYSDEFD(guest_TILEN), + /* 20 */ ALWAYSDEFD(guest_CMSTART), + /* 21 */ ALWAYSDEFD(guest_CMLEN), /* 22 */ ALWAYSDEFD(guest_SC_CLASS), /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) } Index: priv/guest_x86_toIR.c =================================================================== --- priv/guest_x86_toIR.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/guest_x86_toIR.c (.../trunk) (revision 2863) @@ -54,10 +54,6 @@ for float-to-float rounding. For all other operations, round-to-nearest is used, regardless. - * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the - simulation claims the argument is in-range (-2^63 <= arg <= 2^63) - even when it isn't. - * some of the FCOM cases could do with testing -- not convinced that the args are the right way round. @@ -279,8 +275,8 @@ #define OFFB_EMNOTE offsetof(VexGuestX86State,guest_EMNOTE) -#define OFFB_TISTART offsetof(VexGuestX86State,guest_TISTART) -#define OFFB_TILEN offsetof(VexGuestX86State,guest_TILEN) +#define OFFB_CMSTART offsetof(VexGuestX86State,guest_CMSTART) +#define OFFB_CMLEN offsetof(VexGuestX86State,guest_CMLEN) #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR) #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL) @@ -3603,6 +3599,42 @@ } +/* Given i, and some expression e, and a condition cond, generate IR + which has the same effect as put_ST(i,e) when cond is true and has + no effect when cond is false. Given the lack of proper + if-then-else in the IR, this is pretty tricky. +*/ + +static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value ) +{ + // new_tag = if cond then FULL else old_tag + // new_val = if cond then (if old_tag==FULL then NaN else val) + // else old_val + + IRTemp old_tag = newTemp(Ity_I8); + assign(old_tag, get_ST_TAG(i)); + IRTemp new_tag = newTemp(Ity_I8); + assign(new_tag, + IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag))); + + IRTemp old_val = newTemp(Ity_F64); + assign(old_val, get_ST_UNCHECKED(i)); + IRTemp new_val = newTemp(Ity_F64); + assign(new_val, + IRExpr_ITE(mkexpr(cond), + IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)), + /* non-0 means full */ + mkQNaN64(), + /* 0 means empty */ + value), + mkexpr(old_val))); + + put_ST_UNCHECKED(i, mkexpr(new_val)); + // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So + // now set it to new_tag instead. + put_ST_TAG(i, mkexpr(new_tag)); +} + /* Adjust FTOP downwards by one register. */ static void fp_push ( void ) @@ -3610,6 +3642,14 @@ put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); } +/* Adjust FTOP downwards by one register when COND is 1:I1. Else + don't change it. */ + +static void maybe_fp_push ( IRTemp cond ) +{ + put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) ); +} + /* Adjust FTOP upwards by one register, and mark the vacated register as empty. */ @@ -3619,12 +3659,49 @@ put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); } -/* Clear the C2 bit of the FPU status register, for - sin/cos/tan/sincos. */ +/* Set the C2 bit of the FPU status register to e[0]. Assumes that + e[31:1] == 0. +*/ +static void set_C2 ( IRExpr* e ) +{ + IRExpr* cleared = binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)); + put_C3210( binop(Iop_Or32, + cleared, + binop(Iop_Shl32, e, mkU8(X86G_FC_SHIFT_C2))) ); +} -static void clear_C2 ( void ) +/* Generate code to check that abs(d64) < 2^63 and is finite. This is + used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The + test is simple, but the derivation of it is not so simple. + + The exponent field for an IEEE754 double is 11 bits. That means it + can take values 0 through 0x7FF. If the exponent has value 0x7FF, + the number is either a NaN or an Infinity and so is not finite. + Furthermore, a finite value of exactly 2^63 is the smallest value + that has exponent value 0x43E. Hence, what we need to do is + extract the exponent, ignoring the sign bit and mantissa, and check + it is < 0x43E, or <= 0x43D. + + To make this easily applicable to 32- and 64-bit targets, a + roundabout approach is used. First the number is converted to I64, + then the top 32 bits are taken. Shifting them right by 20 bits + places the sign bit and exponent in the bottom 12 bits. Anding + with 0x7FF gets rid of the sign bit, leaving just the exponent + available for comparison. +*/ +static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 ) { - put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)) ); + IRTemp i64 = newTemp(Ity_I64); + assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) ); + IRTemp exponent = newTemp(Ity_I32); + assign(exponent, + binop(Iop_And32, + binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)), + mkU32(0x7FF))); + IRTemp in_range_and_finite = newTemp(Ity_I1); + assign(in_range_and_finite, + binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D))); + return in_range_and_finite; } /* Invent a plausible-looking FPU status word value: @@ -4245,16 +4322,31 @@ fp_pop(); break; - case 0xF2: /* FPTAN */ - DIP("ftan\n"); - put_ST_UNCHECKED(0, - binop(Iop_TanF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - fp_push(); - put_ST(0, IRExpr_Const(IRConst_F64(1.0))); - clear_C2(); /* HACK */ + case 0xF2: { /* FPTAN */ + DIP("fptan\n"); + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(Iop_TanF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + /* Conditionally push 1.0 on the stack, if the arg is + in range */ + maybe_fp_push(argOK); + maybe_put_ST(argOK, 0, + IRExpr_Const(IRConst_F64(1.0))); + set_C2( binop(Iop_Xor32, + unop(Iop_1Uto32, mkexpr(argOK)), + mkU32(1)) ); break; + } case 0xF3: /* FPATAN */ DIP("fpatan\n"); @@ -4368,19 +4460,30 @@ break; case 0xFB: { /* FSINCOS */ - IRTemp a1 = newTemp(Ity_F64); - assign( a1, get_ST(0) ); DIP("fsincos\n"); - put_ST_UNCHECKED(0, - binop(Iop_SinF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - mkexpr(a1))); - fp_push(); - put_ST(0, + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(Iop_SinF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + /* Conditionally push the cos value on the stack, if + the arg is in range */ + maybe_fp_push(argOK); + maybe_put_ST(argOK, 0, binop(Iop_CosF64, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - mkexpr(a1))); - clear_C2(); /* HACK */ + mkexpr(argD))); + set_C2( binop(Iop_Xor32, + unop(Iop_1Uto32, mkexpr(argOK)), + mkU32(1)) ); break; } @@ -4399,24 +4502,29 @@ get_ST(1))); break; - case 0xFE: /* FSIN */ - DIP("fsin\n"); - put_ST_UNCHECKED(0, - binop(Iop_SinF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - clear_C2(); /* HACK */ + case 0xFE: /* FSIN */ + case 0xFF: { /* FCOS */ + Bool isSIN = modrm == 0xFE; + DIP("%s\n", isSIN ? "fsin" : "fcos"); + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(isSIN ? Iop_SinF64 : Iop_CosF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + set_C2( binop(Iop_Xor32, + unop(Iop_1Uto32, mkexpr(argOK)), + mkU32(1)) ); break; + } - case 0xFF: /* FCOS */ - DIP("fcos\n"); - put_ST_UNCHECKED(0, - binop(Iop_CosF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - clear_C2(); /* HACK */ - break; - default: goto decode_fail; } @@ -6856,6 +6964,27 @@ /*--- SSE/SSE2/SSE3 helpers ---*/ /*------------------------------------------------------------*/ +/* Indicates whether the op requires a rounding-mode argument. Note + that this covers only vector floating point arithmetic ops, and + omits the scalar ones that need rounding modes. Note also that + inconsistencies here will get picked up later by the IR sanity + checker, so this isn't correctness-critical. */ +static Bool requiresRMode ( IROp op ) +{ + switch (op) { + /* 128 bit ops */ + case Iop_Add32Fx4: case Iop_Sub32Fx4: + case Iop_Mul32Fx4: case Iop_Div32Fx4: + case Iop_Add64Fx2: case Iop_Sub64Fx2: + case Iop_Mul64Fx2: case Iop_Div64Fx2: + return True; + default: + break; + } + return False; +} + + /* Worker function; do not call directly. Handles full width G = G `op` E and G = (not G) `op` E. */ @@ -6874,9 +7003,15 @@ = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm))) : getXMMReg(gregOfRM(rm)); if (epartIsReg(rm)) { - putXMMReg( gregOfRM(rm), - binop(op, gpart, - getXMMReg(eregOfRM(rm))) ); + putXMMReg( + gregOfRM(rm), + requiresRMode(op) + ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + gpart, + getXMMReg(eregOfRM(rm))) + : binop(op, gpart, + getXMMReg(eregOfRM(rm))) + ); DIP("%s %s,%s\n", opname, nameXMMReg(eregOfRM(rm)), nameXMMReg(gregOfRM(rm)) ); @@ -6883,9 +7018,15 @@ return delta+1; } else { addr = disAMode ( &alen, sorb, delta, dis_buf ); - putXMMReg( gregOfRM(rm), - binop(op, gpart, - loadLE(Ity_V128, mkexpr(addr))) ); + putXMMReg( + gregOfRM(rm), + requiresRMode(op) + ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + gpart, + loadLE(Ity_V128, mkexpr(addr))) + : binop(op, gpart, + loadLE(Ity_V128, mkexpr(addr))) + ); DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(gregOfRM(rm)) ); @@ -8026,14 +8167,14 @@ // injecting here can change. In which case the translation has to // be redone. For ease of handling, we simply invalidate all the // time. - stmt(IRStmt_Put(OFFB_TISTART, mkU32(guest_EIP_curr_instr))); - stmt(IRStmt_Put(OFFB_TILEN, mkU32(14))); + stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_EIP_curr_instr))); + stmt(IRStmt_Put(OFFB_CMLEN, mkU32(14))); delta += 14; stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) ); dres.whatNext = Dis_StopHere; - dres.jk_StopHere = Ijk_TInval; + dres.jk_StopHere = Ijk_InvalICache; goto decode_success; } /* We don't know what it is. */ @@ -11613,14 +11754,14 @@ /* Round addr down to the start of the containing block. */ stmt( IRStmt_Put( - OFFB_TISTART, + OFFB_CMSTART, binop( Iop_And32, mkexpr(addr), mkU32( ~(lineszB-1) ))) ); - stmt( IRStmt_Put(OFFB_TILEN, mkU32(lineszB) ) ); + stmt( IRStmt_Put(OFFB_CMLEN, mkU32(lineszB) ) ); - jmp_lit(&dres, Ijk_TInval, (Addr32)(guest_EIP_bbstart+delta)); + jmp_lit(&dres, Ijk_InvalICache, (Addr32)(guest_EIP_bbstart+delta)); DIP("clflush %s\n", dis_buf); goto decode_success; @@ -11712,6 +11853,7 @@ IRTemp gV = newTemp(Ity_V128); IRTemp addV = newTemp(Ity_V128); IRTemp subV = newTemp(Ity_V128); + IRTemp rm = newTemp(Ity_I32); a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; modrm = insn[3]; @@ -11730,8 +11872,9 @@ assign( gV, getXMMReg(gregOfRM(modrm)) ); - assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) ); - assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) ); + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ + assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) ); + assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) ); breakup128to32s( addV, &a3, &a2, &a1, &a0 ); breakup128to32s( subV, &s3, &s2, &s1, &s0 ); @@ -11748,6 +11891,7 @@ IRTemp subV = newTemp(Ity_V128); IRTemp a1 = newTemp(Ity_I64); IRTemp s0 = newTemp(Ity_I64); + IRTemp rm = newTemp(Ity_I32); modrm = insn[2]; if (epartIsReg(modrm)) { @@ -11765,8 +11909,9 @@ assign( gV, getXMMReg(gregOfRM(modrm)) ); - assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) ); - assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) ); + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ + assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) ); + assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) ); assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); assign( s0, unop(Iop_V128to64, mkexpr(subV) )); @@ -11785,6 +11930,7 @@ IRTemp gV = newTemp(Ity_V128); IRTemp leftV = newTemp(Ity_V128); IRTemp rightV = newTemp(Ity_V128); + IRTemp rm = newTemp(Ity_I32); Bool isAdd = insn[2] == 0x7C; const HChar* str = isAdd ? "add" : "sub"; e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID; @@ -11811,9 +11957,10 @@ assign( leftV, mk128from32s( e2, e0, g2, g0 ) ); assign( rightV, mk128from32s( e3, e1, g3, g1 ) ); + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ putXMMReg( gregOfRM(modrm), - binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, - mkexpr(leftV), mkexpr(rightV) ) ); + triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, + mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); goto decode_success; } @@ -11828,6 +11975,7 @@ IRTemp gV = newTemp(Ity_V128); IRTemp leftV = newTemp(Ity_V128); IRTemp rightV = newTemp(Ity_V128); + IRTemp rm = newTemp(Ity_I32); Bool isAdd = insn[1] == 0x7C; const HChar* str = isAdd ? "add" : "sub"; @@ -11855,9 +12003,10 @@ assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) ); assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) ); + assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ putXMMReg( gregOfRM(modrm), - binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, - mkexpr(leftV), mkexpr(rightV) ) ); + triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, + mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); goto decode_success; } @@ -15181,6 +15330,14 @@ break; } + case 0x05: /* AMD's syscall */ + stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, + mkU32(guest_EIP_curr_instr) ) ); + jmp_lit(&dres, Ijk_Sys_syscall, ((Addr32)guest_EIP_bbstart)+delta); + vassert(dres.whatNext == Dis_StopHere); + DIP("syscall\n"); + break; + /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ default: Index: priv/host_amd64_defs.c =================================================================== --- priv/host_amd64_defs.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_amd64_defs.c (.../trunk) (revision 2863) @@ -2865,7 +2865,7 @@ case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; - case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; + case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break; case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; @@ -3069,7 +3069,6 @@ case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break; - case Afp_TAN: *p++ = 0xD9; *p++ = 0xF2; break; case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break; @@ -3078,7 +3077,24 @@ case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break; case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break; case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break; - default: goto bad; + case Afp_TAN: + /* fptan pushes 1.0 on the FP stack, except when the + argument is out of range. Hence we have to do the + instruction, then inspect C2 to see if there is an out + of range condition. If there is, we skip the fincstp + that is used by the in-range case to get rid of this + extra 1.0 value. */ + *p++ = 0xD9; *p++ = 0xF2; // fptan + *p++ = 0x50; // pushq %rax + *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax + *p++ = 0x66; *p++ = 0xA9; + *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax + *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp + *p++ = 0xD9; *p++ = 0xF7; // fincstp + *p++ = 0x58; // after_fincstp: popq %rax + break; + default: + goto bad; } goto done; Index: priv/host_amd64_isel.c =================================================================== --- priv/host_amd64_isel.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_amd64_isel.c (.../trunk) (revision 2863) @@ -3031,11 +3031,12 @@ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); /* XXXROUNDINGFIXME */ /* set roundingmode here */ + /* Note that AMD64Instr_A87FpOp(Afp_TAN) sets the condition + codes. I don't think that matters, since this insn + selector never generates such an instruction intervening + between an flag-setting instruction and a flag-using + instruction. */ addInstr(env, AMD64Instr_A87FpOp(fpop)); - if (e->Iex.Binop.op==Iop_TanF64) { - /* get rid of the extra 1.0 that fptan pushes */ - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); - } addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); return dst; @@ -3355,12 +3356,8 @@ case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4; case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4; case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4; - case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4; - case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4; case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4; case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4; - case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4; - case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4; do_32Fx4: { HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); @@ -3375,12 +3372,8 @@ case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2; case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2; case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2; - case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2; - case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2; case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2; case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2; - case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2; - case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2; do_64Fx2: { HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); @@ -3660,6 +3653,47 @@ } /* switch (e->Iex.Binop.op) */ } /* if (e->tag == Iex_Binop) */ + if (e->tag == Iex_Triop) { + IRTriop *triop = e->Iex.Triop.details; + switch (triop->op) { + + case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2_w_rm; + case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2_w_rm; + case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2_w_rm; + case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2_w_rm; + do_64Fx2_w_rm: + { + HReg argL = iselVecExpr(env, triop->arg2); + HReg argR = iselVecExpr(env, triop->arg3); + HReg dst = newVRegV(env); + addInstr(env, mk_vMOVsd_RR(argL, dst)); + /* XXXROUNDINGFIXME */ + /* set roundingmode here */ + addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst)); + return dst; + } + + case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4_w_rm; + case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4_w_rm; + case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4_w_rm; + case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4_w_rm; + do_32Fx4_w_rm: + { + HReg argL = iselVecExpr(env, triop->arg2); + HReg argR = iselVecExpr(env, triop->arg3); + HReg dst = newVRegV(env); + addInstr(env, mk_vMOVsd_RR(argL, dst)); + /* XXXROUNDINGFIXME */ + /* set roundingmode here */ + addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst)); + return dst; + } + + default: + break; + } /* switch (triop->op) */ + } /* if (e->tag == Iex_Triop) */ + if (e->tag == Iex_ITE) { // VFD HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue); HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse); @@ -3851,10 +3885,6 @@ if (e->tag == Iex_Binop) { switch (e->Iex.Binop.op) { - case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4; - case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4; - case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4; - case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4; case Iop_Max64Fx4: op = Asse_MAXF; goto do_64Fx4; case Iop_Min64Fx4: op = Asse_MINF; goto do_64Fx4; do_64Fx4: @@ -3873,10 +3903,6 @@ return; } - case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8; - case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8; - case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8; - case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8; case Iop_Max32Fx8: op = Asse_MAXF; goto do_32Fx8; case Iop_Min32Fx8: op = Asse_MINF; goto do_32Fx8; do_32Fx8: @@ -4145,6 +4171,60 @@ } /* switch (e->Iex.Binop.op) */ } /* if (e->tag == Iex_Binop) */ + if (e->tag == Iex_Triop) { + IRTriop *triop = e->Iex.Triop.details; + switch (triop->op) { + + case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4_w_rm; + case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4_w_rm; + case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4_w_rm; + case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4_w_rm; + do_64Fx4_w_rm: + { + HReg argLhi, argLlo, argRhi, argRlo; + iselDVecExpr(&argLhi, &argLlo, env, triop->arg2); + iselDVecExpr(&argRhi, &argRlo, env, triop->arg3); + HReg dstHi = newVRegV(env); + HReg dstLo = newVRegV(env); + addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); + addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); + /* XXXROUNDINGFIXME */ + /* set roundingmode here */ + addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi)); + addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo)); + *rHi = dstHi; + *rLo = dstLo; + return; + } + + case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8_w_rm; + case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8_w_rm; + case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8_w_rm; + case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8_w_rm; + do_32Fx8_w_rm: + { + HReg argLhi, argLlo, argRhi, argRlo; + iselDVecExpr(&argLhi, &argLlo, env, triop->arg2); + iselDVecExpr(&argRhi, &argRlo, env, triop->arg3); + HReg dstHi = newVRegV(env); + HReg dstLo = newVRegV(env); + addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); + addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); + /* XXXROUNDINGFIXME */ + /* set roundingmode here */ + addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi)); + addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo)); + *rHi = dstHi; + *rLo = dstLo; + return; + } + + default: + break; + } /* switch (triop->op) */ + } /* if (e->tag == Iex_Triop) */ + + if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_64x4toV256) { HReg rsp = hregAMD64_RSP(); HReg vHi = newVRegV(env); @@ -4649,7 +4729,7 @@ case Ijk_SigSEGV: case Ijk_SigTRAP: case Ijk_Sys_syscall: - case Ijk_TInval: + case Ijk_InvalICache: case Ijk_Yield: { HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); @@ -4744,7 +4824,7 @@ case Ijk_SigSEGV: case Ijk_SigTRAP: case Ijk_Sys_syscall: - case Ijk_TInval: + case Ijk_InvalICache: case Ijk_Yield: { HReg r = iselIntExpr_R(env, next); AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); Index: priv/host_arm64_defs.c =================================================================== --- priv/host_arm64_defs.c (.../tags/VEX_3_9_0) (revision 0) +++ priv/host_arm64_defs.c (.../trunk) (revision 2863) @@ -0,0 +1,6609 @@ + +/*---------------------------------------------------------------*/ +/*--- begin host_arm64_defs.c ---*/ +/*---------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2013-2013 OpenWorks + info@open-works.net + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "libvex_basictypes.h" +#include "libvex.h" +#include "libvex_trc_values.h" + +#include "main_util.h" +#include "host_generic_regs.h" +#include "host_arm64_defs.h" + +//ZZ UInt arm_hwcaps = 0; + + +/* --------- Registers. --------- */ + +/* The usual HReg abstraction. We use the following classes only: + X regs (64 bit int) + D regs (64 bit float, also used for 32 bit float) + Q regs (128 bit vector) +*/ + +void ppHRegARM64 ( HReg reg ) { + Int r; + /* Be generic for all virtual regs. */ + if (hregIsVirtual(reg)) { + ppHReg(reg); + return; + } + /* But specific for real regs. */ + switch (hregClass(reg)) { + case HRcInt64: + r = hregNumber(reg); + vassert(r >= 0 && r < 31); + vex_printf("x%d", r); + return; + case HRcFlt64: + r = hregNumber(reg); + vassert(r >= 0 && r < 32); + vex_printf("d%d", r); + return; + case HRcVec128: + r = hregNumber(reg); + vassert(r >= 0 && r < 32); + vex_printf("q%d", r); + return; + default: + vpanic("ppHRegARM64"); + } +} + +static void ppHRegARM64asSreg ( HReg reg ) { + ppHRegARM64(reg); + vex_printf("(S-reg)"); +} + +HReg hregARM64_X0 ( void ) { return mkHReg(0, HRcInt64, False); } +HReg hregARM64_X1 ( void ) { return mkHReg(1, HRcInt64, False); } +HReg hregARM64_X2 ( void ) { return mkHReg(2, HRcInt64, False); } +HReg hregARM64_X3 ( void ) { return mkHReg(3, HRcInt64, False); } +HReg hregARM64_X4 ( void ) { return mkHReg(4, HRcInt64, False); } +HReg hregARM64_X5 ( void ) { return mkHReg(5, HRcInt64, False); } +HReg hregARM64_X6 ( void ) { return mkHReg(6, HRcInt64, False); } +HReg hregARM64_X7 ( void ) { return mkHReg(7, HRcInt64, False); } +//ZZ HReg hregARM_R8 ( void ) { return mkHReg(8, HRcInt32, False); } +HReg hregARM64_X9 ( void ) { return mkHReg(9, HRcInt64, False); } +HReg hregARM64_X10 ( void ) { return mkHReg(10, HRcInt64, False); } +HReg hregARM64_X11 ( void ) { return mkHReg(11, HRcInt64, False); } +HReg hregARM64_X12 ( void ) { return mkHReg(12, HRcInt64, False); } +HReg hregARM64_X13 ( void ) { return mkHReg(13, HRcInt64, False); } +HReg hregARM64_X14 ( void ) { return mkHReg(14, HRcInt64, False); } +HReg hregARM64_X15 ( void ) { return mkHReg(15, HRcInt64, False); } +HReg hregARM64_X21 ( void ) { return mkHReg(21, HRcInt64, False); } +HReg hregARM64_X22 ( void ) { return mkHReg(22, HRcInt64, False); } +HReg hregARM64_X23 ( void ) { return mkHReg(23, HRcInt64, False); } +HReg hregARM64_X24 ( void ) { return mkHReg(24, HRcInt64, False); } +HReg hregARM64_X25 ( void ) { return mkHReg(25, HRcInt64, False); } +HReg hregARM64_X26 ( void ) { return mkHReg(26, HRcInt64, False); } +HReg hregARM64_X27 ( void ) { return mkHReg(27, HRcInt64, False); } +HReg hregARM64_X28 ( void ) { return mkHReg(28, HRcInt64, False); } + +// Should really use D8 .. D15 for class F64, since they are callee +// save +HReg hregARM64_D8 ( void ) { return mkHReg(8, HRcFlt64, False); } +HReg hregARM64_D9 ( void ) { return mkHReg(9, HRcFlt64, False); } +HReg hregARM64_D10 ( void ) { return mkHReg(10, HRcFlt64, False); } +HReg hregARM64_D11 ( void ) { return mkHReg(11, HRcFlt64, False); } +HReg hregARM64_D12 ( void ) { return mkHReg(12, HRcFlt64, False); } +HReg hregARM64_D13 ( void ) { return mkHReg(13, HRcFlt64, False); } +//ZZ HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); } +//ZZ HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); } +//ZZ HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); } +//ZZ HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); } +//ZZ HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); } +HReg hregARM64_Q16 ( void ) { return mkHReg(16, HRcVec128, False); } +HReg hregARM64_Q17 ( void ) { return mkHReg(17, HRcVec128, False); } +HReg hregARM64_Q18 ( void ) { return mkHReg(18, HRcVec128, False); } +//ZZ HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); } +//ZZ HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); } +//ZZ HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); } +//ZZ HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); } +//ZZ HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); } + +void getAllocableRegs_ARM64 ( Int* nregs, HReg** arr ) +{ + Int i = 0; + *nregs = 24; + *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); + + // callee saves ones (22 to 28) are listed first, since we prefer + // them if they're available + (*arr)[i++] = hregARM64_X22(); + (*arr)[i++] = hregARM64_X23(); + (*arr)[i++] = hregARM64_X24(); + (*arr)[i++] = hregARM64_X25(); + (*arr)[i++] = hregARM64_X26(); + (*arr)[i++] = hregARM64_X27(); + (*arr)[i++] = hregARM64_X28(); + + (*arr)[i++] = hregARM64_X0(); + (*arr)[i++] = hregARM64_X1(); + (*arr)[i++] = hregARM64_X2(); + (*arr)[i++] = hregARM64_X3(); + (*arr)[i++] = hregARM64_X4(); + (*arr)[i++] = hregARM64_X5(); + (*arr)[i++] = hregARM64_X6(); + (*arr)[i++] = hregARM64_X7(); + // X8 .. who knows. + // X9 is a chaining/spill temporary, not available to regalloc. + + // Do we really need all these? + //(*arr)[i++] = hregARM64_X10(); + //(*arr)[i++] = hregARM64_X11(); + //(*arr)[i++] = hregARM64_X12(); + //(*arr)[i++] = hregARM64_X13(); + //(*arr)[i++] = hregARM64_X14(); + //(*arr)[i++] = hregARM64_X15(); + // X21 is the guest state pointer, not available to regalloc. + + // vector regs. Unfortunately not callee-saved. + (*arr)[i++] = hregARM64_Q16(); + (*arr)[i++] = hregARM64_Q17(); + (*arr)[i++] = hregARM64_Q18(); + + // F64 regs, all of which are callee-saved + (*arr)[i++] = hregARM64_D8(); + (*arr)[i++] = hregARM64_D9(); + (*arr)[i++] = hregARM64_D10(); + (*arr)[i++] = hregARM64_D11(); + (*arr)[i++] = hregARM64_D12(); + (*arr)[i++] = hregARM64_D13(); + + // unavail: x21 as GSP + // x9 is used as a spill/reload/chaining/call temporary + // x8 is unassigned + // x30 as LR + // x31 because dealing with the SP-vs-ZR overloading is too + // confusing, and we don't need to do so, so let's just avoid + // the problem + // + // Currently, we have 15 allocatable integer registers: + // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28 + // + // Hence for the allocatable integer registers we have: + // + // callee-saved: 22 23 24 25 26 27 28 + // caller-saved: 0 1 2 3 4 5 6 7 + // + // If the set of available registers changes or if the e/r status + // changes, be sure to re-check/sync the definition of + // getHRegUsage for ARMInstr_Call too. + vassert(i == *nregs); +} + + +/* --------- Condition codes, ARM64 encoding. --------- */ + +static const HChar* showARM64CondCode ( ARM64CondCode cond ) { + switch (cond) { + case ARM64cc_EQ: return "eq"; + case ARM64cc_NE: return "ne"; + case ARM64cc_CS: return "cs"; + case ARM64cc_CC: return "cc"; + case ARM64cc_MI: return "mi"; + case ARM64cc_PL: return "pl"; + case ARM64cc_VS: return "vs"; + case ARM64cc_VC: return "vc"; + case ARM64cc_HI: return "hi"; + case ARM64cc_LS: return "ls"; + case ARM64cc_GE: return "ge"; + case ARM64cc_LT: return "lt"; + case ARM64cc_GT: return "gt"; + case ARM64cc_LE: return "le"; + case ARM64cc_AL: return "al"; // default + case ARM64cc_NV: return "nv"; + default: vpanic("showARM64CondCode"); + } +} + + +/* --------- Memory address expressions (amodes). --------- */ + +ARM64AMode* ARM64AMode_RI9 ( HReg reg, Int simm9 ) { + ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode)); + am->tag = ARM64am_RI9; + am->ARM64am.RI9.reg = reg; + am->ARM64am.RI9.simm9 = simm9; + vassert(-256 <= simm9 && simm9 <= 255); + return am; +} + +ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) { + ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode)); + am->tag = ARM64am_RI12; + am->ARM64am.RI12.reg = reg; + am->ARM64am.RI12.uimm12 = uimm12; + am->ARM64am.RI12.szB = szB; + vassert(uimm12 >= 0 && uimm12 <= 4095); + switch (szB) { + case 1: case 2: case 4: case 8: break; + default: vassert(0); + } + return am; +} + +ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) { + ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode)); + am->tag = ARM64am_RR; + am->ARM64am.RR.base = base; + am->ARM64am.RR.index = index; + return am; +} + +static void ppARM64AMode ( ARM64AMode* am ) { + switch (am->tag) { + case ARM64am_RI9: + vex_printf("%d(", am->ARM64am.RI9.simm9); + ppHRegARM64(am->ARM64am.RI9.reg); + vex_printf(")"); + break; + case ARM64am_RI12: + vex_printf("%u(", (UInt)am->ARM64am.RI12.szB + * (UInt)am->ARM64am.RI12.uimm12); + ppHRegARM64(am->ARM64am.RI12.reg); + vex_printf(")"); + break; + case ARM64am_RR: + vex_printf("("); + ppHRegARM64(am->ARM64am.RR.base); + vex_printf(","); + ppHRegARM64(am->ARM64am.RR.index); + vex_printf(")"); + break; + default: + vassert(0); + } +} + +static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) { + switch (am->tag) { + case ARM64am_RI9: + addHRegUse(u, HRmRead, am->ARM64am.RI9.reg); + return; + case ARM64am_RI12: + addHRegUse(u, HRmRead, am->ARM64am.RI12.reg); + return; + case ARM64am_RR: + addHRegUse(u, HRmRead, am->ARM64am.RR.base); + addHRegUse(u, HRmRead, am->ARM64am.RR.index); + return; + default: + vpanic("addRegUsage_ARM64Amode"); + } +} + +static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) { + switch (am->tag) { + case ARM64am_RI9: + am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg); + return; + case ARM64am_RI12: + am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg); + return; + case ARM64am_RR: + am->ARM64am.RR.base = lookupHRegRemap(m, am->ARM64am.RR.base); + am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index); + return; + default: + vpanic("mapRegs_ARM64Amode"); + } +} + + +//ZZ /* --------- Mem AModes: Addressing Mode 2 --------- */ +//ZZ +//ZZ ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) { +//ZZ ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2)); +//ZZ am->tag = ARMam2_RI; +//ZZ am->ARMam2.RI.reg = reg; +//ZZ am->ARMam2.RI.simm9 = simm9; +//ZZ vassert(-255 <= simm9 && simm9 <= 255); +//ZZ return am; +//ZZ } +//ZZ ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) { +//ZZ ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2)); +//ZZ am->tag = ARMam2_RR; +//ZZ am->ARMam2.RR.base = base; +//ZZ am->ARMam2.RR.index = index; +//ZZ return am; +//ZZ } +//ZZ +//ZZ void ppARMAMode2 ( ARMAMode2* am ) { +//ZZ switch (am->tag) { +//ZZ case ARMam2_RI: +//ZZ vex_printf("%d(", am->ARMam2.RI.simm9); +//ZZ ppHRegARM(am->ARMam2.RI.reg); +//ZZ vex_printf(")"); +//ZZ break; +//ZZ case ARMam2_RR: +//ZZ vex_printf("("); +//ZZ ppHRegARM(am->ARMam2.RR.base); +//ZZ vex_printf(","); +//ZZ ppHRegARM(am->ARMam2.RR.index); +//ZZ vex_printf(")"); +//ZZ break; +//ZZ default: +//ZZ vassert(0); +//ZZ } +//ZZ } +//ZZ +//ZZ static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) { +//ZZ switch (am->tag) { +//ZZ case ARMam2_RI: +//ZZ addHRegUse(u, HRmRead, am->ARMam2.RI.reg); +//ZZ return; +//ZZ case ARMam2_RR: +//ZZ // addHRegUse(u, HRmRead, am->ARMam2.RR.base); +//ZZ // addHRegUse(u, HRmRead, am->ARMam2.RR.index); +//ZZ // return; +//ZZ default: +//ZZ vpanic("addRegUsage_ARMAmode2"); +//ZZ } +//ZZ } +//ZZ +//ZZ static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) { +//ZZ switch (am->tag) { +//ZZ case ARMam2_RI: +//ZZ am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg); +//ZZ return; +//ZZ case ARMam2_RR: +//ZZ //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base); +//ZZ //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index); +//ZZ //return; +//ZZ default: +//ZZ vpanic("mapRegs_ARMAmode2"); +//ZZ } +//ZZ } +//ZZ +//ZZ +//ZZ /* --------- Mem AModes: Addressing Mode VFP --------- */ +//ZZ +//ZZ ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) { +//ZZ ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV)); +//ZZ vassert(simm11 >= -1020 && simm11 <= 1020); +//ZZ vassert(0 == (simm11 & 3)); +//ZZ am->reg = reg; +//ZZ am->simm11 = simm11; +//ZZ return am; +//ZZ } +//ZZ +//ZZ void ppARMAModeV ( ARMAModeV* am ) { +//ZZ vex_printf("%d(", am->simm11); +//ZZ ppHRegARM(am->reg); +//ZZ vex_printf(")"); +//ZZ } +//ZZ +//ZZ static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) { +//ZZ addHRegUse(u, HRmRead, am->reg); +//ZZ } +//ZZ +//ZZ static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) { +//ZZ am->reg = lookupHRegRemap(m, am->reg); +//ZZ } +//ZZ +//ZZ +//ZZ /* --------- Mem AModes: Addressing Mode Neon ------- */ +//ZZ +//ZZ ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) { +//ZZ ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN)); +//ZZ am->tag = ARMamN_RR; +//ZZ am->ARMamN.RR.rN = rN; +//ZZ am->ARMamN.RR.rM = rM; +//ZZ return am; +//ZZ } +//ZZ +//ZZ ARMAModeN *mkARMAModeN_R ( HReg rN ) { +//ZZ ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN)); +//ZZ am->tag = ARMamN_R; +//ZZ am->ARMamN.R.rN = rN; +//ZZ return am; +//ZZ } +//ZZ +//ZZ static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) { +//ZZ if (am->tag == ARMamN_R) { +//ZZ addHRegUse(u, HRmRead, am->ARMamN.R.rN); +//ZZ } else { +//ZZ addHRegUse(u, HRmRead, am->ARMamN.RR.rN); +//ZZ addHRegUse(u, HRmRead, am->ARMamN.RR.rM); +//ZZ } +//ZZ } +//ZZ +//ZZ static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) { +//ZZ if (am->tag == ARMamN_R) { +//ZZ am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN); +//ZZ } else { +//ZZ am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN); +//ZZ am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM); +//ZZ } +//ZZ } +//ZZ +//ZZ void ppARMAModeN ( ARMAModeN* am ) { +//ZZ vex_printf("["); +//ZZ if (am->tag == ARMamN_R) { +//ZZ ppHRegARM(am->ARMamN.R.rN); +//ZZ } else { +//ZZ ppHRegARM(am->ARMamN.RR.rN); +//ZZ } +//ZZ vex_printf("]"); +//ZZ if (am->tag == ARMamN_RR) { +//ZZ vex_printf(", "); +//ZZ ppHRegARM(am->ARMamN.RR.rM); +//ZZ } +//ZZ } + + +/* --------- Reg or uimm12<<{0,12} operands --------- */ + +ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) { + ARM64RIA* riA = LibVEX_Alloc(sizeof(ARM64RIA)); + riA->tag = ARM64riA_I12; + riA->ARM64riA.I12.imm12 = imm12; + riA->ARM64riA.I12.shift = shift; + vassert(imm12 < 4096); + vassert(shift == 0 || shift == 12); + return riA; +} +ARM64RIA* ARM64RIA_R ( HReg reg ) { + ARM64RIA* riA = LibVEX_Alloc(sizeof(ARM64RIA)); + riA->tag = ARM64riA_R; + riA->ARM64riA.R.reg = reg; + return riA; +} + +static void ppARM64RIA ( ARM64RIA* riA ) { + switch (riA->tag) { + case ARM64riA_I12: + vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12 + << riA->ARM64riA.I12.shift)); + break; + case ARM64riA_R: + ppHRegARM64(riA->ARM64riA.R.reg); + break; + default: + vassert(0); + } +} + +static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) { + switch (riA->tag) { + case ARM64riA_I12: + return; + case ARM64riA_R: + addHRegUse(u, HRmRead, riA->ARM64riA.R.reg); + return; + default: + vpanic("addRegUsage_ARM64RIA"); + } +} + +static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) { + switch (riA->tag) { + case ARM64riA_I12: + return; + case ARM64riA_R: + riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg); + return; + default: + vpanic("mapRegs_ARM64RIA"); + } +} + + +/* --------- Reg or "bitfield" (logic immediate) operands --------- */ + +ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) { + ARM64RIL* riL = LibVEX_Alloc(sizeof(ARM64RIL)); + riL->tag = ARM64riL_I13; + riL->ARM64riL.I13.bitN = bitN; + riL->ARM64riL.I13.immR = immR; + riL->ARM64riL.I13.immS = immS; + vassert(bitN < 2); + vassert(immR < 64); + vassert(immS < 64); + return riL; +} +ARM64RIL* ARM64RIL_R ( HReg reg ) { + ARM64RIL* riL = LibVEX_Alloc(sizeof(ARM64RIL)); + riL->tag = ARM64riL_R; + riL->ARM64riL.R.reg = reg; + return riL; +} + +static void ppARM64RIL ( ARM64RIL* riL ) { + switch (riL->tag) { + case ARM64riL_I13: + vex_printf("#nrs(%u,%u,%u)", + (UInt)riL->ARM64riL.I13.bitN, + (UInt)riL->ARM64riL.I13.immR, + (UInt)riL->ARM64riL.I13.immS); + break; + case ARM64riL_R: + ppHRegARM64(riL->ARM64riL.R.reg); + break; + default: + vassert(0); + } +} + +static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) { + switch (riL->tag) { + case ARM64riL_I13: + return; + case ARM64riL_R: + addHRegUse(u, HRmRead, riL->ARM64riL.R.reg); + return; + default: + vpanic("addRegUsage_ARM64RIL"); + } +} + +static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) { + switch (riL->tag) { + case ARM64riL_I13: + return; + case ARM64riL_R: + riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg); + return; + default: + vpanic("mapRegs_ARM64RIL"); + } +} + + +/* --------------- Reg or uimm6 operands --------------- */ + +ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) { + ARM64RI6* ri6 = LibVEX_Alloc(sizeof(ARM64RI6)); + ri6->tag = ARM64ri6_I6; + ri6->ARM64ri6.I6.imm6 = imm6; + vassert(imm6 > 0 && imm6 < 64); + return ri6; +} +ARM64RI6* ARM64RI6_R ( HReg reg ) { + ARM64RI6* ri6 = LibVEX_Alloc(sizeof(ARM64RI6)); + ri6->tag = ARM64ri6_R; + ri6->ARM64ri6.R.reg = reg; + return ri6; +} + +static void ppARM64RI6 ( ARM64RI6* ri6 ) { + switch (ri6->tag) { + case ARM64ri6_I6: + vex_printf("#%u", ri6->ARM64ri6.I6.imm6); + break; + case ARM64ri6_R: + ppHRegARM64(ri6->ARM64ri6.R.reg); + break; + default: + vassert(0); + } +} + +static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) { + switch (ri6->tag) { + case ARM64ri6_I6: + return; + case ARM64ri6_R: + addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg); + return; + default: + vpanic("addRegUsage_ARM64RI6"); + } +} + +static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) { + switch (ri6->tag) { + case ARM64ri6_I6: + return; + case ARM64ri6_R: + ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg); + return; + default: + vpanic("mapRegs_ARM64RI6"); + } +} + + +//ZZ /* -------- Neon Immediate operatnd --------- */ +//ZZ +//ZZ ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) { +//ZZ ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm)); +//ZZ i->type = type; +//ZZ i->imm8 = imm8; +//ZZ return i; +//ZZ } +//ZZ +//ZZ ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) { +//ZZ int i, j; +//ZZ ULong y, x = imm->imm8; +//ZZ switch (imm->type) { +//ZZ case 3: +//ZZ x = x << 8; /* fallthrough */ +//ZZ case 2: +//ZZ x = x << 8; /* fallthrough */ +//ZZ case 1: +//ZZ x = x << 8; /* fallthrough */ +//ZZ case 0: +//ZZ return (x << 32) | x; +//ZZ case 5: +//ZZ case 6: +//ZZ if (imm->type == 5) +//ZZ x = x << 8; +//ZZ else +//ZZ x = (x << 8) | x; +//ZZ /* fallthrough */ +//ZZ case 4: +//ZZ x = (x << 16) | x; +//ZZ return (x << 32) | x; +//ZZ case 8: +//ZZ x = (x << 8) | 0xFF; +//ZZ /* fallthrough */ +//ZZ case 7: +//ZZ x = (x << 8) | 0xFF; +//ZZ return (x << 32) | x; +//ZZ case 9: +//ZZ x = 0; +//ZZ for (i = 7; i >= 0; i--) { +//ZZ y = ((ULong)imm->imm8 >> i) & 1; +//ZZ for (j = 0; j < 8; j++) { +//ZZ x = (x << 1) | y; +//ZZ } +//ZZ } +//ZZ return x; +//ZZ case 10: +//ZZ x |= (x & 0x80) << 5; +//ZZ x |= (~x & 0x40) << 5; +//ZZ x &= 0x187F; /* 0001 1000 0111 1111 */ +//ZZ x |= (x & 0x40) << 4; +//ZZ x |= (x & 0x40) << 3; +//ZZ x |= (x & 0x40) << 2; +//ZZ x |= (x & 0x40) << 1; +//ZZ x = x << 19; +//ZZ x = (x << 32) | x; +//ZZ return x; +//ZZ default: +//ZZ vpanic("ARMNImm_to_Imm64"); +//ZZ } +//ZZ } +//ZZ +//ZZ ARMNImm* Imm64_to_ARMNImm ( ULong x ) { +//ZZ ARMNImm tmp; +//ZZ if ((x & 0xFFFFFFFF) == (x >> 32)) { +//ZZ if ((x & 0xFFFFFF00) == 0) +//ZZ return ARMNImm_TI(0, x & 0xFF); +//ZZ if ((x & 0xFFFF00FF) == 0) +//ZZ return ARMNImm_TI(1, (x >> 8) & 0xFF); +//ZZ if ((x & 0xFF00FFFF) == 0) +//ZZ return ARMNImm_TI(2, (x >> 16) & 0xFF); +//ZZ if ((x & 0x00FFFFFF) == 0) +//ZZ return ARMNImm_TI(3, (x >> 24) & 0xFF); +//ZZ if ((x & 0xFFFF00FF) == 0xFF) +//ZZ return ARMNImm_TI(7, (x >> 8) & 0xFF); +//ZZ if ((x & 0xFF00FFFF) == 0xFFFF) +//ZZ return ARMNImm_TI(8, (x >> 16) & 0xFF); +//ZZ if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) { +//ZZ if ((x & 0xFF00) == 0) +//ZZ return ARMNImm_TI(4, x & 0xFF); +//ZZ if ((x & 0x00FF) == 0) +//ZZ return ARMNImm_TI(5, (x >> 8) & 0xFF); +//ZZ if ((x & 0xFF) == ((x >> 8) & 0xFF)) +//ZZ return ARMNImm_TI(6, x & 0xFF); +//ZZ } +//ZZ if ((x & 0x7FFFF) == 0) { +//ZZ tmp.type = 10; +//ZZ tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80); +//ZZ if (ARMNImm_to_Imm64(&tmp) == x) +//ZZ return ARMNImm_TI(tmp.type, tmp.imm8); +//ZZ } +//ZZ } else { +//ZZ /* This can only be type 9. */ +//ZZ tmp.imm8 = (((x >> 56) & 1) << 7) +//ZZ | (((x >> 48) & 1) << 6) +//ZZ | (((x >> 40) & 1) << 5) +//ZZ | (((x >> 32) & 1) << 4) +//ZZ | (((x >> 24) & 1) << 3) +//ZZ | (((x >> 16) & 1) << 2) +//ZZ | (((x >> 8) & 1) << 1) +//ZZ | (((x >> 0) & 1) << 0); +//ZZ tmp.type = 9; +//ZZ if (ARMNImm_to_Imm64 (&tmp) == x) +//ZZ return ARMNImm_TI(tmp.type, tmp.imm8); +//ZZ } +//ZZ return NULL; +//ZZ } +//ZZ +//ZZ void ppARMNImm (ARMNImm* i) { +//ZZ ULong x = ARMNImm_to_Imm64(i); +//ZZ vex_printf("0x%llX%llX", x, x); +//ZZ } +//ZZ +//ZZ /* -- Register or scalar operand --- */ +//ZZ +//ZZ ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index) +//ZZ { +//ZZ ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS)); +//ZZ p->tag = tag; +//ZZ p->reg = reg; +//ZZ p->index = index; +//ZZ return p; +//ZZ } +//ZZ +//ZZ void ppARMNRS(ARMNRS *p) +//ZZ { +//ZZ ppHRegARM(p->reg); +//ZZ if (p->tag == ARMNRS_Scalar) { +//ZZ vex_printf("[%d]", p->index); +//ZZ } +//ZZ } + +/* --------- Instructions. --------- */ + +static const HChar* showARM64LogicOp ( ARM64LogicOp op ) { + switch (op) { + case ARM64lo_AND: return "and"; + case ARM64lo_OR: return "orr"; + case ARM64lo_XOR: return "eor"; + default: vpanic("showARM64LogicOp"); + } +} + +static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) { + switch (op) { + case ARM64sh_SHL: return "lsl"; + case ARM64sh_SHR: return "lsr"; + case ARM64sh_SAR: return "asr"; + default: vpanic("showARM64ShiftOp"); + } +} + +static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) { + switch (op) { + case ARM64un_NEG: return "neg"; + case ARM64un_NOT: return "not"; + case ARM64un_CLZ: return "clz"; + default: vpanic("showARM64UnaryOp"); + } +} + +static const HChar* showARM64MulOp ( ARM64MulOp op ) { + switch (op) { + case ARM64mul_PLAIN: return "mul "; + case ARM64mul_ZX: return "umulh"; + case ARM64mul_SX: return "smulh"; + default: vpanic("showARM64MulOp"); + } +} + +static void characteriseARM64CvtOp ( /*OUT*/HChar* syn, + /*OUT*/UInt* fszB, /*OUT*/UInt* iszB, + ARM64CvtOp op ) { + switch (op) { + case ARM64cvt_F32_I32S: + *syn = 's'; *fszB = 4; *iszB = 4; break; + case ARM64cvt_F64_I32S: + *syn = 's'; *fszB = 8; *iszB = 4; break; + case ARM64cvt_F32_I64S: + *syn = 's'; *fszB = 4; *iszB = 8; break; + case ARM64cvt_F64_I64S: + *syn = 's'; *fszB = 8; *iszB = 8; break; + case ARM64cvt_F32_I32U: + *syn = 'u'; *fszB = 4; *iszB = 4; break; + case ARM64cvt_F64_I32U: + *syn = 'u'; *fszB = 8; *iszB = 4; break; + case ARM64cvt_F32_I64U: + *syn = 'u'; *fszB = 4; *iszB = 8; break; + case ARM64cvt_F64_I64U: + *syn = 'u'; *fszB = 8; *iszB = 8; break; + default: + vpanic("characteriseARM64CvtOp"); + } +} + +static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) { + switch (op) { + case ARM64fpb_ADD: return "add"; + case ARM64fpb_SUB: return "sub"; + case ARM64fpb_MUL: return "mul"; + case ARM64fpb_DIV: return "div"; + default: vpanic("showARM64FpBinOp"); + } +} + +static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) { + switch (op) { + case ARM64fpu_NEG: return "neg "; + case ARM64fpu_ABS: return "abs "; + case ARM64fpu_SQRT: return "sqrt "; + case ARM64fpu_RINT: return "rinti"; + default: vpanic("showARM64FpUnaryOp"); + } +} + +static void showARM64VecBinOp(/*OUT*/const HChar** nm, + /*OUT*/const HChar** ar, ARM64VecBinOp op ) { + switch (op) { + case ARM64vecb_ADD64x2: *nm = "add "; *ar = "2d"; return; + case ARM64vecb_ADD32x4: *nm = "add "; *ar = "4s"; return; + case ARM64vecb_ADD16x8: *nm = "add "; *ar = "8h"; return; + case ARM64vecb_ADD8x16: *nm = "add "; *ar = "16b"; return; + case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return; + case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return; + case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return; + case ARM64vecb_SUB8x16: *nm = "sub "; *ar = "16b"; return; + case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return; + case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return; + case ARM64vecb_MUL8x16: *nm = "mul "; *ar = "16b"; return; + case ARM64vecb_FADD64x2: *nm = "fadd"; *ar = "2d"; return; + case ARM64vecb_FSUB64x2: *nm = "fsub"; *ar = "2d"; return; + case ARM64vecb_FMUL64x2: *nm = "fmul"; *ar = "2d"; return; + case ARM64vecb_FDIV64x2: *nm = "fdiv"; *ar = "2d"; return; + case ARM64vecb_FADD32x4: *nm = "fadd"; *ar = "4s"; return; + case ARM64vecb_FSUB32x4: *nm = "fsub"; *ar = "4s"; return; + case ARM64vecb_FMUL32x4: *nm = "fmul"; *ar = "4s"; return; + case ARM64vecb_FDIV32x4: *nm = "fdiv"; *ar = "4s"; return; + case ARM64vecb_UMAX32x4: *nm = "umax"; *ar = "4s"; return; + case ARM64vecb_UMAX16x8: *nm = "umax"; *ar = "8h"; return; + case ARM64vecb_UMAX8x16: *nm = "umax"; *ar = "16b"; return; + case ARM64vecb_UMIN32x4: *nm = "umin"; *ar = "4s"; return; + case ARM64vecb_UMIN16x8: *nm = "umin"; *ar = "8h"; return; + case ARM64vecb_UMIN8x16: *nm = "umin"; *ar = "16b"; return; + case ARM64vecb_SMAX32x4: *nm = "smax"; *ar = "4s"; return; + case ARM64vecb_SMAX16x8: *nm = "smax"; *ar = "8h"; return; + case ARM64vecb_SMAX8x16: *nm = "smax"; *ar = "16b"; return; + case ARM64vecb_SMIN32x4: *nm = "smin"; *ar = "4s"; return; + case ARM64vecb_SMIN16x8: *nm = "smin"; *ar = "8h"; return; + case ARM64vecb_SMIN8x16: *nm = "smin"; *ar = "16b"; return; + case ARM64vecb_AND: *nm = "and "; *ar = "all"; return; + case ARM64vecb_ORR: *nm = "orr "; *ar = "all"; return; + case ARM64vecb_XOR: *nm = "eor "; *ar = "all"; return; + case ARM64vecb_CMEQ64x2: *nm = "cmeq"; *ar = "2d"; return; + case ARM64vecb_CMEQ32x4: *nm = "cmeq"; *ar = "4s"; return; + case ARM64vecb_CMEQ16x8: *nm = "cmeq"; *ar = "8h"; return; + case ARM64vecb_CMEQ8x16: *nm = "cmeq"; *ar = "16b"; return; + case ARM64vecb_CMHI64x2: *nm = "cmhi"; *ar = "2d"; return; + case ARM64vecb_CMHI32x4: *nm = "cmhi"; *ar = "4s"; return; + case ARM64vecb_CMHI16x8: *nm = "cmhi"; *ar = "8h"; return; + case ARM64vecb_CMHI8x16: *nm = "cmhi"; *ar = "16b"; return; + case ARM64vecb_CMGT64x2: *nm = "cmgt"; *ar = "2d"; return; + case ARM64vecb_CMGT32x4: *nm = "cmgt"; *ar = "4s"; return; + case ARM64vecb_CMGT16x8: *nm = "cmgt"; *ar = "8h"; return; + case ARM64vecb_CMGT8x16: *nm = "cmgt"; *ar = "16b"; return; + case ARM64vecb_FCMEQ64x2: *nm = "fcmeq"; *ar = "2d"; return; + case ARM64vecb_FCMEQ32x4: *nm = "fcmeq"; *ar = "4s"; return; + case ARM64vecb_FCMGE64x2: *nm = "fcmge"; *ar = "2d"; return; + case ARM64vecb_FCMGE32x4: *nm = "fcmge"; *ar = "4s"; return; + case ARM64vecb_FCMGT64x2: *nm = "fcmgt"; *ar = "2d"; return; + case ARM64vecb_FCMGT32x4: *nm = "fcmgt"; *ar = "4s"; return; + case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return; + default: vpanic("showARM64VecBinOp"); + } +} + +static void showARM64VecUnaryOp(/*OUT*/const HChar** nm, + /*OUT*/const HChar** ar, ARM64VecUnaryOp op ) +{ + switch (op) { + case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return; + case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return; + case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return; + case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return; + case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return; + default: vpanic("showARM64VecUnaryOp"); + } +} + +static void showARM64VecShiftOp(/*OUT*/const HChar** nm, + /*OUT*/const HChar** ar, + ARM64VecShiftOp op ) +{ + switch (op) { + case ARM64vecsh_USHR64x2: *nm = "ushr "; *ar = "2d"; return; + case ARM64vecsh_USHR32x4: *nm = "ushr "; *ar = "4s"; return; + case ARM64vecsh_USHR16x8: *nm = "ushr "; *ar = "8h"; return; + case ARM64vecsh_USHR8x16: *nm = "ushr "; *ar = "16b"; return; + case ARM64vecsh_SSHR64x2: *nm = "sshr "; *ar = "2d"; return; + case ARM64vecsh_SSHR32x4: *nm = "sshr "; *ar = "4s"; return; + case ARM64vecsh_SSHR16x8: *nm = "sshr "; *ar = "8h"; return; + case ARM64vecsh_SSHR8x16: *nm = "sshr "; *ar = "16b"; return; + case ARM64vecsh_SHL64x2: *nm = "shl "; *ar = "2d"; return; + case ARM64vecsh_SHL32x4: *nm = "shl "; *ar = "4s"; return; + case ARM64vecsh_SHL16x8: *nm = "shl "; *ar = "8h"; return; + case ARM64vecsh_SHL8x16: *nm = "shl "; *ar = "16b"; return; + default: vpanic("showARM64VecShiftImmOp"); + } +} + +//ZZ const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) { +//ZZ switch (op) { +//ZZ case ARMneon_VAND: return "vand"; +//ZZ case ARMneon_VORR: return "vorr"; +//ZZ case ARMneon_VXOR: return "veor"; +//ZZ case ARMneon_VADD: return "vadd"; +//ZZ case ARMneon_VRHADDS: return "vrhadd"; +//ZZ case ARMneon_VRHADDU: return "vrhadd"; +//ZZ case ARMneon_VADDFP: return "vadd"; +//ZZ case ARMneon_VPADDFP: return "vpadd"; +//ZZ case ARMneon_VABDFP: return "vabd"; +//ZZ case ARMneon_VSUB: return "vsub"; +//ZZ case ARMneon_VSUBFP: return "vsub"; +//ZZ case ARMneon_VMINU: return "vmin"; +//ZZ case ARMneon_VMINS: return "vmin"; +//ZZ case ARMneon_VMINF: return "vmin"; +//ZZ case ARMneon_VMAXU: return "vmax"; +//ZZ case ARMneon_VMAXS: return "vmax"; +//ZZ case ARMneon_VMAXF: return "vmax"; +//ZZ case ARMneon_VQADDU: return "vqadd"; +//ZZ case ARMneon_VQADDS: return "vqadd"; +//ZZ case ARMneon_VQSUBU: return "vqsub"; +//ZZ case ARMneon_VQSUBS: return "vqsub"; +//ZZ case ARMneon_VCGTU: return "vcgt"; +//ZZ case ARMneon_VCGTS: return "vcgt"; +//ZZ case ARMneon_VCGTF: return "vcgt"; +//ZZ case ARMneon_VCGEF: return "vcgt"; +//ZZ case ARMneon_VCGEU: return "vcge"; +//ZZ case ARMneon_VCGES: return "vcge"; +//ZZ case ARMneon_VCEQ: return "vceq"; +//ZZ case ARMneon_VCEQF: return "vceq"; +//ZZ case ARMneon_VPADD: return "vpadd"; +//ZZ case ARMneon_VPMINU: return "vpmin"; +//ZZ case ARMneon_VPMINS: return "vpmin"; +//ZZ case ARMneon_VPMINF: return "vpmin"; +//ZZ case ARMneon_VPMAXU: return "vpmax"; +//ZZ case ARMneon_VPMAXS: return "vpmax"; +//ZZ case ARMneon_VPMAXF: return "vpmax"; +//ZZ case ARMneon_VEXT: return "vext"; +//ZZ case ARMneon_VMUL: return "vmuli"; +//ZZ case ARMneon_VMULLU: return "vmull"; +//ZZ case ARMneon_VMULLS: return "vmull"; +//ZZ case ARMneon_VMULP: return "vmul"; +//ZZ case ARMneon_VMULFP: return "vmul"; +//ZZ case ARMneon_VMULLP: return "vmul"; +//ZZ case ARMneon_VQDMULH: return "vqdmulh"; +//ZZ case ARMneon_VQRDMULH: return "vqrdmulh"; +//ZZ case ARMneon_VQDMULL: return "vqdmull"; +//ZZ case ARMneon_VTBL: return "vtbl"; +//ZZ case ARMneon_VRECPS: return "vrecps"; +//ZZ case ARMneon_VRSQRTS: return "vrecps"; +//ZZ /* ... */ +//ZZ default: vpanic("showARMNeonBinOp"); +//ZZ } +//ZZ } +//ZZ +//ZZ const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) { +//ZZ switch (op) { +//ZZ case ARMneon_VAND: +//ZZ case ARMneon_VORR: +//ZZ case ARMneon_VXOR: +//ZZ return ""; +//ZZ case ARMneon_VADD: +//ZZ case ARMneon_VSUB: +//ZZ case ARMneon_VEXT: +//ZZ case ARMneon_VMUL: +//ZZ case ARMneon_VPADD: +//ZZ case ARMneon_VTBL: +//ZZ case ARMneon_VCEQ: +//ZZ return ".i"; +//ZZ case ARMneon_VRHADDU: +//ZZ case ARMneon_VMINU: +//ZZ case ARMneon_VMAXU: +//ZZ case ARMneon_VQADDU: +//ZZ case ARMneon_VQSUBU: +//ZZ case ARMneon_VCGTU: +//ZZ case ARMneon_VCGEU: +//ZZ case ARMneon_VMULLU: +//ZZ case ARMneon_VPMINU: +//ZZ case ARMneon_VPMAXU: +//ZZ return ".u"; +//ZZ case ARMneon_VRHADDS: +//ZZ case ARMneon_VMINS: +//ZZ case ARMneon_VMAXS: +//ZZ case ARMneon_VQADDS: +//ZZ case ARMneon_VQSUBS: +//ZZ case ARMneon_VCGTS: +//ZZ case ARMneon_VCGES: +//ZZ case ARMneon_VQDMULL: +//ZZ case ARMneon_VMULLS: +//ZZ case ARMneon_VPMINS: +//ZZ case ARMneon_VPMAXS: +//ZZ case ARMneon_VQDMULH: +//ZZ case ARMneon_VQRDMULH: +//ZZ return ".s"; +//ZZ case ARMneon_VMULP: +//ZZ case ARMneon_VMULLP: +//ZZ return ".p"; +//ZZ case ARMneon_VADDFP: +//ZZ case ARMneon_VABDFP: +//ZZ case ARMneon_VPADDFP: +//ZZ case ARMneon_VSUBFP: +//ZZ case ARMneon_VMULFP: +//ZZ case ARMneon_VMINF: +//ZZ case ARMneon_VMAXF: +//ZZ case ARMneon_VPMINF: +//ZZ case ARMneon_VPMAXF: +//ZZ case ARMneon_VCGTF: +//ZZ case ARMneon_VCGEF: +//ZZ case ARMneon_VCEQF: +//ZZ case ARMneon_VRECPS: +//ZZ case ARMneon_VRSQRTS: +//ZZ return ".f"; +//ZZ /* ... */ +//ZZ default: vpanic("showARMNeonBinOpDataType"); +//ZZ } +//ZZ } +//ZZ +//ZZ const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) { +//ZZ switch (op) { +//ZZ case ARMneon_COPY: return "vmov"; +//ZZ case ARMneon_COPYLS: return "vmov"; +//ZZ case ARMneon_COPYLU: return "vmov"; +//ZZ case ARMneon_COPYN: return "vmov"; +//ZZ case ARMneon_COPYQNSS: return "vqmovn"; +//ZZ case ARMneon_COPYQNUS: return "vqmovun"; +//ZZ case ARMneon_COPYQNUU: return "vqmovn"; +//ZZ case ARMneon_NOT: return "vmvn"; +//ZZ case ARMneon_EQZ: return "vceq"; +//ZZ case ARMneon_CNT: return "vcnt"; +//ZZ case ARMneon_CLS: return "vcls"; +//ZZ case ARMneon_CLZ: return "vclz"; +//ZZ case ARMneon_DUP: return "vdup"; +//ZZ case ARMneon_PADDLS: return "vpaddl"; +//ZZ case ARMneon_PADDLU: return "vpaddl"; +//ZZ case ARMneon_VQSHLNSS: return "vqshl"; +//ZZ case ARMneon_VQSHLNUU: return "vqshl"; +//ZZ case ARMneon_VQSHLNUS: return "vqshlu"; +//ZZ case ARMneon_REV16: return "vrev16"; +//ZZ case ARMneon_REV32: return "vrev32"; +//ZZ case ARMneon_REV64: return "vrev64"; +//ZZ case ARMneon_VCVTFtoU: return "vcvt"; +//ZZ case ARMneon_VCVTFtoS: return "vcvt"; +//ZZ case ARMneon_VCVTUtoF: return "vcvt"; +//ZZ case ARMneon_VCVTStoF: return "vcvt"; +//ZZ case ARMneon_VCVTFtoFixedU: return "vcvt"; +//ZZ case ARMneon_VCVTFtoFixedS: return "vcvt"; +//ZZ case ARMneon_VCVTFixedUtoF: return "vcvt"; +//ZZ case ARMneon_VCVTFixedStoF: return "vcvt"; +//ZZ case ARMneon_VCVTF32toF16: return "vcvt"; +//ZZ case ARMneon_VCVTF16toF32: return "vcvt"; +//ZZ case ARMneon_VRECIP: return "vrecip"; +//ZZ case ARMneon_VRECIPF: return "vrecipf"; +//ZZ case ARMneon_VNEGF: return "vneg"; +//ZZ case ARMneon_ABS: return "vabs"; +//ZZ case ARMneon_VABSFP: return "vabsfp"; +//ZZ case ARMneon_VRSQRTEFP: return "vrsqrtefp"; +//ZZ case ARMneon_VRSQRTE: return "vrsqrte"; +//ZZ /* ... */ +//ZZ default: vpanic("showARMNeonUnOp"); +//ZZ } +//ZZ } +//ZZ +//ZZ const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) { +//ZZ switch (op) { +//ZZ case ARMneon_COPY: +//ZZ case ARMneon_NOT: +//ZZ return ""; +//ZZ case ARMneon_COPYN: +//ZZ case ARMneon_EQZ: +//ZZ case ARMneon_CNT: +//ZZ case ARMneon_DUP: +//ZZ case ARMneon_REV16: +//ZZ case ARMneon_REV32: +//ZZ case ARMneon_REV64: +//ZZ return ".i"; +//ZZ case ARMneon_COPYLU: +//ZZ case ARMneon_PADDLU: +//ZZ case ARMneon_COPYQNUU: +//ZZ case ARMneon_VQSHLNUU: +//ZZ case ARMneon_VRECIP: +//ZZ case ARMneon_VRSQRTE: +//ZZ return ".u"; +//ZZ case ARMneon_CLS: +//ZZ case ARMneon_CLZ: +//ZZ case ARMneon_COPYLS: +//ZZ case ARMneon_PADDLS: +//ZZ case ARMneon_COPYQNSS: +//ZZ case ARMneon_COPYQNUS: +//ZZ case ARMneon_VQSHLNSS: +//ZZ case ARMneon_VQSHLNUS: +//ZZ case ARMneon_ABS: +//ZZ return ".s"; +//ZZ case ARMneon_VRECIPF: +//ZZ case ARMneon_VNEGF: +//ZZ case ARMneon_VABSFP: +//ZZ case ARMneon_VRSQRTEFP: +//ZZ return ".f"; +//ZZ case ARMneon_VCVTFtoU: return ".u32.f32"; +//ZZ case ARMneon_VCVTFtoS: return ".s32.f32"; +//ZZ case ARMneon_VCVTUtoF: return ".f32.u32"; +//ZZ case ARMneon_VCVTStoF: return ".f32.s32"; +//ZZ case ARMneon_VCVTF16toF32: return ".f32.f16"; +//ZZ case ARMneon_VCVTF32toF16: return ".f16.f32"; +//ZZ case ARMneon_VCVTFtoFixedU: return ".u32.f32"; +//ZZ case ARMneon_VCVTFtoFixedS: return ".s32.f32"; +//ZZ case ARMneon_VCVTFixedUtoF: return ".f32.u32"; +//ZZ case ARMneon_VCVTFixedStoF: return ".f32.s32"; +//ZZ /* ... */ +//ZZ default: vpanic("showARMNeonUnOpDataType"); +//ZZ } +//ZZ } +//ZZ +//ZZ const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) { +//ZZ switch (op) { +//ZZ case ARMneon_SETELEM: return "vmov"; +//ZZ case ARMneon_GETELEMU: return "vmov"; +//ZZ case ARMneon_GETELEMS: return "vmov"; +//ZZ case ARMneon_VDUP: return "vdup"; +//ZZ /* ... */ +//ZZ default: vpanic("showARMNeonUnarySOp"); +//ZZ } +//ZZ } +//ZZ +//ZZ const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) { +//ZZ switch (op) { +//ZZ case ARMneon_SETELEM: +//ZZ case ARMneon_VDUP: +//ZZ return ".i"; +//ZZ case ARMneon_GETELEMS: +//ZZ return ".s"; +//ZZ case ARMneon_GETELEMU: +//ZZ return ".u"; +//ZZ /* ... */ +//ZZ default: vpanic("showARMNeonUnarySOp"); +//ZZ } +//ZZ } +//ZZ +//ZZ const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) { +//ZZ switch (op) { +//ZZ case ARMneon_VSHL: return "vshl"; +//ZZ case ARMneon_VSAL: return "vshl"; +//ZZ case ARMneon_VQSHL: return "vqshl"; +//ZZ case ARMneon_VQSAL: return "vqshl"; +//ZZ /* ... */ +//ZZ default: vpanic("showARMNeonShiftOp"); +//ZZ } +//ZZ } +//ZZ +//ZZ const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) { +//ZZ switch (op) { +//ZZ case ARMneon_VSHL: +//ZZ case ARMneon_VQSHL: +//ZZ return ".u"; +//ZZ case ARMneon_VSAL: +//ZZ case ARMneon_VQSAL: +//ZZ return ".s"; +//ZZ /* ... */ +//ZZ default: vpanic("showARMNeonShiftOpDataType"); +//ZZ } +//ZZ } +//ZZ +//ZZ const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) { +//ZZ switch (op) { +//ZZ case ARMneon_TRN: return "vtrn"; +//ZZ case ARMneon_ZIP: return "vzip"; +//ZZ case ARMneon_UZP: return "vuzp"; +//ZZ /* ... */ +//ZZ default: vpanic("showARMNeonDualOp"); +//ZZ } +//ZZ } +//ZZ +//ZZ const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) { +//ZZ switch (op) { +//ZZ case ARMneon_TRN: +//ZZ case ARMneon_ZIP: +//ZZ case ARMneon_UZP: +//ZZ return "i"; +//ZZ /* ... */ +//ZZ default: vpanic("showARMNeonDualOp"); +//ZZ } +//ZZ } +//ZZ +//ZZ static const HChar* showARMNeonDataSize_wrk ( UInt size ) +//ZZ { +//ZZ switch (size) { +//ZZ case 0: return "8"; +//ZZ case 1: return "16"; +//ZZ case 2: return "32"; +//ZZ case 3: return "64"; +//ZZ default: vpanic("showARMNeonDataSize"); +//ZZ } +//ZZ } +//ZZ +//ZZ static const HChar* showARMNeonDataSize ( ARMInstr* i ) +//ZZ { +//ZZ switch (i->tag) { +//ZZ case ARMin_NBinary: +//ZZ if (i->ARMin.NBinary.op == ARMneon_VEXT) +//ZZ return "8"; +//ZZ if (i->ARMin.NBinary.op == ARMneon_VAND || +//ZZ i->ARMin.NBinary.op == ARMneon_VORR || +//ZZ i->ARMin.NBinary.op == ARMneon_VXOR) +//ZZ return ""; +//ZZ return showARMNeonDataSize_wrk(i->ARMin.NBinary.size); +//ZZ case ARMin_NUnary: +//ZZ if (i->ARMin.NUnary.op == ARMneon_COPY || +//ZZ i->ARMin.NUnary.op == ARMneon_NOT || +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTF32toF16|| +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTF16toF32|| +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS || +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU || +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF || +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF || +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoS || +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoU || +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTStoF || +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTUtoF) +//ZZ return ""; +//ZZ if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS || +//ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUU || +//ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUS) { +//ZZ UInt size; +//ZZ size = i->ARMin.NUnary.size; +//ZZ if (size & 0x40) +//ZZ return "64"; +//ZZ if (size & 0x20) +//ZZ return "32"; +//ZZ if (size & 0x10) +//ZZ return "16"; +//ZZ if (size & 0x08) +//ZZ return "8"; +//ZZ vpanic("showARMNeonDataSize"); +//ZZ } +//ZZ return showARMNeonDataSize_wrk(i->ARMin.NUnary.size); +//ZZ case ARMin_NUnaryS: +//ZZ if (i->ARMin.NUnaryS.op == ARMneon_VDUP) { +//ZZ int size; +//ZZ size = i->ARMin.NUnaryS.size; +//ZZ if ((size & 1) == 1) +//ZZ return "8"; +//ZZ if ((size & 3) == 2) +//ZZ return "16"; +//ZZ if ((size & 7) == 4) +//ZZ return "32"; +//ZZ vpanic("showARMNeonDataSize"); +//ZZ } +//ZZ return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size); +//ZZ case ARMin_NShift: +//ZZ return showARMNeonDataSize_wrk(i->ARMin.NShift.size); +//ZZ case ARMin_NDual: +//ZZ return showARMNeonDataSize_wrk(i->ARMin.NDual.size); +//ZZ default: +//ZZ vpanic("showARMNeonDataSize"); +//ZZ } +//ZZ } + +ARM64Instr* ARM64Instr_Arith ( HReg dst, + HReg argL, ARM64RIA* argR, Bool isAdd ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_Arith; + i->ARM64in.Arith.dst = dst; + i->ARM64in.Arith.argL = argL; + i->ARM64in.Arith.argR = argR; + i->ARM64in.Arith.isAdd = isAdd; + return i; +} +ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_Cmp; + i->ARM64in.Cmp.argL = argL; + i->ARM64in.Cmp.argR = argR; + i->ARM64in.Cmp.is64 = is64; + return i; +} +ARM64Instr* ARM64Instr_Logic ( HReg dst, + HReg argL, ARM64RIL* argR, ARM64LogicOp op ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_Logic; + i->ARM64in.Logic.dst = dst; + i->ARM64in.Logic.argL = argL; + i->ARM64in.Logic.argR = argR; + i->ARM64in.Logic.op = op; + return i; +} +ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_Test; + i->ARM64in.Test.argL = argL; + i->ARM64in.Test.argR = argR; + return i; +} +ARM64Instr* ARM64Instr_Shift ( HReg dst, + HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_Shift; + i->ARM64in.Shift.dst = dst; + i->ARM64in.Shift.argL = argL; + i->ARM64in.Shift.argR = argR; + i->ARM64in.Shift.op = op; + return i; +} +ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_Unary; + i->ARM64in.Unary.dst = dst; + i->ARM64in.Unary.src = src; + i->ARM64in.Unary.op = op; + return i; +} +ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_MovI; + i->ARM64in.MovI.dst = dst; + i->ARM64in.MovI.src = src; + vassert(hregClass(src) == HRcInt64); + vassert(hregClass(dst) == HRcInt64); + return i; +} +ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_Imm64; + i->ARM64in.Imm64.dst = dst; + i->ARM64in.Imm64.imm64 = imm64; + return i; +} +ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_LdSt64; + i->ARM64in.LdSt64.isLoad = isLoad; + i->ARM64in.LdSt64.rD = rD; + i->ARM64in.LdSt64.amode = amode; + return i; +} +ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_LdSt32; + i->ARM64in.LdSt32.isLoad = isLoad; + i->ARM64in.LdSt32.rD = rD; + i->ARM64in.LdSt32.amode = amode; + return i; +} +ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_LdSt16; + i->ARM64in.LdSt16.isLoad = isLoad; + i->ARM64in.LdSt16.rD = rD; + i->ARM64in.LdSt16.amode = amode; + return i; +} +ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_LdSt8; + i->ARM64in.LdSt8.isLoad = isLoad; + i->ARM64in.LdSt8.rD = rD; + i->ARM64in.LdSt8.amode = amode; + return i; +} +ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC, + ARM64CondCode cond, Bool toFastEP ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_XDirect; + i->ARM64in.XDirect.dstGA = dstGA; + i->ARM64in.XDirect.amPC = amPC; + i->ARM64in.XDirect.cond = cond; + i->ARM64in.XDirect.toFastEP = toFastEP; + return i; +} +ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC, + ARM64CondCode cond ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_XIndir; + i->ARM64in.XIndir.dstGA = dstGA; + i->ARM64in.XIndir.amPC = amPC; + i->ARM64in.XIndir.cond = cond; + return i; +} +ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC, + ARM64CondCode cond, IRJumpKind jk ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_XAssisted; + i->ARM64in.XAssisted.dstGA = dstGA; + i->ARM64in.XAssisted.amPC = amPC; + i->ARM64in.XAssisted.cond = cond; + i->ARM64in.XAssisted.jk = jk; + return i; +} +ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR, + ARM64CondCode cond ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_CSel; + i->ARM64in.CSel.dst = dst; + i->ARM64in.CSel.argL = argL; + i->ARM64in.CSel.argR = argR; + i->ARM64in.CSel.cond = cond; + return i; +} +ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, HWord target, Int nArgRegs, + RetLoc rloc ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_Call; + i->ARM64in.Call.cond = cond; + i->ARM64in.Call.target = target; + i->ARM64in.Call.nArgRegs = nArgRegs; + i->ARM64in.Call.rloc = rloc; + vassert(is_sane_RetLoc(rloc)); + return i; +} +extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_AddToSP; + i->ARM64in.AddToSP.simm = simm; + vassert(-4096 < simm && simm < 4096); + vassert(0 == (simm & 0xF)); + return i; +} +extern ARM64Instr* ARM64Instr_FromSP ( HReg dst ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_FromSP; + i->ARM64in.FromSP.dst = dst; + return i; +} +ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR, + ARM64MulOp op ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_Mul; + i->ARM64in.Mul.dst = dst; + i->ARM64in.Mul.argL = argL; + i->ARM64in.Mul.argR = argR; + i->ARM64in.Mul.op = op; + return i; +} +ARM64Instr* ARM64Instr_LdrEX ( Int szB ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_LdrEX; + i->ARM64in.LdrEX.szB = szB; + vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1); + return i; +} +ARM64Instr* ARM64Instr_StrEX ( Int szB ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_StrEX; + i->ARM64in.StrEX.szB = szB; + vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1); + return i; +} +ARM64Instr* ARM64Instr_MFence ( void ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_MFence; + return i; +} +//ZZ ARM64Instr* ARM64Instr_CLREX( void ) { +//ZZ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); +//ZZ i->tag = ARM64in_CLREX; +//ZZ return i; +//ZZ } +ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VLdStS; + i->ARM64in.VLdStS.isLoad = isLoad; + i->ARM64in.VLdStS.sD = sD; + i->ARM64in.VLdStS.rN = rN; + i->ARM64in.VLdStS.uimm12 = uimm12; + vassert(uimm12 < 16384 && 0 == (uimm12 & 3)); + return i; +} +ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VLdStD; + i->ARM64in.VLdStD.isLoad = isLoad; + i->ARM64in.VLdStD.dD = dD; + i->ARM64in.VLdStD.rN = rN; + i->ARM64in.VLdStD.uimm12 = uimm12; + vassert(uimm12 < 32768 && 0 == (uimm12 & 7)); + return i; +} +ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VLdStQ; + i->ARM64in.VLdStQ.isLoad = isLoad; + i->ARM64in.VLdStQ.rQ = rQ; + i->ARM64in.VLdStQ.rN = rN; + return i; +} +ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VCvtI2F; + i->ARM64in.VCvtI2F.how = how; + i->ARM64in.VCvtI2F.rD = rD; + i->ARM64in.VCvtI2F.rS = rS; + return i; +} +ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS, + UChar armRM ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VCvtF2I; + i->ARM64in.VCvtF2I.how = how; + i->ARM64in.VCvtF2I.rD = rD; + i->ARM64in.VCvtF2I.rS = rS; + i->ARM64in.VCvtF2I.armRM = armRM; + vassert(armRM <= 3); + return i; +} +ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VCvtSD; + i->ARM64in.VCvtSD.sToD = sToD; + i->ARM64in.VCvtSD.dst = dst; + i->ARM64in.VCvtSD.src = src; + return i; +} +ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VUnaryD; + i->ARM64in.VUnaryD.op = op; + i->ARM64in.VUnaryD.dst = dst; + i->ARM64in.VUnaryD.src = src; + return i; +} +ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VUnaryS; + i->ARM64in.VUnaryS.op = op; + i->ARM64in.VUnaryS.dst = dst; + i->ARM64in.VUnaryS.src = src; + return i; +} +ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op, + HReg dst, HReg argL, HReg argR ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VBinD; + i->ARM64in.VBinD.op = op; + i->ARM64in.VBinD.dst = dst; + i->ARM64in.VBinD.argL = argL; + i->ARM64in.VBinD.argR = argR; + return i; +} +ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op, + HReg dst, HReg argL, HReg argR ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VBinS; + i->ARM64in.VBinS.op = op; + i->ARM64in.VBinS.dst = dst; + i->ARM64in.VBinS.argL = argL; + i->ARM64in.VBinS.argR = argR; + return i; +} +ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VCmpD; + i->ARM64in.VCmpD.argL = argL; + i->ARM64in.VCmpD.argR = argR; + return i; +} +ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VCmpS; + i->ARM64in.VCmpS.argL = argL; + i->ARM64in.VCmpS.argR = argR; + return i; +} +ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_FPCR; + i->ARM64in.FPCR.toFPCR = toFPCR; + i->ARM64in.FPCR.iReg = iReg; + return i; +} +ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op, + HReg dst, HReg argL, HReg argR ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VBinV; + i->ARM64in.VBinV.op = op; + i->ARM64in.VBinV.dst = dst; + i->ARM64in.VBinV.argL = argL; + i->ARM64in.VBinV.argR = argR; + return i; +} +ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VUnaryV; + i->ARM64in.VUnaryV.op = op; + i->ARM64in.VUnaryV.dst = dst; + i->ARM64in.VUnaryV.arg = arg; + return i; +} +ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VNarrowV; + i->ARM64in.VNarrowV.dszBlg2 = dszBlg2; + i->ARM64in.VNarrowV.dst = dst; + i->ARM64in.VNarrowV.src = src; + vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2); + return i; +} +ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op, + HReg dst, HReg src, UInt amt ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VShiftImmV; + i->ARM64in.VShiftImmV.op = op; + i->ARM64in.VShiftImmV.dst = dst; + i->ARM64in.VShiftImmV.src = src; + i->ARM64in.VShiftImmV.amt = amt; + UInt maxSh = 0; + switch (op) { + case ARM64vecsh_USHR64x2: case ARM64vecsh_SSHR64x2: + case ARM64vecsh_SHL64x2: + maxSh = 63; break; + case ARM64vecsh_USHR32x4: case ARM64vecsh_SSHR32x4: + case ARM64vecsh_SHL32x4: + maxSh = 31; break; + case ARM64vecsh_USHR16x8: case ARM64vecsh_SSHR16x8: + case ARM64vecsh_SHL16x8: + maxSh = 15; break; + case ARM64vecsh_USHR8x16: case ARM64vecsh_SSHR8x16: + case ARM64vecsh_SHL8x16: + maxSh = 7; break; + default: + vassert(0); + } + vassert(maxSh > 0); + vassert(amt > 0 && amt <= maxSh); + return i; +} +//ZZ ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_VAluS; +//ZZ i->ARMin.VAluS.op = op; +//ZZ i->ARMin.VAluS.dst = dst; +//ZZ i->ARMin.VAluS.argL = argL; +//ZZ i->ARMin.VAluS.argR = argR; +//ZZ return i; +//ZZ } +//ZZ ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_VCMovD; +//ZZ i->ARMin.VCMovD.cond = cond; +//ZZ i->ARMin.VCMovD.dst = dst; +//ZZ i->ARMin.VCMovD.src = src; +//ZZ vassert(cond != ARMcc_AL); +//ZZ return i; +//ZZ } +//ZZ ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_VCMovS; +//ZZ i->ARMin.VCMovS.cond = cond; +//ZZ i->ARMin.VCMovS.dst = dst; +//ZZ i->ARMin.VCMovS.src = src; +//ZZ vassert(cond != ARMcc_AL); +//ZZ return i; +//ZZ } +//ZZ ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_VXferD; +//ZZ i->ARMin.VXferD.toD = toD; +//ZZ i->ARMin.VXferD.dD = dD; +//ZZ i->ARMin.VXferD.rHi = rHi; +//ZZ i->ARMin.VXferD.rLo = rLo; +//ZZ return i; +//ZZ } +//ZZ ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_VXferS; +//ZZ i->ARMin.VXferS.toS = toS; +//ZZ i->ARMin.VXferS.fD = fD; +//ZZ i->ARMin.VXferS.rLo = rLo; +//ZZ return i; +//ZZ } +//ZZ ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned, +//ZZ HReg dst, HReg src ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_VCvtID; +//ZZ i->ARMin.VCvtID.iToD = iToD; +//ZZ i->ARMin.VCvtID.syned = syned; +//ZZ i->ARMin.VCvtID.dst = dst; +//ZZ i->ARMin.VCvtID.src = src; +//ZZ return i; +//ZZ } +//ZZ ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_NLdStD; +//ZZ i->ARMin.NLdStD.isLoad = isLoad; +//ZZ i->ARMin.NLdStD.dD = dD; +//ZZ i->ARMin.NLdStD.amode = amode; +//ZZ return i; +//ZZ } +//ZZ +//ZZ ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ, +//ZZ UInt size, Bool Q ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_NUnary; +//ZZ i->ARMin.NUnary.op = op; +//ZZ i->ARMin.NUnary.src = nQ; +//ZZ i->ARMin.NUnary.dst = dQ; +//ZZ i->ARMin.NUnary.size = size; +//ZZ i->ARMin.NUnary.Q = Q; +//ZZ return i; +//ZZ } +//ZZ +//ZZ ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src, +//ZZ UInt size, Bool Q ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_NUnaryS; +//ZZ i->ARMin.NUnaryS.op = op; +//ZZ i->ARMin.NUnaryS.src = src; +//ZZ i->ARMin.NUnaryS.dst = dst; +//ZZ i->ARMin.NUnaryS.size = size; +//ZZ i->ARMin.NUnaryS.Q = Q; +//ZZ return i; +//ZZ } +//ZZ +//ZZ ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ, +//ZZ UInt size, Bool Q ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_NDual; +//ZZ i->ARMin.NDual.op = op; +//ZZ i->ARMin.NDual.arg1 = nQ; +//ZZ i->ARMin.NDual.arg2 = mQ; +//ZZ i->ARMin.NDual.size = size; +//ZZ i->ARMin.NDual.Q = Q; +//ZZ return i; +//ZZ } +//ZZ +//ZZ ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op, +//ZZ HReg dst, HReg argL, HReg argR, +//ZZ UInt size, Bool Q ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_NBinary; +//ZZ i->ARMin.NBinary.op = op; +//ZZ i->ARMin.NBinary.argL = argL; +//ZZ i->ARMin.NBinary.argR = argR; +//ZZ i->ARMin.NBinary.dst = dst; +//ZZ i->ARMin.NBinary.size = size; +//ZZ i->ARMin.NBinary.Q = Q; +//ZZ return i; +//ZZ } + +ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VImmQ; + i->ARM64in.VImmQ.rQ = rQ; + i->ARM64in.VImmQ.imm = imm; + return i; +} +ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VDfromX; + i->ARM64in.VDfromX.rD = rD; + i->ARM64in.VDfromX.rX = rX; + return i; +} +ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VQfromXX; + i->ARM64in.VQfromXX.rQ = rQ; + i->ARM64in.VQfromXX.rXhi = rXhi; + i->ARM64in.VQfromXX.rXlo = rXlo; + return i; +} +ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VXfromQ; + i->ARM64in.VXfromQ.rX = rX; + i->ARM64in.VXfromQ.rQ = rQ; + i->ARM64in.VXfromQ.laneNo = laneNo; + vassert(laneNo <= 1); + return i; +} +ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VMov; + i->ARM64in.VMov.szB = szB; + i->ARM64in.VMov.dst = dst; + i->ARM64in.VMov.src = src; + switch (szB) { + case 16: + vassert(hregClass(src) == HRcVec128); + vassert(hregClass(dst) == HRcVec128); + break; + case 8: + vassert(hregClass(src) == HRcFlt64); + vassert(hregClass(dst) == HRcFlt64); + break; + default: + vpanic("ARM64Instr_VMov"); + } + return i; +} + +//ZZ ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_NCMovQ; +//ZZ i->ARMin.NCMovQ.cond = cond; +//ZZ i->ARMin.NCMovQ.dst = dst; +//ZZ i->ARMin.NCMovQ.src = src; +//ZZ vassert(cond != ARMcc_AL); +//ZZ return i; +//ZZ } +//ZZ +//ZZ ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op, +//ZZ HReg dst, HReg argL, HReg argR, +//ZZ UInt size, Bool Q ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_NShift; +//ZZ i->ARMin.NShift.op = op; +//ZZ i->ARMin.NShift.argL = argL; +//ZZ i->ARMin.NShift.argR = argR; +//ZZ i->ARMin.NShift.dst = dst; +//ZZ i->ARMin.NShift.size = size; +//ZZ i->ARMin.NShift.Q = Q; +//ZZ return i; +//ZZ } +//ZZ +//ZZ ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt ) +//ZZ { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_NShl64; +//ZZ i->ARMin.NShl64.dst = dst; +//ZZ i->ARMin.NShl64.src = src; +//ZZ i->ARMin.NShl64.amt = amt; +//ZZ vassert(amt >= 1 && amt <= 63); +//ZZ return i; +//ZZ } +//ZZ +//ZZ /* Helper copy-pasted from isel.c */ +//ZZ static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u ) +//ZZ { +//ZZ UInt i; +//ZZ for (i = 0; i < 16; i++) { +//ZZ if (0 == (u & 0xFFFFFF00)) { +//ZZ *u8 = u; +//ZZ *u4 = i; +//ZZ return True; +//ZZ } +//ZZ u = ROR32(u, 30); +//ZZ } +//ZZ vassert(i == 16); +//ZZ return False; +//ZZ } +//ZZ +//ZZ ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) { +//ZZ UInt u8, u4; +//ZZ ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ /* Try to generate single ADD if possible */ +//ZZ if (fitsIn8x4(&u8, &u4, imm32)) { +//ZZ i->tag = ARMin_Alu; +//ZZ i->ARMin.Alu.op = ARMalu_ADD; +//ZZ i->ARMin.Alu.dst = rD; +//ZZ i->ARMin.Alu.argL = rN; +//ZZ i->ARMin.Alu.argR = ARMRI84_I84(u8, u4); +//ZZ } else { +//ZZ i->tag = ARMin_Add32; +//ZZ i->ARMin.Add32.rD = rD; +//ZZ i->ARMin.Add32.rN = rN; +//ZZ i->ARMin.Add32.imm32 = imm32; +//ZZ } +//ZZ return i; +//ZZ } + +ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter, + ARM64AMode* amFailAddr ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_EvCheck; + i->ARM64in.EvCheck.amCounter = amCounter; + i->ARM64in.EvCheck.amFailAddr = amFailAddr; + return i; +} + +//ZZ ARMInstr* ARMInstr_ProfInc ( void ) { +//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); +//ZZ i->tag = ARMin_ProfInc; +//ZZ return i; +//ZZ } + +/* ... */ + +void ppARM64Instr ( ARM64Instr* i ) { + switch (i->tag) { + case ARM64in_Arith: + vex_printf("%s ", i->ARM64in.Arith.isAdd ? "add" : "sub"); + ppHRegARM64(i->ARM64in.Arith.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.Arith.argL); + vex_printf(", "); + ppARM64RIA(i->ARM64in.Arith.argR); + return; + case ARM64in_Cmp: + vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? " " : "(w)" ); + ppHRegARM64(i->ARM64in.Cmp.argL); + vex_printf(", "); + ppARM64RIA(i->ARM64in.Cmp.argR); + return; + case ARM64in_Logic: + vex_printf("%s ", showARM64LogicOp(i->ARM64in.Logic.op)); + ppHRegARM64(i->ARM64in.Logic.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.Logic.argL); + vex_printf(", "); + ppARM64RIL(i->ARM64in.Logic.argR); + return; + case ARM64in_Test: + vex_printf("tst "); + ppHRegARM64(i->ARM64in.Test.argL); + vex_printf(", "); + ppARM64RIL(i->ARM64in.Test.argR); + return; + case ARM64in_Shift: + vex_printf("%s ", showARM64ShiftOp(i->ARM64in.Shift.op)); + ppHRegARM64(i->ARM64in.Shift.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.Shift.argL); + vex_printf(", "); + ppARM64RI6(i->ARM64in.Shift.argR); + return; + case ARM64in_Unary: + vex_printf("%s ", showARM64UnaryOp(i->ARM64in.Unary.op)); + ppHRegARM64(i->ARM64in.Unary.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.Unary.src); + return; + case ARM64in_MovI: + vex_printf("mov "); + ppHRegARM64(i->ARM64in.MovI.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.MovI.src); + return; + case ARM64in_Imm64: + vex_printf("imm64 "); + ppHRegARM64(i->ARM64in.Imm64.dst); + vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64); + return; + case ARM64in_LdSt64: + if (i->ARM64in.LdSt64.isLoad) { + vex_printf("ldr "); + ppHRegARM64(i->ARM64in.LdSt64.rD); + vex_printf(", "); + ppARM64AMode(i->ARM64in.LdSt64.amode); + } else { + vex_printf("str "); + ppARM64AMode(i->ARM64in.LdSt64.amode); + vex_printf(", "); + ppHRegARM64(i->ARM64in.LdSt64.rD); + } + return; + case ARM64in_LdSt32: + if (i->ARM64in.LdSt32.isLoad) { + vex_printf("ldruw "); + ppHRegARM64(i->ARM64in.LdSt32.rD); + vex_printf(", "); + ppARM64AMode(i->ARM64in.LdSt32.amode); + } else { + vex_printf("strw "); + ppARM64AMode(i->ARM64in.LdSt32.amode); + vex_printf(", "); + ppHRegARM64(i->ARM64in.LdSt32.rD); + } + return; + case ARM64in_LdSt16: + if (i->ARM64in.LdSt16.isLoad) { + vex_printf("ldruh "); + ppHRegARM64(i->ARM64in.LdSt16.rD); + vex_printf(", "); + ppARM64AMode(i->ARM64in.LdSt16.amode); + } else { + vex_printf("strh "); + ppARM64AMode(i->ARM64in.LdSt16.amode); + vex_printf(", "); + ppHRegARM64(i->ARM64in.LdSt16.rD); + } + return; + case ARM64in_LdSt8: + if (i->ARM64in.LdSt8.isLoad) { + vex_printf("ldrub "); + ppHRegARM64(i->ARM64in.LdSt8.rD); + vex_printf(", "); + ppARM64AMode(i->ARM64in.LdSt8.amode); + } else { + vex_printf("strb "); + ppARM64AMode(i->ARM64in.LdSt8.amode); + vex_printf(", "); + ppHRegARM64(i->ARM64in.LdSt8.rD); + } + return; + case ARM64in_XDirect: + vex_printf("(xDirect) "); + vex_printf("if (%%pstate.%s) { ", + showARM64CondCode(i->ARM64in.XDirect.cond)); + vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA); + vex_printf("str x9,"); + ppARM64AMode(i->ARM64in.XDirect.amPC); + vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ", + i->ARM64in.XDirect.toFastEP ? "fast" : "slow"); + vex_printf("blr x9 }"); + return; + case ARM64in_XIndir: + vex_printf("(xIndir) "); + vex_printf("if (%%pstate.%s) { ", + showARM64CondCode(i->ARM64in.XIndir.cond)); + vex_printf("str "); + ppHRegARM64(i->ARM64in.XIndir.dstGA); + vex_printf(","); + ppARM64AMode(i->ARM64in.XIndir.amPC); + vex_printf("; imm64 x9,$disp_cp_xindir; "); + vex_printf("br x9 }"); + return; + case ARM64in_XAssisted: + vex_printf("(xAssisted) "); + vex_printf("if (%%pstate.%s) { ", + showARM64CondCode(i->ARM64in.XAssisted.cond)); + vex_printf("str "); + ppHRegARM64(i->ARM64in.XAssisted.dstGA); + vex_printf(","); + ppARM64AMode(i->ARM64in.XAssisted.amPC); + vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ", + (Int)i->ARM64in.XAssisted.jk); + vex_printf("imm64 x9,$disp_cp_xassisted; "); + vex_printf("br x9 }"); + return; + case ARM64in_CSel: + vex_printf("csel "); + ppHRegARM64(i->ARM64in.CSel.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.CSel.argL); + vex_printf(", "); + ppHRegARM64(i->ARM64in.CSel.argR); + vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond)); + return; + case ARM64in_Call: + vex_printf("call%s ", + i->ARM64in.Call.cond==ARM64cc_AL + ? " " : showARM64CondCode(i->ARM64in.Call.cond)); + vex_printf("0x%lx [nArgRegs=%d, ", + i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs); + ppRetLoc(i->ARM64in.Call.rloc); + vex_printf("]"); + return; + case ARM64in_AddToSP: { + Int simm = i->ARM64in.AddToSP.simm; + vex_printf("%s xsp, xsp, #%d", simm < 0 ? "sub" : "add", + simm < 0 ? -simm : simm); + return; + } + case ARM64in_FromSP: + vex_printf("mov "); + ppHRegARM64(i->ARM64in.FromSP.dst); + vex_printf(", xsp"); + return; + case ARM64in_Mul: + vex_printf("%s ", showARM64MulOp(i->ARM64in.Mul.op)); + ppHRegARM64(i->ARM64in.Mul.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.Mul.argL); + vex_printf(", "); + ppHRegARM64(i->ARM64in.Mul.argR); + return; + + case ARM64in_LdrEX: { + const HChar* sz = " "; + switch (i->ARM64in.LdrEX.szB) { + case 1: sz = "b"; break; + case 2: sz = "h"; break; + case 4: case 8: break; + default: vassert(0); + } + vex_printf("ldxr%s %c2, [x4]", + sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w'); + return; + } + case ARM64in_StrEX: { + const HChar* sz = " "; + switch (i->ARM64in.StrEX.szB) { + case 1: sz = "b"; break; + case 2: sz = "h"; break; + case 4: case 8: break; + default: vassert(0); + } + vex_printf("stxr%s w0, %c2, [x4]", + sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w'); + return; + } + case ARM64in_MFence: + vex_printf("(mfence) dsb sy; dmb sy; isb"); + return; +//ZZ case ARM64in_CLREX: +//ZZ vex_printf("clrex"); +//ZZ return; + case ARM64in_VLdStS: + if (i->ARM64in.VLdStS.isLoad) { + vex_printf("ldr "); + ppHRegARM64asSreg(i->ARM64in.VLdStS.sD); + vex_printf(", %u(", i->ARM64in.VLdStS.uimm12); + ppHRegARM64(i->ARM64in.VLdStS.rN); + vex_printf(")"); + } else { + vex_printf("str "); + vex_printf("%u(", i->ARM64in.VLdStS.uimm12); + ppHRegARM64(i->ARM64in.VLdStS.rN); + vex_printf("), "); + ppHRegARM64asSreg(i->ARM64in.VLdStS.sD); + } + return; + case ARM64in_VLdStD: + if (i->ARM64in.VLdStD.isLoad) { + vex_printf("ldr "); + ppHRegARM64(i->ARM64in.VLdStD.dD); + vex_printf(", %u(", i->ARM64in.VLdStD.uimm12); + ppHRegARM64(i->ARM64in.VLdStD.rN); + vex_printf(")"); + } else { + vex_printf("str "); + vex_printf("%u(", i->ARM64in.VLdStD.uimm12); + ppHRegARM64(i->ARM64in.VLdStD.rN); + vex_printf("), "); + ppHRegARM64(i->ARM64in.VLdStD.dD); + } + return; + case ARM64in_VLdStQ: + if (i->ARM64in.VLdStQ.isLoad) + vex_printf("ld1.2d {"); + else + vex_printf("st1.2d {"); + ppHRegARM64(i->ARM64in.VLdStQ.rQ); + vex_printf("}, ["); + ppHRegARM64(i->ARM64in.VLdStQ.rN); + vex_printf("]"); + return; + case ARM64in_VCvtI2F: { + HChar syn = '?'; + UInt fszB = 0; + UInt iszB = 0; + characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how); + vex_printf("%ccvtf ", syn); + ppHRegARM64(i->ARM64in.VCvtI2F.rD); + vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D'); + ppHRegARM64(i->ARM64in.VCvtI2F.rS); + vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X'); + return; + } + case ARM64in_VCvtF2I: { + HChar syn = '?'; + UInt fszB = 0; + UInt iszB = 0; + HChar rmo = '?'; + characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how); + UChar armRM = i->ARM64in.VCvtF2I.armRM; + if (armRM < 4) rmo = "npmz"[armRM]; + vex_printf("fcvt%c%c ", rmo, syn); + ppHRegARM64(i->ARM64in.VCvtF2I.rD); + vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X'); + ppHRegARM64(i->ARM64in.VCvtF2I.rS); + vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D'); + return; + } + case ARM64in_VCvtSD: + vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s"); + if (i->ARM64in.VCvtSD.sToD) { + ppHRegARM64(i->ARM64in.VCvtSD.dst); + vex_printf(", "); + ppHRegARM64asSreg(i->ARM64in.VCvtSD.src); + } else { + ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VCvtSD.src); + } + return; + case ARM64in_VUnaryD: + vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op)); + ppHRegARM64(i->ARM64in.VUnaryD.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VUnaryD.src); + return; + case ARM64in_VUnaryS: + vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op)); + ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst); + vex_printf(", "); + ppHRegARM64asSreg(i->ARM64in.VUnaryS.src); + return; + case ARM64in_VBinD: + vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinD.op)); + ppHRegARM64(i->ARM64in.VBinD.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VBinD.argL); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VBinD.argR); + return; + case ARM64in_VBinS: + vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinS.op)); + ppHRegARM64asSreg(i->ARM64in.VBinS.dst); + vex_printf(", "); + ppHRegARM64asSreg(i->ARM64in.VBinS.argL); + vex_printf(", "); + ppHRegARM64asSreg(i->ARM64in.VBinS.argR); + return; + case ARM64in_VCmpD: + vex_printf("fcmp "); + ppHRegARM64(i->ARM64in.VCmpD.argL); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VCmpD.argR); + return; + case ARM64in_VCmpS: + vex_printf("fcmp "); + ppHRegARM64asSreg(i->ARM64in.VCmpS.argL); + vex_printf(", "); + ppHRegARM64asSreg(i->ARM64in.VCmpS.argR); + return; + case ARM64in_FPCR: + if (i->ARM64in.FPCR.toFPCR) { + vex_printf("msr fpcr, "); + ppHRegARM64(i->ARM64in.FPCR.iReg); + } else { + vex_printf("mrs "); + ppHRegARM64(i->ARM64in.FPCR.iReg); + vex_printf(", fpcr"); + } + return; + case ARM64in_VBinV: { + const HChar* nm = "??"; + const HChar* ar = "??"; + showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op); + vex_printf("%s ", nm); + ppHRegARM64(i->ARM64in.VBinV.dst); + vex_printf(".%s, ", ar); + ppHRegARM64(i->ARM64in.VBinV.argL); + vex_printf(".%s, ", ar); + ppHRegARM64(i->ARM64in.VBinV.argR); + vex_printf(".%s", ar); + return; + } + case ARM64in_VUnaryV: { + const HChar* nm = "??"; + const HChar* ar = "??"; + showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op); + vex_printf("%s ", nm); + ppHRegARM64(i->ARM64in.VUnaryV.dst); + vex_printf(".%s, ", ar); + ppHRegARM64(i->ARM64in.VUnaryV.arg); + vex_printf(".%s", ar); + return; + } + case ARM64in_VNarrowV: { + UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2; + const HChar* darr[3] = { "8b", "4h", "2s" }; + const HChar* sarr[3] = { "8h", "4s", "2d" }; + vex_printf("xtn "); + ppHRegARM64(i->ARM64in.VNarrowV.dst); + vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??"); + ppHRegARM64(i->ARM64in.VNarrowV.src); + vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??"); + return; + } + case ARM64in_VShiftImmV: { + const HChar* nm = "??"; + const HChar* ar = "??"; + showARM64VecShiftOp(&nm, &ar, i->ARM64in.VShiftImmV.op); + vex_printf("%s ", nm); + ppHRegARM64(i->ARM64in.VShiftImmV.dst); + vex_printf(".%s, ", ar); + ppHRegARM64(i->ARM64in.VShiftImmV.src); + vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt); + return; + } +//ZZ case ARMin_VAluS: +//ZZ vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op)); +//ZZ ppHRegARM(i->ARMin.VAluS.dst); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.VAluS.argL); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.VAluS.argR); +//ZZ return; +//ZZ case ARMin_VCMovD: +//ZZ vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond)); +//ZZ ppHRegARM(i->ARMin.VCMovD.dst); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.VCMovD.src); +//ZZ return; +//ZZ case ARMin_VCMovS: +//ZZ vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond)); +//ZZ ppHRegARM(i->ARMin.VCMovS.dst); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.VCMovS.src); +//ZZ return; +//ZZ case ARMin_VXferD: +//ZZ vex_printf("vmov "); +//ZZ if (i->ARMin.VXferD.toD) { +//ZZ ppHRegARM(i->ARMin.VXferD.dD); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.VXferD.rLo); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.VXferD.rHi); +//ZZ } else { +//ZZ ppHRegARM(i->ARMin.VXferD.rLo); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.VXferD.rHi); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.VXferD.dD); +//ZZ } +//ZZ return; +//ZZ case ARMin_VXferS: +//ZZ vex_printf("vmov "); +//ZZ if (i->ARMin.VXferS.toS) { +//ZZ ppHRegARM(i->ARMin.VXferS.fD); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.VXferS.rLo); +//ZZ } else { +//ZZ ppHRegARM(i->ARMin.VXferS.rLo); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.VXferS.fD); +//ZZ } +//ZZ return; +//ZZ case ARMin_VCvtID: { +//ZZ const HChar* nm = "?"; +//ZZ if (i->ARMin.VCvtID.iToD) { +//ZZ nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod"; +//ZZ } else { +//ZZ nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid"; +//ZZ } +//ZZ vex_printf("%s ", nm); +//ZZ ppHRegARM(i->ARMin.VCvtID.dst); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.VCvtID.src); +//ZZ return; +//ZZ } +//ZZ case ARMin_NLdStD: +//ZZ if (i->ARMin.NLdStD.isLoad) +//ZZ vex_printf("vld1.32 {"); +//ZZ else +//ZZ vex_printf("vst1.32 {"); +//ZZ ppHRegARM(i->ARMin.NLdStD.dD); +//ZZ vex_printf("} "); +//ZZ ppARMAModeN(i->ARMin.NLdStD.amode); +//ZZ return; +//ZZ case ARMin_NUnary: +//ZZ vex_printf("%s%s%s ", +//ZZ showARMNeonUnOp(i->ARMin.NUnary.op), +//ZZ showARMNeonUnOpDataType(i->ARMin.NUnary.op), +//ZZ showARMNeonDataSize(i)); +//ZZ ppHRegARM(i->ARMin.NUnary.dst); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.NUnary.src); +//ZZ if (i->ARMin.NUnary.op == ARMneon_EQZ) +//ZZ vex_printf(", #0"); +//ZZ if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS || +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU || +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF || +//ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) { +//ZZ vex_printf(", #%d", i->ARMin.NUnary.size); +//ZZ } +//ZZ if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS || +//ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUU || +//ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUS) { +//ZZ UInt size; +//ZZ size = i->ARMin.NUnary.size; +//ZZ if (size & 0x40) { +//ZZ vex_printf(", #%d", size - 64); +//ZZ } else if (size & 0x20) { +//ZZ vex_printf(", #%d", size - 32); +//ZZ } else if (size & 0x10) { +//ZZ vex_printf(", #%d", size - 16); +//ZZ } else if (size & 0x08) { +//ZZ vex_printf(", #%d", size - 8); +//ZZ } +//ZZ } +//ZZ return; +//ZZ case ARMin_NUnaryS: +//ZZ vex_printf("%s%s%s ", +//ZZ showARMNeonUnOpS(i->ARMin.NUnaryS.op), +//ZZ showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op), +//ZZ showARMNeonDataSize(i)); +//ZZ ppARMNRS(i->ARMin.NUnaryS.dst); +//ZZ vex_printf(", "); +//ZZ ppARMNRS(i->ARMin.NUnaryS.src); +//ZZ return; +//ZZ case ARMin_NShift: +//ZZ vex_printf("%s%s%s ", +//ZZ showARMNeonShiftOp(i->ARMin.NShift.op), +//ZZ showARMNeonShiftOpDataType(i->ARMin.NShift.op), +//ZZ showARMNeonDataSize(i)); +//ZZ ppHRegARM(i->ARMin.NShift.dst); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.NShift.argL); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.NShift.argR); +//ZZ return; +//ZZ case ARMin_NShl64: +//ZZ vex_printf("vshl.i64 "); +//ZZ ppHRegARM(i->ARMin.NShl64.dst); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.NShl64.src); +//ZZ vex_printf(", #%u", i->ARMin.NShl64.amt); +//ZZ return; +//ZZ case ARMin_NDual: +//ZZ vex_printf("%s%s%s ", +//ZZ showARMNeonDualOp(i->ARMin.NDual.op), +//ZZ showARMNeonDualOpDataType(i->ARMin.NDual.op), +//ZZ showARMNeonDataSize(i)); +//ZZ ppHRegARM(i->ARMin.NDual.arg1); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.NDual.arg2); +//ZZ return; +//ZZ case ARMin_NBinary: +//ZZ vex_printf("%s%s%s", +//ZZ showARMNeonBinOp(i->ARMin.NBinary.op), +//ZZ showARMNeonBinOpDataType(i->ARMin.NBinary.op), +//ZZ showARMNeonDataSize(i)); +//ZZ vex_printf(" "); +//ZZ ppHRegARM(i->ARMin.NBinary.dst); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.NBinary.argL); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.NBinary.argR); +//ZZ return; + case ARM64in_VImmQ: + vex_printf("qimm "); + ppHRegARM64(i->ARM64in.VImmQ.rQ); + vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm); + return; + case ARM64in_VDfromX: + vex_printf("fmov "); + ppHRegARM64(i->ARM64in.VDfromX.rD); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VDfromX.rX); + return; + case ARM64in_VQfromXX: + vex_printf("qFromXX "); + ppHRegARM64(i->ARM64in.VQfromXX.rQ); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VQfromXX.rXhi); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VQfromXX.rXlo); + return; + case ARM64in_VXfromQ: + vex_printf("mov "); + ppHRegARM64(i->ARM64in.VXfromQ.rX); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VXfromQ.rQ); + vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo); + return; + case ARM64in_VMov: { + UChar aux = '?'; + switch (i->ARM64in.VMov.szB) { + case 16: aux = 'q'; break; + case 8: aux = 'd'; break; + case 4: aux = 's'; break; + default: break; + } + vex_printf("mov(%c) ", aux); + ppHRegARM64(i->ARM64in.VMov.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VMov.src); + return; + } +//ZZ case ARMin_NCMovQ: +//ZZ vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond)); +//ZZ ppHRegARM(i->ARMin.NCMovQ.dst); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.NCMovQ.src); +//ZZ return; +//ZZ case ARMin_Add32: +//ZZ vex_printf("add32 "); +//ZZ ppHRegARM(i->ARMin.Add32.rD); +//ZZ vex_printf(", "); +//ZZ ppHRegARM(i->ARMin.Add32.rN); +//ZZ vex_printf(", "); +//ZZ vex_printf("%d", i->ARMin.Add32.imm32); +//ZZ return; + case ARM64in_EvCheck: + vex_printf("(evCheck) ldr w9,"); + ppARM64AMode(i->ARM64in.EvCheck.amCounter); + vex_printf("; subs w9,w9,$1; str w9,"); + ppARM64AMode(i->ARM64in.EvCheck.amCounter); + vex_printf("; bpl nofail; ldr x9,"); + ppARM64AMode(i->ARM64in.EvCheck.amFailAddr); + vex_printf("; br x9; nofail:"); + return; +//ZZ case ARMin_ProfInc: +//ZZ vex_printf("(profInc) movw r12,LO16($NotKnownYet); " +//ZZ "movw r12,HI16($NotKnownYet); " +//ZZ "ldr r11,[r12]; " +//ZZ "adds r11,r11,$1; " +//ZZ "str r11,[r12]; " +//ZZ "ldr r11,[r12+4]; " +//ZZ "adc r11,r11,$0; " +//ZZ "str r11,[r12+4]"); +//ZZ return; + default: + vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag); + vpanic("ppARM64Instr(1)"); + return; + } +} + + +/* --------- Helpers for register allocation. --------- */ + +void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 ) +{ + vassert(mode64 == True); + initHRegUsage(u); + switch (i->tag) { + case ARM64in_Arith: + addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst); + addHRegUse(u, HRmRead, i->ARM64in.Arith.argL); + addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR); + return; + case ARM64in_Cmp: + addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL); + addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR); + return; + case ARM64in_Logic: + addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst); + addHRegUse(u, HRmRead, i->ARM64in.Logic.argL); + addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR); + return; + case ARM64in_Test: + addHRegUse(u, HRmRead, i->ARM64in.Test.argL); + addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR); + return; + case ARM64in_Shift: + addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst); + addHRegUse(u, HRmRead, i->ARM64in.Shift.argL); + addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR); + return; + case ARM64in_Unary: + addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst); + addHRegUse(u, HRmRead, i->ARM64in.Unary.src); + return; + case ARM64in_MovI: + addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst); + addHRegUse(u, HRmRead, i->ARM64in.MovI.src); + return; + case ARM64in_Imm64: + addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst); + return; + case ARM64in_LdSt64: + addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode); + if (i->ARM64in.LdSt64.isLoad) { + addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD); + } else { + addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD); + } + return; + case ARM64in_LdSt32: + addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode); + if (i->ARM64in.LdSt32.isLoad) { + addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD); + } else { + addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD); + } + return; + case ARM64in_LdSt16: + addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode); + if (i->ARM64in.LdSt16.isLoad) { + addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD); + } else { + addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD); + } + return; + case ARM64in_LdSt8: + addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode); + if (i->ARM64in.LdSt8.isLoad) { + addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD); + } else { + addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD); + } + return; + /* XDirect/XIndir/XAssisted are also a bit subtle. They + conditionally exit the block. Hence we only need to list (1) + the registers that they read, and (2) the registers that they + write in the case where the block is not exited. (2) is + empty, hence only (1) is relevant here. */ + case ARM64in_XDirect: + addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC); + return; + case ARM64in_XIndir: + addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA); + addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC); + return; + case ARM64in_XAssisted: + addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA); + addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC); + return; + case ARM64in_CSel: + addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst); + addHRegUse(u, HRmRead, i->ARM64in.CSel.argL); + addHRegUse(u, HRmRead, i->ARM64in.CSel.argR); + return; + case ARM64in_Call: + /* logic and comments copied/modified from x86 back end */ + /* This is a bit subtle. */ + /* First off, claim it trashes all the caller-saved regs + which fall within the register allocator's jurisdiction. + These I believe to be x0 to x7. Also need to be + careful about vector regs. */ + addHRegUse(u, HRmWrite, hregARM64_X0()); + addHRegUse(u, HRmWrite, hregARM64_X1()); + addHRegUse(u, HRmWrite, hregARM64_X2()); + addHRegUse(u, HRmWrite, hregARM64_X3()); + addHRegUse(u, HRmWrite, hregARM64_X4()); + addHRegUse(u, HRmWrite, hregARM64_X5()); + addHRegUse(u, HRmWrite, hregARM64_X6()); + addHRegUse(u, HRmWrite, hregARM64_X7()); + addHRegUse(u, HRmWrite, hregARM64_Q16()); + addHRegUse(u, HRmWrite, hregARM64_Q17()); + addHRegUse(u, HRmWrite, hregARM64_Q18()); + /* Now we have to state any parameter-carrying registers + which might be read. This depends on nArgRegs. */ + switch (i->ARM64in.Call.nArgRegs) { + case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/ + case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/ + case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/ + case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/ + case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/ + case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/ + case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/ + case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break; + case 0: break; + default: vpanic("getRegUsage_ARM64:Call:regparms"); + } + /* Finally, there is the issue that the insn trashes a + register because the literal target address has to be + loaded into a register. However, we reserve x9 for that + purpose so there's no further complexity here. Stating x9 + as trashed is pointless since it's not under the control + of the allocator, but what the hell. */ + addHRegUse(u, HRmWrite, hregARM64_X9()); + return; + case ARM64in_AddToSP: + /* Only changes SP, but regalloc doesn't control that, hence + we don't care. */ + return; + case ARM64in_FromSP: + addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst); + return; + case ARM64in_Mul: + addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst); + addHRegUse(u, HRmRead, i->ARM64in.Mul.argL); + addHRegUse(u, HRmRead, i->ARM64in.Mul.argR); + return; + case ARM64in_LdrEX: + addHRegUse(u, HRmRead, hregARM64_X4()); + addHRegUse(u, HRmWrite, hregARM64_X2()); + return; + case ARM64in_StrEX: + addHRegUse(u, HRmRead, hregARM64_X4()); + addHRegUse(u, HRmWrite, hregARM64_X0()); + addHRegUse(u, HRmRead, hregARM64_X2()); + return; + case ARM64in_MFence: + return; +//ZZ case ARMin_CLREX: +//ZZ return; + case ARM64in_VLdStS: + addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN); + if (i->ARM64in.VLdStS.isLoad) { + addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD); + } else { + addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD); + } + return; + case ARM64in_VLdStD: + addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN); + if (i->ARM64in.VLdStD.isLoad) { + addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD); + } else { + addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD); + } + return; + case ARM64in_VLdStQ: + addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN); + if (i->ARM64in.VLdStQ.isLoad) + addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ); + else + addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ); + return; + case ARM64in_VCvtI2F: + addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS); + addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD); + return; + case ARM64in_VCvtF2I: + addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS); + addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD); + return; + case ARM64in_VCvtSD: + addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst); + addHRegUse(u, HRmRead, i->ARM64in.VCvtSD.src); + return; + case ARM64in_VUnaryD: + addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst); + addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src); + return; + case ARM64in_VUnaryS: + addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst); + addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src); + return; + case ARM64in_VBinD: + addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst); + addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL); + addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR); + return; + case ARM64in_VBinS: + addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst); + addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL); + addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR); + return; + case ARM64in_VCmpD: + addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL); + addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR); + return; + case ARM64in_VCmpS: + addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL); + addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR); + return; + case ARM64in_FPCR: + if (i->ARM64in.FPCR.toFPCR) + addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg); + else + addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg); + return; + case ARM64in_VBinV: + addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst); + addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL); + addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR); + return; + case ARM64in_VUnaryV: + addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst); + addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg); + return; + case ARM64in_VNarrowV: + addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst); + addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src); + return; + case ARM64in_VShiftImmV: + addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst); + addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src); + return; +//ZZ case ARMin_VAluS: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.VAluS.argL); +//ZZ addHRegUse(u, HRmRead, i->ARMin.VAluS.argR); +//ZZ return; +//ZZ case ARMin_VUnaryS: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src); +//ZZ return; +//ZZ case ARMin_VCMovD: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovD.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovD.src); +//ZZ return; +//ZZ case ARMin_VCMovS: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovS.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovS.src); +//ZZ return; +//ZZ case ARMin_VXferD: +//ZZ if (i->ARMin.VXferD.toD) { +//ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD); +//ZZ addHRegUse(u, HRmRead, i->ARMin.VXferD.rHi); +//ZZ addHRegUse(u, HRmRead, i->ARMin.VXferD.rLo); +//ZZ } else { +//ZZ addHRegUse(u, HRmRead, i->ARMin.VXferD.dD); +//ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi); +//ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo); +//ZZ } +//ZZ return; +//ZZ case ARMin_VXferS: +//ZZ if (i->ARMin.VXferS.toS) { +//ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD); +//ZZ addHRegUse(u, HRmRead, i->ARMin.VXferS.rLo); +//ZZ } else { +//ZZ addHRegUse(u, HRmRead, i->ARMin.VXferS.fD); +//ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo); +//ZZ } +//ZZ return; +//ZZ case ARMin_VCvtID: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.VCvtID.src); +//ZZ return; +//ZZ case ARMin_NLdStD: +//ZZ if (i->ARMin.NLdStD.isLoad) +//ZZ addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD); +//ZZ else +//ZZ addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD); +//ZZ addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode); +//ZZ return; +//ZZ case ARMin_NUnary: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.NUnary.src); +//ZZ return; +//ZZ case ARMin_NUnaryS: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg); +//ZZ addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg); +//ZZ return; +//ZZ case ARMin_NShift: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.NShift.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.NShift.argL); +//ZZ addHRegUse(u, HRmRead, i->ARMin.NShift.argR); +//ZZ return; +//ZZ case ARMin_NShl64: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.NShl64.src); +//ZZ return; +//ZZ case ARMin_NDual: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1); +//ZZ addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2); +//ZZ addHRegUse(u, HRmRead, i->ARMin.NDual.arg1); +//ZZ addHRegUse(u, HRmRead, i->ARMin.NDual.arg2); +//ZZ return; + case ARM64in_VImmQ: + addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ); + return; + case ARM64in_VDfromX: + addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD); + addHRegUse(u, HRmRead, i->ARM64in.VDfromX.rX); + return; + case ARM64in_VQfromXX: + addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ); + addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXhi); + addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXlo); + return; + case ARM64in_VXfromQ: + addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX); + addHRegUse(u, HRmRead, i->ARM64in.VXfromQ.rQ); + return; + case ARM64in_VMov: + addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst); + addHRegUse(u, HRmRead, i->ARM64in.VMov.src); + return; +//ZZ case ARMin_NBinary: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst); +//ZZ /* TODO: sometimes dst is also being read! */ +//ZZ // XXX fix this +//ZZ addHRegUse(u, HRmRead, i->ARMin.NBinary.argL); +//ZZ addHRegUse(u, HRmRead, i->ARMin.NBinary.argR); +//ZZ return; +//ZZ case ARMin_NCMovQ: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.NCMovQ.dst); +//ZZ addHRegUse(u, HRmRead, i->ARMin.NCMovQ.src); +//ZZ return; +//ZZ case ARMin_Add32: +//ZZ addHRegUse(u, HRmWrite, i->ARMin.Add32.rD); +//ZZ addHRegUse(u, HRmRead, i->ARMin.Add32.rN); +//ZZ return; + case ARM64in_EvCheck: + /* We expect both amodes only to mention x21, so this is in + fact pointless, since x21 isn't allocatable, but + anyway.. */ + addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter); + addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr); + addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */ + return; +//ZZ case ARMin_ProfInc: +//ZZ addHRegUse(u, HRmWrite, hregARM_R12()); +//ZZ addHRegUse(u, HRmWrite, hregARM_R11()); +//ZZ return; + default: + ppARM64Instr(i); + vpanic("getRegUsage_ARM64Instr"); + } +} + + +void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) +{ + vassert(mode64 == True); + switch (i->tag) { + case ARM64in_Arith: + i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst); + i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL); + mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR); + return; + case ARM64in_Cmp: + i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL); + mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR); + return; + case ARM64in_Logic: + i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst); + i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL); + mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR); + return; + case ARM64in_Test: + i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL); + mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR); + return; + case ARM64in_Shift: + i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst); + i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL); + mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR); + return; + case ARM64in_Unary: + i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst); + i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src); + return; + case ARM64in_MovI: + i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst); + i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src); + return; + case ARM64in_Imm64: + i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst); + return; + case ARM64in_LdSt64: + i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD); + mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode); + return; + case ARM64in_LdSt32: + i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD); + mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode); + return; + case ARM64in_LdSt16: + i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD); + mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode); + return; + case ARM64in_LdSt8: + i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD); + mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode); + return; + case ARM64in_XDirect: + mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC); + return; + case ARM64in_XIndir: + i->ARM64in.XIndir.dstGA + = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA); + mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC); + return; + case ARM64in_XAssisted: + i->ARM64in.XAssisted.dstGA + = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA); + mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC); + return; + case ARM64in_CSel: + i->ARM64in.CSel.dst = lookupHRegRemap(m, i->ARM64in.CSel.dst); + i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL); + i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR); + return; + case ARM64in_Call: + return; + case ARM64in_AddToSP: + return; + case ARM64in_FromSP: + i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst); + return; + case ARM64in_Mul: + i->ARM64in.Mul.dst = lookupHRegRemap(m, i->ARM64in.Mul.dst); + i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL); + i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR); + break; + case ARM64in_LdrEX: + return; + case ARM64in_StrEX: + return; + case ARM64in_MFence: + return; +//ZZ case ARMin_CLREX: +//ZZ return; + case ARM64in_VLdStS: + i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD); + i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN); + return; + case ARM64in_VLdStD: + i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD); + i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN); + return; + case ARM64in_VLdStQ: + i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ); + i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN); + return; + case ARM64in_VCvtI2F: + i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS); + i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD); + return; + case ARM64in_VCvtF2I: + i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS); + i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD); + return; + case ARM64in_VCvtSD: + i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst); + i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src); + return; + case ARM64in_VUnaryD: + i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst); + i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src); + return; + case ARM64in_VUnaryS: + i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst); + i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src); + return; + case ARM64in_VBinD: + i->ARM64in.VBinD.dst = lookupHRegRemap(m, i->ARM64in.VBinD.dst); + i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL); + i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR); + return; + case ARM64in_VBinS: + i->ARM64in.VBinS.dst = lookupHRegRemap(m, i->ARM64in.VBinS.dst); + i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL); + i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR); + return; + case ARM64in_VCmpD: + i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL); + i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR); + return; + case ARM64in_VCmpS: + i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL); + i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR); + return; + case ARM64in_FPCR: + i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg); + return; + case ARM64in_VBinV: + i->ARM64in.VBinV.dst = lookupHRegRemap(m, i->ARM64in.VBinV.dst); + i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL); + i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR); + return; + case ARM64in_VUnaryV: + i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst); + i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg); + return; + case ARM64in_VNarrowV: + i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst); + i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src); + return; + case ARM64in_VShiftImmV: + i->ARM64in.VShiftImmV.dst + = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst); + i->ARM64in.VShiftImmV.src + = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src); + return; +//ZZ case ARMin_VAluS: +//ZZ i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst); +//ZZ i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL); +//ZZ i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR); +//ZZ return; +//ZZ case ARMin_VCMovD: +//ZZ i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst); +//ZZ i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src); +//ZZ return; +//ZZ case ARMin_VCMovS: +//ZZ i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst); +//ZZ i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src); +//ZZ return; +//ZZ case ARMin_VXferD: +//ZZ i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD); +//ZZ i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi); +//ZZ i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo); +//ZZ return; +//ZZ case ARMin_VXferS: +//ZZ i->ARMin.VXferS.fD = lookupHRegRemap(m, i->ARMin.VXferS.fD); +//ZZ i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo); +//ZZ return; +//ZZ case ARMin_VCvtID: +//ZZ i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst); +//ZZ i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src); +//ZZ return; +//ZZ case ARMin_NLdStD: +//ZZ i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD); +//ZZ mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode); +//ZZ return; +//ZZ case ARMin_NUnary: +//ZZ i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src); +//ZZ i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst); +//ZZ return; +//ZZ case ARMin_NUnaryS: +//ZZ i->ARMin.NUnaryS.src->reg +//ZZ = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg); +//ZZ i->ARMin.NUnaryS.dst->reg +//ZZ = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg); +//ZZ return; +//ZZ case ARMin_NShift: +//ZZ i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst); +//ZZ i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL); +//ZZ i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR); +//ZZ return; +//ZZ case ARMin_NShl64: +//ZZ i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst); +//ZZ i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src); +//ZZ return; +//ZZ case ARMin_NDual: +//ZZ i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1); +//ZZ i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2); +//ZZ return; + case ARM64in_VImmQ: + i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ); + return; + case ARM64in_VDfromX: + i->ARM64in.VDfromX.rD + = lookupHRegRemap(m, i->ARM64in.VDfromX.rD); + i->ARM64in.VDfromX.rX + = lookupHRegRemap(m, i->ARM64in.VDfromX.rX); + return; + case ARM64in_VQfromXX: + i->ARM64in.VQfromXX.rQ + = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ); + i->ARM64in.VQfromXX.rXhi + = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi); + i->ARM64in.VQfromXX.rXlo + = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo); + return; + case ARM64in_VXfromQ: + i->ARM64in.VXfromQ.rX + = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX); + i->ARM64in.VXfromQ.rQ + = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ); + return; + case ARM64in_VMov: + i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst); + i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src); + return; + +//ZZ case ARMin_NBinary: +//ZZ i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL); +//ZZ i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR); +//ZZ i->ARMin.NBinary.dst = lookupHRegRemap(m, i->ARMin.NBinary.dst); +//ZZ return; +//ZZ case ARMin_NCMovQ: +//ZZ i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst); +//ZZ i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src); +//ZZ return; +//ZZ case ARMin_Add32: +//ZZ i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD); +//ZZ i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN); +//ZZ return; + case ARM64in_EvCheck: + /* We expect both amodes only to mention x21, so this is in + fact pointless, since x21 isn't allocatable, but + anyway.. */ + mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter); + mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr); + return; +//ZZ case ARMin_ProfInc: +//ZZ /* hardwires r11 and r12 -- nothing to modify. */ +//ZZ return; + default: + ppARM64Instr(i); + vpanic("mapRegs_ARM64Instr"); + } +} + +/* Figure out if i represents a reg-reg move, and if so assign the + source and destination to *src and *dst. If in doubt say No. Used + by the register allocator to do move coalescing. +*/ +Bool isMove_ARM64Instr ( ARM64Instr* i, HReg* src, HReg* dst ) +{ + switch (i->tag) { + case ARM64in_MovI: + *src = i->ARM64in.MovI.src; + *dst = i->ARM64in.MovI.dst; + return True; + case ARM64in_VMov: + *src = i->ARM64in.VMov.src; + *dst = i->ARM64in.VMov.dst; + return True; + default: + break; + } + + return False; +} + + +/* Generate arm spill/reload instructions under the direction of the + register allocator. Note it's critical these don't write the + condition codes. */ + +void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, + HReg rreg, Int offsetB, Bool mode64 ) +{ + HRegClass rclass; + vassert(offsetB >= 0); + vassert(!hregIsVirtual(rreg)); + vassert(mode64 == True); + *i1 = *i2 = NULL; + rclass = hregClass(rreg); + switch (rclass) { + case HRcInt64: + vassert(0 == (offsetB & 7)); + offsetB >>= 3; + vassert(offsetB < 4096); + *i1 = ARM64Instr_LdSt64( + False/*!isLoad*/, + rreg, + ARM64AMode_RI12(hregARM64_X21(), offsetB, 8) + ); + return; + case HRcFlt64: + vassert(0 == (offsetB & 7)); + vassert(offsetB >= 0 && offsetB < 32768); + *i1 = ARM64Instr_VLdStD(False/*!isLoad*/, + rreg, hregARM64_X21(), offsetB); + return; + case HRcVec128: { + HReg x21 = hregARM64_X21(); // baseblock + HReg x9 = hregARM64_X9(); // spill temporary + vassert(0 == (offsetB & 15)); // check sane alignment + vassert(offsetB < 4096); + *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True); + *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9); + return; + } + default: + ppHRegClass(rclass); + vpanic("genSpill_ARM: unimplemented regclass"); + } +} + +void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, + HReg rreg, Int offsetB, Bool mode64 ) +{ + HRegClass rclass; + vassert(offsetB >= 0); + vassert(!hregIsVirtual(rreg)); + vassert(mode64 == True); + *i1 = *i2 = NULL; + rclass = hregClass(rreg); + switch (rclass) { + case HRcInt64: + vassert(0 == (offsetB & 7)); + offsetB >>= 3; + vassert(offsetB < 4096); + *i1 = ARM64Instr_LdSt64( + True/*isLoad*/, + rreg, + ARM64AMode_RI12(hregARM64_X21(), offsetB, 8) + ); + return; + case HRcFlt64: + vassert(0 == (offsetB & 7)); + vassert(offsetB >= 0 && offsetB < 32768); + *i1 = ARM64Instr_VLdStD(True/*isLoad*/, + rreg, hregARM64_X21(), offsetB); + return; + case HRcVec128: { + HReg x21 = hregARM64_X21(); // baseblock + HReg x9 = hregARM64_X9(); // spill temporary + vassert(0 == (offsetB & 15)); // check sane alignment + vassert(offsetB < 4096); + *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True); + *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9); + return; + } + default: + ppHRegClass(rclass); + vpanic("genReload_ARM: unimplemented regclass"); + } +} + + +//ZZ /* Emit an instruction into buf and return the number of bytes used. +//ZZ Note that buf is not the insn's final place, and therefore it is +//ZZ imperative to emit position-independent code. */ + +static inline UChar iregNo ( HReg r ) +{ + UInt n; + vassert(hregClass(r) == HRcInt64); + vassert(!hregIsVirtual(r)); + n = hregNumber(r); + vassert(n <= 30); + return toUChar(n); +} + +static inline UChar dregNo ( HReg r ) +{ + UInt n; + vassert(hregClass(r) == HRcFlt64); + vassert(!hregIsVirtual(r)); + n = hregNumber(r); + vassert(n <= 31); + return toUChar(n); +} + +static inline UChar qregNo ( HReg r ) +{ + UInt n; + vassert(hregClass(r) == HRcVec128); + vassert(!hregIsVirtual(r)); + n = hregNumber(r); + vassert(n <= 31); + return toUChar(n); +} + +#define BITS4(zzb3,zzb2,zzb1,zzb0) \ + (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0)) + +#define X00 BITS4(0,0, 0,0) +#define X01 BITS4(0,0, 0,1) +#define X10 BITS4(0,0, 1,0) +#define X11 BITS4(0,0, 1,1) + +#define X000 BITS4(0, 0,0,0) +#define X001 BITS4(0, 0,0,1) +#define X010 BITS4(0, 0,1,0) +#define X011 BITS4(0, 0,1,1) +#define X100 BITS4(0, 1,0,0) +#define X101 BITS4(0, 1,0,1) +#define X110 BITS4(0, 1,1,0) +#define X111 BITS4(0, 1,1,1) + +#define X0000 BITS4(0,0,0,0) +#define X0001 BITS4(0,0,0,1) +#define X0010 BITS4(0,0,1,0) +#define X0011 BITS4(0,0,1,1) + +#define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \ + ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0)) + +#define X00000 BITS8(0,0,0, 0,0,0,0,0) +#define X00001 BITS8(0,0,0, 0,0,0,0,1) +#define X00111 BITS8(0,0,0, 0,0,1,1,1) +#define X01000 BITS8(0,0,0, 0,1,0,0,0) +#define X10000 BITS8(0,0,0, 1,0,0,0,0) +#define X11000 BITS8(0,0,0, 1,1,0,0,0) +#define X11110 BITS8(0,0,0, 1,1,1,1,0) +#define X11111 BITS8(0,0,0, 1,1,1,1,1) + +#define X000000 BITS8(0,0, 0,0,0,0,0,0) +#define X000001 BITS8(0,0, 0,0,0,0,0,1) +#define X000100 BITS8(0,0, 0,0,0,1,0,0) +#define X000111 BITS8(0,0, 0,0,0,1,1,1) +#define X001000 BITS8(0,0, 0,0,1,0,0,0) +#define X001001 BITS8(0,0, 0,0,1,0,0,1) +#define X001010 BITS8(0,0, 0,0,1,0,1,0) +#define X001101 BITS8(0,0, 0,0,1,1,0,1) +#define X001111 BITS8(0,0, 0,0,1,1,1,1) +#define X010000 BITS8(0,0, 0,1,0,0,0,0) +#define X010001 BITS8(0,0, 0,1,0,0,0,1) +#define X010101 BITS8(0,0, 0,1,0,1,0,1) +#define X010110 BITS8(0,0, 0,1,0,1,1,0) +#define X011001 BITS8(0,0, 0,1,1,0,0,1) +#define X011010 BITS8(0,0, 0,1,1,0,1,0) +#define X011011 BITS8(0,0, 0,1,1,0,1,1) +#define X011110 BITS8(0,0, 0,1,1,1,1,0) +#define X011111 BITS8(0,0, 0,1,1,1,1,1) +#define X100001 BITS8(0,0, 1,0,0,0,0,1) +#define X100011 BITS8(0,0, 1,0,0,0,1,1) +#define X100100 BITS8(0,0, 1,0,0,1,0,0) +#define X100101 BITS8(0,0, 1,0,0,1,0,1) +#define X100110 BITS8(0,0, 1,0,0,1,1,0) +#define X100111 BITS8(0,0, 1,0,0,1,1,1) +#define X110000 BITS8(0,0, 1,1,0,0,0,0) +#define X110001 BITS8(0,0, 1,1,0,0,0,1) +#define X110101 BITS8(0,0, 1,1,0,1,0,1) +#define X110111 BITS8(0,0, 1,1,0,1,1,1) +#define X111000 BITS8(0,0, 1,1,1,0,0,0) +#define X111001 BITS8(0,0, 1,1,1,0,0,1) +#define X111101 BITS8(0,0, 1,1,1,1,0,1) +#define X111110 BITS8(0,0, 1,1,1,1,1,0) +#define X111111 BITS8(0,0, 1,1,1,1,1,1) + +#define X0001000 BITS8(0, 0,0,0,1,0,0,0) +#define X0010000 BITS8(0, 0,0,1,0,0,0,0) +#define X0100000 BITS8(0, 0,1,0,0,0,0,0) +#define X1000000 BITS8(0, 1,0,0,0,0,0,0) + +#define X00100000 BITS8(0,0,1,0,0,0,0,0) +#define X00100001 BITS8(0,0,1,0,0,0,0,1) +#define X00100010 BITS8(0,0,1,0,0,0,1,0) +#define X00100011 BITS8(0,0,1,0,0,0,1,1) +#define X01010000 BITS8(0,1,0,1,0,0,0,0) +#define X01010001 BITS8(0,1,0,1,0,0,0,1) +#define X01010100 BITS8(0,1,0,1,0,1,0,0) +#define X01011000 BITS8(0,1,0,1,1,0,0,0) +#define X01100000 BITS8(0,1,1,0,0,0,0,0) +#define X01100001 BITS8(0,1,1,0,0,0,0,1) +#define X01100010 BITS8(0,1,1,0,0,0,1,0) +#define X01100011 BITS8(0,1,1,0,0,0,1,1) +#define X01110000 BITS8(0,1,1,1,0,0,0,0) +#define X01110001 BITS8(0,1,1,1,0,0,0,1) +#define X01110011 BITS8(0,1,1,1,0,0,1,1) +#define X01110101 BITS8(0,1,1,1,0,1,0,1) +#define X01110111 BITS8(0,1,1,1,0,1,1,1) +#define X11000001 BITS8(1,1,0,0,0,0,0,1) +#define X11000011 BITS8(1,1,0,0,0,0,1,1) +#define X11010100 BITS8(1,1,0,1,0,1,0,0) +#define X11010110 BITS8(1,1,0,1,0,1,1,0) +#define X11011000 BITS8(1,1,0,1,1,0,0,0) +#define X11011010 BITS8(1,1,0,1,1,0,1,0) +#define X11011110 BITS8(1,1,0,1,1,1,1,0) +#define X11110001 BITS8(1,1,1,1,0,0,0,1) +#define X11110011 BITS8(1,1,1,1,0,0,1,1) + + +/* --- 4 fields --- */ + +static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) { + vassert(8+19+1+4 == 32); + vassert(f1 < (1<<8)); + vassert(f2 < (1<<19)); + vassert(f3 < (1<<1)); + vassert(f4 < (1<<4)); + UInt w = 0; + w = (w << 8) | f1; + w = (w << 19) | f2; + w = (w << 1) | f3; + w = (w << 4) | f4; + return w; +} + +/* --- 5 fields --- */ + +static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2, + UInt f3, UInt f4, UInt f5 ) { + vassert(3+6+2+16+5 == 32); + vassert(f1 < (1<<3)); + vassert(f2 < (1<<6)); + vassert(f3 < (1<<2)); + vassert(f4 < (1<<16)); + vassert(f5 < (1<<5)); + UInt w = 0; + w = (w << 3) | f1; + w = (w << 6) | f2; + w = (w << 2) | f3; + w = (w << 16) | f4; + w = (w << 5) | f5; + return w; +} + +/* --- 6 fields --- */ + +static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3, + UInt f4, UInt f5, UInt f6 ) { + vassert(2+6+2+12+5+5 == 32); + vassert(f1 < (1<<2)); + vassert(f2 < (1<<6)); + vassert(f3 < (1<<2)); + vassert(f4 < (1<<12)); + vassert(f5 < (1<<5)); + vassert(f6 < (1<<5)); + UInt w = 0; + w = (w << 2) | f1; + w = (w << 6) | f2; + w = (w << 2) | f3; + w = (w << 12) | f4; + w = (w << 5) | f5; + w = (w << 5) | f6; + return w; +} + +static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3, + UInt f4, UInt f5, UInt f6 ) { + vassert(3+8+5+6+5+5 == 32); + vassert(f1 < (1<<3)); + vassert(f2 < (1<<8)); + vassert(f3 < (1<<5)); + vassert(f4 < (1<<6)); + vassert(f5 < (1<<5)); + vassert(f6 < (1<<5)); + UInt w = 0; + w = (w << 3) | f1; + w = (w << 8) | f2; + w = (w << 5) | f3; + w = (w << 6) | f4; + w = (w << 5) | f5; + w = (w << 5) | f6; + return w; +} + +static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3, + UInt f4, UInt f5, UInt f6 ) { + vassert(3+8+5+6+5+5 == 32); + vassert(f1 < (1<<3)); + vassert(f2 < (1<<5)); + vassert(f3 < (1<<8)); + vassert(f4 < (1<<6)); + vassert(f5 < (1<<5)); + vassert(f6 < (1<<5)); + UInt w = 0; + w = (w << 3) | f1; + w = (w << 5) | f2; + w = (w << 8) | f3; + w = (w << 6) | f4; + w = (w << 5) | f5; + w = (w << 5) | f6; + return w; +} + +static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3, + UInt f4, UInt f5, UInt f6 ) { + vassert(3+6+7+6+5+5 == 32); + vassert(f1 < (1<<3)); + vassert(f2 < (1<<6)); + vassert(f3 < (1<<7)); + vassert(f4 < (1<<6)); + vassert(f5 < (1<<5)); + vassert(f6 < (1<<5)); + UInt w = 0; + w = (w << 3) | f1; + w = (w << 6) | f2; + w = (w << 7) | f3; + w = (w << 6) | f4; + w = (w << 5) | f5; + w = (w << 5) | f6; + return w; +} + +/* --- 7 fields --- */ + +static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3, + UInt f4, UInt f5, UInt f6, UInt f7 ) { + vassert(2+6+3+9+2+5+5 == 32); + vassert(f1 < (1<<2)); + vassert(f2 < (1<<6)); + vassert(f3 < (1<<3)); + vassert(f4 < (1<<9)); + vassert(f5 < (1<<2)); + vassert(f6 < (1<<5)); + vassert(f7 < (1<<5)); + UInt w = 0; + w = (w << 2) | f1; + w = (w << 6) | f2; + w = (w << 3) | f3; + w = (w << 9) | f4; + w = (w << 2) | f5; + w = (w << 5) | f6; + w = (w << 5) | f7; + return w; +} + +static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3, + UInt f4, UInt f5, UInt f6, UInt f7 ) { + vassert(3+6+1+6+6+5+5 == 32); + vassert(f1 < (1<<3)); + vassert(f2 < (1<<6)); + vassert(f3 < (1<<1)); + vassert(f4 < (1<<6)); + vassert(f5 < (1<<6)); + vassert(f6 < (1<<5)); + vassert(f7 < (1<<5)); + UInt w = 0; + w = (w << 3) | f1; + w = (w << 6) | f2; + w = (w << 1) | f3; + w = (w << 6) | f4; + w = (w << 6) | f5; + w = (w << 5) | f6; + w = (w << 5) | f7; + return w; +} + + +//ZZ #define X0000 BITS4(0,0,0,0) +//ZZ #define X0001 BITS4(0,0,0,1) +//ZZ #define X0010 BITS4(0,0,1,0) +//ZZ #define X0011 BITS4(0,0,1,1) +//ZZ #define X0100 BITS4(0,1,0,0) +//ZZ #define X0101 BITS4(0,1,0,1) +//ZZ #define X0110 BITS4(0,1,1,0) +//ZZ #define X0111 BITS4(0,1,1,1) +//ZZ #define X1000 BITS4(1,0,0,0) +//ZZ #define X1001 BITS4(1,0,0,1) +//ZZ #define X1010 BITS4(1,0,1,0) +//ZZ #define X1011 BITS4(1,0,1,1) +//ZZ #define X1100 BITS4(1,1,0,0) +//ZZ #define X1101 BITS4(1,1,0,1) +//ZZ #define X1110 BITS4(1,1,1,0) +//ZZ #define X1111 BITS4(1,1,1,1) +/* +#define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \ + ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \ + (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \ + (((zzx3) & 0xF) << 12)) + +#define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \ + ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \ + (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \ + (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8)) + +#define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \ + ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \ + (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \ + (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0)) + +#define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \ + ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \ + (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \ + (((zzx0) & 0xF) << 0)) + +#define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \ + ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \ + (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \ + (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \ + (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0)) + +#define XX______(zzx7,zzx6) \ + ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24)) +*/ +//ZZ /* Generate a skeletal insn that involves an a RI84 shifter operand. +//ZZ Returns a word which is all zeroes apart from bits 25 and 11..0, +//ZZ since it is those that encode the shifter operand (at least to the +//ZZ extent that we care about it.) */ +//ZZ static UInt skeletal_RI84 ( ARMRI84* ri ) +//ZZ { +//ZZ UInt instr; +//ZZ if (ri->tag == ARMri84_I84) { +//ZZ vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F)); +//ZZ vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF)); +//ZZ instr = 1 << 25; +//ZZ instr |= (ri->ARMri84.I84.imm4 << 8); +//ZZ instr |= ri->ARMri84.I84.imm8; +//ZZ } else { +//ZZ instr = 0 << 25; +//ZZ instr |= iregNo(ri->ARMri84.R.reg); +//ZZ } +//ZZ return instr; +//ZZ } +//ZZ +//ZZ /* Ditto for RI5. Resulting word is zeroes apart from bit 4 and bits +//ZZ 11..7. */ +//ZZ static UInt skeletal_RI5 ( ARMRI5* ri ) +//ZZ { +//ZZ UInt instr; +//ZZ if (ri->tag == ARMri5_I5) { +//ZZ UInt imm5 = ri->ARMri5.I5.imm5; +//ZZ vassert(imm5 >= 1 && imm5 <= 31); +//ZZ instr = 0 << 4; +//ZZ instr |= imm5 << 7; +//ZZ } else { +//ZZ instr = 1 << 4; +//ZZ instr |= iregNo(ri->ARMri5.R.reg) << 8; +//ZZ } +//ZZ return instr; +//ZZ } + + +/* Get an immediate into a register, using only that register. */ +static UInt* imm64_to_iregNo ( UInt* p, Int xD, ULong imm64 ) +{ + if (imm64 == 0) { + // This has to be special-cased, since the logic below + // will leave the register unchanged in this case. + // MOVZ xD, #0, LSL #0 + *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD); + return p; + } + + // There must be at least one non-zero halfword. Find the + // lowest nonzero such, and use MOVZ to install it and zero + // out the rest of the register. + UShort h[4]; + h[3] = (UShort)((imm64 >> 48) & 0xFFFF); + h[2] = (UShort)((imm64 >> 32) & 0xFFFF); + h[1] = (UShort)((imm64 >> 16) & 0xFFFF); + h[0] = (UShort)((imm64 >> 0) & 0xFFFF); + + UInt i; + for (i = 0; i < 4; i++) { + if (h[i] != 0) + break; + } + vassert(i < 4); + + // MOVZ xD, h[i], LSL (16*i) + *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD); + + // Work on upwards through h[i], using MOVK to stuff in any + // remaining nonzero elements. + i++; + for (; i < 4; i++) { + if (h[i] == 0) + continue; + // MOVK xD, h[i], LSL (16*i) + *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD); + } + + return p; +} + +/* Get an immediate into a register, using only that register, and + generating exactly 4 instructions, regardless of the value of the + immediate. This is used when generating sections of code that need + to be patched later, so as to guarantee a specific size. */ +static UInt* imm64_to_iregNo_EXACTLY4 ( UInt* p, Int xD, ULong imm64 ) +{ + UShort h[4]; + h[3] = (UShort)((imm64 >> 48) & 0xFFFF); + h[2] = (UShort)((imm64 >> 32) & 0xFFFF); + h[1] = (UShort)((imm64 >> 16) & 0xFFFF); + h[0] = (UShort)((imm64 >> 0) & 0xFFFF); + // Work on upwards through h[i], using MOVK to stuff in the + // remaining elements. + UInt i; + for (i = 0; i < 4; i++) { + if (i == 0) { + // MOVZ xD, h[0], LSL (16*0) + *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD); + } else { + // MOVK xD, h[i], LSL (16*i) + *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD); + } + } + return p; +} + +/* Check whether p points at a 4-insn sequence cooked up by + imm64_to_iregNo_EXACTLY4(). */ +static Bool is_imm64_to_iregNo_EXACTLY4 ( UInt* p, Int xD, ULong imm64 ) +{ + UShort h[4]; + h[3] = (UShort)((imm64 >> 48) & 0xFFFF); + h[2] = (UShort)((imm64 >> 32) & 0xFFFF); + h[1] = (UShort)((imm64 >> 16) & 0xFFFF); + h[0] = (UShort)((imm64 >> 0) & 0xFFFF); + // Work on upwards through h[i], using MOVK to stuff in the + // remaining elements. + UInt i; + for (i = 0; i < 4; i++) { + UInt expected; + if (i == 0) { + // MOVZ xD, h[0], LSL (16*0) + expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD); + } else { + // MOVK xD, h[i], LSL (16*i) + expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD); + } + if (p[i] != expected) + return False; + } + return True; +} + + +/* Generate a 8 bit store or 8-to-64 unsigned widening load from/to + rD, using the given amode for the address. */ +static UInt* do_load_or_store8 ( UInt* p, + Bool isLoad, UInt wD, ARM64AMode* am ) +{ + vassert(wD <= 30); + if (am->tag == ARM64am_RI9) { + /* STURB Wd, [Xn|SP + simm9]: 00 111000 000 simm9 00 n d + LDURB Wd, [Xn|SP + simm9]: 00 111000 010 simm9 00 n d + */ + Int simm9 = am->ARM64am.RI9.simm9; + vassert(-256 <= simm9 && simm9 <= 255); + UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000, + simm9 & 0x1FF, X00, + iregNo(am->ARM64am.RI9.reg), wD); + *p++ = instr; + return p; + } + if (am->tag == ARM64am_RI12) { + /* STRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 00 imm12 n d + LDRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 01 imm12 n d + */ + UInt uimm12 = am->ARM64am.RI12.uimm12; + UInt scale = am->ARM64am.RI12.szB; + vassert(scale == 1); /* failure of this is serious. Do not ignore. */ + UInt xN = iregNo(am->ARM64am.RI12.reg); + vassert(xN <= 30); + UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00, + uimm12, xN, wD); + *p++ = instr; + return p; + } + if (am->tag == ARM64am_RR) { + /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d + LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d + */ + UInt xN = iregNo(am->ARM64am.RR.base); + UInt xM = iregNo(am->ARM64am.RR.index); + vassert(xN <= 30); + UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001, + xM, X011010, xN, wD); + *p++ = instr; + return p; + } + vpanic("do_load_or_store8"); + vassert(0); +} + + +/* Generate a 16 bit store or 16-to-64 unsigned widening load from/to + rD, using the given amode for the address. */ +static UInt* do_load_or_store16 ( UInt* p, + Bool isLoad, UInt wD, ARM64AMode* am ) +{ + vassert(wD <= 30); + if (am->tag == ARM64am_RI9) { + /* STURH Wd, [Xn|SP + simm9]: 01 111000 000 simm9 00 n d + LDURH Wd, [Xn|SP + simm9]: 01 111000 010 simm9 00 n d + */ + Int simm9 = am->ARM64am.RI9.simm9; + vassert(-256 <= simm9 && simm9 <= 255); + UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000, + simm9 & 0x1FF, X00, + iregNo(am->ARM64am.RI9.reg), wD); + *p++ = instr; + return p; + } + if (am->tag == ARM64am_RI12) { + /* STRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 00 imm12 n d + LDRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 01 imm12 n d + */ + UInt uimm12 = am->ARM64am.RI12.uimm12; + UInt scale = am->ARM64am.RI12.szB; + vassert(scale == 2); /* failure of this is serious. Do not ignore. */ + UInt xN = iregNo(am->ARM64am.RI12.reg); + vassert(xN <= 30); + UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00, + uimm12, xN, wD); + *p++ = instr; + return p; + } + if (am->tag == ARM64am_RR) { + /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d + LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d + */ + UInt xN = iregNo(am->ARM64am.RR.base); + UInt xM = iregNo(am->ARM64am.RR.index); + vassert(xN <= 30); + UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001, + xM, X011010, xN, wD); + *p++ = instr; + return p; + } + vpanic("do_load_or_store16"); + vassert(0); +} + + +/* Generate a 32 bit store or 32-to-64 unsigned widening load from/to + rD, using the given amode for the address. */ +static UInt* do_load_or_store32 ( UInt* p, + Bool isLoad, UInt wD, ARM64AMode* am ) +{ + vassert(wD <= 30); + if (am->tag == ARM64am_RI9) { + /* STUR Wd, [Xn|SP + simm9]: 10 111000 000 simm9 00 n d + LDUR Wd, [Xn|SP + simm9]: 10 111000 010 simm9 00 n d + */ + Int simm9 = am->ARM64am.RI9.simm9; + vassert(-256 <= simm9 && simm9 <= 255); + UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000, + simm9 & 0x1FF, X00, + iregNo(am->ARM64am.RI9.reg), wD); + *p++ = instr; + return p; + } + if (am->tag == ARM64am_RI12) { + /* STR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 00 imm12 n d + LDR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 01 imm12 n d + */ + UInt uimm12 = am->ARM64am.RI12.uimm12; + UInt scale = am->ARM64am.RI12.szB; + vassert(scale == 4); /* failure of this is serious. Do not ignore. */ + UInt xN = iregNo(am->ARM64am.RI12.reg); + vassert(xN <= 30); + UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00, + uimm12, xN, wD); + *p++ = instr; + return p; + } + if (am->tag == ARM64am_RR) { + /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d + LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d + */ + UInt xN = iregNo(am->ARM64am.RR.base); + UInt xM = iregNo(am->ARM64am.RR.index); + vassert(xN <= 30); + UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001, + xM, X011010, xN, wD); + *p++ = instr; + return p; + } + vpanic("do_load_or_store32"); + vassert(0); +} + + +/* Generate a 64 bit load or store to/from xD, using the given amode + for the address. */ +static UInt* do_load_or_store64 ( UInt* p, + Bool isLoad, UInt xD, ARM64AMode* am ) +{ + /* In all these cases, Rn can't be 31 since that means SP. */ + vassert(xD <= 30); + if (am->tag == ARM64am_RI9) { + /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d + LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d + */ + Int simm9 = am->ARM64am.RI9.simm9; + vassert(-256 <= simm9 && simm9 <= 255); + UInt xN = iregNo(am->ARM64am.RI9.reg); + vassert(xN <= 30); + UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000, + simm9 & 0x1FF, X00, xN, xD); + *p++ = instr; + return p; + } + if (am->tag == ARM64am_RI12) { + /* STR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 00 imm12 n d + LDR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 01 imm12 n d + */ + UInt uimm12 = am->ARM64am.RI12.uimm12; + UInt scale = am->ARM64am.RI12.szB; + vassert(scale == 8); /* failure of this is serious. Do not ignore. */ + UInt xN = iregNo(am->ARM64am.RI12.reg); + vassert(xN <= 30); + UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00, + uimm12, xN, xD); + *p++ = instr; + return p; + } + if (am->tag == ARM64am_RR) { + /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d + LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d + */ + UInt xN = iregNo(am->ARM64am.RR.base); + UInt xM = iregNo(am->ARM64am.RR.index); + vassert(xN <= 30); + UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001, + xM, X011010, xN, xD); + *p++ = instr; + return p; + } + vpanic("do_load_or_store64"); + vassert(0); +} + + +/* Emit an instruction into buf and return the number of bytes used. + Note that buf is not the insn's final place, and therefore it is + imperative to emit position-independent code. If the emitted + instruction was a profiler inc, set *is_profInc to True, else + leave it unchanged. */ + +Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, + UChar* buf, Int nbuf, ARM64Instr* i, + Bool mode64, + void* disp_cp_chain_me_to_slowEP, + void* disp_cp_chain_me_to_fastEP, + void* disp_cp_xindir, + void* disp_cp_xassisted ) +{ + UInt* p = (UInt*)buf; + vassert(nbuf >= 32); + vassert(mode64 == True); + vassert(0 == (((HWord)buf) & 3)); + + switch (i->tag) { + case ARM64in_Arith: { + UInt rD = iregNo(i->ARM64in.Arith.dst); + UInt rN = iregNo(i->ARM64in.Arith.argL); + ARM64RIA* argR = i->ARM64in.Arith.argR; + switch (argR->tag) { + case ARM64riA_I12: + *p++ = X_2_6_2_12_5_5( + i->ARM64in.Arith.isAdd ? X10 : X11, + X010001, + argR->ARM64riA.I12.shift == 12 ? X01 : X00, + argR->ARM64riA.I12.imm12, rN, rD + ); + break; + case ARM64riA_R: { + UInt rM = iregNo(i->ARM64in.Arith.argR->ARM64riA.R.reg); + *p++ = X_3_8_5_6_5_5( + i->ARM64in.Arith.isAdd ? X100 : X110, + X01011000, rM, X000000, rN, rD + ); + break; + } + default: + goto bad; + } + goto done; + } + case ARM64in_Cmp: { + UInt rD = 31; /* XZR, we are going to dump the result */ + UInt rN = iregNo(i->ARM64in.Cmp.argL); + ARM64RIA* argR = i->ARM64in.Cmp.argR; + Bool is64 = i->ARM64in.Cmp.is64; + switch (argR->tag) { + case ARM64riA_I12: + /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */ + /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */ + *p++ = X_2_6_2_12_5_5( + is64 ? X11 : X01, X110001, + argR->ARM64riA.I12.shift == 12 ? X01 : X00, + argR->ARM64riA.I12.imm12, rN, rD); + break; + case ARM64riA_R: { + /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */ + /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */ + UInt rM = iregNo(i->ARM64in.Cmp.argR->ARM64riA.R.reg); + *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011, + X01011000, rM, X000000, rN, rD); + break; + } + default: + goto bad; + } + goto done; + } + case ARM64in_Logic: { + UInt rD = iregNo(i->ARM64in.Logic.dst); + UInt rN = iregNo(i->ARM64in.Logic.argL); + ARM64RIL* argR = i->ARM64in.Logic.argR; + UInt opc = 0; /* invalid */ + vassert(rD < 31); + vassert(rN < 31); + switch (i->ARM64in.Logic.op) { + case ARM64lo_OR: opc = X101; break; + case ARM64lo_AND: opc = X100; break; + case ARM64lo_XOR: opc = X110; break; + default: break; + } + vassert(opc != 0); + switch (argR->tag) { + case ARM64riL_I13: { + /* 1 01 100100 N immR immS Rn Rd = ORR , Xn, #imm */ + /* 1 00 100100 N immR immS Rn Rd = AND , Xn, #imm */ + /* 1 10 100100 N immR immS Rn Rd = EOR , Xn, #imm */ + *p++ = X_3_6_1_6_6_5_5( + opc, X100100, argR->ARM64riL.I13.bitN, + argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS, + rN, rD + ); + break; + } + case ARM64riL_R: { + /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */ + /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */ + /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */ + UInt rM = iregNo(argR->ARM64riL.R.reg); + vassert(rM < 31); + *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD); + break; + } + default: + goto bad; + } + goto done; + } + case ARM64in_Test: { + UInt rD = 31; /* XZR, we are going to dump the result */ + UInt rN = iregNo(i->ARM64in.Test.argL); + ARM64RIL* argR = i->ARM64in.Test.argR; + switch (argR->tag) { + case ARM64riL_I13: { + /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */ + *p++ = X_3_6_1_6_6_5_5( + X111, X100100, argR->ARM64riL.I13.bitN, + argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS, + rN, rD + ); + break; + } + default: + goto bad; + } + goto done; + } + case ARM64in_Shift: { + UInt rD = iregNo(i->ARM64in.Shift.dst); + UInt rN = iregNo(i->ARM64in.Shift.argL); + ARM64RI6* argR = i->ARM64in.Shift.argR; + vassert(rD < 31); + vassert(rN < 31); + switch (argR->tag) { + case ARM64ri6_I6: { + /* 110 1001101 (63-sh) (64-sh) nn dd LSL Xd, Xn, sh */ + /* 110 1001101 sh 63 nn dd LSR Xd, Xn, sh */ + /* 100 1001101 sh 63 nn dd ASR Xd, Xn, sh */ + UInt sh = argR->ARM64ri6.I6.imm6; + vassert(sh > 0 && sh < 64); + switch (i->ARM64in.Shift.op) { + case ARM64sh_SHL: + *p++ = X_3_6_1_6_6_5_5(X110, X100110, + 1, 64-sh, 63-sh, rN, rD); + break; + case ARM64sh_SHR: + *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD); + break; + case ARM64sh_SAR: + *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD); + break; + default: + vassert(0); + } + break; + } + case ARM64ri6_R: { + /* 100 1101 0110 mm 001000 nn dd LSL Xd, Xn, Xm */ + /* 100 1101 0110 mm 001001 nn dd LSR Xd, Xn, Xm */ + /* 100 1101 0110 mm 001010 nn dd ASR Xd, Xn, Xm */ + UInt rM = iregNo(argR->ARM64ri6.R.reg); + vassert(rM < 31); + UInt subOpc = 0; + switch (i->ARM64in.Shift.op) { + case ARM64sh_SHL: subOpc = X001000; break; + case ARM64sh_SHR: subOpc = X001001; break; + case ARM64sh_SAR: subOpc = X001010; break; + default: vassert(0); + } + *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD); + break; + } + default: + vassert(0); + } + goto done; + } + case ARM64in_Unary: { + UInt rDst = iregNo(i->ARM64in.Unary.dst); + UInt rSrc = iregNo(i->ARM64in.Unary.src); + switch (i->ARM64in.Unary.op) { + case ARM64un_CLZ: + /* 1 10 1101 0110 00000 00010 0 nn dd CLZ Xd, Xn */ + /* 1 10 1101 0110 00000 00010 1 nn dd CLS Xd, Xn (unimp) */ + *p++ = X_3_8_5_6_5_5(X110, + X11010110, X00000, X000100, rSrc, rDst); + goto done; + case ARM64un_NEG: + /* 1 10 01011 000 m 000000 11111 d NEG Xd,Xm */ + /* 0 10 01011 000 m 000000 11111 d NEG Wd,Wm (unimp) */ + *p++ = X_3_8_5_6_5_5(X110, + X01011000, rSrc, X000000, X11111, rDst); + goto done; + case ARM64un_NOT: { + /* 1 01 01010 00 1 m 000000 11111 d MVN Xd,Xm */ + *p++ = X_3_8_5_6_5_5(X101, + X01010001, rSrc, X000000, X11111, rDst); + goto done; + } + default: + break; + } + goto bad; + } + case ARM64in_MovI: { + /* We generate the "preferred form", ORR Xd, XZR, Xm + 101 01010 00 0 m 000000 11111 d + */ + UInt instr = 0xAA0003E0; + UInt d = iregNo(i->ARM64in.MovI.dst); + UInt m = iregNo(i->ARM64in.MovI.src); + *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0); + goto done; + } + case ARM64in_Imm64: { + p = imm64_to_iregNo( p, iregNo(i->ARM64in.Imm64.dst), + i->ARM64in.Imm64.imm64 ); + goto done; + } + case ARM64in_LdSt64: { + p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad, + iregNo(i->ARM64in.LdSt64.rD), + i->ARM64in.LdSt64.amode ); + goto done; + } + case ARM64in_LdSt32: { + p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad, + iregNo(i->ARM64in.LdSt32.rD), + i->ARM64in.LdSt32.amode ); + goto done; + } + case ARM64in_LdSt16: { + p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad, + iregNo(i->ARM64in.LdSt16.rD), + i->ARM64in.LdSt16.amode ); + goto done; + } + case ARM64in_LdSt8: { + p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad, + iregNo(i->ARM64in.LdSt8.rD), + i->ARM64in.LdSt8.amode ); + goto done; + } +//ZZ case ARMin_LdSt32: +//ZZ case ARMin_LdSt8U: { +//ZZ UInt bL, bB; +//ZZ HReg rD; +//ZZ ARMAMode1* am; +//ZZ ARMCondCode cc; +//ZZ if (i->tag == ARMin_LdSt32) { +//ZZ bB = 0; +//ZZ bL = i->ARMin.LdSt32.isLoad ? 1 : 0; +//ZZ am = i->ARMin.LdSt32.amode; +//ZZ rD = i->ARMin.LdSt32.rD; +//ZZ cc = i->ARMin.LdSt32.cc; +//ZZ } else { +//ZZ bB = 1; +//ZZ bL = i->ARMin.LdSt8U.isLoad ? 1 : 0; +//ZZ am = i->ARMin.LdSt8U.amode; +//ZZ rD = i->ARMin.LdSt8U.rD; +//ZZ cc = i->ARMin.LdSt8U.cc; +//ZZ } +//ZZ vassert(cc != ARMcc_NV); +//ZZ if (am->tag == ARMam1_RI) { +//ZZ Int simm12; +//ZZ UInt instr, bP; +//ZZ if (am->ARMam1.RI.simm13 < 0) { +//ZZ bP = 0; +//ZZ simm12 = -am->ARMam1.RI.simm13; +//ZZ } else { +//ZZ bP = 1; +//ZZ simm12 = am->ARMam1.RI.simm13; +//ZZ } +//ZZ vassert(simm12 >= 0 && simm12 <= 4095); +//ZZ instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL), +//ZZ iregNo(am->ARMam1.RI.reg), +//ZZ iregNo(rD)); +//ZZ instr |= simm12; +//ZZ *p++ = instr; +//ZZ goto done; +//ZZ } else { +//ZZ // RR case +//ZZ goto bad; +//ZZ } +//ZZ } +//ZZ case ARMin_LdSt16: { +//ZZ HReg rD = i->ARMin.LdSt16.rD; +//ZZ UInt bS = i->ARMin.LdSt16.signedLoad ? 1 : 0; +//ZZ UInt bL = i->ARMin.LdSt16.isLoad ? 1 : 0; +//ZZ ARMAMode2* am = i->ARMin.LdSt16.amode; +//ZZ ARMCondCode cc = i->ARMin.LdSt16.cc; +//ZZ vassert(cc != ARMcc_NV); +//ZZ if (am->tag == ARMam2_RI) { +//ZZ HReg rN = am->ARMam2.RI.reg; +//ZZ Int simm8; +//ZZ UInt bP, imm8hi, imm8lo, instr; +//ZZ if (am->ARMam2.RI.simm9 < 0) { +//ZZ bP = 0; +//ZZ simm8 = -am->ARMam2.RI.simm9; +//ZZ } else { +//ZZ bP = 1; +//ZZ simm8 = am->ARMam2.RI.simm9; +//ZZ } +//ZZ vassert(simm8 >= 0 && simm8 <= 255); +//ZZ imm8hi = (simm8 >> 4) & 0xF; +//ZZ imm8lo = simm8 & 0xF; +//ZZ vassert(!(bL == 0 && bS == 1)); // "! signed store" +//ZZ /**/ if (bL == 0 && bS == 0) { +//ZZ // strh +//ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregNo(rN), +//ZZ iregNo(rD), imm8hi, X1011, imm8lo); +//ZZ *p++ = instr; +//ZZ goto done; +//ZZ } +//ZZ else if (bL == 1 && bS == 0) { +//ZZ // ldrh +//ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN), +//ZZ iregNo(rD), imm8hi, X1011, imm8lo); +//ZZ *p++ = instr; +//ZZ goto done; +//ZZ } +//ZZ else if (bL == 1 && bS == 1) { +//ZZ // ldrsh +//ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN), +//ZZ iregNo(rD), imm8hi, X1111, imm8lo); +//ZZ *p++ = instr; +//ZZ goto done; +//ZZ } +//ZZ else vassert(0); // ill-constructed insn +//ZZ } else { +//ZZ // RR case +//ZZ goto bad; +//ZZ } +//ZZ } +//ZZ case ARMin_Ld8S: { +//ZZ HReg rD = i->ARMin.Ld8S.rD; +//ZZ ARMAMode2* am = i->ARMin.Ld8S.amode; +//ZZ ARMCondCode cc = i->ARMin.Ld8S.cc; +//ZZ vassert(cc != ARMcc_NV); +//ZZ if (am->tag == ARMam2_RI) { +//ZZ HReg rN = am->ARMam2.RI.reg; +//ZZ Int simm8; +//ZZ UInt bP, imm8hi, imm8lo, instr; +//ZZ if (am->ARMam2.RI.simm9 < 0) { +//ZZ bP = 0; +//ZZ simm8 = -am->ARMam2.RI.simm9; +//ZZ } else { +//ZZ bP = 1; +//ZZ simm8 = am->ARMam2.RI.simm9; +//ZZ } +//ZZ vassert(simm8 >= 0 && simm8 <= 255); +//ZZ imm8hi = (simm8 >> 4) & 0xF; +//ZZ imm8lo = simm8 & 0xF; +//ZZ // ldrsb +//ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN), +//ZZ iregNo(rD), imm8hi, X1101, imm8lo); +//ZZ *p++ = instr; +//ZZ goto done; +//ZZ } else { +//ZZ // RR case +//ZZ goto bad; +//ZZ } +//ZZ } + + case ARM64in_XDirect: { + /* NB: what goes on here has to be very closely coordinated + with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */ + /* We're generating chain-me requests here, so we need to be + sure this is actually allowed -- no-redir translations + can't use chain-me's. Hence: */ + vassert(disp_cp_chain_me_to_slowEP != NULL); + vassert(disp_cp_chain_me_to_fastEP != NULL); + + /* Use ptmp for backpatching conditional jumps. */ + UInt* ptmp = NULL; + + /* First off, if this is conditional, create a conditional + jump over the rest of it. Or at least, leave a space for + it that we will shortly fill in. */ + if (i->ARM64in.XDirect.cond != ARM64cc_AL) { + vassert(i->ARM64in.XDirect.cond != ARM64cc_NV); + ptmp = p; + *p++ = 0; + } + + /* Update the guest PC. */ + /* imm64 x9, dstGA */ + /* str x9, amPC */ + p = imm64_to_iregNo(p, /*x*/9, i->ARM64in.XDirect.dstGA); + p = do_load_or_store64(p, False/*!isLoad*/, + /*x*/9, i->ARM64in.XDirect.amPC); + + /* --- FIRST PATCHABLE BYTE follows --- */ + /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're + calling to) backs up the return address, so as to find the + address of the first patchable byte. So: don't change the + number of instructions (5) below. */ + /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */ + /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */ + /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */ + /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */ + /* blr x9 */ + void* disp_cp_chain_me + = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP + : disp_cp_chain_me_to_slowEP; + p = imm64_to_iregNo_EXACTLY4(p, /*x*/9, + Ptr_to_ULong(disp_cp_chain_me)); + *p++ = 0xD63F0120; + /* --- END of PATCHABLE BYTES --- */ + + /* Fix up the conditional jump, if there was one. */ + if (i->ARM64in.XDirect.cond != ARM64cc_AL) { + Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */ + vassert(delta > 0 && delta < 40); + vassert((delta & 3) == 0); + UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond; + vassert(notCond <= 13); /* Neither AL nor NV */ + vassert(ptmp != NULL); + delta = delta >> 2; + *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond); + } + goto done; + } + + case ARM64in_XIndir: { + // XIndir is more or less the same as XAssisted, except + // we don't have a trc value to hand back, so there's no + // write to r21 + /* Use ptmp for backpatching conditional jumps. */ + //UInt* ptmp = NULL; + + /* First off, if this is conditional, create a conditional + jump over the rest of it. Or at least, leave a space for + it that we will shortly fill in. */ + if (i->ARM64in.XIndir.cond != ARM64cc_AL) { + vassert(0); //ATC +//ZZ vassert(i->ARMin.XIndir.cond != ARMcc_NV); +//ZZ ptmp = p; +//ZZ *p++ = 0; + } + + /* Update the guest PC. */ + /* str r-dstGA, amPC */ + p = do_load_or_store64(p, False/*!isLoad*/, + iregNo(i->ARM64in.XIndir.dstGA), + i->ARM64in.XIndir.amPC); + + /* imm64 x9, VG_(disp_cp_xindir) */ + /* br x9 */ + p = imm64_to_iregNo(p, /*x*/9, Ptr_to_ULong(disp_cp_xindir)); + *p++ = 0xD61F0120; /* br x9 */ + + /* Fix up the conditional jump, if there was one. */ + if (i->ARM64in.XIndir.cond != ARM64cc_AL) { + vassert(0); //ATC +//ZZ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */ +//ZZ vassert(delta > 0 && delta < 40); +//ZZ vassert((delta & 3) == 0); +//ZZ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond; +//ZZ vassert(notCond <= 13); /* Neither AL nor NV */ +//ZZ delta = (delta >> 2) - 2; +//ZZ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF); + } + goto done; + } + + case ARM64in_XAssisted: { + /* Use ptmp for backpatching conditional jumps. */ + UInt* ptmp = NULL; + + /* First off, if this is conditional, create a conditional + jump over the rest of it. Or at least, leave a space for + it that we will shortly fill in. I think this can only + ever happen when VEX is driven by the switchbacker. */ + if (i->ARM64in.XAssisted.cond != ARM64cc_AL) { + vassert(i->ARM64in.XDirect.cond != ARM64cc_NV); + ptmp = p; + *p++ = 0; + } + + /* Update the guest PC. */ + /* str r-dstGA, amPC */ + p = do_load_or_store64(p, False/*!isLoad*/, + iregNo(i->ARM64in.XAssisted.dstGA), + i->ARM64in.XAssisted.amPC); + + /* movw r21, $magic_number */ + UInt trcval = 0; + switch (i->ARM64in.XAssisted.jk) { + case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; + case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; + //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break; + //case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; + //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; + //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; + case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; + case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break; + case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break; + case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; + //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; + //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; + case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; + /* We don't expect to see the following being assisted. */ + //case Ijk_Ret: + //case Ijk_Call: + /* fallthrough */ + default: + ppIRJumpKind(i->ARM64in.XAssisted.jk); + vpanic("emit_ARM64Instr.ARM64in_XAssisted: " + "unexpected jump kind"); + } + vassert(trcval != 0); + p = imm64_to_iregNo(p, /*x*/21, (ULong)trcval); + + /* imm64 x9, VG_(disp_cp_xassisted) */ + /* br x9 */ + p = imm64_to_iregNo(p, /*x*/9, Ptr_to_ULong(disp_cp_xassisted)); + *p++ = 0xD61F0120; /* br x9 */ + + /* Fix up the conditional jump, if there was one. */ + if (i->ARM64in.XAssisted.cond != ARM64cc_AL) { + Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */ + vassert(delta > 0 && delta < 40); + vassert((delta & 3) == 0); + UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond; + vassert(notCond <= 13); /* Neither AL nor NV */ + vassert(ptmp != NULL); + delta = delta >> 2; + *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond); + } + goto done; + } + + case ARM64in_CSel: { + /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */ + UInt dd = iregNo(i->ARM64in.CSel.dst); + UInt nn = iregNo(i->ARM64in.CSel.argL); + UInt mm = iregNo(i->ARM64in.CSel.argR); + UInt cond = (UInt)i->ARM64in.CSel.cond; + vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16); + *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd); + goto done; + } + + case ARM64in_Call: { + /* We'll use x9 as a scratch register to put the target + address in. */ + if (i->ARM64in.Call.cond != ARM64cc_AL + && i->ARM64in.Call.rloc.pri != RLPri_None) { + /* The call might not happen (it isn't unconditional) and + it returns a result. In this case we will need to + generate a control flow diamond to put 0x555..555 in + the return register(s) in the case where the call + doesn't happen. If this ever becomes necessary, maybe + copy code from the 32-bit ARM equivalent. Until that + day, just give up. */ + goto bad; + } + + UInt* ptmp = NULL; + if (i->ARM64in.Call.cond != ARM64cc_AL) { + /* Create a hole to put a conditional branch in. We'll + patch it once we know the branch length. */ + ptmp = p; + *p++ = 0; + } + + // x9 = &target + p = imm64_to_iregNo( (UInt*)p, + /*x*/9, (ULong)i->ARM64in.Call.target ); + // blr x9 + *p++ = 0xD63F0120; + + // Patch the hole if necessary + if (i->ARM64in.Call.cond != ARM64cc_AL) { + ULong dist = (ULong)(p - ptmp); + /* imm64_to_iregNo produces between 1 and 4 insns, and + then there's the BLR itself. Hence: */ + vassert(dist >= 2 && dist <= 5); + vassert(ptmp != NULL); + // 01010100 simm19 0 cond = B.cond (here + simm19 << 2) + *ptmp = X_8_19_1_4(X01010100, dist, 0, + 1 ^ (UInt)i->ARM64in.Call.cond); + } else { + vassert(ptmp == NULL); + } + + goto done; + } + + case ARM64in_AddToSP: { + /* 10,0 10001 00 imm12 11111 11111 ADD xsp, xsp, #imm12 + 11,0 10001 00 imm12 11111 11111 SUB xsp, xsp, #imm12 + */ + Int simm12 = i->ARM64in.AddToSP.simm; + vassert(-4096 < simm12 && simm12 < 4096); + vassert(0 == (simm12 & 0xF)); + if (simm12 >= 0) { + *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111); + } else { + *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111); + } + goto done; + } + + case ARM64in_FromSP: { + /* 10,0 10001 00 0..(12)..0 11111 dd MOV Xd, xsp */ + UInt dd = iregNo(i->ARM64in.FromSP.dst); + vassert(dd < 31); + *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd); + goto done; + } + + case ARM64in_Mul: { + /* 100 11011 110 mm 011111 nn dd UMULH Xd, Xn,Xm + 100 11011 010 mm 011111 nn dd SMULH Xd, Xn,Xm + 100 11011 000 mm 011111 nn dd MUL Xd, Xn,Xm + */ + UInt dd = iregNo(i->ARM64in.Mul.dst); + UInt nn = iregNo(i->ARM64in.Mul.argL); + UInt mm = iregNo(i->ARM64in.Mul.argR); + vassert(dd < 31 && nn < 31 && mm < 31); + switch (i->ARM64in.Mul.op) { + case ARM64mul_ZX: + *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd); + goto done; + case ARM64mul_SX: + *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd); + goto done; + case ARM64mul_PLAIN: + *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd); + goto done; + default: + vassert(0); + } + goto bad; + } + case ARM64in_LdrEX: { + /* 085F7C82 ldxrb w2, [x4] + 485F7C82 ldxrh w2, [x4] + 885F7C82 ldxr w2, [x4] + C85F7C82 ldxr x2, [x4] + */ + switch (i->ARM64in.LdrEX.szB) { + case 1: *p++ = 0x085F7C82; goto done; + case 2: *p++ = 0x485F7C82; goto done; + case 4: *p++ = 0x885F7C82; goto done; + case 8: *p++ = 0xC85F7C82; goto done; + default: break; + } + goto bad; + } + case ARM64in_StrEX: { + /* 08007C82 stxrb w0, w2, [x4] + 48007C82 stxrh w0, w2, [x4] + 88007C82 stxr w0, w2, [x4] + C8007C82 stxr w0, x2, [x4] + */ + switch (i->ARM64in.StrEX.szB) { + case 1: *p++ = 0x08007C82; goto done; + case 2: *p++ = 0x48007C82; goto done; + case 4: *p++ = 0x88007C82; goto done; + case 8: *p++ = 0xC8007C82; goto done; + default: break; + } + goto bad; + } + case ARM64in_MFence: { + *p++ = 0xD5033F9F; /* DSB sy */ + *p++ = 0xD5033FBF; /* DMB sy */ + *p++ = 0xD5033FDF; /* ISB */ + goto done; + } + //case ARM64in_CLREX: { + // //ATC, but believed to be correct + // goto bad; + // *p++ = 0xD5033F5F; /* clrex */ + // goto done; + //} + case ARM64in_VLdStS: { + /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4] + 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4] + */ + UInt sD = dregNo(i->ARM64in.VLdStS.sD); + UInt rN = iregNo(i->ARM64in.VLdStS.rN); + UInt uimm12 = i->ARM64in.VLdStS.uimm12; + Bool isLD = i->ARM64in.VLdStS.isLoad; + vassert(uimm12 < 16384 && 0 == (uimm12 & 3)); + uimm12 >>= 2; + vassert(uimm12 < (1<<12)); + vassert(sD < 32); + vassert(rN < 31); + *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00, + uimm12, rN, sD); + goto done; + } + case ARM64in_VLdStD: { + /* 11 111101 01 imm12 n t LDR Dt, [Xn|SP, #imm12 * 8] + 11 111101 00 imm12 n t STR Dt, [Xn|SP, #imm12 * 8] + */ + UInt dD = dregNo(i->ARM64in.VLdStD.dD); + UInt rN = iregNo(i->ARM64in.VLdStD.rN); + UInt uimm12 = i->ARM64in.VLdStD.uimm12; + Bool isLD = i->ARM64in.VLdStD.isLoad; + vassert(uimm12 < 32768 && 0 == (uimm12 & 7)); + uimm12 >>= 3; + vassert(uimm12 < (1<<12)); + vassert(dD < 32); + vassert(rN < 31); + *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00, + uimm12, rN, dD); + goto done; + } + case ARM64in_VLdStQ: { + /* 0100 1100 0000 0000 0111 11 rN rQ st1 {vQ.2d}, [] + 0100 1100 0100 0000 0111 11 rN rQ ld1 {vQ.2d}, [] + */ + UInt rQ = qregNo(i->ARM64in.VLdStQ.rQ); + UInt rN = iregNo(i->ARM64in.VLdStQ.rN); + vassert(rQ < 32); + vassert(rN < 31); + if (i->ARM64in.VLdStQ.isLoad) { + *p++ = 0x4C407C00 | (rN << 5) | rQ; + } else { + *p++ = 0x4C007C00 | (rN << 5) | rQ; + } + goto done; + } + case ARM64in_VCvtI2F: { + /* 31 28 23 21 20 18 15 9 4 + 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn + 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn + 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn + 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn + 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn + 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn + 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn + 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn + */ + UInt rN = iregNo(i->ARM64in.VCvtI2F.rS); + UInt rD = dregNo(i->ARM64in.VCvtI2F.rD); + ARM64CvtOp how = i->ARM64in.VCvtI2F.how; + /* Just handle cases as they show up. */ + switch (how) { + case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */ + *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD); + break; + case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */ + *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD); + break; + case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */ + *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD); + break; + case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */ + *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD); + break; + case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */ + *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD); + break; + case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */ + *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD); + break; + case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */ + *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD); + break; + case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn */ + *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD); + break; + default: + goto bad; //ATC + } + goto done; + } + case ARM64in_VCvtF2I: { + /* 30 23 20 18 15 9 4 + sf 00,11110,0x 1 00 000,000000 n d FCVTNS Rd, Fn (round to + sf 00,11110,0x 1 00 001,000000 n d FCVTNU Rd, Fn nearest) + ---------------- 01 -------------- FCVTP-------- (round to +inf) + ---------------- 10 -------------- FCVTM-------- (round to -inf) + ---------------- 11 -------------- FCVTZ-------- (round to zero) + + Rd is Xd when sf==1, Wd when sf==0 + Fn is Dn when x==1, Sn when x==0 + 20:19 carry the rounding mode, using the same encoding as FPCR + */ + UInt rD = iregNo(i->ARM64in.VCvtF2I.rD); + UInt rN = dregNo(i->ARM64in.VCvtF2I.rS); + ARM64CvtOp how = i->ARM64in.VCvtF2I.how; + UChar armRM = i->ARM64in.VCvtF2I.armRM; + /* Just handle cases as they show up. */ + switch (how) { + case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */ + *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3), + X000000, rN, rD); + break; + case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */ + *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3), + X000000, rN, rD); + break; + case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */ + *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3), + X000000, rN, rD); + break; + case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */ + *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3), + X000000, rN, rD); + break; + case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */ + *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3), + X000000, rN, rD); + break; + case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */ + *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3), + X000000, rN, rD); + break; + case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */ + *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3), + X000000, rN, rD); + break; + case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */ + *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3), + X000000, rN, rD); + break; + default: + goto bad; //ATC + } + goto done; + } + case ARM64in_VCvtSD: { + /* 31 23 21 16 14 9 4 + 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D) + ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S) + Rounding, when dst is smaller than src, is per the FPCR. + */ + UInt dd = dregNo(i->ARM64in.VCvtSD.dst); + UInt nn = dregNo(i->ARM64in.VCvtSD.src); + if (i->ARM64in.VCvtSD.sToD) { + *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd); + } else { + *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd); + } + goto done; + } + case ARM64in_VUnaryD: { + /* 31 23 21 16 14 9 4 + 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled) + ------------------- 0,1 --------- FABS ------ + ------------------- 1,0 --------- FNEG ------ + ------------------- 1,1 --------- FQSRT ----- + */ + UInt dD = dregNo(i->ARM64in.VUnaryD.dst); + UInt dN = dregNo(i->ARM64in.VUnaryD.src); + UInt b16 = 2; /* impossible */ + UInt b15 = 2; /* impossible */ + switch (i->ARM64in.VUnaryD.op) { + case ARM64fpu_NEG: b16 = 1; b15 = 0; break; + case ARM64fpu_SQRT: b16 = 1; b15 = 1; break; + case ARM64fpu_ABS: b16 = 0; b15 = 1; break; + default: break; + } + if (b16 < 2 && b15 < 2) { + *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16, + (b15 << 5) | X10000, dN, dD); + goto done; + } + /* + 000, 11110 01 1,001 11,1 10000 n d FRINTI Dd, Dm (round per FPCR) + */ + if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) { + *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD); + goto done; + } + goto bad; + } + case ARM64in_VUnaryS: { + /* 31 23 21 16 14 9 4 + 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled) + ------------------- 0,1 --------- FABS ------ + ------------------- 1,0 --------- FNEG ------ + ------------------- 1,1 --------- FQSRT ----- + */ + UInt sD = dregNo(i->ARM64in.VUnaryS.dst); + UInt sN = dregNo(i->ARM64in.VUnaryS.src); + UInt b16 = 2; /* impossible */ + UInt b15 = 2; /* impossible */ + switch (i->ARM64in.VUnaryS.op) { + case ARM64fpu_NEG: b16 = 1; b15 = 0; break; + case ARM64fpu_SQRT: b16 = 1; b15 = 1; break; + case ARM64fpu_ABS: b16 = 0; b15 = 1; break; + default: break; + } + if (b16 < 2 && b15 < 2) { + *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16, + (b15 << 5) | X10000, sN, sD); + goto done; + } + /* + 000, 11110 00 1,001 11,1 10000 n d FRINTI Sd, Sm (round per FPCR) + */ + if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) { + *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD); + goto done; + } + goto bad; + } + case ARM64in_VBinD: { + /* 31 23 20 15 11 9 4 + ---------------- 0000 ------ FMUL -------- + 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm + ---------------- 0010 ------ FADD -------- + ---------------- 0011 ------ FSUB -------- + */ + UInt dD = dregNo(i->ARM64in.VBinD.dst); + UInt dN = dregNo(i->ARM64in.VBinD.argL); + UInt dM = dregNo(i->ARM64in.VBinD.argR); + UInt b1512 = 16; /* impossible */ + switch (i->ARM64in.VBinD.op) { + case ARM64fpb_DIV: b1512 = X0001; break; + case ARM64fpb_MUL: b1512 = X0000; break; + case ARM64fpb_SUB: b1512 = X0011; break; + case ARM64fpb_ADD: b1512 = X0010; break; + default: goto bad; + } + vassert(b1512 < 16); + *p++ + = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD); + goto done; + } + case ARM64in_VBinS: { + /* 31 23 20 15 11 9 4 + ---------------- 0000 ------ FMUL -------- + 000 11110 001 m 0001 10 n d FDIV Dd,Dn,Dm + ---------------- 0010 ------ FADD -------- + ---------------- 0011 ------ FSUB -------- + */ + UInt sD = dregNo(i->ARM64in.VBinS.dst); + UInt sN = dregNo(i->ARM64in.VBinS.argL); + UInt sM = dregNo(i->ARM64in.VBinS.argR); + UInt b1512 = 16; /* impossible */ + switch (i->ARM64in.VBinS.op) { + case ARM64fpb_DIV: b1512 = X0001; break; + case ARM64fpb_MUL: b1512 = X0000; break; + case ARM64fpb_SUB: b1512 = X0011; break; + case ARM64fpb_ADD: b1512 = X0010; break; + default: goto bad; + } + vassert(b1512 < 16); + *p++ + = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD); + goto done; + } + case ARM64in_VCmpD: { + /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */ + UInt dN = dregNo(i->ARM64in.VCmpD.argL); + UInt dM = dregNo(i->ARM64in.VCmpD.argR); + *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000); + goto done; + } + case ARM64in_VCmpS: { + /* 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm */ + UInt sN = dregNo(i->ARM64in.VCmpS.argL); + UInt sM = dregNo(i->ARM64in.VCmpS.argR); + *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000); + goto done; + } + case ARM64in_FPCR: { + Bool toFPCR = i->ARM64in.FPCR.toFPCR; + UInt iReg = iregNo(i->ARM64in.FPCR.iReg); + if (toFPCR) { + /* 0xD51B44 000 Rt MSR fpcr, rT */ + *p++ = 0xD51B4400 | (iReg & 0x1F); + goto done; + } + goto bad; // FPCR -> iReg case currently ATC + } + case ARM64in_VBinV: { + /* 31 23 20 15 9 4 + 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d + 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s + 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h + 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b + + 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d + 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s + 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h + 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b + + 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s + 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h + 010 01110 00 1 m 100111 n d MUL Vd.16b, Vn.16b, Vm.16b + + 010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d + 010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s + 010 01110 11 1 m 110101 n d FSUB Vd.2d, Vn.2d, Vm.2d + 010 01110 10 1 m 110101 n d FSUB Vd.4s, Vn.4s, Vm.4s + + 011 01110 01 1 m 110111 n d FMUL Vd.2d, Vn.2d, Vm.2d + 011 01110 00 1 m 110111 n d FMUL Vd.4s, Vn.4s, Vm.4s + 011 01110 01 1 m 111111 n d FDIV Vd.2d, Vn.2d, Vm.2d + 011 01110 00 1 m 111111 n d FDIV Vd.4s, Vn.4s, Vm.4s + + 011 01110 10 1 m 011001 n d UMAX Vd.4s, Vn.4s, Vm.4s + 011 01110 01 1 m 011001 n d UMAX Vd.8h, Vn.8h, Vm.8h + 011 01110 00 1 m 011001 n d UMAX Vd.16b, Vn.16b, Vm.16b + + 011 01110 10 1 m 011011 n d UMIN Vd.4s, Vn.4s, Vm.4s + 011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h + 011 01110 00 1 m 011011 n d UMIN Vd.16b, Vn.16b, Vm.16b + + 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s + 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h + 010 01110 00 1 m 011001 n d SMAX Vd.16b, Vn.16b, Vm.16b + + 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s + 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h + 010 01110 00 1 m 011011 n d SMIN Vd.16b, Vn.16b, Vm.16b + + 010 01110 00 1 m 000111 n d AND Vd, Vn, Vm + 010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm + 011 01110 00 1 m 000111 n d EOR Vd, Vn, Vm + + 011 01110 11 1 m 100011 n d CMEQ Vd.2d, Vn.2d, Vm.2d + 011 01110 10 1 m 100011 n d CMEQ Vd.4s, Vn.4s, Vm.4s + 011 01110 01 1 m 100011 n d CMEQ Vd.8h, Vn.8h, Vm.8h + 011 01110 00 1 m 100011 n d CMEQ Vd.16b, Vn.16b, Vm.16b + + 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d + 011 01110 10 1 m 001101 n d CMHI Vd.4s, Vn.4s, Vm.4s + 011 01110 01 1 m 001101 n d CMHI Vd.8h, Vn.8h, Vm.8h + 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b + + 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d + 010 01110 10 1 m 001101 n d CMGT Vd.4s, Vn.4s, Vm.4s + 010 01110 01 1 m 001101 n d CMGT Vd.8h, Vn.8h, Vm.8h + 010 01110 00 1 m 001101 n d CMGT Vd.16b, Vn.16b, Vm.16b + + 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d + 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s + + 011 01110 01 1 m 111001 n d FCMGE Vd.2d, Vn.2d, Vm.2d + 011 01110 00 1 m 111001 n d FCMGE Vd.4s, Vn.4s, Vm.4s + + 011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d + 011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s + + 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b + + */ + UInt vD = qregNo(i->ARM64in.VBinV.dst); + UInt vN = qregNo(i->ARM64in.VBinV.argL); + UInt vM = qregNo(i->ARM64in.VBinV.argR); + switch (i->ARM64in.VBinV.op) { + case ARM64vecb_ADD64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD); + break; + case ARM64vecb_ADD32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD); + break; + case ARM64vecb_ADD16x8: + *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD); + break; + case ARM64vecb_ADD8x16: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD); + break; + case ARM64vecb_SUB64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD); + break; + case ARM64vecb_SUB32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD); + break; + case ARM64vecb_SUB16x8: + *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD); + break; + case ARM64vecb_SUB8x16: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD); + break; + case ARM64vecb_MUL32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD); + break; + case ARM64vecb_MUL16x8: + *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD); + break; + case ARM64vecb_MUL8x16: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD); + break; + case ARM64vecb_FADD64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD); + break; + case ARM64vecb_FADD32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD); + break; + case ARM64vecb_FSUB64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD); + break; + case ARM64vecb_FSUB32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD); + break; + case ARM64vecb_FMUL64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD); + break; + case ARM64vecb_FMUL32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD); + break; + case ARM64vecb_FDIV64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD); + break; + case ARM64vecb_FDIV32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD); + break; + + case ARM64vecb_UMAX32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD); + break; + case ARM64vecb_UMAX16x8: + *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD); + break; + case ARM64vecb_UMAX8x16: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD); + break; + + case ARM64vecb_UMIN32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD); + break; + case ARM64vecb_UMIN16x8: + *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD); + break; + case ARM64vecb_UMIN8x16: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD); + break; + + case ARM64vecb_SMAX32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD); + break; + case ARM64vecb_SMAX16x8: + *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD); + break; + case ARM64vecb_SMAX8x16: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD); + break; + + case ARM64vecb_SMIN32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD); + break; + case ARM64vecb_SMIN16x8: + *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD); + break; + case ARM64vecb_SMIN8x16: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD); + break; + + case ARM64vecb_AND: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD); + break; + case ARM64vecb_ORR: + *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD); + break; + case ARM64vecb_XOR: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD); + break; + + case ARM64vecb_CMEQ64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD); + break; + case ARM64vecb_CMEQ32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD); + break; + case ARM64vecb_CMEQ16x8: + *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD); + break; + case ARM64vecb_CMEQ8x16: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD); + break; + + case ARM64vecb_CMHI64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001101, vN, vD); + break; + case ARM64vecb_CMHI32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001101, vN, vD); + break; + case ARM64vecb_CMHI16x8: + *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001101, vN, vD); + break; + case ARM64vecb_CMHI8x16: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001101, vN, vD); + break; + + case ARM64vecb_CMGT64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001101, vN, vD); + break; + case ARM64vecb_CMGT32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001101, vN, vD); + break; + case ARM64vecb_CMGT16x8: + *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001101, vN, vD); + break; + case ARM64vecb_CMGT8x16: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001101, vN, vD); + break; + + case ARM64vecb_FCMEQ64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD); + break; + case ARM64vecb_FCMEQ32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD); + break; + + case ARM64vecb_FCMGE64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD); + break; + case ARM64vecb_FCMGE32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD); + break; + + case ARM64vecb_FCMGT64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD); + break; + case ARM64vecb_FCMGT32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD); + break; + + case ARM64vecb_TBL1: + *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD); + break; + + default: + goto bad; + } + goto done; + } + case ARM64in_VUnaryV: { + /* 31 23 20 15 9 4 + 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d + 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s + 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d + 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s + 011 01110 00 1 00000 010110 n d NOT Vd.16b, Vn.16b + */ + UInt vD = qregNo(i->ARM64in.VUnaryV.dst); + UInt vN = qregNo(i->ARM64in.VUnaryV.arg); + switch (i->ARM64in.VUnaryV.op) { + case ARM64vecu_FABS64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD); + break; + case ARM64vecu_FABS32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD); + break; + case ARM64vecu_FNEG64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD); + break; + case ARM64vecu_FNEG32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD); + break; + case ARM64vecu_NOT: + *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD); + break; + default: + goto bad; + } + goto done; + } + case ARM64in_VNarrowV: { + /* 31 23 21 15 9 4 + 000 01110 00 1,00001 001010 n d XTN Vd.8b, Vn.8h + 000 01110 01 1,00001 001010 n d XTN Vd.4h, Vn.4s + 000 01110 10 1,00001 001010 n d XTN Vd.2s, Vn.2d + */ + UInt vD = qregNo(i->ARM64in.VNarrowV.dst); + UInt vN = qregNo(i->ARM64in.VNarrowV.src); + UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2; + vassert(dszBlg2 >= 0 && dszBlg2 <= 2); + *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1), + X00001, X001010, vN, vD); + goto done; + } + case ARM64in_VShiftImmV: { + /* + 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh + 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh + where immh:immb + = case T of + 2d | sh in 1..63 -> let xxxxxx = 64-sh in 1xxx:xxx + 4s | sh in 1..31 -> let xxxxx = 32-sh in 01xx:xxx + 8h | sh in 1..15 -> let xxxx = 16-sh in 001x:xxx + 16b | sh in 1..7 -> let xxx = 8-sh in 0001:xxx + + 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh + where immh:immb + = case T of + 2d | sh in 1..63 -> let xxxxxx = sh in 1xxx:xxx + 4s | sh in 1..31 -> let xxxxx = sh in 01xx:xxx + 8h | sh in 1..15 -> let xxxx = sh in 001x:xxx + 16b | sh in 1..7 -> let xxx = sh in 0001:xxx + */ + UInt vD = qregNo(i->ARM64in.VShiftImmV.dst); + UInt vN = qregNo(i->ARM64in.VShiftImmV.src); + UInt sh = i->ARM64in.VShiftImmV.amt; + ARM64VecShiftOp op = i->ARM64in.VShiftImmV.op; + Bool syned = False; + switch (op) { + /* 64x2 cases */ + case ARM64vecsh_SSHR64x2: syned = True; + case ARM64vecsh_USHR64x2: /* fallthrough */ + if (sh >= 1 && sh <= 63) { + UInt xxxxxx = 64-sh; + *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110, + X1000000 | xxxxxx, X000001, vN, vD); + goto done; + } + break; + case ARM64vecsh_SHL64x2: + if (sh >= 1 && sh <= 63) { + UInt xxxxxx = sh; + *p++ = X_3_6_7_6_5_5(X010, X011110, + X1000000 | xxxxxx, X010101, vN, vD); + goto done; + } + break; + /* 32x4 cases */ + case ARM64vecsh_SSHR32x4: syned = True; + case ARM64vecsh_USHR32x4: /* fallthrough */ + if (sh >= 1 && sh <= 31) { + UInt xxxxx = 32-sh; + *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110, + X0100000 | xxxxx, X000001, vN, vD); + goto done; + } + break; + case ARM64vecsh_SHL32x4: + if (sh >= 1 && sh <= 31) { + UInt xxxxx = sh; + *p++ = X_3_6_7_6_5_5(X010, X011110, + X0100000 | xxxxx, X010101, vN, vD); + goto done; + } + break; + /* 16x8 cases */ + case ARM64vecsh_SSHR16x8: syned = True; + case ARM64vecsh_USHR16x8: /* fallthrough */ + if (sh >= 1 && sh <= 15) { + UInt xxxx = 16-sh; + *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110, + X0010000 | xxxx, X000001, vN, vD); + goto done; + } + break; + case ARM64vecsh_SHL16x8: + if (sh >= 1 && sh <= 15) { + UInt xxxx = sh; + *p++ = X_3_6_7_6_5_5(X010, X011110, + X0010000 | xxxx, X010101, vN, vD); + goto done; + } + break; + + + /* 8x16 cases */ + case ARM64vecsh_SSHR8x16: syned = True; + case ARM64vecsh_USHR8x16: /* fallthrough */ + if (sh >= 1 && sh <= 7) { + UInt xxx = 8-sh; + *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110, + X0001000 | xxx, X000001, vN, vD); + goto done; + } + break; + case ARM64vecsh_SHL8x16: + if (sh >= 1 && sh <= 7) { + UInt xxx = sh; + *p++ = X_3_6_7_6_5_5(X010, X011110, + X0001000 | xxx, X010101, vN, vD); + goto done; + } + break; + + default: + break; + } + goto bad; + } +//ZZ case ARMin_VAluS: { +//ZZ UInt dN = fregNo(i->ARMin.VAluS.argL); +//ZZ UInt dD = fregNo(i->ARMin.VAluS.dst); +//ZZ UInt dM = fregNo(i->ARMin.VAluS.argR); +//ZZ UInt bN = dN & 1; +//ZZ UInt bD = dD & 1; +//ZZ UInt bM = dM & 1; +//ZZ UInt pqrs = X1111; /* undefined */ +//ZZ switch (i->ARMin.VAluS.op) { +//ZZ case ARMvfp_ADD: pqrs = X0110; break; +//ZZ case ARMvfp_SUB: pqrs = X0111; break; +//ZZ case ARMvfp_MUL: pqrs = X0100; break; +//ZZ case ARMvfp_DIV: pqrs = X1000; break; +//ZZ default: goto bad; +//ZZ } +//ZZ vassert(pqrs != X1111); +//ZZ UInt bP = (pqrs >> 3) & 1; +//ZZ UInt bQ = (pqrs >> 2) & 1; +//ZZ UInt bR = (pqrs >> 1) & 1; +//ZZ UInt bS = (pqrs >> 0) & 1; +//ZZ UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR), +//ZZ (dN >> 1), (dD >> 1), +//ZZ X1010, BITS4(bN,bS,bM,0), (dM >> 1)); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_VUnaryS: { +//ZZ UInt fD = fregNo(i->ARMin.VUnaryS.dst); +//ZZ UInt fM = fregNo(i->ARMin.VUnaryS.src); +//ZZ UInt insn = 0; +//ZZ switch (i->ARMin.VUnaryS.op) { +//ZZ case ARMvfpu_COPY: +//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000, +//ZZ (fD >> 1), X1010, BITS4(0,1,(fM & 1),0), +//ZZ (fM >> 1)); +//ZZ break; +//ZZ case ARMvfpu_ABS: +//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000, +//ZZ (fD >> 1), X1010, BITS4(1,1,(fM & 1),0), +//ZZ (fM >> 1)); +//ZZ break; +//ZZ case ARMvfpu_NEG: +//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001, +//ZZ (fD >> 1), X1010, BITS4(0,1,(fM & 1),0), +//ZZ (fM >> 1)); +//ZZ break; +//ZZ case ARMvfpu_SQRT: +//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001, +//ZZ (fD >> 1), X1010, BITS4(1,1,(fM & 1),0), +//ZZ (fM >> 1)); +//ZZ break; +//ZZ default: +//ZZ goto bad; +//ZZ } +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_VCMovD: { +//ZZ UInt cc = (UInt)i->ARMin.VCMovD.cond; +//ZZ UInt dD = dregNo(i->ARMin.VCMovD.dst); +//ZZ UInt dM = dregNo(i->ARMin.VCMovD.src); +//ZZ vassert(cc < 16 && cc != ARMcc_AL); +//ZZ UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_VCMovS: { +//ZZ UInt cc = (UInt)i->ARMin.VCMovS.cond; +//ZZ UInt fD = fregNo(i->ARMin.VCMovS.dst); +//ZZ UInt fM = fregNo(i->ARMin.VCMovS.src); +//ZZ vassert(cc < 16 && cc != ARMcc_AL); +//ZZ UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1), +//ZZ X0000,(fD >> 1),X1010, +//ZZ BITS4(0,1,(fM & 1),0), (fM >> 1)); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_VXferD: { +//ZZ UInt dD = dregNo(i->ARMin.VXferD.dD); +//ZZ UInt rHi = iregNo(i->ARMin.VXferD.rHi); +//ZZ UInt rLo = iregNo(i->ARMin.VXferD.rLo); +//ZZ /* vmov dD, rLo, rHi is +//ZZ E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0] +//ZZ vmov rLo, rHi, dD is +//ZZ E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0] +//ZZ */ +//ZZ UInt insn +//ZZ = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5, +//ZZ rHi, rLo, 0xB, +//ZZ BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF)); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_VXferS: { +//ZZ UInt fD = fregNo(i->ARMin.VXferS.fD); +//ZZ UInt rLo = iregNo(i->ARMin.VXferS.rLo); +//ZZ /* vmov fD, rLo is +//ZZ E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0 +//ZZ vmov rLo, fD is +//ZZ E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0 +//ZZ */ +//ZZ UInt insn +//ZZ = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1, +//ZZ (fD >> 1) & 0xF, rLo, 0xA, +//ZZ BITS4((fD & 1),0,0,1), 0); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_VCvtID: { +//ZZ Bool iToD = i->ARMin.VCvtID.iToD; +//ZZ Bool syned = i->ARMin.VCvtID.syned; +//ZZ if (iToD && syned) { +//ZZ // FSITOD: I32S-in-freg to F64-in-dreg +//ZZ UInt regF = fregNo(i->ARMin.VCvtID.src); +//ZZ UInt regD = dregNo(i->ARMin.VCvtID.dst); +//ZZ UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD, +//ZZ X1011, BITS4(1,1,(regF & 1),0), +//ZZ (regF >> 1) & 0xF); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ if (iToD && (!syned)) { +//ZZ // FUITOD: I32U-in-freg to F64-in-dreg +//ZZ UInt regF = fregNo(i->ARMin.VCvtID.src); +//ZZ UInt regD = dregNo(i->ARMin.VCvtID.dst); +//ZZ UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD, +//ZZ X1011, BITS4(0,1,(regF & 1),0), +//ZZ (regF >> 1) & 0xF); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ if ((!iToD) && syned) { +//ZZ // FTOSID: F64-in-dreg to I32S-in-freg +//ZZ UInt regD = dregNo(i->ARMin.VCvtID.src); +//ZZ UInt regF = fregNo(i->ARMin.VCvtID.dst); +//ZZ UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1), +//ZZ X1101, (regF >> 1) & 0xF, +//ZZ X1011, X0100, regD); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ if ((!iToD) && (!syned)) { +//ZZ // FTOUID: F64-in-dreg to I32U-in-freg +//ZZ UInt regD = dregNo(i->ARMin.VCvtID.src); +//ZZ UInt regF = fregNo(i->ARMin.VCvtID.dst); +//ZZ UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1), +//ZZ X1100, (regF >> 1) & 0xF, +//ZZ X1011, X0100, regD); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ /*UNREACHED*/ +//ZZ vassert(0); +//ZZ } +//ZZ case ARMin_NLdStD: { +//ZZ UInt regD = dregNo(i->ARMin.NLdStD.dD); +//ZZ UInt regN, regM; +//ZZ UInt D = regD >> 4; +//ZZ UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0; +//ZZ UInt insn; +//ZZ vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64); +//ZZ regD &= 0xF; +//ZZ if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) { +//ZZ regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN); +//ZZ regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM); +//ZZ } else { +//ZZ regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN); +//ZZ regM = 15; +//ZZ } +//ZZ insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0), +//ZZ regN, regD, X0111, X1000, regM); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_NUnaryS: { +//ZZ UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0; +//ZZ UInt regD, D; +//ZZ UInt regM, M; +//ZZ UInt size = i->ARMin.NUnaryS.size; +//ZZ UInt insn; +//ZZ UInt opc, opc1, opc2; +//ZZ switch (i->ARMin.NUnaryS.op) { +//ZZ case ARMneon_VDUP: +//ZZ if (i->ARMin.NUnaryS.size >= 16) +//ZZ goto bad; +//ZZ if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg) +//ZZ goto bad; +//ZZ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar) +//ZZ goto bad; +//ZZ regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) +//ZZ : dregNo(i->ARMin.NUnaryS.dst->reg); +//ZZ regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) +//ZZ : dregNo(i->ARMin.NUnaryS.src->reg); +//ZZ D = regD >> 4; +//ZZ M = regM >> 4; +//ZZ regD &= 0xf; +//ZZ regM &= 0xf; +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), +//ZZ (i->ARMin.NUnaryS.size & 0xf), regD, +//ZZ X1100, BITS4(0,Q,M,0), regM); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ case ARMneon_SETELEM: +//ZZ regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) : +//ZZ dregNo(i->ARMin.NUnaryS.dst->reg); +//ZZ regM = iregNo(i->ARMin.NUnaryS.src->reg); +//ZZ M = regM >> 4; +//ZZ D = regD >> 4; +//ZZ regM &= 0xF; +//ZZ regD &= 0xF; +//ZZ if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar) +//ZZ goto bad; +//ZZ switch (size) { +//ZZ case 0: +//ZZ if (i->ARMin.NUnaryS.dst->index > 7) +//ZZ goto bad; +//ZZ opc = X1000 | i->ARMin.NUnaryS.dst->index; +//ZZ break; +//ZZ case 1: +//ZZ if (i->ARMin.NUnaryS.dst->index > 3) +//ZZ goto bad; +//ZZ opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1); +//ZZ break; +//ZZ case 2: +//ZZ if (i->ARMin.NUnaryS.dst->index > 1) +//ZZ goto bad; +//ZZ opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2); +//ZZ break; +//ZZ default: +//ZZ goto bad; +//ZZ } +//ZZ opc1 = (opc >> 2) & 3; +//ZZ opc2 = opc & 3; +//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0), +//ZZ regD, regM, X1011, +//ZZ BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ case ARMneon_GETELEMU: +//ZZ regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) : +//ZZ dregNo(i->ARMin.NUnaryS.src->reg); +//ZZ regD = iregNo(i->ARMin.NUnaryS.dst->reg); +//ZZ M = regM >> 4; +//ZZ D = regD >> 4; +//ZZ regM &= 0xF; +//ZZ regD &= 0xF; +//ZZ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar) +//ZZ goto bad; +//ZZ switch (size) { +//ZZ case 0: +//ZZ if (Q && i->ARMin.NUnaryS.src->index > 7) { +//ZZ regM++; +//ZZ i->ARMin.NUnaryS.src->index -= 8; +//ZZ } +//ZZ if (i->ARMin.NUnaryS.src->index > 7) +//ZZ goto bad; +//ZZ opc = X1000 | i->ARMin.NUnaryS.src->index; +//ZZ break; +//ZZ case 1: +//ZZ if (Q && i->ARMin.NUnaryS.src->index > 3) { +//ZZ regM++; +//ZZ i->ARMin.NUnaryS.src->index -= 4; +//ZZ } +//ZZ if (i->ARMin.NUnaryS.src->index > 3) +//ZZ goto bad; +//ZZ opc = X0001 | (i->ARMin.NUnaryS.src->index << 1); +//ZZ break; +//ZZ case 2: +//ZZ goto bad; +//ZZ default: +//ZZ goto bad; +//ZZ } +//ZZ opc1 = (opc >> 2) & 3; +//ZZ opc2 = opc & 3; +//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1), +//ZZ regM, regD, X1011, +//ZZ BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ case ARMneon_GETELEMS: +//ZZ regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) : +//ZZ dregNo(i->ARMin.NUnaryS.src->reg); +//ZZ regD = iregNo(i->ARMin.NUnaryS.dst->reg); +//ZZ M = regM >> 4; +//ZZ D = regD >> 4; +//ZZ regM &= 0xF; +//ZZ regD &= 0xF; +//ZZ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar) +//ZZ goto bad; +//ZZ switch (size) { +//ZZ case 0: +//ZZ if (Q && i->ARMin.NUnaryS.src->index > 7) { +//ZZ regM++; +//ZZ i->ARMin.NUnaryS.src->index -= 8; +//ZZ } +//ZZ if (i->ARMin.NUnaryS.src->index > 7) +//ZZ goto bad; +//ZZ opc = X1000 | i->ARMin.NUnaryS.src->index; +//ZZ break; +//ZZ case 1: +//ZZ if (Q && i->ARMin.NUnaryS.src->index > 3) { +//ZZ regM++; +//ZZ i->ARMin.NUnaryS.src->index -= 4; +//ZZ } +//ZZ if (i->ARMin.NUnaryS.src->index > 3) +//ZZ goto bad; +//ZZ opc = X0001 | (i->ARMin.NUnaryS.src->index << 1); +//ZZ break; +//ZZ case 2: +//ZZ if (Q && i->ARMin.NUnaryS.src->index > 1) { +//ZZ regM++; +//ZZ i->ARMin.NUnaryS.src->index -= 2; +//ZZ } +//ZZ if (i->ARMin.NUnaryS.src->index > 1) +//ZZ goto bad; +//ZZ opc = X0000 | (i->ARMin.NUnaryS.src->index << 2); +//ZZ break; +//ZZ default: +//ZZ goto bad; +//ZZ } +//ZZ opc1 = (opc >> 2) & 3; +//ZZ opc2 = opc & 3; +//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1), +//ZZ regM, regD, X1011, +//ZZ BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ default: +//ZZ goto bad; +//ZZ } +//ZZ } +//ZZ case ARMin_NUnary: { +//ZZ UInt Q = i->ARMin.NUnary.Q ? 1 : 0; +//ZZ UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NUnary.dst) << 1) +//ZZ : dregNo(i->ARMin.NUnary.dst); +//ZZ UInt regM, M; +//ZZ UInt D = regD >> 4; +//ZZ UInt sz1 = i->ARMin.NUnary.size >> 1; +//ZZ UInt sz2 = i->ARMin.NUnary.size & 1; +//ZZ UInt sz = i->ARMin.NUnary.size; +//ZZ UInt insn; +//ZZ UInt F = 0; /* TODO: floating point EQZ ??? */ +//ZZ if (i->ARMin.NUnary.op != ARMneon_DUP) { +//ZZ regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NUnary.src) << 1) +//ZZ : dregNo(i->ARMin.NUnary.src); +//ZZ M = regM >> 4; +//ZZ } else { +//ZZ regM = iregNo(i->ARMin.NUnary.src); +//ZZ M = regM >> 4; +//ZZ } +//ZZ regD &= 0xF; +//ZZ regM &= 0xF; +//ZZ switch (i->ARMin.NUnary.op) { +//ZZ case ARMneon_COPY: /* VMOV reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001, +//ZZ BITS4(M,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_COPYN: /* VMOVN regD, regQ */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0), +//ZZ regD, X0010, BITS4(0,0,M,0), regM); +//ZZ break; +//ZZ case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0), +//ZZ regD, X0010, BITS4(1,0,M,0), regM); +//ZZ break; +//ZZ case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0), +//ZZ regD, X0010, BITS4(0,1,M,0), regM); +//ZZ break; +//ZZ case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0), +//ZZ regD, X0010, BITS4(1,1,M,0), regM); +//ZZ break; +//ZZ case ARMneon_COPYLS: /* VMOVL regQ, regD */ +//ZZ if (sz >= 3) +//ZZ goto bad; +//ZZ insn = XXXXXXXX(0xF, X0010, +//ZZ BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0), +//ZZ BITS4((sz == 0) ? 1 : 0,0,0,0), +//ZZ regD, X1010, BITS4(0,0,M,1), regM); +//ZZ break; +//ZZ case ARMneon_COPYLU: /* VMOVL regQ, regD */ +//ZZ if (sz >= 3) +//ZZ goto bad; +//ZZ insn = XXXXXXXX(0xF, X0011, +//ZZ BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0), +//ZZ BITS4((sz == 0) ? 1 : 0,0,0,0), +//ZZ regD, X1010, BITS4(0,0,M,1), regM); +//ZZ break; +//ZZ case ARMneon_NOT: /* VMVN reg, reg*/ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101, +//ZZ BITS4(1,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_EQZ: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1), +//ZZ regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_CNT: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101, +//ZZ BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_CLZ: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0), +//ZZ regD, X0100, BITS4(1,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_CLS: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0), +//ZZ regD, X0100, BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_ABS: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1), +//ZZ regD, X0011, BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_DUP: +//ZZ sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0; +//ZZ sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0; +//ZZ vassert(sz1 + sz2 < 2); +//ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM, +//ZZ X1011, BITS4(D,0,sz2,1), X0000); +//ZZ break; +//ZZ case ARMneon_REV16: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0), +//ZZ regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_REV32: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0), +//ZZ regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_REV64: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0), +//ZZ regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_PADDLU: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0), +//ZZ regD, X0010, BITS4(1,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_PADDLS: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0), +//ZZ regD, X0010, BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VQSHLNUU: +//ZZ insn = XXXXXXXX(0xF, X0011, +//ZZ (1 << 3) | (D << 2) | ((sz >> 4) & 3), +//ZZ sz & 0xf, regD, X0111, +//ZZ BITS4(sz >> 6,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VQSHLNSS: +//ZZ insn = XXXXXXXX(0xF, X0010, +//ZZ (1 << 3) | (D << 2) | ((sz >> 4) & 3), +//ZZ sz & 0xf, regD, X0111, +//ZZ BITS4(sz >> 6,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VQSHLNUS: +//ZZ insn = XXXXXXXX(0xF, X0011, +//ZZ (1 << 3) | (D << 2) | ((sz >> 4) & 3), +//ZZ sz & 0xf, regD, X0110, +//ZZ BITS4(sz >> 6,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VCVTFtoS: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111, +//ZZ BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VCVTFtoU: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111, +//ZZ BITS4(1,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VCVTStoF: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110, +//ZZ BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VCVTUtoF: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110, +//ZZ BITS4(1,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VCVTFtoFixedU: +//ZZ sz1 = (sz >> 5) & 1; +//ZZ sz2 = (sz >> 4) & 1; +//ZZ sz &= 0xf; +//ZZ insn = XXXXXXXX(0xF, X0011, +//ZZ BITS4(1,D,sz1,sz2), sz, regD, X1111, +//ZZ BITS4(0,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VCVTFtoFixedS: +//ZZ sz1 = (sz >> 5) & 1; +//ZZ sz2 = (sz >> 4) & 1; +//ZZ sz &= 0xf; +//ZZ insn = XXXXXXXX(0xF, X0010, +//ZZ BITS4(1,D,sz1,sz2), sz, regD, X1111, +//ZZ BITS4(0,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VCVTFixedUtoF: +//ZZ sz1 = (sz >> 5) & 1; +//ZZ sz2 = (sz >> 4) & 1; +//ZZ sz &= 0xf; +//ZZ insn = XXXXXXXX(0xF, X0011, +//ZZ BITS4(1,D,sz1,sz2), sz, regD, X1110, +//ZZ BITS4(0,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VCVTFixedStoF: +//ZZ sz1 = (sz >> 5) & 1; +//ZZ sz2 = (sz >> 4) & 1; +//ZZ sz &= 0xf; +//ZZ insn = XXXXXXXX(0xF, X0010, +//ZZ BITS4(1,D,sz1,sz2), sz, regD, X1110, +//ZZ BITS4(0,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VCVTF32toF16: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110, +//ZZ BITS4(0,0,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VCVTF16toF32: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111, +//ZZ BITS4(0,0,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VRECIP: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100, +//ZZ BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VRECIPF: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101, +//ZZ BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VABSFP: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111, +//ZZ BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VRSQRTEFP: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101, +//ZZ BITS4(1,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VRSQRTE: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100, +//ZZ BITS4(1,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VNEGF: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111, +//ZZ BITS4(1,Q,M,0), regM); +//ZZ break; +//ZZ +//ZZ default: +//ZZ goto bad; +//ZZ } +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_NDual: { +//ZZ UInt Q = i->ARMin.NDual.Q ? 1 : 0; +//ZZ UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NDual.arg1) << 1) +//ZZ : dregNo(i->ARMin.NDual.arg1); +//ZZ UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NDual.arg2) << 1) +//ZZ : dregNo(i->ARMin.NDual.arg2); +//ZZ UInt D = regD >> 4; +//ZZ UInt M = regM >> 4; +//ZZ UInt sz1 = i->ARMin.NDual.size >> 1; +//ZZ UInt sz2 = i->ARMin.NDual.size & 1; +//ZZ UInt insn; +//ZZ regD &= 0xF; +//ZZ regM &= 0xF; +//ZZ switch (i->ARMin.NDual.op) { +//ZZ case ARMneon_TRN: /* VTRN reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0), +//ZZ regD, X0000, BITS4(1,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_ZIP: /* VZIP reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0), +//ZZ regD, X0001, BITS4(1,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_UZP: /* VUZP reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0), +//ZZ regD, X0001, BITS4(0,Q,M,0), regM); +//ZZ break; +//ZZ default: +//ZZ goto bad; +//ZZ } +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_NBinary: { +//ZZ UInt Q = i->ARMin.NBinary.Q ? 1 : 0; +//ZZ UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NBinary.dst) << 1) +//ZZ : dregNo(i->ARMin.NBinary.dst); +//ZZ UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NBinary.argL) << 1) +//ZZ : dregNo(i->ARMin.NBinary.argL); +//ZZ UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NBinary.argR) << 1) +//ZZ : dregNo(i->ARMin.NBinary.argR); +//ZZ UInt sz1 = i->ARMin.NBinary.size >> 1; +//ZZ UInt sz2 = i->ARMin.NBinary.size & 1; +//ZZ UInt D = regD >> 4; +//ZZ UInt N = regN >> 4; +//ZZ UInt M = regM >> 4; +//ZZ UInt insn; +//ZZ regD &= 0xF; +//ZZ regM &= 0xF; +//ZZ regN &= 0xF; +//ZZ switch (i->ARMin.NBinary.op) { +//ZZ case ARMneon_VAND: /* VAND reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001, +//ZZ BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VORR: /* VORR reg, reg, reg*/ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001, +//ZZ BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VXOR: /* VEOR reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001, +//ZZ BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VADD: /* VADD reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1000, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VSUB: /* VSUB reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1000, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0110, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0110, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0110, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0110, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0001, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0001, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0000, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0000, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0010, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0010, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0011, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0011, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0011, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VCGES: /* VCGE signed reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0011, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VCEQ: /* VCEQ reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1000, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/ +//ZZ if (i->ARMin.NBinary.size >= 16) +//ZZ goto bad; +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD, +//ZZ i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0), +//ZZ regM); +//ZZ break; +//ZZ case ARMneon_VMUL: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1001, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VMULLU: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD, +//ZZ X1100, BITS4(N,0,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VMULLS: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD, +//ZZ X1100, BITS4(N,0,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VMULP: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1001, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VMULFP: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, +//ZZ X1101, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VMULLP: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD, +//ZZ X1110, BITS4(N,0,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VQDMULH: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1011, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VQRDMULH: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1011, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VQDMULL: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD, +//ZZ X1101, BITS4(N,0,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VTBL: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD, +//ZZ X1000, BITS4(N,0,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VPADD: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1011, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VPADDFP: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, +//ZZ X1101, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VPMINU: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1010, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VPMINS: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1010, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VPMAXU: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1010, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VPMAXS: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X1010, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VADDFP: /* VADD reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, +//ZZ X1101, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VSUBFP: /* VADD reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, +//ZZ X1101, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VABDFP: /* VABD reg, reg, reg */ +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, +//ZZ X1101, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VMINF: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, +//ZZ X1111, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VMAXF: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, +//ZZ X1111, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VPMINF: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, +//ZZ X1111, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VPMAXF: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, +//ZZ X1111, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VRECPS: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111, +//ZZ BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VCGTF: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110, +//ZZ BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VCGEF: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110, +//ZZ BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VCEQF: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110, +//ZZ BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VRSQRTS: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111, +//ZZ BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ default: +//ZZ goto bad; +//ZZ } +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_NShift: { +//ZZ UInt Q = i->ARMin.NShift.Q ? 1 : 0; +//ZZ UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NShift.dst) << 1) +//ZZ : dregNo(i->ARMin.NShift.dst); +//ZZ UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NShift.argL) << 1) +//ZZ : dregNo(i->ARMin.NShift.argL); +//ZZ UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128) +//ZZ ? (qregNo(i->ARMin.NShift.argR) << 1) +//ZZ : dregNo(i->ARMin.NShift.argR); +//ZZ UInt sz1 = i->ARMin.NShift.size >> 1; +//ZZ UInt sz2 = i->ARMin.NShift.size & 1; +//ZZ UInt D = regD >> 4; +//ZZ UInt N = regN >> 4; +//ZZ UInt M = regM >> 4; +//ZZ UInt insn; +//ZZ regD &= 0xF; +//ZZ regM &= 0xF; +//ZZ regN &= 0xF; +//ZZ switch (i->ARMin.NShift.op) { +//ZZ case ARMneon_VSHL: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0100, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VSAL: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0100, BITS4(N,Q,M,0), regM); +//ZZ break; +//ZZ case ARMneon_VQSHL: +//ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0100, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ case ARMneon_VQSAL: +//ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD, +//ZZ X0100, BITS4(N,Q,M,1), regM); +//ZZ break; +//ZZ default: +//ZZ goto bad; +//ZZ } +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_NShl64: { +//ZZ HReg regDreg = i->ARMin.NShl64.dst; +//ZZ HReg regMreg = i->ARMin.NShl64.src; +//ZZ UInt amt = i->ARMin.NShl64.amt; +//ZZ vassert(amt >= 1 && amt <= 63); +//ZZ vassert(hregClass(regDreg) == HRcFlt64); +//ZZ vassert(hregClass(regMreg) == HRcFlt64); +//ZZ UInt regD = dregNo(regDreg); +//ZZ UInt regM = dregNo(regMreg); +//ZZ UInt D = (regD >> 4) & 1; +//ZZ UInt Vd = regD & 0xF; +//ZZ UInt L = 1; +//ZZ UInt Q = 0; /* always 64-bit */ +//ZZ UInt M = (regM >> 4) & 1; +//ZZ UInt Vm = regM & 0xF; +//ZZ UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1), +//ZZ amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } + case ARM64in_VImmQ: { + UInt rQ = qregNo(i->ARM64in.VImmQ.rQ); + UShort imm = i->ARM64in.VImmQ.imm; + if (imm == 0x0000) { + /* movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ */ + vassert(rQ < 32); + *p++ = 0x4F000400 | rQ; + goto done; + } + if (imm == 0x0001) { + /* movi rD, #0xFF == 0x2F 0x00 0xE4 001 rD */ + vassert(rQ < 32); + *p++ = 0x2F00E420 | rQ; + goto done; + } + if (imm == 0x0003) { + /* movi rD, #0xFFFF == 0x2F 0x00 0xE4 011 rD */ + vassert(rQ < 32); + *p++ = 0x2F00E460 | rQ; + goto done; + } + if (imm == 0x000F) { + /* movi rD, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rD */ + vassert(rQ < 32); + *p++ = 0x2F00E5E0 | rQ; + goto done; + } + if (imm == 0x00FF) { + /* movi rD, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rD */ + vassert(rQ < 32); + *p++ = 0x2F07E7E0 | rQ; + goto done; + } + goto bad; /* no other handled cases right now */ + } + + case ARM64in_VDfromX: { + /* INS Vd.D[0], rX + 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn + This isn't wonderful, in the sense that the upper half of + the vector register stays unchanged and thus the insn is + data dependent on its output register. */ + UInt dd = dregNo(i->ARM64in.VDfromX.rD); + UInt xx = iregNo(i->ARM64in.VDfromX.rX); + vassert(xx < 31); + *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd); + goto done; + } + + case ARM64in_VQfromXX: { + /* What we really generate is a two insn sequence: + INS Vd.D[0], Xlo; INS Vd.D[1], Xhi + 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn + 0100 1110 0001 1000 0001 11 nn dd INS Vd.D[1], Xn + */ + UInt qq = qregNo(i->ARM64in.VQfromXX.rQ); + UInt xhi = iregNo(i->ARM64in.VQfromXX.rXhi); + UInt xlo = iregNo(i->ARM64in.VQfromXX.rXlo); + vassert(xhi < 31 && xlo < 31); + *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq); + *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq); + goto done; + } + + case ARM64in_VXfromQ: { + /* 010 0111 0000 01000 001111 nn dd UMOV Xd, Vn.D[0] + 010 0111 0000 11000 001111 nn dd UMOV Xd, Vn.D[1] + */ + UInt dd = iregNo(i->ARM64in.VXfromQ.rX); + UInt nn = qregNo(i->ARM64in.VXfromQ.rQ); + UInt laneNo = i->ARM64in.VXfromQ.laneNo; + vassert(dd < 31); + vassert(laneNo < 2); + *p++ = X_3_8_5_6_5_5(X010, X01110000, + laneNo == 1 ? X11000 : X01000, X001111, nn, dd); + goto done; + } + + case ARM64in_VMov: { + /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn + 000 11110 01 10000 00 10000 n d FMOV Dd, Dn + 010 01110 10 1 n 0 00111 n d MOV Vd.16b, Vn.16b + */ + HReg rD = i->ARM64in.VMov.dst; + HReg rN = i->ARM64in.VMov.src; + switch (i->ARM64in.VMov.szB) { + case 8: { + UInt dd = dregNo(rD); + UInt nn = dregNo(rN); + *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd); + goto done; + } + default: + break; + } + goto bad; + } +//ZZ case ARMin_NeonImm: { +//ZZ UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0; +//ZZ UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) : +//ZZ dregNo(i->ARMin.NeonImm.dst); +//ZZ UInt D = regD >> 4; +//ZZ UInt imm = i->ARMin.NeonImm.imm->imm8; +//ZZ UInt tp = i->ARMin.NeonImm.imm->type; +//ZZ UInt j = imm >> 7; +//ZZ UInt imm3 = (imm >> 4) & 0x7; +//ZZ UInt imm4 = imm & 0xF; +//ZZ UInt cmode, op; +//ZZ UInt insn; +//ZZ regD &= 0xF; +//ZZ if (tp == 9) +//ZZ op = 1; +//ZZ else +//ZZ op = 0; +//ZZ switch (tp) { +//ZZ case 0: +//ZZ case 1: +//ZZ case 2: +//ZZ case 3: +//ZZ case 4: +//ZZ case 5: +//ZZ cmode = tp << 1; +//ZZ break; +//ZZ case 9: +//ZZ case 6: +//ZZ cmode = 14; +//ZZ break; +//ZZ case 7: +//ZZ cmode = 12; +//ZZ break; +//ZZ case 8: +//ZZ cmode = 13; +//ZZ break; +//ZZ case 10: +//ZZ cmode = 15; +//ZZ break; +//ZZ default: +//ZZ vpanic("ARMin_NeonImm"); +//ZZ +//ZZ } +//ZZ insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD, +//ZZ cmode, BITS4(0,Q,op,1), imm4); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_NCMovQ: { +//ZZ UInt cc = (UInt)i->ARMin.NCMovQ.cond; +//ZZ UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1; +//ZZ UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1; +//ZZ UInt vM = qM & 0xF; +//ZZ UInt vD = qD & 0xF; +//ZZ UInt M = (qM >> 4) & 1; +//ZZ UInt D = (qD >> 4) & 1; +//ZZ vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV); +//ZZ /* b!cc here+8: !cc A00 0000 */ +//ZZ UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0); +//ZZ *p++ = insn; +//ZZ /* vmov qD, qM */ +//ZZ insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0), +//ZZ vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } +//ZZ case ARMin_Add32: { +//ZZ UInt regD = iregNo(i->ARMin.Add32.rD); +//ZZ UInt regN = iregNo(i->ARMin.Add32.rN); +//ZZ UInt imm32 = i->ARMin.Add32.imm32; +//ZZ vassert(regD != regN); +//ZZ /* MOV regD, imm32 */ +//ZZ p = imm32_to_iregNo((UInt *)p, regD, imm32); +//ZZ /* ADD regD, regN, regD */ +//ZZ UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD); +//ZZ *p++ = insn; +//ZZ goto done; +//ZZ } + + case ARM64in_EvCheck: { + /* The sequence is fixed (canned) except for the two amodes + supplied by the insn. These don't change the length, though. + We generate: + ldr w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER) + subs w9, w9, #1 + str w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER) + bpl nofail + ldr x9, [x21 + #0] 0 == offsetof(host_EvC_FAILADDR) + br x9 + nofail: + */ + UInt* p0 = p; + p = do_load_or_store32(p, True/*isLoad*/, /*w*/9, + i->ARM64in.EvCheck.amCounter); + *p++ = 0x71000529; /* subs w9, w9, #1 */ + p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9, + i->ARM64in.EvCheck.amCounter); + *p++ = 0x54000065; /* bpl nofail */ + p = do_load_or_store64(p, True/*isLoad*/, /*x*/9, + i->ARM64in.EvCheck.amFailAddr); + *p++ = 0xD61F0120; /* br x9 */ + /* nofail: */ + + /* Crosscheck */ + vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0); + goto done; + } + +//ZZ case ARMin_ProfInc: { +//ZZ /* We generate: +//ZZ (ctrP is unknown now, so use 0x65556555 in the +//ZZ expectation that a later call to LibVEX_patchProfCtr +//ZZ will be used to fill in the immediate fields once the +//ZZ right value is known.) +//ZZ movw r12, lo16(0x65556555) +//ZZ movt r12, lo16(0x65556555) +//ZZ ldr r11, [r12] +//ZZ adds r11, r11, #1 +//ZZ str r11, [r12] +//ZZ ldr r11, [r12+4] +//ZZ adc r11, r11, #0 +//ZZ str r11, [r12+4] +//ZZ */ +//ZZ p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555); +//ZZ *p++ = 0xE59CB000; +//ZZ *p++ = 0xE29BB001; +//ZZ *p++ = 0xE58CB000; +//ZZ *p++ = 0xE59CB004; +//ZZ *p++ = 0xE2ABB000; +//ZZ *p++ = 0xE58CB004; +//ZZ /* Tell the caller .. */ +//ZZ vassert(!(*is_profInc)); +//ZZ *is_profInc = True; +//ZZ goto done; +//ZZ } + + /* ... */ + default: + goto bad; + } + + bad: + ppARM64Instr(i); + vpanic("emit_ARM64Instr"); + /*NOTREACHED*/ + + done: + vassert(((UChar*)p) - &buf[0] <= 36); + return ((UChar*)p) - &buf[0]; +} + + +/* How big is an event check? See case for ARM64in_EvCheck in + emit_ARM64Instr just above. That crosschecks what this returns, so + we can tell if we're inconsistent. */ +Int evCheckSzB_ARM64 ( void ) +{ + return 24; +} + + +/* NB: what goes on here has to be very closely coordinated with the + emitInstr case for XDirect, above. */ +VexInvalRange chainXDirect_ARM64 ( void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to ) +{ + /* What we're expecting to see is: + movw x9, disp_cp_chain_me_to_EXPECTED[15:0] + movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16 + movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32 + movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48 + blr x9 + viz + <16 bytes generated by imm64_to_iregNo_EXACTLY4> + D6 3F 01 20 + */ + UInt* p = (UInt*)place_to_chain; + vassert(0 == (3 & (HWord)p)); + vassert(is_imm64_to_iregNo_EXACTLY4( + p, /*x*/9, Ptr_to_ULong(disp_cp_chain_me_EXPECTED))); + vassert(p[4] == 0xD63F0120); + + /* And what we want to change it to is: + movw x9, place_to_jump_to[15:0] + movk x9, place_to_jump_to[31:15], lsl 16 + movk x9, place_to_jump_to[47:32], lsl 32 + movk x9, place_to_jump_to[63:48], lsl 48 + br x9 + viz + <16 bytes generated by imm64_to_iregNo_EXACTLY4> + D6 1F 01 20 + + The replacement has the same length as the original. + */ + (void)imm64_to_iregNo_EXACTLY4( + p, /*x*/9, Ptr_to_ULong(place_to_jump_to)); + p[4] = 0xD61F0120; + + VexInvalRange vir = {(HWord)p, 20}; + return vir; +} + + +/* NB: what goes on here has to be very closely coordinated with the + emitInstr case for XDirect, above. */ +VexInvalRange unchainXDirect_ARM64 ( void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me ) +{ + /* What we're expecting to see is: + movw x9, place_to_jump_to_EXPECTED[15:0] + movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16 + movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32 + movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48 + br x9 + viz + <16 bytes generated by imm64_to_iregNo_EXACTLY4> + D6 1F 01 20 + */ + UInt* p = (UInt*)place_to_unchain; + vassert(0 == (3 & (HWord)p)); + vassert(is_imm64_to_iregNo_EXACTLY4( + p, /*x*/9, Ptr_to_ULong(place_to_jump_to_EXPECTED))); + vassert(p[4] == 0xD61F0120); + + /* And what we want to change it to is: + movw x9, disp_cp_chain_me_to[15:0] + movk x9, disp_cp_chain_me_to[31:15], lsl 16 + movk x9, disp_cp_chain_me_to[47:32], lsl 32 + movk x9, disp_cp_chain_me_to[63:48], lsl 48 + blr x9 + viz + <16 bytes generated by imm64_to_iregNo_EXACTLY4> + D6 3F 01 20 + */ + (void)imm64_to_iregNo_EXACTLY4( + p, /*x*/9, Ptr_to_ULong(disp_cp_chain_me)); + p[4] = 0xD63F0120; + + VexInvalRange vir = {(HWord)p, 20}; + return vir; +} + + +//ZZ /* Patch the counter address into a profile inc point, as previously +//ZZ created by the ARMin_ProfInc case for emit_ARMInstr. */ +//ZZ VexInvalRange patchProfInc_ARM ( void* place_to_patch, +//ZZ ULong* location_of_counter ) +//ZZ { +//ZZ vassert(sizeof(ULong*) == 4); +//ZZ UInt* p = (UInt*)place_to_patch; +//ZZ vassert(0 == (3 & (HWord)p)); +//ZZ vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555)); +//ZZ vassert(p[2] == 0xE59CB000); +//ZZ vassert(p[3] == 0xE29BB001); +//ZZ vassert(p[4] == 0xE58CB000); +//ZZ vassert(p[5] == 0xE59CB004); +//ZZ vassert(p[6] == 0xE2ABB000); +//ZZ vassert(p[7] == 0xE58CB004); +//ZZ imm32_to_iregNo_EXACTLY2(p, /*r*/12, +//ZZ (UInt)Ptr_to_ULong(location_of_counter)); +//ZZ VexInvalRange vir = {(HWord)p, 8}; +//ZZ return vir; +//ZZ } +//ZZ +//ZZ +//ZZ #undef BITS4 +//ZZ #undef X0000 +//ZZ #undef X0001 +//ZZ #undef X0010 +//ZZ #undef X0011 +//ZZ #undef X0100 +//ZZ #undef X0101 +//ZZ #undef X0110 +//ZZ #undef X0111 +//ZZ #undef X1000 +//ZZ #undef X1001 +//ZZ #undef X1010 +//ZZ #undef X1011 +//ZZ #undef X1100 +//ZZ #undef X1101 +//ZZ #undef X1110 +//ZZ #undef X1111 +//ZZ #undef XXXXX___ +//ZZ #undef XXXXXX__ +//ZZ #undef XXX___XX +//ZZ #undef XXXXX__X +//ZZ #undef XXXXXXXX +//ZZ #undef XX______ + +/*---------------------------------------------------------------*/ +/*--- end host_arm64_defs.c ---*/ +/*---------------------------------------------------------------*/ Index: priv/host_arm64_defs.h =================================================================== --- priv/host_arm64_defs.h (.../tags/VEX_3_9_0) (revision 0) +++ priv/host_arm64_defs.h (.../trunk) (revision 2863) @@ -0,0 +1,1148 @@ + +/*---------------------------------------------------------------*/ +/*--- begin host_arm64_defs.h ---*/ +/*---------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2013-2013 OpenWorks + info@open-works.net + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __VEX_HOST_ARM64_DEFS_H +#define __VEX_HOST_ARM64_DEFS_H + +#include "libvex_basictypes.h" +#include "libvex.h" // VexArch +#include "host_generic_regs.h" // HReg + +//ZZ extern UInt arm_hwcaps; + + +/* --------- Registers. --------- */ + +//ZZ /* The usual HReg abstraction. +//ZZ There are 16 general purpose regs. +//ZZ */ + +extern void ppHRegARM64 ( HReg ); + +extern HReg hregARM64_X0 ( void ); +extern HReg hregARM64_X1 ( void ); +extern HReg hregARM64_X2 ( void ); +extern HReg hregARM64_X3 ( void ); +extern HReg hregARM64_X4 ( void ); +extern HReg hregARM64_X5 ( void ); +extern HReg hregARM64_X6 ( void ); +extern HReg hregARM64_X7 ( void ); +//ZZ extern HReg hregARM_R8 ( void ); +extern HReg hregARM64_X9 ( void ); +extern HReg hregARM64_X10 ( void ); +extern HReg hregARM64_X11 ( void ); +extern HReg hregARM64_X12 ( void ); +extern HReg hregARM64_X13 ( void ); +extern HReg hregARM64_X14 ( void ); +extern HReg hregARM64_X15 ( void ); +extern HReg hregARM64_X21 ( void ); +extern HReg hregARM64_X22 ( void ); +extern HReg hregARM64_X23 ( void ); +extern HReg hregARM64_X24 ( void ); +extern HReg hregARM64_X25 ( void ); +extern HReg hregARM64_X26 ( void ); +extern HReg hregARM64_X27 ( void ); +extern HReg hregARM64_X28 ( void ); +extern HReg hregARM64_D8 ( void ); +extern HReg hregARM64_D9 ( void ); +extern HReg hregARM64_D10 ( void ); +extern HReg hregARM64_D11 ( void ); +extern HReg hregARM64_D12 ( void ); +extern HReg hregARM64_D13 ( void ); +extern HReg hregARM64_Q16 ( void ); +extern HReg hregARM64_Q17 ( void ); +extern HReg hregARM64_Q18 ( void ); + +/* Number of registers used arg passing in function calls */ +#define ARM64_N_ARGREGS 8 /* x0 .. x7 */ + + +/* --------- Condition codes. --------- */ + +typedef + enum { + ARM64cc_EQ = 0, /* equal : Z=1 */ + ARM64cc_NE = 1, /* not equal : Z=0 */ + + ARM64cc_CS = 2, /* >=u (higher or same) : C=1 */ + ARM64cc_CC = 3, /* u (higher) : C=1 && Z=0 */ + ARM64cc_LS = 9, /* <=u (lower or same) : !(C=1 && Z=0) */ + + ARM64cc_GE = 10, /* >=s (signed greater or equal) : N=V */ + ARM64cc_LT = 11, /* s (signed greater) : Z=0 && N=V */ + ARM64cc_LE = 13, /* <=s (signed less or equal) : !(Z=0 && N=V) */ + + ARM64cc_AL = 14, /* always (unconditional) */ + ARM64cc_NV = 15 /* in 64-bit mode also means "always" */ + } + ARM64CondCode; + + +/* --------- Memory address expressions (amodes). --------- */ + +typedef + enum { + ARM64am_RI9=10, /* reg + simm9 */ + ARM64am_RI12, /* reg + uimm12 * szB (iow, scaled by access size) */ + ARM64am_RR /* reg1 + reg2 */ + } + ARM64AModeTag; + +typedef + struct { + ARM64AModeTag tag; + union { + struct { + HReg reg; + Int simm9; /* -256 .. +255 */ + } RI9; + struct { + HReg reg; + UInt uimm12; /* 0 .. 4095 */ + UChar szB; /* 1, 2, 4, 8 (16 ?) */ + } RI12; + struct { + HReg base; + HReg index; + } RR; + } ARM64am; + } + ARM64AMode; + +extern ARM64AMode* ARM64AMode_RI9 ( HReg reg, Int simm9 ); +extern ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ); +extern ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ); + + +/* --------- Reg or uimm12 or (uimm12 << 12) operands --------- */ + +typedef + enum { + ARM64riA_I12=20, /* uimm12 << 0 or 12 only */ + ARM64riA_R /* reg */ + } + ARM64RIATag; + +typedef + struct { + ARM64RIATag tag; + union { + struct { + UShort imm12; /* 0 .. 4095 */ + UChar shift; /* 0 or 12 only */ + } I12; + struct { + HReg reg; + } R; + } ARM64riA; + } + ARM64RIA; + +extern ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ); +extern ARM64RIA* ARM64RIA_R ( HReg ); + + +/* --------- Reg or "bitfield" (logic immediate) operands --------- */ + +typedef + enum { + ARM64riL_I13=6, /* wierd-o bitfield immediate, 13 bits in total */ + ARM64riL_R /* reg */ + } + ARM64RILTag; + +typedef + struct { + ARM64RILTag tag; + union { + struct { + UChar bitN; /* 0 .. 1 */ + UChar immR; /* 0 .. 63 */ + UChar immS; /* 0 .. 63 */ + } I13; + struct { + HReg reg; + } R; + } ARM64riL; + } + ARM64RIL; + +extern ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ); +extern ARM64RIL* ARM64RIL_R ( HReg ); + + +/* --------------- Reg or uimm6 operands --------------- */ + +typedef + enum { + ARM64ri6_I6=30, /* uimm6, 1 .. 63 only */ + ARM64ri6_R /* reg */ + } + ARM64RI6Tag; + +typedef + struct { + ARM64RI6Tag tag; + union { + struct { + UInt imm6; /* 1 .. 63 */ + } I6; + struct { + HReg reg; + } R; + } ARM64ri6; + } + ARM64RI6; + +extern ARM64RI6* ARM64RI6_I6 ( UInt imm6 ); +extern ARM64RI6* ARM64RI6_R ( HReg ); + + +/* --------------------- Instructions --------------------- */ + +typedef + enum { + ARM64lo_AND=40, + ARM64lo_OR, + ARM64lo_XOR + } + ARM64LogicOp; + +typedef + enum { + ARM64sh_SHL=50, + ARM64sh_SHR, + ARM64sh_SAR + } + ARM64ShiftOp; + +typedef + enum { + ARM64un_NEG=60, + ARM64un_NOT, + ARM64un_CLZ, + } + ARM64UnaryOp; + +typedef + enum { + ARM64mul_PLAIN=70, /* lo64(64 * 64) */ + ARM64mul_ZX, /* hi64(64 *u 64) */ + ARM64mul_SX /* hi64(64 *s 64) */ + } + ARM64MulOp; + +typedef + /* These characterise an integer-FP conversion, but don't imply any + particular direction. */ + enum { + ARM64cvt_F32_I32S=80, + ARM64cvt_F64_I32S, + ARM64cvt_F32_I64S, + ARM64cvt_F64_I64S, + ARM64cvt_F32_I32U, + ARM64cvt_F64_I32U, + ARM64cvt_F32_I64U, + ARM64cvt_F64_I64U, + ARM64cvt_INVALID + } + ARM64CvtOp; + +typedef + enum { + ARM64fpb_ADD=100, + ARM64fpb_SUB, + ARM64fpb_MUL, + ARM64fpb_DIV, + ARM64fpb_INVALID + } + ARM64FpBinOp; + +typedef + enum { + ARM64fpu_NEG=110, + ARM64fpu_ABS, + ARM64fpu_SQRT, + ARM64fpu_RINT, + ARM64fpu_INVALID + } + ARM64FpUnaryOp; + +typedef + enum { + ARM64vecb_ADD64x2=120, + ARM64vecb_ADD32x4, + ARM64vecb_ADD16x8, + ARM64vecb_ADD8x16, + ARM64vecb_SUB64x2, + ARM64vecb_SUB32x4, + ARM64vecb_SUB16x8, + ARM64vecb_SUB8x16, + ARM64vecb_MUL32x4, + ARM64vecb_MUL16x8, + ARM64vecb_MUL8x16, + ARM64vecb_FADD64x2, + ARM64vecb_FSUB64x2, + ARM64vecb_FMUL64x2, + ARM64vecb_FDIV64x2, + ARM64vecb_FADD32x4, + ARM64vecb_FSUB32x4, + ARM64vecb_FMUL32x4, + ARM64vecb_FDIV32x4, + ARM64vecb_UMAX32x4, + ARM64vecb_UMAX16x8, + ARM64vecb_UMAX8x16, + ARM64vecb_UMIN32x4, + ARM64vecb_UMIN16x8, + ARM64vecb_UMIN8x16, + ARM64vecb_SMAX32x4, + ARM64vecb_SMAX16x8, + ARM64vecb_SMAX8x16, + ARM64vecb_SMIN32x4, + ARM64vecb_SMIN16x8, + ARM64vecb_SMIN8x16, + ARM64vecb_AND, + ARM64vecb_ORR, + ARM64vecb_XOR, + ARM64vecb_CMEQ64x2, + ARM64vecb_CMEQ32x4, + ARM64vecb_CMEQ16x8, + ARM64vecb_CMEQ8x16, + ARM64vecb_CMHI64x2, /* >u */ + ARM64vecb_CMHI32x4, + ARM64vecb_CMHI16x8, + ARM64vecb_CMHI8x16, + ARM64vecb_CMGT64x2, /* >s */ + ARM64vecb_CMGT32x4, + ARM64vecb_CMGT16x8, + ARM64vecb_CMGT8x16, + ARM64vecb_FCMEQ64x2, + ARM64vecb_FCMEQ32x4, + ARM64vecb_FCMGE64x2, + ARM64vecb_FCMGE32x4, + ARM64vecb_FCMGT64x2, + ARM64vecb_FCMGT32x4, + ARM64vecb_TBL1, + ARM64vecb_INVALID + } + ARM64VecBinOp; + +typedef + enum { + ARM64vecu_FNEG64x2=300, + ARM64vecu_FNEG32x4, + ARM64vecu_FABS64x2, + ARM64vecu_FABS32x4, + ARM64vecu_NOT, + ARM64vecu_INVALID + } + ARM64VecUnaryOp; + +typedef + enum { + ARM64vecsh_USHR64x2=350, + ARM64vecsh_USHR32x4, + ARM64vecsh_USHR16x8, + ARM64vecsh_USHR8x16, + ARM64vecsh_SSHR64x2, + ARM64vecsh_SSHR32x4, + ARM64vecsh_SSHR16x8, + ARM64vecsh_SSHR8x16, + ARM64vecsh_SHL64x2, + ARM64vecsh_SHL32x4, + ARM64vecsh_SHL16x8, + ARM64vecsh_SHL8x16, + ARM64vecsh_INVALID + } + ARM64VecShiftOp; + +//ZZ extern const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ); +//ZZ +//ZZ typedef +//ZZ enum { +//ZZ ARMneon_VAND=90, +//ZZ ARMneon_VORR, +//ZZ ARMneon_VXOR, +//ZZ ARMneon_VADD, +//ZZ ARMneon_VADDFP, +//ZZ ARMneon_VRHADDS, +//ZZ ARMneon_VRHADDU, +//ZZ ARMneon_VPADDFP, +//ZZ ARMneon_VABDFP, +//ZZ ARMneon_VSUB, +//ZZ ARMneon_VSUBFP, +//ZZ ARMneon_VMAXU, +//ZZ ARMneon_VMAXS, +//ZZ ARMneon_VMAXF, +//ZZ ARMneon_VMINU, +//ZZ ARMneon_VMINS, +//ZZ ARMneon_VMINF, +//ZZ ARMneon_VQADDU, +//ZZ ARMneon_VQADDS, +//ZZ ARMneon_VQSUBU, +//ZZ ARMneon_VQSUBS, +//ZZ ARMneon_VCGTU, +//ZZ ARMneon_VCGTS, +//ZZ ARMneon_VCGEU, +//ZZ ARMneon_VCGES, +//ZZ ARMneon_VCGTF, +//ZZ ARMneon_VCGEF, +//ZZ ARMneon_VCEQ, +//ZZ ARMneon_VCEQF, +//ZZ ARMneon_VEXT, +//ZZ ARMneon_VMUL, +//ZZ ARMneon_VMULFP, +//ZZ ARMneon_VMULLU, +//ZZ ARMneon_VMULLS, +//ZZ ARMneon_VMULP, +//ZZ ARMneon_VMULLP, +//ZZ ARMneon_VQDMULH, +//ZZ ARMneon_VQRDMULH, +//ZZ ARMneon_VPADD, +//ZZ ARMneon_VPMINU, +//ZZ ARMneon_VPMINS, +//ZZ ARMneon_VPMINF, +//ZZ ARMneon_VPMAXU, +//ZZ ARMneon_VPMAXS, +//ZZ ARMneon_VPMAXF, +//ZZ ARMneon_VTBL, +//ZZ ARMneon_VQDMULL, +//ZZ ARMneon_VRECPS, +//ZZ ARMneon_VRSQRTS, +//ZZ /* ... */ +//ZZ } +//ZZ ARMNeonBinOp; +//ZZ +//ZZ typedef +//ZZ enum { +//ZZ ARMneon_VSHL=150, +//ZZ ARMneon_VSAL, /* Yah, not SAR but SAL */ +//ZZ ARMneon_VQSHL, +//ZZ ARMneon_VQSAL +//ZZ } +//ZZ ARMNeonShiftOp; +//ZZ +//ZZ typedef +//ZZ enum { +//ZZ ARMneon_COPY=160, +//ZZ ARMneon_COPYLU, +//ZZ ARMneon_COPYLS, +//ZZ ARMneon_COPYN, +//ZZ ARMneon_COPYQNSS, +//ZZ ARMneon_COPYQNUS, +//ZZ ARMneon_COPYQNUU, +//ZZ ARMneon_NOT, +//ZZ ARMneon_EQZ, +//ZZ ARMneon_DUP, +//ZZ ARMneon_PADDLS, +//ZZ ARMneon_PADDLU, +//ZZ ARMneon_CNT, +//ZZ ARMneon_CLZ, +//ZZ ARMneon_CLS, +//ZZ ARMneon_VCVTxFPxINT, +//ZZ ARMneon_VQSHLNSS, +//ZZ ARMneon_VQSHLNUU, +//ZZ ARMneon_VQSHLNUS, +//ZZ ARMneon_VCVTFtoU, +//ZZ ARMneon_VCVTFtoS, +//ZZ ARMneon_VCVTUtoF, +//ZZ ARMneon_VCVTStoF, +//ZZ ARMneon_VCVTFtoFixedU, +//ZZ ARMneon_VCVTFtoFixedS, +//ZZ ARMneon_VCVTFixedUtoF, +//ZZ ARMneon_VCVTFixedStoF, +//ZZ ARMneon_VCVTF16toF32, +//ZZ ARMneon_VCVTF32toF16, +//ZZ ARMneon_REV16, +//ZZ ARMneon_REV32, +//ZZ ARMneon_REV64, +//ZZ ARMneon_ABS, +//ZZ ARMneon_VNEGF, +//ZZ ARMneon_VRECIP, +//ZZ ARMneon_VRECIPF, +//ZZ ARMneon_VABSFP, +//ZZ ARMneon_VRSQRTEFP, +//ZZ ARMneon_VRSQRTE +//ZZ /* ... */ +//ZZ } +//ZZ ARMNeonUnOp; +//ZZ +//ZZ typedef +//ZZ enum { +//ZZ ARMneon_SETELEM=200, +//ZZ ARMneon_GETELEMU, +//ZZ ARMneon_GETELEMS, +//ZZ ARMneon_VDUP, +//ZZ } +//ZZ ARMNeonUnOpS; +//ZZ +//ZZ typedef +//ZZ enum { +//ZZ ARMneon_TRN=210, +//ZZ ARMneon_ZIP, +//ZZ ARMneon_UZP +//ZZ /* ... */ +//ZZ } +//ZZ ARMNeonDualOp; +//ZZ +//ZZ extern const HChar* showARMNeonBinOp ( ARMNeonBinOp op ); +//ZZ extern const HChar* showARMNeonUnOp ( ARMNeonUnOp op ); +//ZZ extern const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ); +//ZZ extern const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ); +//ZZ extern const HChar* showARMNeonDualOp ( ARMNeonDualOp op ); +//ZZ extern const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ); +//ZZ extern const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ); +//ZZ extern const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ); +//ZZ extern const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ); +//ZZ extern const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ); + +typedef + enum { + /* baseline */ + ARM64in_Arith=1220, + ARM64in_Cmp, + ARM64in_Logic, + ARM64in_Test, + ARM64in_Shift, + ARM64in_Unary, + ARM64in_MovI, /* int reg-reg move */ + ARM64in_Imm64, + ARM64in_LdSt64, + ARM64in_LdSt32, /* w/ ZX loads */ + ARM64in_LdSt16, /* w/ ZX loads */ + ARM64in_LdSt8, /* w/ ZX loads */ + ARM64in_XDirect, /* direct transfer to GA */ + ARM64in_XIndir, /* indirect transfer to GA */ + ARM64in_XAssisted, /* assisted transfer to GA */ + ARM64in_CSel, + ARM64in_Call, + ARM64in_AddToSP, /* move SP by small, signed constant */ + ARM64in_FromSP, /* move SP to integer register */ + ARM64in_Mul, + ARM64in_LdrEX, + ARM64in_StrEX, + ARM64in_MFence, +//ZZ ARMin_CLREX, + /* ARM64in_V*: scalar ops involving vector registers */ + ARM64in_VLdStS, /* 32-bit FP load/store, with imm offset */ + ARM64in_VLdStD, /* 64-bit FP load/store, with imm offset */ + ARM64in_VLdStQ, + ARM64in_VCvtI2F, + ARM64in_VCvtF2I, + ARM64in_VCvtSD, + ARM64in_VUnaryD, + ARM64in_VUnaryS, + ARM64in_VBinD, + ARM64in_VBinS, + ARM64in_VCmpD, + ARM64in_VCmpS, + ARM64in_FPCR, + /* ARM64in_V*V: vector ops on vector registers */ + ARM64in_VBinV, + ARM64in_VUnaryV, + ARM64in_VNarrowV, + ARM64in_VShiftImmV, +//ZZ ARMin_VAluS, +//ZZ ARMin_VCMovD, +//ZZ ARMin_VCMovS, +//ZZ ARMin_VXferD, +//ZZ ARMin_VXferS, +//ZZ ARMin_VCvtID, +//ZZ /* Neon */ +//ZZ ARMin_NLdStD, +//ZZ ARMin_NUnary, +//ZZ ARMin_NUnaryS, +//ZZ ARMin_NDual, +//ZZ ARMin_NBinary, +//ZZ ARMin_NBinaryS, +//ZZ ARMin_NShift, +//ZZ ARMin_NShl64, // special case 64-bit shift of Dreg by immediate + ARM64in_VImmQ, + ARM64in_VDfromX, /* Move an Xreg to a Dreg */ + ARM64in_VQfromXX, /* Move 2 Xregs to a Qreg */ + ARM64in_VXfromQ, /* Move half a Qreg to an Xreg */ + ARM64in_VMov, /* vector reg-reg move, 16, 8 or 4 bytes */ + /* infrastructure */ + ARM64in_EvCheck, /* Event check */ +//ZZ ARMin_ProfInc /* 64-bit profile counter increment */ + } + ARM64InstrTag; + +/* Destinations are on the LEFT (first operand) */ + +typedef + struct { + ARM64InstrTag tag; + union { + /* --- INTEGER INSTRUCTIONS --- */ + /* 64 bit ADD/SUB reg, reg or uimm12<<{0,12} */ + struct { + HReg dst; + HReg argL; + ARM64RIA* argR; + Bool isAdd; + } Arith; + /* 64 or 32 bit CMP reg, reg or aimm (SUB and set flags) */ + struct { + HReg argL; + ARM64RIA* argR; + Bool is64; + } Cmp; + /* 64 bit AND/OR/XOR reg, reg or bitfield-immediate */ + struct { + HReg dst; + HReg argL; + ARM64RIL* argR; + ARM64LogicOp op; + } Logic; + /* 64 bit TST reg, reg or bimm (AND and set flags) */ + struct { + HReg argL; + ARM64RIL* argR; + } Test; + /* 64 bit SHL/SHR/SAR, 2nd arg is reg or imm */ + struct { + HReg dst; + HReg argL; + ARM64RI6* argR; + ARM64ShiftOp op; + } Shift; + /* NOT/NEG/CLZ, 64 bit only */ + struct { + HReg dst; + HReg src; + ARM64UnaryOp op; + } Unary; + /* MOV dst, src -- reg-reg move for integer registers */ + struct { + HReg dst; + HReg src; + } MovI; + /* Pseudo-insn; make a 64-bit immediate */ + struct { + HReg dst; + ULong imm64; + } Imm64; + /* 64-bit load or store */ + struct { + Bool isLoad; + HReg rD; + ARM64AMode* amode; + } LdSt64; + /* zx-32-to-64-bit load, or 32-bit store */ + struct { + Bool isLoad; + HReg rD; + ARM64AMode* amode; + } LdSt32; + /* zx-16-to-64-bit load, or 16-bit store */ + struct { + Bool isLoad; + HReg rD; + ARM64AMode* amode; + } LdSt16; + /* zx-8-to-64-bit load, or 8-bit store */ + struct { + Bool isLoad; + HReg rD; + ARM64AMode* amode; + } LdSt8; + /* Update the guest PC value, then exit requesting to chain + to it. May be conditional. Urr, use of Addr64 implicitly + assumes that wordsize(guest) == wordsize(host). */ + struct { + Addr64 dstGA; /* next guest address */ + ARM64AMode* amPC; /* amode in guest state for PC */ + ARM64CondCode cond; /* can be ARM64cc_AL */ + Bool toFastEP; /* chain to the slow or fast point? */ + } XDirect; + /* Boring transfer to a guest address not known at JIT time. + Not chainable. May be conditional. */ + struct { + HReg dstGA; + ARM64AMode* amPC; + ARM64CondCode cond; /* can be ARM64cc_AL */ + } XIndir; + /* Assisted transfer to a guest address, most general case. + Not chainable. May be conditional. */ + struct { + HReg dstGA; + ARM64AMode* amPC; + ARM64CondCode cond; /* can be ARM64cc_AL */ + IRJumpKind jk; + } XAssisted; + /* CSEL: dst = if cond then argL else argR. cond may be anything. */ + struct { + HReg dst; + HReg argL; + HReg argR; + ARM64CondCode cond; + } CSel; + /* Pseudo-insn. Call target (an absolute address), on given + condition (which could be ARM64cc_AL). */ + struct { + RetLoc rloc; /* where the return value will be */ + HWord target; + ARM64CondCode cond; + Int nArgRegs; /* # regs carrying args: 0 .. 8 */ + } Call; + /* move SP by small, signed constant */ + struct { + Int simm; /* needs to be 0 % 16 and in the range -4095 + .. 4095 inclusive */ + } AddToSP; + /* move SP to integer register */ + struct { + HReg dst; + } FromSP; + /* Integer multiply, with 3 variants: + (PLAIN) lo64(64 * 64) + (ZX) hi64(64 *u 64) + (SX) hi64(64 *s 64) + */ + struct { + HReg dst; + HReg argL; + HReg argR; + ARM64MulOp op; + } Mul; + /* LDXR{,H,B} x2, [x4] */ + struct { + Int szB; /* 1, 2, 4 or 8 */ + } LdrEX; + /* STXR{,H,B} w0, x2, [x4] */ + struct { + Int szB; /* 1, 2, 4 or 8 */ + } StrEX; + /* Mem fence. An insn which fences all loads and stores as + much as possible before continuing. On ARM64 we emit the + sequence "dsb sy ; dmb sy ; isb sy", which is probably + total nuclear overkill, but better safe than sorry. */ + struct { + } MFence; +//ZZ /* A CLREX instruction. */ +//ZZ struct { +//ZZ } CLREX; + /* --- INSTRUCTIONS INVOLVING VECTOR REGISTERS --- */ + /* 32-bit Fp load/store */ + struct { + Bool isLoad; + HReg sD; + HReg rN; + UInt uimm12; /* 0 .. 16380 inclusive, 0 % 4 */ + } VLdStS; + /* 64-bit Fp load/store */ + struct { + Bool isLoad; + HReg dD; + HReg rN; + UInt uimm12; /* 0 .. 32760 inclusive, 0 % 8 */ + } VLdStD; + /* 128-bit Vector load/store. */ + struct { + Bool isLoad; + HReg rQ; // data + HReg rN; // address + } VLdStQ; + /* Scalar conversion of int to float. */ + struct { + ARM64CvtOp how; + HReg rD; // dst, a D or S register + HReg rS; // src, a W or X register + } VCvtI2F; + /* Scalar conversion of float to int, w/ specified RM. */ + struct { + ARM64CvtOp how; + HReg rD; // dst, a W or X register + HReg rS; // src, a D or S register + UChar armRM; // ARM encoded RM: + // 00=nearest, 01=+inf, 10=-inf, 11=zero + } VCvtF2I; + /* Convert between 32-bit and 64-bit FP values (both + ways). (FCVT) */ + struct { + Bool sToD; /* True: F32->F64. False: F64->F32 */ + HReg dst; + HReg src; + } VCvtSD; + /* 64-bit FP unary */ + struct { + ARM64FpUnaryOp op; + HReg dst; + HReg src; + } VUnaryD; + /* 32-bit FP unary */ + struct { + ARM64FpUnaryOp op; + HReg dst; + HReg src; + } VUnaryS; + /* 64-bit FP binary arithmetic */ + struct { + ARM64FpBinOp op; + HReg dst; + HReg argL; + HReg argR; + } VBinD; + /* 32-bit FP binary arithmetic */ + struct { + ARM64FpBinOp op; + HReg dst; + HReg argL; + HReg argR; + } VBinS; + /* 64-bit FP compare */ + struct { + HReg argL; + HReg argR; + } VCmpD; + /* 32-bit FP compare */ + struct { + HReg argL; + HReg argR; + } VCmpS; + /* Move a 32-bit value to/from the FPCR */ + struct { + Bool toFPCR; + HReg iReg; + } FPCR; + /* binary vector operation on vector registers */ + struct { + ARM64VecBinOp op; + HReg dst; + HReg argL; + HReg argR; + } VBinV; + /* unary vector operation on vector registers */ + struct { + ARM64VecUnaryOp op; + HReg dst; + HReg arg; + } VUnaryV; + /* vector narrowing, Q -> Q. Result goes in the bottom half + of dst and the top half is zeroed out. Iow is XTN. */ + struct { + UInt dszBlg2; // 0: 16to8_x8 1: 32to16_x4 2: 64to32_x2 + HReg dst; // Q reg + HReg src; // Q reg + } VNarrowV; + /* Vector shift by immediate. |amt| needs to be > 0 and < + implied lane size of |op|. Zero shifts and out of range + shifts are not allowed. */ + struct { + ARM64VecShiftOp op; + HReg dst; + HReg src; + UInt amt; + } VShiftImmV; +//ZZ /* 32-bit FP binary arithmetic */ +//ZZ struct { +//ZZ ARMVfpOp op; +//ZZ HReg dst; +//ZZ HReg argL; +//ZZ HReg argR; +//ZZ } VAluS; +//ZZ /* 64-bit FP mov src to dst on the given condition, which may +//ZZ not be ARMcc_AL. */ +//ZZ struct { +//ZZ ARMCondCode cond; +//ZZ HReg dst; +//ZZ HReg src; +//ZZ } VCMovD; +//ZZ /* 32-bit FP mov src to dst on the given condition, which may +//ZZ not be ARMcc_AL. */ +//ZZ struct { +//ZZ ARMCondCode cond; +//ZZ HReg dst; +//ZZ HReg src; +//ZZ } VCMovS; +//ZZ /* Transfer a VFP D reg to/from two integer registers (VMOV) */ +//ZZ struct { +//ZZ Bool toD; +//ZZ HReg dD; +//ZZ HReg rHi; +//ZZ HReg rLo; +//ZZ } VXferD; +//ZZ /* Transfer a VFP S reg to/from an integer register (VMOV) */ +//ZZ struct { +//ZZ Bool toS; +//ZZ HReg fD; +//ZZ HReg rLo; +//ZZ } VXferS; +//ZZ /* Convert between 32-bit ints and 64-bit FP values (both ways +//ZZ and both signednesses). (FSITOD, FUITOD, FTOSID, FTOUID) */ +//ZZ struct { +//ZZ Bool iToD; /* True: I32->F64. False: F64->I32 */ +//ZZ Bool syned; /* True: I32 is signed. False: I32 is unsigned */ +//ZZ HReg dst; +//ZZ HReg src; +//ZZ } VCvtID; +//ZZ /* Neon data processing instruction: 3 registers of the same +//ZZ length */ +//ZZ struct { +//ZZ ARMNeonBinOp op; +//ZZ HReg dst; +//ZZ HReg argL; +//ZZ HReg argR; +//ZZ UInt size; +//ZZ Bool Q; +//ZZ } NBinary; +//ZZ struct { +//ZZ ARMNeonBinOp op; +//ZZ ARMNRS* dst; +//ZZ ARMNRS* argL; +//ZZ ARMNRS* argR; +//ZZ UInt size; +//ZZ Bool Q; +//ZZ } NBinaryS; +//ZZ struct { +//ZZ ARMNeonShiftOp op; +//ZZ HReg dst; +//ZZ HReg argL; +//ZZ HReg argR; +//ZZ UInt size; +//ZZ Bool Q; +//ZZ } NShift; +//ZZ struct { +//ZZ HReg dst; +//ZZ HReg src; +//ZZ UInt amt; /* 1..63 only */ +//ZZ } NShl64; +//ZZ struct { +//ZZ Bool isLoad; +//ZZ HReg dD; +//ZZ ARMAModeN *amode; +//ZZ } NLdStD +//ZZ struct { +//ZZ ARMNeonUnOpS op; +//ZZ ARMNRS* dst; +//ZZ ARMNRS* src; +//ZZ UInt size; +//ZZ Bool Q; +//ZZ } NUnaryS; +//ZZ struct { +//ZZ ARMNeonUnOp op; +//ZZ HReg dst; +//ZZ HReg src; +//ZZ UInt size; +//ZZ Bool Q; +//ZZ } NUnary; +//ZZ /* Takes two arguments and modifies them both. */ +//ZZ struct { +//ZZ ARMNeonDualOp op; +//ZZ HReg arg1; +//ZZ HReg arg2; +//ZZ UInt size; +//ZZ Bool Q; +//ZZ } NDual; + struct { + HReg rQ; + UShort imm; /* Same 1-bit-per-byte encoding as IR */ + } VImmQ; + struct { + HReg rD; + HReg rX; + } VDfromX; + struct { + HReg rQ; + HReg rXhi; + HReg rXlo; + } VQfromXX; + struct { + HReg rX; + HReg rQ; + UInt laneNo; /* either 0 or 1 */ + } VXfromQ; + /* MOV dst, src -- reg-reg move for vector registers */ + struct { + UInt szB; // 16=mov qD,qS; 8=mov dD,dS; 4=mov sD,sS + HReg dst; + HReg src; + } VMov; + struct { + ARM64AMode* amCounter; + ARM64AMode* amFailAddr; + } EvCheck; +//ZZ struct { +//ZZ /* No fields. The address of the counter to inc is +//ZZ installed later, post-translation, by patching it in, +//ZZ as it is not known at translation time. */ +//ZZ } ProfInc; + } ARM64in; + } + ARM64Instr; + +//ZZ +extern ARM64Instr* ARM64Instr_Arith ( HReg, HReg, ARM64RIA*, Bool isAdd ); +extern ARM64Instr* ARM64Instr_Cmp ( HReg, ARM64RIA*, Bool is64 ); +extern ARM64Instr* ARM64Instr_Logic ( HReg, HReg, ARM64RIL*, ARM64LogicOp ); +extern ARM64Instr* ARM64Instr_Test ( HReg, ARM64RIL* ); +extern ARM64Instr* ARM64Instr_Shift ( HReg, HReg, ARM64RI6*, ARM64ShiftOp ); +extern ARM64Instr* ARM64Instr_Unary ( HReg, HReg, ARM64UnaryOp ); +//ZZ extern ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg, ARMRI84* ); +extern ARM64Instr* ARM64Instr_MovI ( HReg, HReg ); +extern ARM64Instr* ARM64Instr_Imm64 ( HReg, ULong ); +extern ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg, ARM64AMode* ); +extern ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg, ARM64AMode* ); +extern ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg, ARM64AMode* ); +extern ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg, ARM64AMode* ); +//ZZ extern ARMInstr* ARMInstr_Ld8S ( ARMCondCode, HReg, ARMAMode2* ); +extern ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC, + ARM64CondCode cond, Bool toFastEP ); +extern ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC, + ARM64CondCode cond ); +extern ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC, + ARM64CondCode cond, IRJumpKind jk ); +extern ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR, + ARM64CondCode cond ); +extern ARM64Instr* ARM64Instr_Call ( ARM64CondCode, HWord, Int nArgRegs, + RetLoc rloc ); +extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ); +extern ARM64Instr* ARM64Instr_FromSP ( HReg dst ); +extern ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR, + ARM64MulOp op ); +extern ARM64Instr* ARM64Instr_LdrEX ( Int szB ); +extern ARM64Instr* ARM64Instr_StrEX ( Int szB ); +extern ARM64Instr* ARM64Instr_MFence ( void ); +//ZZ extern ARMInstr* ARMInstr_CLREX ( void ); +extern ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, + UInt uimm12 /* 0 .. 16380, 0 % 4 */ ); +extern ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, + UInt uimm12 /* 0 .. 32760, 0 % 8 */ ); +extern ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ); +extern ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ); +extern ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS, + UChar armRM ); +extern ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ); +extern ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ); +extern ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ); +extern ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op, HReg, HReg, HReg ); +extern ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op, HReg, HReg, HReg ); +extern ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ); +extern ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ); +extern ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ); +extern ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op, HReg, HReg, HReg ); +extern ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg, HReg ); +extern ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src ); +extern ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op, + HReg dst, HReg src, UInt amt ); +//ZZ extern ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg, HReg, HReg ); +//ZZ extern ARMInstr* ARMInstr_VCMovD ( ARMCondCode, HReg dst, HReg src ); +//ZZ extern ARMInstr* ARMInstr_VCMovS ( ARMCondCode, HReg dst, HReg src ); +//ZZ extern ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ); +//ZZ extern ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ); +//ZZ extern ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned, +//ZZ HReg dst, HReg src ); +//ZZ extern ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg, ARMAModeN* ); +//ZZ extern ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp, HReg, HReg, UInt, Bool ); +//ZZ extern ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS, ARMNRS*, ARMNRS*, +//ZZ UInt, Bool ); +//ZZ extern ARMInstr* ARMInstr_NDual ( ARMNeonDualOp, HReg, HReg, UInt, Bool ); +//ZZ extern ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp, HReg, HReg, HReg, +//ZZ UInt, Bool ); +//ZZ extern ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp, HReg, HReg, HReg, +//ZZ UInt, Bool ); +//ZZ extern ARMInstr* ARMInstr_NShl64 ( HReg, HReg, UInt ); +extern ARM64Instr* ARM64Instr_VImmQ ( HReg, UShort ); +extern ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ); +extern ARM64Instr* ARM64Instr_VQfromXX( HReg rQ, HReg rXhi, HReg rXlo ); +extern ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ); +extern ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ); + +extern ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter, + ARM64AMode* amFailAddr ); +//ZZ extern ARMInstr* ARMInstr_ProfInc ( void ); + +extern void ppARM64Instr ( ARM64Instr* ); + + +/* Some functions that insulate the register allocator from details + of the underlying instruction set. */ +extern void getRegUsage_ARM64Instr ( HRegUsage*, ARM64Instr*, Bool ); +extern void mapRegs_ARM64Instr ( HRegRemap*, ARM64Instr*, Bool ); +extern Bool isMove_ARM64Instr ( ARM64Instr*, HReg*, HReg* ); +extern Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, + UChar* buf, Int nbuf, ARM64Instr* i, + Bool mode64, + void* disp_cp_chain_me_to_slowEP, + void* disp_cp_chain_me_to_fastEP, + void* disp_cp_xindir, + void* disp_cp_xassisted ); + +extern void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, + HReg rreg, Int offset, Bool ); +extern void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, + HReg rreg, Int offset, Bool ); + +extern void getAllocableRegs_ARM64 ( Int*, HReg** ); +extern HInstrArray* iselSB_ARM64 ( IRSB*, + VexArch, + VexArchInfo*, + VexAbiInfo*, + Int offs_Host_EvC_Counter, + Int offs_Host_EvC_FailAddr, + Bool chainingAllowed, + Bool addProfInc, + Addr64 max_ga ); + +/* How big is an event check? This is kind of a kludge because it + depends on the offsets of host_EvC_FAILADDR and + host_EvC_COUNTER. */ +extern Int evCheckSzB_ARM64 ( void ); + +/* Perform a chaining and unchaining of an XDirect jump. */ +extern VexInvalRange chainXDirect_ARM64 ( void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to ); + +extern VexInvalRange unchainXDirect_ARM64 ( void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me ); + +//ZZ /* Patch the counter location into an existing ProfInc point. */ +//ZZ extern VexInvalRange patchProfInc_ARM ( void* place_to_patch, +//ZZ ULong* location_of_counter ); + + +#endif /* ndef __VEX_HOST_ARM64_DEFS_H */ + +/*---------------------------------------------------------------*/ +/*--- end host_arm64_defs.h ---*/ +/*---------------------------------------------------------------*/ Index: priv/host_arm64_isel.c =================================================================== --- priv/host_arm64_isel.c (.../tags/VEX_3_9_0) (revision 0) +++ priv/host_arm64_isel.c (.../trunk) (revision 2863) @@ -0,0 +1,7058 @@ + +/*---------------------------------------------------------------*/ +/*--- begin host_arm64_isel.c ---*/ +/*---------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2013-2013 OpenWorks + info@open-works.net + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "libvex_basictypes.h" +#include "libvex_ir.h" +#include "libvex.h" +#include "ir_match.h" + +#include "main_util.h" +#include "main_globals.h" +#include "host_generic_regs.h" +#include "host_generic_simd64.h" // for 32-bit SIMD helpers +#include "host_arm64_defs.h" + + +//ZZ /*---------------------------------------------------------*/ +//ZZ /*--- ARMvfp control word stuff ---*/ +//ZZ /*---------------------------------------------------------*/ +//ZZ +//ZZ /* Vex-generated code expects to run with the FPU set as follows: all +//ZZ exceptions masked, round-to-nearest, non-vector mode, with the NZCV +//ZZ flags cleared, and FZ (flush to zero) disabled. Curiously enough, +//ZZ this corresponds to a FPSCR value of zero. +//ZZ +//ZZ fpscr should therefore be zero on entry to Vex-generated code, and +//ZZ should be unchanged at exit. (Or at least the bottom 28 bits +//ZZ should be zero). +//ZZ */ +//ZZ +//ZZ #define DEFAULT_FPSCR 0 + + +/*---------------------------------------------------------*/ +/*--- ISelEnv ---*/ +/*---------------------------------------------------------*/ + +/* This carries around: + + - A mapping from IRTemp to IRType, giving the type of any IRTemp we + might encounter. This is computed before insn selection starts, + and does not change. + + - A mapping from IRTemp to HReg. This tells the insn selector + which virtual register is associated with each IRTemp temporary. + This is computed before insn selection starts, and does not + change. We expect this mapping to map precisely the same set of + IRTemps as the type mapping does. + + |vregmap| holds the primary register for the IRTemp. + |vregmapHI| is only used for 128-bit integer-typed + IRTemps. It holds the identity of a second + 64-bit virtual HReg, which holds the high half + of the value. + + - The code array, that is, the insns selected so far. + + - A counter, for generating new virtual registers. + + - The host hardware capabilities word. This is set at the start + and does not change. + + - A Bool for indicating whether we may generate chain-me + instructions for control flow transfers, or whether we must use + XAssisted. + + - The maximum guest address of any guest insn in this block. + Actually, the address of the highest-addressed byte from any insn + in this block. Is set at the start and does not change. This is + used for detecting jumps which are definitely forward-edges from + this block, and therefore can be made (chained) to the fast entry + point of the destination, thereby avoiding the destination's + event check. + + - An IRExpr*, which may be NULL, holding the IR expression (an + IRRoundingMode-encoded value) to which the FPU's rounding mode + was most recently set. Setting to NULL is always safe. Used to + avoid redundant settings of the FPU's rounding mode, as + described in set_FPCR_rounding_mode below. + + Note, this is all (well, mostly) host-independent. +*/ + +typedef + struct { + /* Constant -- are set at the start and do not change. */ + IRTypeEnv* type_env; + + HReg* vregmap; + HReg* vregmapHI; + Int n_vregmap; + + UInt hwcaps; + + Bool chainingAllowed; + Addr64 max_ga; + + /* These are modified as we go along. */ + HInstrArray* code; + Int vreg_ctr; + + IRExpr* previous_rm; + } + ISelEnv; + +static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) +{ + vassert(tmp >= 0); + vassert(tmp < env->n_vregmap); + return env->vregmap[tmp]; +} + +static void addInstr ( ISelEnv* env, ARM64Instr* instr ) +{ + addHInstr(env->code, instr); + if (vex_traceflags & VEX_TRACE_VCODE) { + ppARM64Instr(instr); + vex_printf("\n"); + } +} + +static HReg newVRegI ( ISelEnv* env ) +{ + HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/); + env->vreg_ctr++; + return reg; +} + +static HReg newVRegD ( ISelEnv* env ) +{ + HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/); + env->vreg_ctr++; + return reg; +} + +//ZZ static HReg newVRegF ( ISelEnv* env ) +//ZZ { +//ZZ HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/); +//ZZ env->vreg_ctr++; +//ZZ return reg; +//ZZ } + +static HReg newVRegV ( ISelEnv* env ) +{ + HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); + env->vreg_ctr++; + return reg; +} + +//ZZ /* These are duplicated in guest_arm_toIR.c */ +//ZZ static IRExpr* unop ( IROp op, IRExpr* a ) +//ZZ { +//ZZ return IRExpr_Unop(op, a); +//ZZ } +//ZZ +//ZZ static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) +//ZZ { +//ZZ return IRExpr_Binop(op, a1, a2); +//ZZ } +//ZZ +//ZZ static IRExpr* bind ( Int binder ) +//ZZ { +//ZZ return IRExpr_Binder(binder); +//ZZ } + + +/*---------------------------------------------------------*/ +/*--- ISEL: Forward declarations ---*/ +/*---------------------------------------------------------*/ + +/* These are organised as iselXXX and iselXXX_wrk pairs. The + iselXXX_wrk do the real work, but are not to be called directly. + For each XXX, iselXXX calls its iselXXX_wrk counterpart, then + checks that all returned registers are virtual. You should not + call the _wrk version directly. + + Because some forms of ARM64 memory amodes are implicitly scaled by + the access size, iselIntExpr_AMode takes an IRType which tells it + the type of the access for which the amode is to be used. This + type needs to be correct, else you'll get incorrect code. +*/ +static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, + IRExpr* e, IRType dty ); +static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, + IRExpr* e, IRType dty ); + +static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e ); +static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e ); + +static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e ); +static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e ); + +static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e ); +static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e ); + +static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); +static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); + +static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); +static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); + +static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, + ISelEnv* env, IRExpr* e ); +static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo, + ISelEnv* env, IRExpr* e ); + + +//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, +//ZZ ISelEnv* env, IRExpr* e ); +//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, +//ZZ ISelEnv* env, IRExpr* e ); + +static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); +static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); + +static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); +static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); + +//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e ); +//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e ); + +static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ); +static HReg iselV128Expr ( ISelEnv* env, IRExpr* e ); + +static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 ); + + +/*---------------------------------------------------------*/ +/*--- ISEL: Misc helpers ---*/ +/*---------------------------------------------------------*/ + +/* Generate an amode suitable for a 64-bit sized access relative to + the baseblock register (X21). This generates an RI12 amode, which + means its scaled by the access size, which is why the access size + -- 64 bit -- is stated explicitly here. Consequently |off| needs + to be divisible by 8. */ +static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off ) +{ + vassert(off < (8 << 12)); /* otherwise it's unrepresentable */ + vassert((off & 7) == 0); /* ditto */ + return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/); +} + +/* Ditto, for 32 bit accesses. */ +static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off ) +{ + vassert(off < (4 << 12)); /* otherwise it's unrepresentable */ + vassert((off & 3) == 0); /* ditto */ + return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/); +} + +/* Ditto, for 16 bit accesses. */ +static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off ) +{ + vassert(off < (2 << 12)); /* otherwise it's unrepresentable */ + vassert((off & 1) == 0); /* ditto */ + return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/); +} + +/* Ditto, for 8 bit accesses. */ +static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off ) +{ + vassert(off < (1 << 12)); /* otherwise it's unrepresentable */ + return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/); +} + +static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off ) +{ + vassert(off < (1<<12)); + HReg r = newVRegI(env); + addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(), + ARM64RIA_I12(off,0), True/*isAdd*/)); + return r; +} + +static HReg get_baseblock_register ( void ) +{ + return hregARM64_X21(); +} + +/* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in + a new register, and return the new register. */ +static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src ) +{ + HReg dst = newVRegI(env); + ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */ + addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND)); + return dst; +} + +/* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in + a new register, and return the new register. */ +static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src ) +{ + HReg dst = newVRegI(env); + ARM64RI6* n48 = ARM64RI6_I6(48); + addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL)); + addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR)); + return dst; +} + +/* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in + a new register, and return the new register. */ +static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src ) +{ + HReg dst = newVRegI(env); + ARM64RI6* n48 = ARM64RI6_I6(48); + addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL)); + addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR)); + return dst; +} + +/* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in + a new register, and return the new register. */ +static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src ) +{ + HReg dst = newVRegI(env); + ARM64RI6* n32 = ARM64RI6_I6(32); + addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL)); + addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR)); + return dst; +} + +/* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in + a new register, and return the new register. */ +static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src ) +{ + HReg dst = newVRegI(env); + ARM64RI6* n56 = ARM64RI6_I6(56); + addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL)); + addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR)); + return dst; +} + +static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src ) +{ + HReg dst = newVRegI(env); + ARM64RI6* n56 = ARM64RI6_I6(56); + addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL)); + addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR)); + return dst; +} + +/* Is this IRExpr_Const(IRConst_U64(0)) ? */ +static Bool isZeroU64 ( IRExpr* e ) { + if (e->tag != Iex_Const) return False; + IRConst* con = e->Iex.Const.con; + vassert(con->tag == Ico_U64); + return con->Ico.U64 == 0; +} + + +/*---------------------------------------------------------*/ +/*--- ISEL: FP rounding mode helpers ---*/ +/*---------------------------------------------------------*/ + +/* Set the FP rounding mode: 'mode' is an I32-typed expression + denoting a value in the range 0 .. 3, indicating a round mode + encoded as per type IRRoundingMode -- the first four values only + (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the PPC + FSCR to have the same rounding. + + For speed & simplicity, we're setting the *entire* FPCR here. + + Setting the rounding mode is expensive. So this function tries to + avoid repeatedly setting the rounding mode to the same thing by + first comparing 'mode' to the 'mode' tree supplied in the previous + call to this function, if any. (The previous value is stored in + env->previous_rm.) If 'mode' is a single IR temporary 't' and + env->previous_rm is also just 't', then the setting is skipped. + + This is safe because of the SSA property of IR: an IR temporary can + only be defined once and so will have the same value regardless of + where it appears in the block. Cool stuff, SSA. + + A safety condition: all attempts to set the RM must be aware of + this mechanism - by being routed through the functions here. + + Of course this only helps if blocks where the RM is set more than + once and it is set to the same value each time, *and* that value is + held in the same IR temporary each time. In order to assure the + latter as much as possible, the IR optimiser takes care to do CSE + on any block with any sign of floating point activity. +*/ +static +void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode ) +{ + vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32); + + /* Do we need to do anything? */ + if (env->previous_rm + && env->previous_rm->tag == Iex_RdTmp + && mode->tag == Iex_RdTmp + && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) { + /* no - setting it to what it was before. */ + vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32); + return; + } + + /* No luck - we better set it, and remember what we set it to. */ + env->previous_rm = mode; + + /* Only supporting the rounding-mode bits - the rest of FPCR is set + to zero - so we can set the whole register at once (faster). */ + + /* This isn't simple, because 'mode' carries an IR rounding + encoding, and we need to translate that to an ARM64 FP one: + The IR encoding: + 00 to nearest (the default) + 10 to +infinity + 01 to -infinity + 11 to zero + The ARM64 FP encoding: + 00 to nearest + 01 to +infinity + 10 to -infinity + 11 to zero + Easy enough to do; just swap the two bits. + */ + HReg irrm = iselIntExpr_R(env, mode); + HReg tL = newVRegI(env); + HReg tR = newVRegI(env); + HReg t3 = newVRegI(env); + /* tL = irrm << 1; + tR = irrm >> 1; if we're lucky, these will issue together + tL &= 2; + tR &= 1; ditto + t3 = tL | tR; + t3 <<= 22; + fmxr fpscr, t3 + */ + ARM64RIL* ril_one = mb_mkARM64RIL_I(1); + ARM64RIL* ril_two = mb_mkARM64RIL_I(2); + vassert(ril_one && ril_two); + addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL)); + addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR)); + addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND)); + addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND)); + addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR)); + addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL)); + addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3)); +} + + +/*---------------------------------------------------------*/ +/*--- ISEL: Function call helpers ---*/ +/*---------------------------------------------------------*/ + +/* Used only in doHelperCall. See big comment in doHelperCall re + handling of register-parameter args. This function figures out + whether evaluation of an expression might require use of a fixed + register. If in doubt return True (safe but suboptimal). +*/ +static +Bool mightRequireFixedRegs ( IRExpr* e ) +{ + if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) { + // These are always "safe" -- either a copy of SP in some + // arbitrary vreg, or a copy of x21, respectively. + return False; + } + /* Else it's a "normal" expression. */ + switch (e->tag) { + case Iex_RdTmp: case Iex_Const: case Iex_Get: + return False; + default: + return True; + } +} + + +/* Do a complete function call. |guard| is a Ity_Bit expression + indicating whether or not the call happens. If guard==NULL, the + call is unconditional. |retloc| is set to indicate where the + return value is after the call. The caller (of this fn) must + generate code to add |stackAdjustAfterCall| to the stack pointer + after the call is done. Returns True iff it managed to handle this + combination of arg/return types, else returns False. */ + +static +Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall, + /*OUT*/RetLoc* retloc, + ISelEnv* env, + IRExpr* guard, + IRCallee* cee, IRType retTy, IRExpr** args ) +{ + ARM64CondCode cc; + HReg argregs[ARM64_N_ARGREGS]; + HReg tmpregs[ARM64_N_ARGREGS]; + Bool go_fast; + Int n_args, i, nextArgReg; + ULong target; + + vassert(ARM64_N_ARGREGS == 8); + + /* Set default returns. We'll update them later if needed. */ + *stackAdjustAfterCall = 0; + *retloc = mk_RetLoc_INVALID(); + + /* These are used for cross-checking that IR-level constraints on + the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */ + UInt nVECRETs = 0; + UInt nBBPTRs = 0; + + /* Marshal args for a call and do the call. + + This function only deals with a tiny set of possibilities, which + cover all helpers in practice. The restrictions are that only + arguments in registers are supported, hence only + ARM64_N_REGPARMS x 64 integer bits in total can be passed. In + fact the only supported arg type is I64. + + The return type can be I{64,32} or V128. In the V128 case, it + is expected that |args| will contain the special node + IRExpr_VECRET(), in which case this routine generates code to + allocate space on the stack for the vector return value. Since + we are not passing any scalars on the stack, it is enough to + preallocate the return space before marshalling any arguments, + in this case. + + |args| may also contain IRExpr_BBPTR(), in which case the + value in x21 is passed as the corresponding argument. + + Generating code which is both efficient and correct when + parameters are to be passed in registers is difficult, for the + reasons elaborated in detail in comments attached to + doHelperCall() in priv/host-x86/isel.c. Here, we use a variant + of the method described in those comments. + + The problem is split into two cases: the fast scheme and the + slow scheme. In the fast scheme, arguments are computed + directly into the target (real) registers. This is only safe + when we can be sure that computation of each argument will not + trash any real registers set by computation of any other + argument. + + In the slow scheme, all args are first computed into vregs, and + once they are all done, they are moved to the relevant real + regs. This always gives correct code, but it also gives a bunch + of vreg-to-rreg moves which are usually redundant but are hard + for the register allocator to get rid of. + + To decide which scheme to use, all argument expressions are + first examined. If they are all so simple that it is clear they + will be evaluated without use of any fixed registers, use the + fast scheme, else use the slow scheme. Note also that only + unconditional calls may use the fast scheme, since having to + compute a condition expression could itself trash real + registers. + + Note this requires being able to examine an expression and + determine whether or not evaluation of it might use a fixed + register. That requires knowledge of how the rest of this insn + selector works. Currently just the following 3 are regarded as + safe -- hopefully they cover the majority of arguments in + practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. + */ + + /* Note that the cee->regparms field is meaningless on ARM64 hosts + (since there is only one calling convention) and so we always + ignore it. */ + + n_args = 0; + for (i = 0; args[i]; i++) { + IRExpr* arg = args[i]; + if (UNLIKELY(arg->tag == Iex_VECRET)) { + nVECRETs++; + } else if (UNLIKELY(arg->tag == Iex_BBPTR)) { + nBBPTRs++; + } + n_args++; + } + + /* If this fails, the IR is ill-formed */ + vassert(nBBPTRs == 0 || nBBPTRs == 1); + + /* If we have a VECRET, allocate space on the stack for the return + value, and record the stack pointer after that. */ + HReg r_vecRetAddr = INVALID_HREG; + if (nVECRETs == 1) { + vassert(retTy == Ity_V128 || retTy == Ity_V256); + vassert(retTy != Ity_V256); // we don't handle that yet (if ever) + r_vecRetAddr = newVRegI(env); + addInstr(env, ARM64Instr_AddToSP(-16)); + addInstr(env, ARM64Instr_FromSP(r_vecRetAddr)); + } else { + // If either of these fail, the IR is ill-formed + vassert(retTy != Ity_V128 && retTy != Ity_V256); + vassert(nVECRETs == 0); + } + + argregs[0] = hregARM64_X0(); + argregs[1] = hregARM64_X1(); + argregs[2] = hregARM64_X2(); + argregs[3] = hregARM64_X3(); + argregs[4] = hregARM64_X4(); + argregs[5] = hregARM64_X5(); + argregs[6] = hregARM64_X6(); + argregs[7] = hregARM64_X7(); + + tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG; + tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG; + + /* First decide which scheme (slow or fast) is to be used. First + assume the fast scheme, and select slow if any contraindications + (wow) appear. */ + + go_fast = True; + + if (guard) { + if (guard->tag == Iex_Const + && guard->Iex.Const.con->tag == Ico_U1 + && guard->Iex.Const.con->Ico.U1 == True) { + /* unconditional */ + } else { + /* Not manifestly unconditional -- be conservative. */ + go_fast = False; + } + } + + if (go_fast) { + for (i = 0; i < n_args; i++) { + if (mightRequireFixedRegs(args[i])) { + go_fast = False; + break; + } + } + } + + if (go_fast) { + if (retTy == Ity_V128 || retTy == Ity_V256) + go_fast = False; + } + + /* At this point the scheme to use has been established. Generate + code to get the arg values into the argument rregs. If we run + out of arg regs, give up. */ + + if (go_fast) { + + /* FAST SCHEME */ + nextArgReg = 0; + + for (i = 0; i < n_args; i++) { + IRExpr* arg = args[i]; + + IRType aTy = Ity_INVALID; + if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) + aTy = typeOfIRExpr(env->type_env, args[i]); + + if (nextArgReg >= ARM64_N_ARGREGS) + return False; /* out of argregs */ + + if (aTy == Ity_I64) { + addInstr(env, ARM64Instr_MovI( argregs[nextArgReg], + iselIntExpr_R(env, args[i]) )); + nextArgReg++; + } + else if (arg->tag == Iex_BBPTR) { + vassert(0); //ATC + addInstr(env, ARM64Instr_MovI( argregs[nextArgReg], + hregARM64_X21() )); + nextArgReg++; + } + else if (arg->tag == Iex_VECRET) { + // because of the go_fast logic above, we can't get here, + // since vector return values makes us use the slow path + // instead. + vassert(0); + } + else + return False; /* unhandled arg type */ + } + + /* Fast scheme only applies for unconditional calls. Hence: */ + cc = ARM64cc_AL; + + } else { + + /* SLOW SCHEME; move via temporaries */ + nextArgReg = 0; + + for (i = 0; i < n_args; i++) { + IRExpr* arg = args[i]; + + IRType aTy = Ity_INVALID; + if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) + aTy = typeOfIRExpr(env->type_env, args[i]); + + if (nextArgReg >= ARM64_N_ARGREGS) + return False; /* out of argregs */ + + if (aTy == Ity_I64) { + tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]); + nextArgReg++; + } + else if (arg->tag == Iex_BBPTR) { + vassert(0); //ATC + tmpregs[nextArgReg] = hregARM64_X21(); + nextArgReg++; + } + else if (arg->tag == Iex_VECRET) { + vassert(!hregIsInvalid(r_vecRetAddr)); + tmpregs[nextArgReg] = r_vecRetAddr; + nextArgReg++; + } + else + return False; /* unhandled arg type */ + } + + /* Now we can compute the condition. We can't do it earlier + because the argument computations could trash the condition + codes. Be a bit clever to handle the common case where the + guard is 1:Bit. */ + cc = ARM64cc_AL; + if (guard) { + if (guard->tag == Iex_Const + && guard->Iex.Const.con->tag == Ico_U1 + && guard->Iex.Const.con->Ico.U1 == True) { + /* unconditional -- do nothing */ + } else { + cc = iselCondCode( env, guard ); + } + } + + /* Move the args to their final destinations. */ + for (i = 0; i < nextArgReg; i++) { + vassert(!(hregIsInvalid(tmpregs[i]))); + /* None of these insns, including any spill code that might + be generated, may alter the condition codes. */ + addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) ); + } + + } + + /* Should be assured by checks above */ + vassert(nextArgReg <= ARM64_N_ARGREGS); + + /* Do final checks, set the return values, and generate the call + instruction proper. */ + vassert(nBBPTRs == 0 || nBBPTRs == 1); + vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0); + vassert(*stackAdjustAfterCall == 0); + vassert(is_RetLoc_INVALID(*retloc)); + switch (retTy) { + case Ity_INVALID: + /* Function doesn't return a value. */ + *retloc = mk_RetLoc_simple(RLPri_None); + break; + case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: + *retloc = mk_RetLoc_simple(RLPri_Int); + break; + case Ity_V128: + *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0); + *stackAdjustAfterCall = 16; + break; + case Ity_V256: + vassert(0); // ATC + *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0); + *stackAdjustAfterCall = 32; + break; + default: + /* IR can denote other possible return types, but we don't + handle those here. */ + vassert(0); + } + + /* Finally, generate the call itself. This needs the *retloc value + set in the switch above, which is why it's at the end. */ + + /* nextArgReg doles out argument registers. Since these are + assigned in the order x0 .. x7, its numeric value at this point, + which must be between 0 and 8 inclusive, is going to be equal to + the number of arg regs in use for the call. Hence bake that + number into the call (we'll need to know it when doing register + allocation, to know what regs the call reads.) */ + + target = (HWord)Ptr_to_ULong(cee->addr); + addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc )); + + return True; /* success */ +} + + +/*---------------------------------------------------------*/ +/*--- ISEL: Integer expressions (64/32 bit) ---*/ +/*---------------------------------------------------------*/ + +/* Select insns for an integer-typed expression, and add them to the + code list. Return a reg holding the result. This reg will be a + virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you + want to modify it, ask for a new vreg, copy it in there, and modify + the copy. The register allocator will do its best to map both + vregs to the same real register, so the copies will often disappear + later in the game. + + This should handle expressions of 64- and 32-bit type. All results + are returned in a 64-bit register. For 32-bit expressions, the + upper 32 bits are arbitrary, so you should mask or sign extend + partial values if necessary. +*/ + +/* --------------------- AMode --------------------- */ + +/* Return an AMode which computes the value of the specified + expression, possibly also adding insns to the code list as a + result. The expression may only be a 64-bit one. +*/ + +static Bool isValidScale ( UChar scale ) +{ + switch (scale) { + case 1: case 2: case 4: case 8: /* case 16: ??*/ return True; + default: return False; + } +} + +static Bool sane_AMode ( ARM64AMode* am ) +{ + switch (am->tag) { + case ARM64am_RI9: + return + toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64 + && (hregIsVirtual(am->ARM64am.RI9.reg) + /* || sameHReg(am->ARM64am.RI9.reg, + hregARM64_X21()) */ ) + && am->ARM64am.RI9.simm9 >= -256 + && am->ARM64am.RI9.simm9 <= 255 ); + case ARM64am_RI12: + return + toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64 + && (hregIsVirtual(am->ARM64am.RI12.reg) + /* || sameHReg(am->ARM64am.RI12.reg, + hregARM64_X21()) */ ) + && am->ARM64am.RI12.uimm12 < 4096 + && isValidScale(am->ARM64am.RI12.szB) ); + case ARM64am_RR: + return + toBool( hregClass(am->ARM64am.RR.base) == HRcInt64 + && hregIsVirtual(am->ARM64am.RR.base) + && hregClass(am->ARM64am.RR.index) == HRcInt64 + && hregIsVirtual(am->ARM64am.RR.index) ); + default: + vpanic("sane_AMode: unknown ARM64 AMode1 tag"); + } +} + +static +ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty ) +{ + ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty); + vassert(sane_AMode(am)); + return am; +} + +static +ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty ) +{ + IRType ty = typeOfIRExpr(env->type_env,e); + vassert(ty == Ity_I64); + + ULong szBbits = 0; + switch (dty) { + case Ity_I64: szBbits = 3; break; + case Ity_I32: szBbits = 2; break; + case Ity_I16: szBbits = 1; break; + case Ity_I8: szBbits = 0; break; + default: vassert(0); + } + + /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since + we're going to create an amode suitable for LDU* or STU* + instructions, which use unscaled immediate offsets. */ + if (e->tag == Iex_Binop + && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64) + && e->Iex.Binop.arg2->tag == Iex_Const + && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) { + Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; + if (simm >= -255 && simm <= 255) { + /* Although the gating condition might seem to be + simm >= -256 && simm <= 255 + we will need to negate simm in the case where the op is Sub64. + Hence limit the lower value to -255 in order that its negation + is representable. */ + HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); + if (e->Iex.Binop.op == Iop_Sub64) simm = -simm; + return ARM64AMode_RI9(reg, (Int)simm); + } + } + + /* Add64(expr, uimm12 * transfer-size) */ + if (e->tag == Iex_Binop + && e->Iex.Binop.op == Iop_Add64 + && e->Iex.Binop.arg2->tag == Iex_Const + && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) { + ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; + ULong szB = 1 << szBbits; + if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */ + && (uimm >> szBbits) < 4096) { + HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); + return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB); + } + } + + /* Add64(expr1, expr2) */ + if (e->tag == Iex_Binop + && e->Iex.Binop.op == Iop_Add64) { + HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1); + HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2); + return ARM64AMode_RR(reg1, reg2); + } + + /* Doesn't match anything in particular. Generate it into + a register and use that. */ + HReg reg = iselIntExpr_R(env, e); + return ARM64AMode_RI9(reg, 0); +} + +//ZZ /* --------------------- AModeV --------------------- */ +//ZZ +//ZZ /* Return an AModeV which computes the value of the specified +//ZZ expression, possibly also adding insns to the code list as a +//ZZ result. The expression may only be a 32-bit one. +//ZZ */ +//ZZ +//ZZ static Bool sane_AModeV ( ARMAModeV* am ) +//ZZ { +//ZZ return toBool( hregClass(am->reg) == HRcInt32 +//ZZ && hregIsVirtual(am->reg) +//ZZ && am->simm11 >= -1020 && am->simm11 <= 1020 +//ZZ && 0 == (am->simm11 & 3) ); +//ZZ } +//ZZ +//ZZ static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e ) +//ZZ { +//ZZ ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e); +//ZZ vassert(sane_AModeV(am)); +//ZZ return am; +//ZZ } +//ZZ +//ZZ static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e ) +//ZZ { +//ZZ IRType ty = typeOfIRExpr(env->type_env,e); +//ZZ vassert(ty == Ity_I32); +//ZZ +//ZZ /* {Add32,Sub32}(expr, simm8 << 2) */ +//ZZ if (e->tag == Iex_Binop +//ZZ && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32) +//ZZ && e->Iex.Binop.arg2->tag == Iex_Const +//ZZ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { +//ZZ Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; +//ZZ if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) { +//ZZ HReg reg; +//ZZ if (e->Iex.Binop.op == Iop_Sub32) +//ZZ simm = -simm; +//ZZ reg = iselIntExpr_R(env, e->Iex.Binop.arg1); +//ZZ return mkARMAModeV(reg, simm); +//ZZ } +//ZZ } +//ZZ +//ZZ /* Doesn't match anything in particular. Generate it into +//ZZ a register and use that. */ +//ZZ { +//ZZ HReg reg = iselIntExpr_R(env, e); +//ZZ return mkARMAModeV(reg, 0); +//ZZ } +//ZZ +//ZZ } +//ZZ +//ZZ /* -------------------- AModeN -------------------- */ +//ZZ +//ZZ static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e ) +//ZZ { +//ZZ return iselIntExpr_AModeN_wrk(env, e); +//ZZ } +//ZZ +//ZZ static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e ) +//ZZ { +//ZZ HReg reg = iselIntExpr_R(env, e); +//ZZ return mkARMAModeN_R(reg); +//ZZ } +//ZZ +//ZZ +//ZZ /* --------------------- RI84 --------------------- */ +//ZZ +//ZZ /* Select instructions to generate 'e' into a RI84. If mayInv is +//ZZ true, then the caller will also accept an I84 form that denotes +//ZZ 'not e'. In this case didInv may not be NULL, and *didInv is set +//ZZ to True. This complication is so as to allow generation of an RI84 +//ZZ which is suitable for use in either an AND or BIC instruction, +//ZZ without knowing (before this call) which one. +//ZZ */ +//ZZ static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv, +//ZZ ISelEnv* env, IRExpr* e ) +//ZZ { +//ZZ ARMRI84* ri; +//ZZ if (mayInv) +//ZZ vassert(didInv != NULL); +//ZZ ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e); +//ZZ /* sanity checks ... */ +//ZZ switch (ri->tag) { +//ZZ case ARMri84_I84: +//ZZ return ri; +//ZZ case ARMri84_R: +//ZZ vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32); +//ZZ vassert(hregIsVirtual(ri->ARMri84.R.reg)); +//ZZ return ri; +//ZZ default: +//ZZ vpanic("iselIntExpr_RI84: unknown arm RI84 tag"); +//ZZ } +//ZZ } +//ZZ +//ZZ /* DO NOT CALL THIS DIRECTLY ! */ +//ZZ static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv, +//ZZ ISelEnv* env, IRExpr* e ) +//ZZ { +//ZZ IRType ty = typeOfIRExpr(env->type_env,e); +//ZZ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); +//ZZ +//ZZ if (didInv) *didInv = False; +//ZZ +//ZZ /* special case: immediate */ +//ZZ if (e->tag == Iex_Const) { +//ZZ UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */ +//ZZ switch (e->Iex.Const.con->tag) { +//ZZ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; +//ZZ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; +//ZZ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; +//ZZ default: vpanic("iselIntExpr_RI84.Iex_Const(armh)"); +//ZZ } +//ZZ if (fitsIn8x4(&u8, &u4, u)) { +//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 ); +//ZZ } +//ZZ if (mayInv && fitsIn8x4(&u8, &u4, ~u)) { +//ZZ vassert(didInv); +//ZZ *didInv = True; +//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 ); +//ZZ } +//ZZ /* else fail, fall through to default case */ +//ZZ } +//ZZ +//ZZ /* default case: calculate into a register and return that */ +//ZZ { +//ZZ HReg r = iselIntExpr_R ( env, e ); +//ZZ return ARMRI84_R(r); +//ZZ } +//ZZ } + + +/* --------------------- RIA --------------------- */ + +/* Select instructions to generate 'e' into a RIA. */ + +static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e ) +{ + ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e); + /* sanity checks ... */ + switch (ri->tag) { + case ARM64riA_I12: + vassert(ri->ARM64riA.I12.imm12 < 4096); + vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12); + return ri; + case ARM64riA_R: + vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64); + vassert(hregIsVirtual(ri->ARM64riA.R.reg)); + return ri; + default: + vpanic("iselIntExpr_RIA: unknown arm RIA tag"); + } +} + +/* DO NOT CALL THIS DIRECTLY ! */ +static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e ) +{ + IRType ty = typeOfIRExpr(env->type_env,e); + vassert(ty == Ity_I64 || ty == Ity_I32); + + /* special case: immediate */ + if (e->tag == Iex_Const) { + ULong u = 0xF000000ULL; /* invalid */ + switch (e->Iex.Const.con->tag) { + case Ico_U64: u = e->Iex.Const.con->Ico.U64; break; + case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; + default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)"); + } + if (0 == (u & ~(0xFFFULL << 0))) + return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0); + if (0 == (u & ~(0xFFFULL << 12))) + return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12); + /* else fail, fall through to default case */ + } + + /* default case: calculate into a register and return that */ + { + HReg r = iselIntExpr_R ( env, e ); + return ARM64RIA_R(r); + } +} + + +/* --------------------- RIL --------------------- */ + +/* Select instructions to generate 'e' into a RIL. At this point we + have to deal with the strange bitfield-immediate encoding for logic + instructions. */ + + +// The following four functions +// CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical +// are copied, with modifications, from +// https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc +// which has the following copyright notice: +/* + Copyright 2013, ARM Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of ARM Limited nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +static Int CountLeadingZeros(ULong value, Int width) +{ + vassert(width == 32 || width == 64); + Int count = 0; + ULong bit_test = 1ULL << (width - 1); + while ((count < width) && ((bit_test & value) == 0)) { + count++; + bit_test >>= 1; + } + return count; +} + +static Int CountTrailingZeros(ULong value, Int width) +{ + vassert(width == 32 || width == 64); + Int count = 0; + while ((count < width) && (((value >> count) & 1) == 0)) { + count++; + } + return count; +} + +static Int CountSetBits(ULong value, Int width) +{ + // TODO: Other widths could be added here, as the implementation already + // supports them. + vassert(width == 32 || width == 64); + + // Mask out unused bits to ensure that they are not counted. + value &= (0xffffffffffffffffULL >> (64-width)); + + // Add up the set bits. + // The algorithm works by adding pairs of bit fields together iteratively, + // where the size of each bit field doubles each time. + // An example for an 8-bit value: + // Bits: h g f e d c b a + // \ | \ | \ | \ | + // value = h+g f+e d+c b+a + // \ | \ | + // value = h+g+f+e d+c+b+a + // \ | + // value = h+g+f+e+d+c+b+a + value = ((value >> 1) & 0x5555555555555555ULL) + + (value & 0x5555555555555555ULL); + value = ((value >> 2) & 0x3333333333333333ULL) + + (value & 0x3333333333333333ULL); + value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL) + + (value & 0x0f0f0f0f0f0f0f0fULL); + value = ((value >> 8) & 0x00ff00ff00ff00ffULL) + + (value & 0x00ff00ff00ff00ffULL); + value = ((value >> 16) & 0x0000ffff0000ffffULL) + + (value & 0x0000ffff0000ffffULL); + value = ((value >> 32) & 0x00000000ffffffffULL) + + (value & 0x00000000ffffffffULL); + + return value; +} + +static Bool isImmLogical ( /*OUT*/UInt* n, + /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r, + ULong value, UInt width ) +{ + // Test if a given value can be encoded in the immediate field of a + // logical instruction. + + // If it can be encoded, the function returns true, and values + // pointed to by n, imm_s and imm_r are updated with immediates + // encoded in the format required by the corresponding fields in the + // logical instruction. If it can not be encoded, the function + // returns false, and the values pointed to by n, imm_s and imm_r + // are undefined. + vassert(n != NULL && imm_s != NULL && imm_r != NULL); + vassert(width == 32 || width == 64); + + // Logical immediates are encoded using parameters n, imm_s and imm_r using + // the following table: + // + // N imms immr size S R + // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) + // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) + // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) + // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) + // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) + // 0 11110s xxxxxr 2 UInt(s) UInt(r) + // (s bits must not be all set) + // + // A pattern is constructed of size bits, where the least significant S+1 + // bits are set. The pattern is rotated right by R, and repeated across a + // 32 or 64-bit value, depending on destination register width. + // + // To test if an arbitrary immediate can be encoded using this scheme, an + // iterative algorithm is used. + // + // TODO: This code does not consider using X/W register overlap to support + // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits + // are an encodable logical immediate. + + // 1. If the value has all set or all clear bits, it can't be encoded. + if ((value == 0) || (value == 0xffffffffffffffffULL) || + ((width == 32) && (value == 0xffffffff))) { + return False; + } + + UInt lead_zero = CountLeadingZeros(value, width); + UInt lead_one = CountLeadingZeros(~value, width); + UInt trail_zero = CountTrailingZeros(value, width); + UInt trail_one = CountTrailingZeros(~value, width); + UInt set_bits = CountSetBits(value, width); + + // The fixed bits in the immediate s field. + // If width == 64 (X reg), start at 0xFFFFFF80. + // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit + // widths won't be executed. + Int imm_s_fixed = (width == 64) ? -128 : -64; + Int imm_s_mask = 0x3F; + + for (;;) { + // 2. If the value is two bits wide, it can be encoded. + if (width == 2) { + *n = 0; + *imm_s = 0x3C; + *imm_r = (value & 3) - 1; + return True; + } + + *n = (width == 64) ? 1 : 0; + *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask); + if ((lead_zero + set_bits) == width) { + *imm_r = 0; + } else { + *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one; + } + + // 3. If the sum of leading zeros, trailing zeros and set bits is equal to + // the bit width of the value, it can be encoded. + if (lead_zero + trail_zero + set_bits == width) { + return True; + } + + // 4. If the sum of leading ones, trailing ones and unset bits in the + // value is equal to the bit width of the value, it can be encoded. + if (lead_one + trail_one + (width - set_bits) == width) { + return True; + } + + // 5. If the most-significant half of the bitwise value is equal to the + // least-significant half, return to step 2 using the least-significant + // half of the value. + ULong mask = (1ULL << (width >> 1)) - 1; + if ((value & mask) == ((value >> (width >> 1)) & mask)) { + width >>= 1; + set_bits >>= 1; + imm_s_fixed >>= 1; + continue; + } + + // 6. Otherwise, the value can't be encoded. + return False; + } +} + + +/* Create a RIL for the given immediate, if it is representable, or + return NULL if not. */ + +static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 ) +{ + UInt n = 0, imm_s = 0, imm_r = 0; + Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64); + if (!ok) return NULL; + vassert(n < 2 && imm_s < 64 && imm_r < 64); + return ARM64RIL_I13(n, imm_r, imm_s); +} + +/* So, finally .. */ + +static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e ) +{ + ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e); + /* sanity checks ... */ + switch (ri->tag) { + case ARM64riL_I13: + vassert(ri->ARM64riL.I13.bitN < 2); + vassert(ri->ARM64riL.I13.immR < 64); + vassert(ri->ARM64riL.I13.immS < 64); + return ri; + case ARM64riL_R: + vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64); + vassert(hregIsVirtual(ri->ARM64riL.R.reg)); + return ri; + default: + vpanic("iselIntExpr_RIL: unknown arm RIL tag"); + } +} + +/* DO NOT CALL THIS DIRECTLY ! */ +static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e ) +{ + IRType ty = typeOfIRExpr(env->type_env,e); + vassert(ty == Ity_I64 || ty == Ity_I32); + + /* special case: immediate */ + if (e->tag == Iex_Const) { + ARM64RIL* maybe = NULL; + if (ty == Ity_I64) { + vassert(e->Iex.Const.con->tag == Ico_U64); + maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64); + } else { + vassert(ty == Ity_I32); + vassert(e->Iex.Const.con->tag == Ico_U32); + UInt u32 = e->Iex.Const.con->Ico.U32; + ULong u64 = (ULong)u32; + /* First try with 32 leading zeroes. */ + maybe = mb_mkARM64RIL_I(u64); + /* If that doesn't work, try with 2 copies, since it doesn't + matter what winds up in the upper 32 bits. */ + if (!maybe) { + maybe = mb_mkARM64RIL_I((u64 << 32) | u64); + } + } + if (maybe) return maybe; + /* else fail, fall through to default case */ + } + + /* default case: calculate into a register and return that */ + { + HReg r = iselIntExpr_R ( env, e ); + return ARM64RIL_R(r); + } +} + + +/* --------------------- RI6 --------------------- */ + +/* Select instructions to generate 'e' into a RI6. */ + +static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e ) +{ + ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e); + /* sanity checks ... */ + switch (ri->tag) { + case ARM64ri6_I6: + vassert(ri->ARM64ri6.I6.imm6 < 64); + vassert(ri->ARM64ri6.I6.imm6 > 0); + return ri; + case ARM64ri6_R: + vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64); + vassert(hregIsVirtual(ri->ARM64ri6.R.reg)); + return ri; + default: + vpanic("iselIntExpr_RI6: unknown arm RI6 tag"); + } +} + +/* DO NOT CALL THIS DIRECTLY ! */ +static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e ) +{ + IRType ty = typeOfIRExpr(env->type_env,e); + vassert(ty == Ity_I64 || ty == Ity_I8); + + /* special case: immediate */ + if (e->tag == Iex_Const) { + switch (e->Iex.Const.con->tag) { + case Ico_U8: { + UInt u = e->Iex.Const.con->Ico.U8; + if (u > 0 && u < 64) + return ARM64RI6_I6(u); + break; + default: + break; + } + } + /* else fail, fall through to default case */ + } + + /* default case: calculate into a register and return that */ + { + HReg r = iselIntExpr_R ( env, e ); + return ARM64RI6_R(r); + } +} + + +/* ------------------- CondCode ------------------- */ + +/* Generate code to evaluated a bit-typed expression, returning the + condition code which would correspond when the expression would + notionally have returned 1. */ + +static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) +{ + ARM64CondCode cc = iselCondCode_wrk(env,e); + vassert(cc != ARM64cc_NV); + return cc; +} + +static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) +{ + vassert(e); + vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); + + /* var */ + if (e->tag == Iex_RdTmp) { + HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp); + /* Cmp doesn't modify rTmp; so this is OK. */ + ARM64RIL* one = mb_mkARM64RIL_I(1); + vassert(one); + addInstr(env, ARM64Instr_Test(rTmp, one)); + return ARM64cc_NE; + } + + /* Not1(e) */ + if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { + /* Generate code for the arg, and negate the test condition */ + ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); + if (cc == ARM64cc_AL || cc == ARM64cc_NV) { + return ARM64cc_AL; + } else { + return 1 ^ cc; + } + } + + /* --- patterns rooted at: 64to1 --- */ + + if (e->tag == Iex_Unop + && e->Iex.Unop.op == Iop_64to1) { + HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg); + ARM64RIL* one = mb_mkARM64RIL_I(1); + vassert(one); /* '1' must be representable */ + addInstr(env, ARM64Instr_Test(rTmp, one)); + return ARM64cc_NE; + } + + /* --- patterns rooted at: CmpNEZ8 --- */ + + if (e->tag == Iex_Unop + && e->Iex.Unop.op == Iop_CmpNEZ8) { + HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); + ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF); + addInstr(env, ARM64Instr_Test(r1, xFF)); + return ARM64cc_NE; + } + + /* --- patterns rooted at: CmpNEZ64 --- */ + + if (e->tag == Iex_Unop + && e->Iex.Unop.op == Iop_CmpNEZ64) { + HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); + ARM64RIA* zero = ARM64RIA_I12(0,0); + addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/)); + return ARM64cc_NE; + } + + /* --- patterns rooted at: CmpNEZ32 --- */ + + if (e->tag == Iex_Unop + && e->Iex.Unop.op == Iop_CmpNEZ32) { + HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); + ARM64RIA* zero = ARM64RIA_I12(0,0); + addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/)); + return ARM64cc_NE; + } + + /* --- Cmp*64*(x,y) --- */ + if (e->tag == Iex_Binop + && (e->Iex.Binop.op == Iop_CmpEQ64 + || e->Iex.Binop.op == Iop_CmpNE64 + || e->Iex.Binop.op == Iop_CmpLT64S + || e->Iex.Binop.op == Iop_CmpLT64U + || e->Iex.Binop.op == Iop_CmpLE64S + || e->Iex.Binop.op == Iop_CmpLE64U)) { + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); + addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/)); + switch (e->Iex.Binop.op) { + case Iop_CmpEQ64: return ARM64cc_EQ; + case Iop_CmpNE64: return ARM64cc_NE; + case Iop_CmpLT64S: return ARM64cc_LT; + case Iop_CmpLT64U: return ARM64cc_CC; + case Iop_CmpLE64S: return ARM64cc_LE; + case Iop_CmpLE64U: return ARM64cc_LS; + default: vpanic("iselCondCode(arm64): CmpXX64"); + } + } + + /* --- Cmp*32*(x,y) --- */ + if (e->tag == Iex_Binop + && (e->Iex.Binop.op == Iop_CmpEQ32 + || e->Iex.Binop.op == Iop_CmpNE32 + || e->Iex.Binop.op == Iop_CmpLT32S + || e->Iex.Binop.op == Iop_CmpLT32U + || e->Iex.Binop.op == Iop_CmpLE32S + || e->Iex.Binop.op == Iop_CmpLE32U)) { + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); + addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/)); + switch (e->Iex.Binop.op) { + case Iop_CmpEQ32: return ARM64cc_EQ; + case Iop_CmpNE32: return ARM64cc_NE; + case Iop_CmpLT32S: return ARM64cc_LT; + case Iop_CmpLT32U: return ARM64cc_CC; + case Iop_CmpLE32S: return ARM64cc_LE; + case Iop_CmpLE32U: return ARM64cc_LS; + default: vpanic("iselCondCode(arm64): CmpXX32"); + } + } + +//ZZ /* const */ +//ZZ /* Constant 1:Bit */ +//ZZ if (e->tag == Iex_Const) { +//ZZ HReg r; +//ZZ vassert(e->Iex.Const.con->tag == Ico_U1); +//ZZ vassert(e->Iex.Const.con->Ico.U1 == True +//ZZ || e->Iex.Const.con->Ico.U1 == False); +//ZZ r = newVRegI(env); +//ZZ addInstr(env, ARMInstr_Imm32(r, 0)); +//ZZ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r))); +//ZZ return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE; +//ZZ } +//ZZ +//ZZ // JRS 2013-Jan-03: this seems completely nonsensical +//ZZ /* --- CasCmpEQ* --- */ +//ZZ /* Ist_Cas has a dummy argument to compare with, so comparison is +//ZZ always true. */ +//ZZ //if (e->tag == Iex_Binop +//ZZ // && (e->Iex.Binop.op == Iop_CasCmpEQ32 +//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ16 +//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ8)) { +//ZZ // return ARMcc_AL; +//ZZ //} + + ppIRExpr(e); + vpanic("iselCondCode"); +} + + +/* --------------------- Reg --------------------- */ + +static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) +{ + HReg r = iselIntExpr_R_wrk(env, e); + /* sanity checks ... */ +# if 0 + vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); +# endif + vassert(hregClass(r) == HRcInt64); + vassert(hregIsVirtual(r)); + return r; +} + +/* DO NOT CALL THIS DIRECTLY ! */ +static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) +{ + IRType ty = typeOfIRExpr(env->type_env,e); + vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); + + switch (e->tag) { + + /* --------- TEMP --------- */ + case Iex_RdTmp: { + return lookupIRTemp(env, e->Iex.RdTmp.tmp); + } + + /* --------- LOAD --------- */ + case Iex_Load: { + HReg dst = newVRegI(env); + + if (e->Iex.Load.end != Iend_LE) + goto irreducible; + + if (ty == Ity_I64) { + ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); + addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode)); + return dst; + } + if (ty == Ity_I32) { + ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); + addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode)); + return dst; + } + if (ty == Ity_I16) { + ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); + addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode)); + return dst; + } + if (ty == Ity_I8) { + ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); + addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode)); + return dst; + } + break; + } + + /* --------- BINARY OP --------- */ + case Iex_Binop: { + + ARM64LogicOp lop = 0; /* invalid */ + ARM64ShiftOp sop = 0; /* invalid */ + + /* Special-case 0-x into a Neg instruction. Not because it's + particularly useful but more so as to give value flow using + this instruction, so as to check its assembly correctness for + implementation of Left32/Left64. */ + switch (e->Iex.Binop.op) { + case Iop_Sub64: + if (isZeroU64(e->Iex.Binop.arg1)) { + HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); + HReg dst = newVRegI(env); + addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG)); + return dst; + } + break; + default: + break; + } + + /* ADD/SUB */ + switch (e->Iex.Binop.op) { + case Iop_Add64: case Iop_Add32: + case Iop_Sub64: case Iop_Sub32: { + Bool isAdd = e->Iex.Binop.op == Iop_Add64 + || e->Iex.Binop.op == Iop_Add32; + HReg dst = newVRegI(env); + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); + addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd)); + return dst; + } + default: + break; + } + + /* AND/OR/XOR */ + switch (e->Iex.Binop.op) { + case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop; + case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop; + case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop; + log_binop: { + HReg dst = newVRegI(env); + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2); + addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop)); + return dst; + } + default: + break; + } + + /* SHL/SHR/SAR */ + switch (e->Iex.Binop.op) { + case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop; + case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop; + case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop; + sh_binop: { + HReg dst = newVRegI(env); + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2); + addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop)); + return dst; + } + case Iop_Shr32: + case Iop_Sar32: { + Bool zx = e->Iex.Binop.op == Iop_Shr32; + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2); + HReg dst = zx ? widen_z_32_to_64(env, argL) + : widen_s_32_to_64(env, argL); + addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR)); + return dst; + } + default: break; + } + + /* MUL */ + if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) { + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); + HReg dst = newVRegI(env); + addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN)); + return dst; + } + + /* MULL */ + if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) { + Bool isS = e->Iex.Binop.op == Iop_MullS32; + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL); + HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); + HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR); + HReg dst = newVRegI(env); + addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN)); + return dst; + } + + /* Handle misc other ops. */ + + if (e->Iex.Binop.op == Iop_Max32U) { + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); + HReg dst = newVRegI(env); + addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/)); + addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS)); + return dst; + } + + if (e->Iex.Binop.op == Iop_32HLto64) { + HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1); + HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2); + HReg lo32 = widen_z_32_to_64(env, lo32s); + HReg hi32 = newVRegI(env); + addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32), + ARM64sh_SHL)); + addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32), + ARM64lo_OR)); + return hi32; + } + + if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) { + Bool isD = e->Iex.Binop.op == Iop_CmpF64; + HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1); + HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2); + HReg dst = newVRegI(env); + HReg imm = newVRegI(env); + /* Do the compare (FCMP), which sets NZCV in PSTATE. Then + create in dst, the IRCmpF64Result encoded result. */ + addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR)); + addInstr(env, ARM64Instr_Imm64(dst, 0)); + addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ + addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ)); + addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT + addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI)); + addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT + addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT)); + addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN + addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS)); + return dst; + } + + { /* local scope */ + ARM64CvtOp cvt_op = ARM64cvt_INVALID; + Bool srcIsD = False; + switch (e->Iex.Binop.op) { + case Iop_F64toI64S: + cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break; + case Iop_F64toI64U: + cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break; + case Iop_F64toI32S: + cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break; + case Iop_F64toI32U: + cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break; + case Iop_F32toI32S: + cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break; + case Iop_F32toI32U: + cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break; + case Iop_F32toI64S: + cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break; + case Iop_F32toI64U: + cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break; + default: + break; + } + if (cvt_op != ARM64cvt_INVALID) { + /* This is all a bit dodgy, because we can't handle a + non-constant (not-known-at-JIT-time) rounding mode + indication. That's because there's no instruction + AFAICS that does this conversion but rounds according to + FPCR.RM, so we have to bake the rounding mode into the + instruction right now. But that should be OK because + (1) the front end attaches a literal Irrm_ value to the + conversion binop, and (2) iropt will never float that + off via CSE, into a literal. Hence we should always + have an Irrm_ value as the first arg. */ + IRExpr* arg1 = e->Iex.Binop.arg1; + if (arg1->tag != Iex_Const) goto irreducible; + IRConst* arg1con = arg1->Iex.Const.con; + vassert(arg1con->tag == Ico_U32); // else ill-typed IR + UInt irrm = arg1con->Ico.U32; + /* Find the ARM-encoded equivalent for |irrm|. */ + UInt armrm = 4; /* impossible */ + switch (irrm) { + case Irrm_NEAREST: armrm = 0; break; + case Irrm_NegINF: armrm = 2; break; + case Irrm_PosINF: armrm = 1; break; + case Irrm_ZERO: armrm = 3; break; + default: goto irreducible; + } + HReg src = (srcIsD ? iselDblExpr : iselFltExpr) + (env, e->Iex.Binop.arg2); + HReg dst = newVRegI(env); + addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm)); + return dst; + } + } /* local scope */ + +//ZZ if (e->Iex.Binop.op == Iop_GetElem8x8 +//ZZ || e->Iex.Binop.op == Iop_GetElem16x4 +//ZZ || e->Iex.Binop.op == Iop_GetElem32x2) { +//ZZ HReg res = newVRegI(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ UInt index, size; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM target supports GetElem with constant " +//ZZ "second argument only\n"); +//ZZ } +//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_GetElem8x8: vassert(index < 8); size = 0; break; +//ZZ case Iop_GetElem16x4: vassert(index < 4); size = 1; break; +//ZZ case Iop_GetElem32x2: vassert(index < 2); size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS, +//ZZ mkARMNRS(ARMNRS_Reg, res, 0), +//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), +//ZZ size, False)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ if (e->Iex.Binop.op == Iop_GetElem8x16 +//ZZ || e->Iex.Binop.op == Iop_GetElem16x8 +//ZZ || e->Iex.Binop.op == Iop_GetElem32x4) { +//ZZ HReg res = newVRegI(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ UInt index, size; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM target supports GetElem with constant " +//ZZ "second argument only\n"); +//ZZ } +//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_GetElem8x16: vassert(index < 16); size = 0; break; +//ZZ case Iop_GetElem16x8: vassert(index < 8); size = 1; break; +//ZZ case Iop_GetElem32x4: vassert(index < 4); size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS, +//ZZ mkARMNRS(ARMNRS_Reg, res, 0), +//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), +//ZZ size, True)); +//ZZ return res; +//ZZ } + + /* All cases involving host-side helper calls. */ + void* fn = NULL; + switch (e->Iex.Binop.op) { +//ZZ case Iop_Add16x2: +//ZZ fn = &h_generic_calc_Add16x2; break; +//ZZ case Iop_Sub16x2: +//ZZ fn = &h_generic_calc_Sub16x2; break; +//ZZ case Iop_HAdd16Ux2: +//ZZ fn = &h_generic_calc_HAdd16Ux2; break; +//ZZ case Iop_HAdd16Sx2: +//ZZ fn = &h_generic_calc_HAdd16Sx2; break; +//ZZ case Iop_HSub16Ux2: +//ZZ fn = &h_generic_calc_HSub16Ux2; break; +//ZZ case Iop_HSub16Sx2: +//ZZ fn = &h_generic_calc_HSub16Sx2; break; +//ZZ case Iop_QAdd16Sx2: +//ZZ fn = &h_generic_calc_QAdd16Sx2; break; +//ZZ case Iop_QAdd16Ux2: +//ZZ fn = &h_generic_calc_QAdd16Ux2; break; +//ZZ case Iop_QSub16Sx2: +//ZZ fn = &h_generic_calc_QSub16Sx2; break; +//ZZ case Iop_Add8x4: +//ZZ fn = &h_generic_calc_Add8x4; break; +//ZZ case Iop_Sub8x4: +//ZZ fn = &h_generic_calc_Sub8x4; break; +//ZZ case Iop_HAdd8Ux4: +//ZZ fn = &h_generic_calc_HAdd8Ux4; break; +//ZZ case Iop_HAdd8Sx4: +//ZZ fn = &h_generic_calc_HAdd8Sx4; break; +//ZZ case Iop_HSub8Ux4: +//ZZ fn = &h_generic_calc_HSub8Ux4; break; +//ZZ case Iop_HSub8Sx4: +//ZZ fn = &h_generic_calc_HSub8Sx4; break; +//ZZ case Iop_QAdd8Sx4: +//ZZ fn = &h_generic_calc_QAdd8Sx4; break; +//ZZ case Iop_QAdd8Ux4: +//ZZ fn = &h_generic_calc_QAdd8Ux4; break; +//ZZ case Iop_QSub8Sx4: +//ZZ fn = &h_generic_calc_QSub8Sx4; break; +//ZZ case Iop_QSub8Ux4: +//ZZ fn = &h_generic_calc_QSub8Ux4; break; +//ZZ case Iop_Sad8Ux4: +//ZZ fn = &h_generic_calc_Sad8Ux4; break; +//ZZ case Iop_QAdd32S: +//ZZ fn = &h_generic_calc_QAdd32S; break; +//ZZ case Iop_QSub32S: +//ZZ fn = &h_generic_calc_QSub32S; break; +//ZZ case Iop_QSub16Ux2: +//ZZ fn = &h_generic_calc_QSub16Ux2; break; + case Iop_DivU32: + fn = &h_calc_udiv32_w_arm_semantics; break; + case Iop_DivS32: + fn = &h_calc_sdiv32_w_arm_semantics; break; + case Iop_DivU64: + fn = &h_calc_udiv64_w_arm_semantics; break; + case Iop_DivS64: + fn = &h_calc_sdiv64_w_arm_semantics; break; + default: + break; + } + + if (fn) { + HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); + HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); + HReg res = newVRegI(env); + addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL)); + addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR)); + addInstr(env, ARM64Instr_Call( ARM64cc_AL, (HWord)Ptr_to_ULong(fn), + 2, mk_RetLoc_simple(RLPri_Int) )); + addInstr(env, ARM64Instr_MovI(res, hregARM64_X0())); + return res; + } + + break; + } + + /* --------- UNARY OP --------- */ + case Iex_Unop: { + + switch (e->Iex.Unop.op) { + case Iop_16Uto64: { + /* This probably doesn't occur often enough to be worth + rolling the extension into the load. */ + IRExpr* arg = e->Iex.Unop.arg; + HReg src = iselIntExpr_R(env, arg); + HReg dst = widen_z_16_to_64(env, src); + return dst; + } + case Iop_32Uto64: { + IRExpr* arg = e->Iex.Unop.arg; + if (arg->tag == Iex_Load) { + /* This correctly zero extends because _LdSt32 is + defined to do a zero extending load. */ + HReg dst = newVRegI(env); + ARM64AMode* am + = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32); + addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am)); + return dst; + } + /* else be lame and mask it */ + HReg src = iselIntExpr_R(env, arg); + HReg dst = widen_z_32_to_64(env, src); + return dst; + } + case Iop_8Uto32: /* Just freeload on the 8Uto64 case */ + case Iop_8Uto64: { + IRExpr* arg = e->Iex.Unop.arg; + if (arg->tag == Iex_Load) { + /* This correctly zero extends because _LdSt8 is + defined to do a zero extending load. */ + HReg dst = newVRegI(env); + ARM64AMode* am + = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8); + addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am)); + return dst; + } + /* else be lame and mask it */ + HReg src = iselIntExpr_R(env, arg); + HReg dst = widen_z_8_to_64(env, src); + return dst; + } + case Iop_128HIto64: { + HReg rHi, rLo; + iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); + return rHi; /* and abandon rLo */ + } + case Iop_8Sto32: case Iop_8Sto64: { + IRExpr* arg = e->Iex.Unop.arg; + HReg src = iselIntExpr_R(env, arg); + HReg dst = widen_s_8_to_64(env, src); + return dst; + } + case Iop_16Sto32: case Iop_16Sto64: { + IRExpr* arg = e->Iex.Unop.arg; + HReg src = iselIntExpr_R(env, arg); + HReg dst = widen_s_16_to_64(env, src); + return dst; + } + case Iop_32Sto64: { + IRExpr* arg = e->Iex.Unop.arg; + HReg src = iselIntExpr_R(env, arg); + HReg dst = widen_s_32_to_64(env, src); + return dst; + } + case Iop_Not32: + case Iop_Not64: { + HReg dst = newVRegI(env); + HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); + addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT)); + return dst; + } + case Iop_Clz64: { + HReg dst = newVRegI(env); + HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); + addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ)); + return dst; + } + case Iop_Left32: + case Iop_Left64: { + /* Left64(src) = src | -src. Left32 can use the same + implementation since in that case we don't care what + the upper 32 bits become. */ + HReg dst = newVRegI(env); + HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); + addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); + addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), + ARM64lo_OR)); + return dst; + } + case Iop_CmpwNEZ64: { + /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1 + = Left64(src) >>s 63 */ + HReg dst = newVRegI(env); + HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); + addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); + addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), + ARM64lo_OR)); + addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), + ARM64sh_SAR)); + return dst; + } + case Iop_CmpwNEZ32: { + /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF) + = Left64(src & 0xFFFFFFFF) >>s 63 */ + HReg dst = newVRegI(env); + HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg); + HReg src = widen_z_32_to_64(env, pre); + addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); + addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), + ARM64lo_OR)); + addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), + ARM64sh_SAR)); + return dst; + } + case Iop_V128to64: case Iop_V128HIto64: { + HReg dst = newVRegI(env); + HReg src = iselV128Expr(env, e->Iex.Unop.arg); + UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0; + addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo)); + return dst; + } + case Iop_1Sto32: + case Iop_1Sto64: { + /* As with the iselStmt case for 'tmp:I1 = expr', we could + do a lot better here if it ever became necessary. */ + HReg zero = newVRegI(env); + HReg one = newVRegI(env); + HReg dst = newVRegI(env); + addInstr(env, ARM64Instr_Imm64(zero, 0)); + addInstr(env, ARM64Instr_Imm64(one, 1)); + ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); + addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); + addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), + ARM64sh_SHL)); + addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), + ARM64sh_SAR)); + return dst; + } + case Iop_NarrowUn16to8x8: + case Iop_NarrowUn32to16x4: + case Iop_NarrowUn64to32x2: { + HReg src = iselV128Expr(env, e->Iex.Unop.arg); + HReg tmp = newVRegV(env); + HReg dst = newVRegI(env); + UInt dszBlg2 = 3; /* illegal */ + switch (e->Iex.Unop.op) { + case Iop_NarrowUn16to8x8: dszBlg2 = 0; break; // 16to8_x8 + case Iop_NarrowUn32to16x4: dszBlg2 = 1; break; // 32to16_x4 + case Iop_NarrowUn64to32x2: dszBlg2 = 2; break; // 64to32_x2 + default: vassert(0); + } + addInstr(env, ARM64Instr_VNarrowV(dszBlg2, tmp, src)); + addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/)); + return dst; + } +//ZZ case Iop_64HIto32: { +//ZZ HReg rHi, rLo; +//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); +//ZZ return rHi; /* and abandon rLo .. poor wee thing :-) */ +//ZZ } +//ZZ case Iop_64to32: { +//ZZ HReg rHi, rLo; +//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); +//ZZ return rLo; /* similar stupid comment to the above ... */ +//ZZ } +//ZZ case Iop_64to8: { +//ZZ HReg rHi, rLo; +//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { +//ZZ HReg tHi = newVRegI(env); +//ZZ HReg tLo = newVRegI(env); +//ZZ HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); +//ZZ rHi = tHi; +//ZZ rLo = tLo; +//ZZ } else { +//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); +//ZZ } +//ZZ return rLo; +//ZZ } + + case Iop_1Uto64: { + /* 1Uto64(tmp). */ + HReg dst = newVRegI(env); + if (e->Iex.Unop.arg->tag == Iex_RdTmp) { + ARM64RIL* one = mb_mkARM64RIL_I(1); + HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); + vassert(one); + addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND)); + } else { + /* CLONE-01 */ + HReg zero = newVRegI(env); + HReg one = newVRegI(env); + addInstr(env, ARM64Instr_Imm64(zero, 0)); + addInstr(env, ARM64Instr_Imm64(one, 1)); + ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); + addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); + } + return dst; + } +//ZZ case Iop_1Uto8: { +//ZZ HReg dst = newVRegI(env); +//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); +//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); +//ZZ return dst; +//ZZ } +//ZZ +//ZZ case Iop_1Sto32: { +//ZZ HReg dst = newVRegI(env); +//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); +//ZZ ARMRI5* amt = ARMRI5_I5(31); +//ZZ /* This is really rough. We could do much better here; +//ZZ perhaps mvn{cond} dst, #0 as the second insn? +//ZZ (same applies to 1Sto64) */ +//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); +//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); +//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt)); +//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); +//ZZ return dst; +//ZZ } +//ZZ +//ZZ case Iop_Clz32: { +//ZZ /* Count leading zeroes; easy on ARM. */ +//ZZ HReg dst = newVRegI(env); +//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src)); +//ZZ return dst; +//ZZ } +//ZZ +//ZZ case Iop_CmpwNEZ32: { +//ZZ HReg dst = newVRegI(env); +//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src)); +//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src))); +//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31))); +//ZZ return dst; +//ZZ } +//ZZ +//ZZ case Iop_ReinterpF32asI32: { +//ZZ HReg dst = newVRegI(env); +//ZZ HReg src = iselFltExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst)); +//ZZ return dst; +//ZZ } + + case Iop_64to32: + case Iop_64to16: + case Iop_64to8: + /* These are no-ops. */ + return iselIntExpr_R(env, e->Iex.Unop.arg); + + default: + break; + } + +//ZZ /* All Unop cases involving host-side helper calls. */ +//ZZ void* fn = NULL; +//ZZ switch (e->Iex.Unop.op) { +//ZZ case Iop_CmpNEZ16x2: +//ZZ fn = &h_generic_calc_CmpNEZ16x2; break; +//ZZ case Iop_CmpNEZ8x4: +//ZZ fn = &h_generic_calc_CmpNEZ8x4; break; +//ZZ default: +//ZZ break; +//ZZ } +//ZZ +//ZZ if (fn) { +//ZZ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); +//ZZ HReg res = newVRegI(env); +//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg)); +//ZZ addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), +//ZZ 1, RetLocInt )); +//ZZ addInstr(env, mk_iMOVds_RR(res, hregARM_R0())); +//ZZ return res; +//ZZ } + + break; + } + + /* --------- GET --------- */ + case Iex_Get: { + if (ty == Ity_I64 + && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) { + HReg dst = newVRegI(env); + ARM64AMode* am + = mk_baseblock_64bit_access_amode(e->Iex.Get.offset); + addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am)); + return dst; + } + if (ty == Ity_I32 + && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) { + HReg dst = newVRegI(env); + ARM64AMode* am + = mk_baseblock_32bit_access_amode(e->Iex.Get.offset); + addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am)); + return dst; + } + if (ty == Ity_I16 + && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) { + HReg dst = newVRegI(env); + ARM64AMode* am + = mk_baseblock_16bit_access_amode(e->Iex.Get.offset); + addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am)); + return dst; + } + if (ty == Ity_I8 + /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) { + HReg dst = newVRegI(env); + ARM64AMode* am + = mk_baseblock_8bit_access_amode(e->Iex.Get.offset); + addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am)); + return dst; + } + break; + } + + /* --------- CCALL --------- */ + case Iex_CCall: { + HReg dst = newVRegI(env); + vassert(ty == e->Iex.CCall.retty); + + /* be very restrictive for now. Only 64-bit ints allowed for + args, and 64 bits for return type. Don't forget to change + the RetLoc if more types are allowed in future. */ + if (e->Iex.CCall.retty != Ity_I64) + goto irreducible; + + /* Marshal args, do the call, clear stack. */ + UInt addToSp = 0; + RetLoc rloc = mk_RetLoc_INVALID(); + Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, + e->Iex.CCall.cee, e->Iex.CCall.retty, + e->Iex.CCall.args ); + /* */ + if (ok) { + vassert(is_sane_RetLoc(rloc)); + vassert(rloc.pri == RLPri_Int); + vassert(addToSp == 0); + addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0())); + return dst; + } + /* else fall through; will hit the irreducible: label */ + } + + /* --------- LITERAL --------- */ + /* 64-bit literals */ + case Iex_Const: { + ULong u = 0; + HReg dst = newVRegI(env); + switch (e->Iex.Const.con->tag) { + case Ico_U64: u = e->Iex.Const.con->Ico.U64; break; + case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; + case Ico_U16: u = e->Iex.Const.con->Ico.U16; break; + case Ico_U8: u = e->Iex.Const.con->Ico.U8; break; + default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)"); + } + addInstr(env, ARM64Instr_Imm64(dst, u)); + return dst; + } + + /* --------- MULTIPLEX --------- */ + case Iex_ITE: { + /* ITE(ccexpr, iftrue, iffalse) */ + if (ty == Ity_I64 || ty == Ity_I32) { + ARM64CondCode cc; + HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue); + HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse); + HReg dst = newVRegI(env); + cc = iselCondCode(env, e->Iex.ITE.cond); + addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc)); + return dst; + } + break; + } + + default: + break; + } /* switch (e->tag) */ + + /* We get here if no pattern matched. */ + irreducible: + ppIRExpr(e); + vpanic("iselIntExpr_R: cannot reduce tree"); +} + + +/*---------------------------------------------------------*/ +/*--- ISEL: Integer expressions (128 bit) ---*/ +/*---------------------------------------------------------*/ + +/* Compute a 128-bit value into a register pair, which is returned as + the first two parameters. As with iselIntExpr_R, these may be + either real or virtual regs; in any case they must not be changed + by subsequent code emitted by the caller. */ + +static void iselInt128Expr ( HReg* rHi, HReg* rLo, + ISelEnv* env, IRExpr* e ) +{ + iselInt128Expr_wrk(rHi, rLo, env, e); +# if 0 + vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); +# endif + vassert(hregClass(*rHi) == HRcInt64); + vassert(hregIsVirtual(*rHi)); + vassert(hregClass(*rLo) == HRcInt64); + vassert(hregIsVirtual(*rLo)); +} + +/* DO NOT CALL THIS DIRECTLY ! */ +static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, + ISelEnv* env, IRExpr* e ) +{ + vassert(e); + vassert(typeOfIRExpr(env->type_env,e) == Ity_I128); + + /* --------- BINARY ops --------- */ + if (e->tag == Iex_Binop) { + switch (e->Iex.Binop.op) { + /* 64 x 64 -> 128 multiply */ + case Iop_MullU64: + case Iop_MullS64: { + Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); + HReg dstLo = newVRegI(env); + HReg dstHi = newVRegI(env); + addInstr(env, ARM64Instr_Mul(dstLo, argL, argR, + ARM64mul_PLAIN)); + addInstr(env, ARM64Instr_Mul(dstHi, argL, argR, + syned ? ARM64mul_SX : ARM64mul_ZX)); + *rHi = dstHi; + *rLo = dstLo; + return; + } + /* 64HLto128(e1,e2) */ + case Iop_64HLto128: + *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); + *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); + return; + default: + break; + } + } /* if (e->tag == Iex_Binop) */ + + ppIRExpr(e); + vpanic("iselInt128Expr(arm64)"); +} + + +//ZZ /* -------------------- 64-bit -------------------- */ +//ZZ +//ZZ /* Compute a 64-bit value into a register pair, which is returned as +//ZZ the first two parameters. As with iselIntExpr_R, these may be +//ZZ either real or virtual regs; in any case they must not be changed +//ZZ by subsequent code emitted by the caller. */ +//ZZ +//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) +//ZZ { +//ZZ iselInt64Expr_wrk(rHi, rLo, env, e); +//ZZ # if 0 +//ZZ vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); +//ZZ # endif +//ZZ vassert(hregClass(*rHi) == HRcInt32); +//ZZ vassert(hregIsVirtual(*rHi)); +//ZZ vassert(hregClass(*rLo) == HRcInt32); +//ZZ vassert(hregIsVirtual(*rLo)); +//ZZ } +//ZZ +//ZZ /* DO NOT CALL THIS DIRECTLY ! */ +//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) +//ZZ { +//ZZ vassert(e); +//ZZ vassert(typeOfIRExpr(env->type_env,e) == Ity_I64); +//ZZ +//ZZ /* 64-bit literal */ +//ZZ if (e->tag == Iex_Const) { +//ZZ ULong w64 = e->Iex.Const.con->Ico.U64; +//ZZ UInt wHi = toUInt(w64 >> 32); +//ZZ UInt wLo = toUInt(w64); +//ZZ HReg tHi = newVRegI(env); +//ZZ HReg tLo = newVRegI(env); +//ZZ vassert(e->Iex.Const.con->tag == Ico_U64); +//ZZ addInstr(env, ARMInstr_Imm32(tHi, wHi)); +//ZZ addInstr(env, ARMInstr_Imm32(tLo, wLo)); +//ZZ *rHi = tHi; +//ZZ *rLo = tLo; +//ZZ return; +//ZZ } +//ZZ +//ZZ /* read 64-bit IRTemp */ +//ZZ if (e->tag == Iex_RdTmp) { +//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { +//ZZ HReg tHi = newVRegI(env); +//ZZ HReg tLo = newVRegI(env); +//ZZ HReg tmp = iselNeon64Expr(env, e); +//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); +//ZZ *rHi = tHi; +//ZZ *rLo = tLo; +//ZZ } else { +//ZZ lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp); +//ZZ } +//ZZ return; +//ZZ } +//ZZ +//ZZ /* 64-bit load */ +//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { +//ZZ HReg tLo, tHi, rA; +//ZZ vassert(e->Iex.Load.ty == Ity_I64); +//ZZ rA = iselIntExpr_R(env, e->Iex.Load.addr); +//ZZ tHi = newVRegI(env); +//ZZ tLo = newVRegI(env); +//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, +//ZZ tHi, ARMAMode1_RI(rA, 4))); +//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, +//ZZ tLo, ARMAMode1_RI(rA, 0))); +//ZZ *rHi = tHi; +//ZZ *rLo = tLo; +//ZZ return; +//ZZ } +//ZZ +//ZZ /* 64-bit GET */ +//ZZ if (e->tag == Iex_Get) { +//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0); +//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4); +//ZZ HReg tHi = newVRegI(env); +//ZZ HReg tLo = newVRegI(env); +//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4)); +//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0)); +//ZZ *rHi = tHi; +//ZZ *rLo = tLo; +//ZZ return; +//ZZ } +//ZZ +//ZZ /* --------- BINARY ops --------- */ +//ZZ if (e->tag == Iex_Binop) { +//ZZ switch (e->Iex.Binop.op) { +//ZZ +//ZZ /* 32 x 32 -> 64 multiply */ +//ZZ case Iop_MullS32: +//ZZ case Iop_MullU32: { +//ZZ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); +//ZZ HReg tHi = newVRegI(env); +//ZZ HReg tLo = newVRegI(env); +//ZZ ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32 +//ZZ ? ARMmul_SX : ARMmul_ZX; +//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL)); +//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR)); +//ZZ addInstr(env, ARMInstr_Mul(mop)); +//ZZ addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1())); +//ZZ addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0())); +//ZZ *rHi = tHi; +//ZZ *rLo = tLo; +//ZZ return; +//ZZ } +//ZZ +//ZZ case Iop_Or64: { +//ZZ HReg xLo, xHi, yLo, yHi; +//ZZ HReg tHi = newVRegI(env); +//ZZ HReg tLo = newVRegI(env); +//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); +//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi))); +//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo))); +//ZZ *rHi = tHi; +//ZZ *rLo = tLo; +//ZZ return; +//ZZ } +//ZZ +//ZZ case Iop_Add64: { +//ZZ HReg xLo, xHi, yLo, yHi; +//ZZ HReg tHi = newVRegI(env); +//ZZ HReg tLo = newVRegI(env); +//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); +//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo))); +//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi))); +//ZZ *rHi = tHi; +//ZZ *rLo = tLo; +//ZZ return; +//ZZ } +//ZZ +//ZZ /* 32HLto64(e1,e2) */ +//ZZ case Iop_32HLto64: { +//ZZ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); +//ZZ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); +//ZZ return; +//ZZ } +//ZZ +//ZZ default: +//ZZ break; +//ZZ } +//ZZ } +//ZZ +//ZZ /* --------- UNARY ops --------- */ +//ZZ if (e->tag == Iex_Unop) { +//ZZ switch (e->Iex.Unop.op) { +//ZZ +//ZZ /* ReinterpF64asI64 */ +//ZZ case Iop_ReinterpF64asI64: { +//ZZ HReg dstHi = newVRegI(env); +//ZZ HReg dstLo = newVRegI(env); +//ZZ HReg src = iselDblExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo)); +//ZZ *rHi = dstHi; +//ZZ *rLo = dstLo; +//ZZ return; +//ZZ } +//ZZ +//ZZ /* Left64(e) */ +//ZZ case Iop_Left64: { +//ZZ HReg yLo, yHi; +//ZZ HReg tHi = newVRegI(env); +//ZZ HReg tLo = newVRegI(env); +//ZZ HReg zero = newVRegI(env); +//ZZ /* yHi:yLo = arg */ +//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); +//ZZ /* zero = 0 */ +//ZZ addInstr(env, ARMInstr_Imm32(zero, 0)); +//ZZ /* tLo = 0 - yLo, and set carry */ +//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SUBS, +//ZZ tLo, zero, ARMRI84_R(yLo))); +//ZZ /* tHi = 0 - yHi - carry */ +//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SBC, +//ZZ tHi, zero, ARMRI84_R(yHi))); +//ZZ /* So now we have tHi:tLo = -arg. To finish off, or 'arg' +//ZZ back in, so as to give the final result +//ZZ tHi:tLo = arg | -arg. */ +//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi))); +//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo))); +//ZZ *rHi = tHi; +//ZZ *rLo = tLo; +//ZZ return; +//ZZ } +//ZZ +//ZZ /* CmpwNEZ64(e) */ +//ZZ case Iop_CmpwNEZ64: { +//ZZ HReg srcLo, srcHi; +//ZZ HReg tmp1 = newVRegI(env); +//ZZ HReg tmp2 = newVRegI(env); +//ZZ /* srcHi:srcLo = arg */ +//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); +//ZZ /* tmp1 = srcHi | srcLo */ +//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, +//ZZ tmp1, srcHi, ARMRI84_R(srcLo))); +//ZZ /* tmp2 = (tmp1 | -tmp1) >>s 31 */ +//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1)); +//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, +//ZZ tmp2, tmp2, ARMRI84_R(tmp1))); +//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, +//ZZ tmp2, tmp2, ARMRI5_I5(31))); +//ZZ *rHi = tmp2; +//ZZ *rLo = tmp2; +//ZZ return; +//ZZ } +//ZZ +//ZZ case Iop_1Sto64: { +//ZZ HReg dst = newVRegI(env); +//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); +//ZZ ARMRI5* amt = ARMRI5_I5(31); +//ZZ /* This is really rough. We could do much better here; +//ZZ perhaps mvn{cond} dst, #0 as the second insn? +//ZZ (same applies to 1Sto32) */ +//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); +//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); +//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt)); +//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); +//ZZ *rHi = dst; +//ZZ *rLo = dst; +//ZZ return; +//ZZ } +//ZZ +//ZZ default: +//ZZ break; +//ZZ } +//ZZ } /* if (e->tag == Iex_Unop) */ +//ZZ +//ZZ /* --------- MULTIPLEX --------- */ +//ZZ if (e->tag == Iex_ITE) { // VFD +//ZZ IRType tyC; +//ZZ HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo; +//ZZ ARMCondCode cc; +//ZZ tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond); +//ZZ vassert(tyC == Ity_I1); +//ZZ iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue); +//ZZ iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse); +//ZZ dstHi = newVRegI(env); +//ZZ dstLo = newVRegI(env); +//ZZ addInstr(env, mk_iMOVds_RR(dstHi, r1hi)); +//ZZ addInstr(env, mk_iMOVds_RR(dstLo, r1lo)); +//ZZ cc = iselCondCode(env, e->Iex.ITE.cond); +//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi))); +//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo))); +//ZZ *rHi = dstHi; +//ZZ *rLo = dstLo; +//ZZ return; +//ZZ } +//ZZ +//ZZ /* It is convenient sometimes to call iselInt64Expr even when we +//ZZ have NEON support (e.g. in do_helper_call we need 64-bit +//ZZ arguments as 2 x 32 regs). */ +//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { +//ZZ HReg tHi = newVRegI(env); +//ZZ HReg tLo = newVRegI(env); +//ZZ HReg tmp = iselNeon64Expr(env, e); +//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); +//ZZ *rHi = tHi; +//ZZ *rLo = tLo; +//ZZ return ; +//ZZ } +//ZZ +//ZZ ppIRExpr(e); +//ZZ vpanic("iselInt64Expr"); +//ZZ } +//ZZ +//ZZ +//ZZ /*---------------------------------------------------------*/ +//ZZ /*--- ISEL: Vector (NEON) expressions (64 bit) ---*/ +//ZZ /*---------------------------------------------------------*/ +//ZZ +//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e ) +//ZZ { +//ZZ HReg r = iselNeon64Expr_wrk( env, e ); +//ZZ vassert(hregClass(r) == HRcFlt64); +//ZZ vassert(hregIsVirtual(r)); +//ZZ return r; +//ZZ } +//ZZ +//ZZ /* DO NOT CALL THIS DIRECTLY */ +//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e ) +//ZZ { +//ZZ IRType ty = typeOfIRExpr(env->type_env, e); +//ZZ MatchInfo mi; +//ZZ vassert(e); +//ZZ vassert(ty == Ity_I64); +//ZZ +//ZZ if (e->tag == Iex_RdTmp) { +//ZZ return lookupIRTemp(env, e->Iex.RdTmp.tmp); +//ZZ } +//ZZ +//ZZ if (e->tag == Iex_Const) { +//ZZ HReg rLo, rHi; +//ZZ HReg res = newVRegD(env); +//ZZ iselInt64Expr(&rHi, &rLo, env, e); +//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ /* 64-bit load */ +//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { +//ZZ HReg res = newVRegD(env); +//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr); +//ZZ vassert(ty == Ity_I64); +//ZZ addInstr(env, ARMInstr_NLdStD(True, res, am)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ /* 64-bit GET */ +//ZZ if (e->tag == Iex_Get) { +//ZZ HReg addr = newVRegI(env); +//ZZ HReg res = newVRegD(env); +//ZZ vassert(ty == Ity_I64); +//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset)); +//ZZ addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr))); +//ZZ return res; +//ZZ } +//ZZ +//ZZ /* --------- BINARY ops --------- */ +//ZZ if (e->tag == Iex_Binop) { +//ZZ switch (e->Iex.Binop.op) { +//ZZ +//ZZ /* 32 x 32 -> 64 multiply */ +//ZZ case Iop_MullS32: +//ZZ case Iop_MullU32: { +//ZZ HReg rLo, rHi; +//ZZ HReg res = newVRegD(env); +//ZZ iselInt64Expr(&rHi, &rLo, env, e); +//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ case Iop_And64: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND, +//ZZ res, argL, argR, 4, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Or64: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, +//ZZ res, argL, argR, 4, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Xor64: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR, +//ZZ res, argL, argR, 4, False)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ /* 32HLto64(e1,e2) */ +//ZZ case Iop_32HLto64: { +//ZZ HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); +//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); +//ZZ HReg res = newVRegD(env); +//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ case Iop_Add8x8: +//ZZ case Iop_Add16x4: +//ZZ case Iop_Add32x2: +//ZZ case Iop_Add64: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Add8x8: size = 0; break; +//ZZ case Iop_Add16x4: size = 1; break; +//ZZ case Iop_Add32x2: size = 2; break; +//ZZ case Iop_Add64: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADD, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Add32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Recps32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Rsqrts32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ // These 6 verified 18 Apr 2013 +//ZZ case Iop_InterleaveHI32x2: +//ZZ case Iop_InterleaveLO32x2: +//ZZ case Iop_InterleaveOddLanes8x8: +//ZZ case Iop_InterleaveEvenLanes8x8: +//ZZ case Iop_InterleaveOddLanes16x4: +//ZZ case Iop_InterleaveEvenLanes16x4: { +//ZZ HReg rD = newVRegD(env); +//ZZ HReg rM = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ Bool resRd; // is the result in rD or rM ? +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break; +//ZZ case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break; +//ZZ case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break; +//ZZ case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break; +//ZZ case Iop_InterleaveHI32x2: resRd = False; size = 2; break; +//ZZ case Iop_InterleaveLO32x2: resRd = True; size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); +//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False)); +//ZZ return resRd ? rD : rM; +//ZZ } +//ZZ +//ZZ // These 4 verified 18 Apr 2013 +//ZZ case Iop_InterleaveHI8x8: +//ZZ case Iop_InterleaveLO8x8: +//ZZ case Iop_InterleaveHI16x4: +//ZZ case Iop_InterleaveLO16x4: { +//ZZ HReg rD = newVRegD(env); +//ZZ HReg rM = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ Bool resRd; // is the result in rD or rM ? +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_InterleaveHI8x8: resRd = False; size = 0; break; +//ZZ case Iop_InterleaveLO8x8: resRd = True; size = 0; break; +//ZZ case Iop_InterleaveHI16x4: resRd = False; size = 1; break; +//ZZ case Iop_InterleaveLO16x4: resRd = True; size = 1; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); +//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False)); +//ZZ return resRd ? rD : rM; +//ZZ } +//ZZ +//ZZ // These 4 verified 18 Apr 2013 +//ZZ case Iop_CatOddLanes8x8: +//ZZ case Iop_CatEvenLanes8x8: +//ZZ case Iop_CatOddLanes16x4: +//ZZ case Iop_CatEvenLanes16x4: { +//ZZ HReg rD = newVRegD(env); +//ZZ HReg rM = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ Bool resRd; // is the result in rD or rM ? +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_CatOddLanes8x8: resRd = False; size = 0; break; +//ZZ case Iop_CatEvenLanes8x8: resRd = True; size = 0; break; +//ZZ case Iop_CatOddLanes16x4: resRd = False; size = 1; break; +//ZZ case Iop_CatEvenLanes16x4: resRd = True; size = 1; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); +//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False)); +//ZZ return resRd ? rD : rM; +//ZZ } +//ZZ +//ZZ case Iop_QAdd8Ux8: +//ZZ case Iop_QAdd16Ux4: +//ZZ case Iop_QAdd32Ux2: +//ZZ case Iop_QAdd64Ux1: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QAdd8Ux8: size = 0; break; +//ZZ case Iop_QAdd16Ux4: size = 1; break; +//ZZ case Iop_QAdd32Ux2: size = 2; break; +//ZZ case Iop_QAdd64Ux1: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QAdd8Sx8: +//ZZ case Iop_QAdd16Sx4: +//ZZ case Iop_QAdd32Sx2: +//ZZ case Iop_QAdd64Sx1: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QAdd8Sx8: size = 0; break; +//ZZ case Iop_QAdd16Sx4: size = 1; break; +//ZZ case Iop_QAdd32Sx2: size = 2; break; +//ZZ case Iop_QAdd64Sx1: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Sub8x8: +//ZZ case Iop_Sub16x4: +//ZZ case Iop_Sub32x2: +//ZZ case Iop_Sub64: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Sub8x8: size = 0; break; +//ZZ case Iop_Sub16x4: size = 1; break; +//ZZ case Iop_Sub32x2: size = 2; break; +//ZZ case Iop_Sub64: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Sub32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QSub8Ux8: +//ZZ case Iop_QSub16Ux4: +//ZZ case Iop_QSub32Ux2: +//ZZ case Iop_QSub64Ux1: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QSub8Ux8: size = 0; break; +//ZZ case Iop_QSub16Ux4: size = 1; break; +//ZZ case Iop_QSub32Ux2: size = 2; break; +//ZZ case Iop_QSub64Ux1: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QSub8Sx8: +//ZZ case Iop_QSub16Sx4: +//ZZ case Iop_QSub32Sx2: +//ZZ case Iop_QSub64Sx1: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QSub8Sx8: size = 0; break; +//ZZ case Iop_QSub16Sx4: size = 1; break; +//ZZ case Iop_QSub32Sx2: size = 2; break; +//ZZ case Iop_QSub64Sx1: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Max8Ux8: +//ZZ case Iop_Max16Ux4: +//ZZ case Iop_Max32Ux2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Max8Ux8: size = 0; break; +//ZZ case Iop_Max16Ux4: size = 1; break; +//ZZ case Iop_Max32Ux2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Max8Sx8: +//ZZ case Iop_Max16Sx4: +//ZZ case Iop_Max32Sx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Max8Sx8: size = 0; break; +//ZZ case Iop_Max16Sx4: size = 1; break; +//ZZ case Iop_Max32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Min8Ux8: +//ZZ case Iop_Min16Ux4: +//ZZ case Iop_Min32Ux2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Min8Ux8: size = 0; break; +//ZZ case Iop_Min16Ux4: size = 1; break; +//ZZ case Iop_Min32Ux2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Min8Sx8: +//ZZ case Iop_Min16Sx4: +//ZZ case Iop_Min32Sx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Min8Sx8: size = 0; break; +//ZZ case Iop_Min16Sx4: size = 1; break; +//ZZ case Iop_Min32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Sar8x8: +//ZZ case Iop_Sar16x4: +//ZZ case Iop_Sar32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ HReg argR2 = newVRegD(env); +//ZZ HReg zero = newVRegD(env); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Sar8x8: size = 0; break; +//ZZ case Iop_Sar16x4: size = 1; break; +//ZZ case Iop_Sar32x2: size = 2; break; +//ZZ case Iop_Sar64: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, +//ZZ argR2, zero, argR, size, False)); +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, +//ZZ res, argL, argR2, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Sal8x8: +//ZZ case Iop_Sal16x4: +//ZZ case Iop_Sal32x2: +//ZZ case Iop_Sal64x1: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Sal8x8: size = 0; break; +//ZZ case Iop_Sal16x4: size = 1; break; +//ZZ case Iop_Sal32x2: size = 2; break; +//ZZ case Iop_Sal64x1: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Shr8x8: +//ZZ case Iop_Shr16x4: +//ZZ case Iop_Shr32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ HReg argR2 = newVRegD(env); +//ZZ HReg zero = newVRegD(env); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Shr8x8: size = 0; break; +//ZZ case Iop_Shr16x4: size = 1; break; +//ZZ case Iop_Shr32x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, +//ZZ argR2, zero, argR, size, False)); +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, +//ZZ res, argL, argR2, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Shl8x8: +//ZZ case Iop_Shl16x4: +//ZZ case Iop_Shl32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Shl8x8: size = 0; break; +//ZZ case Iop_Shl16x4: size = 1; break; +//ZZ case Iop_Shl32x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QShl8x8: +//ZZ case Iop_QShl16x4: +//ZZ case Iop_QShl32x2: +//ZZ case Iop_QShl64x1: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QShl8x8: size = 0; break; +//ZZ case Iop_QShl16x4: size = 1; break; +//ZZ case Iop_QShl32x2: size = 2; break; +//ZZ case Iop_QShl64x1: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QSal8x8: +//ZZ case Iop_QSal16x4: +//ZZ case Iop_QSal32x2: +//ZZ case Iop_QSal64x1: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QSal8x8: size = 0; break; +//ZZ case Iop_QSal16x4: size = 1; break; +//ZZ case Iop_QSal32x2: size = 2; break; +//ZZ case Iop_QSal64x1: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QShlN8x8: +//ZZ case Iop_QShlN16x4: +//ZZ case Iop_QShlN32x2: +//ZZ case Iop_QShlN64x1: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ UInt size, imm; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " +//ZZ "second argument only\n"); +//ZZ } +//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QShlN8x8: size = 8 | imm; break; +//ZZ case Iop_QShlN16x4: size = 16 | imm; break; +//ZZ case Iop_QShlN32x2: size = 32 | imm; break; +//ZZ case Iop_QShlN64x1: size = 64 | imm; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU, +//ZZ res, argL, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QShlN8Sx8: +//ZZ case Iop_QShlN16Sx4: +//ZZ case Iop_QShlN32Sx2: +//ZZ case Iop_QShlN64Sx1: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ UInt size, imm; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " +//ZZ "second argument only\n"); +//ZZ } +//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QShlN8Sx8: size = 8 | imm; break; +//ZZ case Iop_QShlN16Sx4: size = 16 | imm; break; +//ZZ case Iop_QShlN32Sx2: size = 32 | imm; break; +//ZZ case Iop_QShlN64Sx1: size = 64 | imm; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS, +//ZZ res, argL, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QSalN8x8: +//ZZ case Iop_QSalN16x4: +//ZZ case Iop_QSalN32x2: +//ZZ case Iop_QSalN64x1: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ UInt size, imm; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " +//ZZ "second argument only\n"); +//ZZ } +//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QSalN8x8: size = 8 | imm; break; +//ZZ case Iop_QSalN16x4: size = 16 | imm; break; +//ZZ case Iop_QSalN32x2: size = 32 | imm; break; +//ZZ case Iop_QSalN64x1: size = 64 | imm; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS, +//ZZ res, argL, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_ShrN8x8: +//ZZ case Iop_ShrN16x4: +//ZZ case Iop_ShrN32x2: +//ZZ case Iop_Shr64: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg tmp = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); +//ZZ HReg argR2 = newVRegI(env); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_ShrN8x8: size = 0; break; +//ZZ case Iop_ShrN16x4: size = 1; break; +//ZZ case Iop_ShrN32x2: size = 2; break; +//ZZ case Iop_Shr64: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False)); +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, +//ZZ res, argL, tmp, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_ShlN8x8: +//ZZ case Iop_ShlN16x4: +//ZZ case Iop_ShlN32x2: +//ZZ case Iop_Shl64: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg tmp = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ /* special-case Shl64(x, imm8) since the Neon front +//ZZ end produces a lot of those for V{LD,ST}{1,2,3,4}. */ +//ZZ if (e->Iex.Binop.op == Iop_Shl64 +//ZZ && e->Iex.Binop.arg2->tag == Iex_Const) { +//ZZ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); +//ZZ Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ if (nshift >= 1 && nshift <= 63) { +//ZZ addInstr(env, ARMInstr_NShl64(res, argL, nshift)); +//ZZ return res; +//ZZ } +//ZZ /* else fall through to general case */ +//ZZ } +//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_ShlN8x8: size = 0; break; +//ZZ case Iop_ShlN16x4: size = 1; break; +//ZZ case Iop_ShlN32x2: size = 2; break; +//ZZ case Iop_Shl64: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, +//ZZ tmp, argR, 0, False)); +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, +//ZZ res, argL, tmp, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_SarN8x8: +//ZZ case Iop_SarN16x4: +//ZZ case Iop_SarN32x2: +//ZZ case Iop_Sar64: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg tmp = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); +//ZZ HReg argR2 = newVRegI(env); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_SarN8x8: size = 0; break; +//ZZ case Iop_SarN16x4: size = 1; break; +//ZZ case Iop_SarN32x2: size = 2; break; +//ZZ case Iop_Sar64: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False)); +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, +//ZZ res, argL, tmp, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpGT8Ux8: +//ZZ case Iop_CmpGT16Ux4: +//ZZ case Iop_CmpGT32Ux2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_CmpGT8Ux8: size = 0; break; +//ZZ case Iop_CmpGT16Ux4: size = 1; break; +//ZZ case Iop_CmpGT32Ux2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpGT8Sx8: +//ZZ case Iop_CmpGT16Sx4: +//ZZ case Iop_CmpGT32Sx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_CmpGT8Sx8: size = 0; break; +//ZZ case Iop_CmpGT16Sx4: size = 1; break; +//ZZ case Iop_CmpGT32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpEQ8x8: +//ZZ case Iop_CmpEQ16x4: +//ZZ case Iop_CmpEQ32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_CmpEQ8x8: size = 0; break; +//ZZ case Iop_CmpEQ16x4: size = 1; break; +//ZZ case Iop_CmpEQ32x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Mul8x8: +//ZZ case Iop_Mul16x4: +//ZZ case Iop_Mul32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Mul8x8: size = 0; break; +//ZZ case Iop_Mul16x4: size = 1; break; +//ZZ case Iop_Mul32x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Mul32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QDMulHi16Sx4: +//ZZ case Iop_QDMulHi32Sx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_QDMulHi16Sx4: size = 1; break; +//ZZ case Iop_QDMulHi32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ case Iop_QRDMulHi16Sx4: +//ZZ case Iop_QRDMulHi32Sx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_QRDMulHi16Sx4: size = 1; break; +//ZZ case Iop_QRDMulHi32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ case Iop_PwAdd8x8: +//ZZ case Iop_PwAdd16x4: +//ZZ case Iop_PwAdd32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_PwAdd8x8: size = 0; break; +//ZZ case Iop_PwAdd16x4: size = 1; break; +//ZZ case Iop_PwAdd32x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwAdd32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwMin8Ux8: +//ZZ case Iop_PwMin16Ux4: +//ZZ case Iop_PwMin32Ux2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_PwMin8Ux8: size = 0; break; +//ZZ case Iop_PwMin16Ux4: size = 1; break; +//ZZ case Iop_PwMin32Ux2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwMin8Sx8: +//ZZ case Iop_PwMin16Sx4: +//ZZ case Iop_PwMin32Sx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_PwMin8Sx8: size = 0; break; +//ZZ case Iop_PwMin16Sx4: size = 1; break; +//ZZ case Iop_PwMin32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwMax8Ux8: +//ZZ case Iop_PwMax16Ux4: +//ZZ case Iop_PwMax32Ux2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_PwMax8Ux8: size = 0; break; +//ZZ case Iop_PwMax16Ux4: size = 1; break; +//ZZ case Iop_PwMax32Ux2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwMax8Sx8: +//ZZ case Iop_PwMax16Sx4: +//ZZ case Iop_PwMax32Sx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_PwMax8Sx8: size = 0; break; +//ZZ case Iop_PwMax16Sx4: size = 1; break; +//ZZ case Iop_PwMax32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Perm8x8: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VTBL, +//ZZ res, argL, argR, 0, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PolynomialMul8x8: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP, +//ZZ res, argL, argR, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Max32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF, +//ZZ res, argL, argR, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Min32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF, +//ZZ res, argL, argR, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwMax32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF, +//ZZ res, argL, argR, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwMin32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF, +//ZZ res, argL, argR, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpGT32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF, +//ZZ res, argL, argR, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpGE32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF, +//ZZ res, argL, argR, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpEQ32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF, +//ZZ res, argL, argR, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_F32ToFixed32Ux2_RZ: +//ZZ case Iop_F32ToFixed32Sx2_RZ: +//ZZ case Iop_Fixed32UToF32x2_RN: +//ZZ case Iop_Fixed32SToF32x2_RN: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ ARMNeonUnOp op; +//ZZ UInt imm6; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant " +//ZZ "second argument less than 33 only\n"); +//ZZ } +//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ vassert(imm6 <= 32 && imm6 > 0); +//ZZ imm6 = 64 - imm6; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break; +//ZZ case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break; +//ZZ case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break; +//ZZ case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False)); +//ZZ return res; +//ZZ } +//ZZ /* +//ZZ FIXME: is this here or not? +//ZZ case Iop_VDup8x8: +//ZZ case Iop_VDup16x4: +//ZZ case Iop_VDup32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ UInt index; +//ZZ UInt imm4; +//ZZ UInt size = 0; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM supports Iop_VDup with constant " +//ZZ "second argument less than 16 only\n"); +//ZZ } +//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_VDup8x8: imm4 = (index << 1) + 1; break; +//ZZ case Iop_VDup16x4: imm4 = (index << 2) + 2; break; +//ZZ case Iop_VDup32x2: imm4 = (index << 3) + 4; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ if (imm4 >= 16) { +//ZZ vpanic("ARM supports Iop_VDup with constant " +//ZZ "second argument less than 16 only\n"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP, +//ZZ res, argL, imm4, False)); +//ZZ return res; +//ZZ } +//ZZ */ +//ZZ default: +//ZZ break; +//ZZ } +//ZZ } +//ZZ +//ZZ /* --------- UNARY ops --------- */ +//ZZ if (e->tag == Iex_Unop) { +//ZZ switch (e->Iex.Unop.op) { +//ZZ +//ZZ /* 32Uto64 */ +//ZZ case Iop_32Uto64: { +//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg); +//ZZ HReg rHi = newVRegI(env); +//ZZ HReg res = newVRegD(env); +//ZZ addInstr(env, ARMInstr_Imm32(rHi, 0)); +//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ /* 32Sto64 */ +//ZZ case Iop_32Sto64: { +//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg); +//ZZ HReg rHi = newVRegI(env); +//ZZ addInstr(env, mk_iMOVds_RR(rHi, rLo)); +//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31))); +//ZZ HReg res = newVRegD(env); +//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ /* The next 3 are pass-throughs */ +//ZZ /* ReinterpF64asI64 */ +//ZZ case Iop_ReinterpF64asI64: +//ZZ /* Left64(e) */ +//ZZ case Iop_Left64: +//ZZ /* CmpwNEZ64(e) */ +//ZZ case Iop_1Sto64: { +//ZZ HReg rLo, rHi; +//ZZ HReg res = newVRegD(env); +//ZZ iselInt64Expr(&rHi, &rLo, env, e); +//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ case Iop_Not64: { +//ZZ DECLARE_PATTERN(p_veqz_8x8); +//ZZ DECLARE_PATTERN(p_veqz_16x4); +//ZZ DECLARE_PATTERN(p_veqz_32x2); +//ZZ DECLARE_PATTERN(p_vcge_8sx8); +//ZZ DECLARE_PATTERN(p_vcge_16sx4); +//ZZ DECLARE_PATTERN(p_vcge_32sx2); +//ZZ DECLARE_PATTERN(p_vcge_8ux8); +//ZZ DECLARE_PATTERN(p_vcge_16ux4); +//ZZ DECLARE_PATTERN(p_vcge_32ux2); +//ZZ DEFINE_PATTERN(p_veqz_8x8, +//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0)))); +//ZZ DEFINE_PATTERN(p_veqz_16x4, +//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0)))); +//ZZ DEFINE_PATTERN(p_veqz_32x2, +//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_8sx8, +//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_16sx4, +//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_32sx2, +//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_8ux8, +//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_16ux4, +//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_32ux2, +//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0)))); +//ZZ if (matchIRExpr(&mi, p_veqz_8x8, e)) { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_veqz_16x4, e)) { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_veqz_32x2, e)) { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, +//ZZ res, argL, argR, 0, False)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, +//ZZ res, argL, argR, 1, False)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, +//ZZ res, argL, argR, 2, False)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, +//ZZ res, argL, argR, 0, False)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, +//ZZ res, argL, argR, 1, False)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, +//ZZ res, argL, argR, 2, False)); +//ZZ return res; +//ZZ } else { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False)); +//ZZ return res; +//ZZ } +//ZZ } +//ZZ case Iop_Dup8x8: +//ZZ case Iop_Dup16x4: +//ZZ case Iop_Dup32x2: { +//ZZ HReg res, arg; +//ZZ UInt size; +//ZZ DECLARE_PATTERN(p_vdup_8x8); +//ZZ DECLARE_PATTERN(p_vdup_16x4); +//ZZ DECLARE_PATTERN(p_vdup_32x2); +//ZZ DEFINE_PATTERN(p_vdup_8x8, +//ZZ unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1)))); +//ZZ DEFINE_PATTERN(p_vdup_16x4, +//ZZ unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1)))); +//ZZ DEFINE_PATTERN(p_vdup_32x2, +//ZZ unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1)))); +//ZZ if (matchIRExpr(&mi, p_vdup_8x8, e)) { +//ZZ UInt index; +//ZZ UInt imm4; +//ZZ if (mi.bindee[1]->tag == Iex_Const && +//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { +//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; +//ZZ imm4 = (index << 1) + 1; +//ZZ if (index < 8) { +//ZZ res = newVRegD(env); +//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnaryS( +//ZZ ARMneon_VDUP, +//ZZ mkARMNRS(ARMNRS_Reg, res, 0), +//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), +//ZZ imm4, False +//ZZ )); +//ZZ return res; +//ZZ } +//ZZ } +//ZZ } else if (matchIRExpr(&mi, p_vdup_16x4, e)) { +//ZZ UInt index; +//ZZ UInt imm4; +//ZZ if (mi.bindee[1]->tag == Iex_Const && +//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { +//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; +//ZZ imm4 = (index << 2) + 2; +//ZZ if (index < 4) { +//ZZ res = newVRegD(env); +//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnaryS( +//ZZ ARMneon_VDUP, +//ZZ mkARMNRS(ARMNRS_Reg, res, 0), +//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), +//ZZ imm4, False +//ZZ )); +//ZZ return res; +//ZZ } +//ZZ } +//ZZ } else if (matchIRExpr(&mi, p_vdup_32x2, e)) { +//ZZ UInt index; +//ZZ UInt imm4; +//ZZ if (mi.bindee[1]->tag == Iex_Const && +//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { +//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; +//ZZ imm4 = (index << 3) + 4; +//ZZ if (index < 2) { +//ZZ res = newVRegD(env); +//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnaryS( +//ZZ ARMneon_VDUP, +//ZZ mkARMNRS(ARMNRS_Reg, res, 0), +//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), +//ZZ imm4, False +//ZZ )); +//ZZ return res; +//ZZ } +//ZZ } +//ZZ } +//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg); +//ZZ res = newVRegD(env); +//ZZ switch (e->Iex.Unop.op) { +//ZZ case Iop_Dup8x8: size = 0; break; +//ZZ case Iop_Dup16x4: size = 1; break; +//ZZ case Iop_Dup32x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Abs8x8: +//ZZ case Iop_Abs16x4: +//ZZ case Iop_Abs32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Abs8x8: size = 0; break; +//ZZ case Iop_Abs16x4: size = 1; break; +//ZZ case Iop_Abs32x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Reverse64_8x8: +//ZZ case Iop_Reverse64_16x4: +//ZZ case Iop_Reverse64_32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Reverse64_8x8: size = 0; break; +//ZZ case Iop_Reverse64_16x4: size = 1; break; +//ZZ case Iop_Reverse64_32x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Reverse32_8x8: +//ZZ case Iop_Reverse32_16x4: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Reverse32_8x8: size = 0; break; +//ZZ case Iop_Reverse32_16x4: size = 1; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Reverse16_8x8: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpwNEZ64: { +//ZZ HReg x_lsh = newVRegD(env); +//ZZ HReg x_rsh = newVRegD(env); +//ZZ HReg lsh_amt = newVRegD(env); +//ZZ HReg rsh_amt = newVRegD(env); +//ZZ HReg zero = newVRegD(env); +//ZZ HReg tmp = newVRegD(env); +//ZZ HReg tmp2 = newVRegD(env); +//ZZ HReg res = newVRegD(env); +//ZZ HReg x = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False)); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False)); +//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32))); +//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0))); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, +//ZZ rsh_amt, zero, lsh_amt, 2, False)); +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, +//ZZ x_lsh, x, lsh_amt, 3, False)); +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, +//ZZ x_rsh, x, rsh_amt, 3, False)); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, +//ZZ tmp, x_lsh, x_rsh, 0, False)); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, +//ZZ res, tmp, x, 0, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpNEZ8x8: +//ZZ case Iop_CmpNEZ16x4: +//ZZ case Iop_CmpNEZ32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg tmp = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size; +//ZZ switch (e->Iex.Unop.op) { +//ZZ case Iop_CmpNEZ8x8: size = 0; break; +//ZZ case Iop_CmpNEZ16x4: size = 1; break; +//ZZ case Iop_CmpNEZ32x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False)); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_NarrowUn16to8x8: +//ZZ case Iop_NarrowUn32to16x4: +//ZZ case Iop_NarrowUn64to32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_NarrowUn16to8x8: size = 0; break; +//ZZ case Iop_NarrowUn32to16x4: size = 1; break; +//ZZ case Iop_NarrowUn64to32x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYN, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QNarrowUn16Sto8Sx8: +//ZZ case Iop_QNarrowUn32Sto16Sx4: +//ZZ case Iop_QNarrowUn64Sto32Sx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_QNarrowUn16Sto8Sx8: size = 0; break; +//ZZ case Iop_QNarrowUn32Sto16Sx4: size = 1; break; +//ZZ case Iop_QNarrowUn64Sto32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QNarrowUn16Sto8Ux8: +//ZZ case Iop_QNarrowUn32Sto16Ux4: +//ZZ case Iop_QNarrowUn64Sto32Ux2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_QNarrowUn16Sto8Ux8: size = 0; break; +//ZZ case Iop_QNarrowUn32Sto16Ux4: size = 1; break; +//ZZ case Iop_QNarrowUn64Sto32Ux2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QNarrowUn16Uto8Ux8: +//ZZ case Iop_QNarrowUn32Uto16Ux4: +//ZZ case Iop_QNarrowUn64Uto32Ux2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_QNarrowUn16Uto8Ux8: size = 0; break; +//ZZ case Iop_QNarrowUn32Uto16Ux4: size = 1; break; +//ZZ case Iop_QNarrowUn64Uto32Ux2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwAddL8Sx8: +//ZZ case Iop_PwAddL16Sx4: +//ZZ case Iop_PwAddL32Sx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_PwAddL8Sx8: size = 0; break; +//ZZ case Iop_PwAddL16Sx4: size = 1; break; +//ZZ case Iop_PwAddL32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwAddL8Ux8: +//ZZ case Iop_PwAddL16Ux4: +//ZZ case Iop_PwAddL32Ux2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_PwAddL8Ux8: size = 0; break; +//ZZ case Iop_PwAddL16Ux4: size = 1; break; +//ZZ case Iop_PwAddL32Ux2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Cnt8x8: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Clz8Sx8: +//ZZ case Iop_Clz16Sx4: +//ZZ case Iop_Clz32Sx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Clz8Sx8: size = 0; break; +//ZZ case Iop_Clz16Sx4: size = 1; break; +//ZZ case Iop_Clz32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Cls8Sx8: +//ZZ case Iop_Cls16Sx4: +//ZZ case Iop_Cls32Sx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Cls8Sx8: size = 0; break; +//ZZ case Iop_Cls16Sx4: size = 1; break; +//ZZ case Iop_Cls32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, +//ZZ res, arg, size, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_FtoI32Sx2_RZ: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS, +//ZZ res, arg, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_FtoI32Ux2_RZ: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU, +//ZZ res, arg, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_I32StoFx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF, +//ZZ res, arg, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_I32UtoFx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF, +//ZZ res, arg, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_F32toF16x4: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16, +//ZZ res, arg, 2, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Recip32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF, +//ZZ res, argL, 0, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Recip32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP, +//ZZ res, argL, 0, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Abs32Fx2: { +//ZZ DECLARE_PATTERN(p_vabd_32fx2); +//ZZ DEFINE_PATTERN(p_vabd_32fx2, +//ZZ unop(Iop_Abs32Fx2, +//ZZ binop(Iop_Sub32Fx2, +//ZZ bind(0), +//ZZ bind(1)))); +//ZZ if (matchIRExpr(&mi, p_vabd_32fx2, e)) { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP, +//ZZ res, argL, argR, 0, False)); +//ZZ return res; +//ZZ } else { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP, +//ZZ res, arg, 0, False)); +//ZZ return res; +//ZZ } +//ZZ } +//ZZ case Iop_Rsqrte32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP, +//ZZ res, arg, 0, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Rsqrte32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE, +//ZZ res, arg, 0, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Neg32Fx2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF, +//ZZ res, arg, 0, False)); +//ZZ return res; +//ZZ } +//ZZ default: +//ZZ break; +//ZZ } +//ZZ } /* if (e->tag == Iex_Unop) */ +//ZZ +//ZZ if (e->tag == Iex_Triop) { +//ZZ IRTriop *triop = e->Iex.Triop.details; +//ZZ +//ZZ switch (triop->op) { +//ZZ case Iop_Extract64: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg argL = iselNeon64Expr(env, triop->arg1); +//ZZ HReg argR = iselNeon64Expr(env, triop->arg2); +//ZZ UInt imm4; +//ZZ if (triop->arg3->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) { +//ZZ vpanic("ARM target supports Iop_Extract64 with constant " +//ZZ "third argument less than 16 only\n"); +//ZZ } +//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8; +//ZZ if (imm4 >= 8) { +//ZZ vpanic("ARM target supports Iop_Extract64 with constant " +//ZZ "third argument less than 16 only\n"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT, +//ZZ res, argL, argR, imm4, False)); +//ZZ return res; +//ZZ } +//ZZ case Iop_SetElem8x8: +//ZZ case Iop_SetElem16x4: +//ZZ case Iop_SetElem32x2: { +//ZZ HReg res = newVRegD(env); +//ZZ HReg dreg = iselNeon64Expr(env, triop->arg1); +//ZZ HReg arg = iselIntExpr_R(env, triop->arg3); +//ZZ UInt index, size; +//ZZ if (triop->arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) { +//ZZ vpanic("ARM target supports SetElem with constant " +//ZZ "second argument only\n"); +//ZZ } +//ZZ index = triop->arg2->Iex.Const.con->Ico.U8; +//ZZ switch (triop->op) { +//ZZ case Iop_SetElem8x8: vassert(index < 8); size = 0; break; +//ZZ case Iop_SetElem16x4: vassert(index < 4); size = 1; break; +//ZZ case Iop_SetElem32x2: vassert(index < 2); size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False)); +//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM, +//ZZ mkARMNRS(ARMNRS_Scalar, res, index), +//ZZ mkARMNRS(ARMNRS_Reg, arg, 0), +//ZZ size, False)); +//ZZ return res; +//ZZ } +//ZZ default: +//ZZ break; +//ZZ } +//ZZ } +//ZZ +//ZZ /* --------- MULTIPLEX --------- */ +//ZZ if (e->tag == Iex_ITE) { // VFD +//ZZ HReg rLo, rHi; +//ZZ HReg res = newVRegD(env); +//ZZ iselInt64Expr(&rHi, &rLo, env, e); +//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ ppIRExpr(e); +//ZZ vpanic("iselNeon64Expr"); +//ZZ } + + +/*---------------------------------------------------------*/ +/*--- ISEL: Vector (NEON) expressions (128 bit) ---*/ +/*---------------------------------------------------------*/ + +static HReg iselV128Expr ( ISelEnv* env, IRExpr* e ) +{ + HReg r = iselV128Expr_wrk( env, e ); + vassert(hregClass(r) == HRcVec128); + vassert(hregIsVirtual(r)); + return r; +} + +/* DO NOT CALL THIS DIRECTLY */ +static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) +{ + IRType ty = typeOfIRExpr(env->type_env, e); + vassert(e); + vassert(ty == Ity_V128); + + if (e->tag == Iex_RdTmp) { + return lookupIRTemp(env, e->Iex.RdTmp.tmp); + } + + if (e->tag == Iex_Const) { + /* Only a very limited range of constants is handled. */ + vassert(e->Iex.Const.con->tag == Ico_V128); + UShort con = e->Iex.Const.con->Ico.V128; + if (con == 0x0000) { + HReg res = newVRegV(env); + addInstr(env, ARM64Instr_VImmQ(res, con)); + return res; + } + /* Unhandled */ + goto v128_expr_bad; + } + + if (e->tag == Iex_Load) { + HReg res = newVRegV(env); + HReg rN = iselIntExpr_R(env, e->Iex.Load.addr); + vassert(ty == Ity_V128); + addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN)); + return res; + } + + if (e->tag == Iex_Get) { + UInt offs = (UInt)e->Iex.Get.offset; + if (offs < (1<<12)) { + HReg addr = mk_baseblock_128bit_access_addr(env, offs); + HReg res = newVRegV(env); + vassert(ty == Ity_V128); + addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr)); + return res; + } + goto v128_expr_bad; + } + + if (e->tag == Iex_Unop) { + + /* Iop_ZeroHIXXofV128 cases */ + UShort imm16 = 0; + switch (e->Iex.Unop.op) { + case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break; + case Iop_ZeroHI96ofV128: imm16 = 0x000F; break; + case Iop_ZeroHI112ofV128: imm16 = 0x0003; break; + case Iop_ZeroHI120ofV128: imm16 = 0x0001; break; + default: break; + } + if (imm16 != 0) { + HReg src = iselV128Expr(env, e->Iex.Unop.arg); + HReg imm = newVRegV(env); + HReg res = newVRegV(env); + addInstr(env, ARM64Instr_VImmQ(imm, imm16)); + addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm)); + return res; + } + + /* Other cases */ + switch (e->Iex.Unop.op) { + case Iop_NotV128: + case Iop_Abs64Fx2: + case Iop_Abs32Fx4: + case Iop_Neg64Fx2: + case Iop_Neg32Fx4: { + HReg res = newVRegV(env); + HReg arg = iselV128Expr(env, e->Iex.Unop.arg); + ARM64VecUnaryOp op = ARM64vecu_INVALID; + switch (e->Iex.Unop.op) { + case Iop_NotV128: op = ARM64vecu_NOT; break; + case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break; + case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break; + case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break; + case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break; + default: vassert(0); + } + addInstr(env, ARM64Instr_VUnaryV(op, res, arg)); + return res; + } + case Iop_CmpNEZ8x16: + case Iop_CmpNEZ16x8: + case Iop_CmpNEZ32x4: + case Iop_CmpNEZ64x2: { + HReg arg = iselV128Expr(env, e->Iex.Unop.arg); + HReg zero = newVRegV(env); + HReg res = newVRegV(env); + ARM64VecBinOp cmp = ARM64vecb_INVALID; + switch (e->Iex.Unop.op) { + case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break; + case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break; + case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break; + case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break; + default: vassert(0); + } + // This is pretty feeble. Better: use CMP against zero + // and avoid the extra instruction and extra register. + addInstr(env, ARM64Instr_VImmQ(zero, 0x0000)); + addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero)); + addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res)); + return res; + } + +//ZZ case Iop_NotV128: { +//ZZ DECLARE_PATTERN(p_veqz_8x16); +//ZZ DECLARE_PATTERN(p_veqz_16x8); +//ZZ DECLARE_PATTERN(p_veqz_32x4); +//ZZ DECLARE_PATTERN(p_vcge_8sx16); +//ZZ DECLARE_PATTERN(p_vcge_16sx8); +//ZZ DECLARE_PATTERN(p_vcge_32sx4); +//ZZ DECLARE_PATTERN(p_vcge_8ux16); +//ZZ DECLARE_PATTERN(p_vcge_16ux8); +//ZZ DECLARE_PATTERN(p_vcge_32ux4); +//ZZ DEFINE_PATTERN(p_veqz_8x16, +//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0)))); +//ZZ DEFINE_PATTERN(p_veqz_16x8, +//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0)))); +//ZZ DEFINE_PATTERN(p_veqz_32x4, +//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_8sx16, +//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_16sx8, +//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_32sx4, +//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_8ux16, +//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_16ux8, +//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0)))); +//ZZ DEFINE_PATTERN(p_vcge_32ux4, +//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0)))); +//ZZ if (matchIRExpr(&mi, p_veqz_8x16, e)) { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_veqz_16x8, e)) { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_veqz_32x4, e)) { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, +//ZZ res, argL, argR, 0, True)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, +//ZZ res, argL, argR, 1, True)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, +//ZZ res, argL, argR, 2, True)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, +//ZZ res, argL, argR, 0, True)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, +//ZZ res, argL, argR, 1, True)); +//ZZ return res; +//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); +//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, +//ZZ res, argL, argR, 2, True)); +//ZZ return res; +//ZZ } else { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True)); +//ZZ return res; +//ZZ } +//ZZ } +//ZZ case Iop_Dup8x16: +//ZZ case Iop_Dup16x8: +//ZZ case Iop_Dup32x4: { +//ZZ HReg res, arg; +//ZZ UInt size; +//ZZ DECLARE_PATTERN(p_vdup_8x16); +//ZZ DECLARE_PATTERN(p_vdup_16x8); +//ZZ DECLARE_PATTERN(p_vdup_32x4); +//ZZ DEFINE_PATTERN(p_vdup_8x16, +//ZZ unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1)))); +//ZZ DEFINE_PATTERN(p_vdup_16x8, +//ZZ unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1)))); +//ZZ DEFINE_PATTERN(p_vdup_32x4, +//ZZ unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1)))); +//ZZ if (matchIRExpr(&mi, p_vdup_8x16, e)) { +//ZZ UInt index; +//ZZ UInt imm4; +//ZZ if (mi.bindee[1]->tag == Iex_Const && +//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { +//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; +//ZZ imm4 = (index << 1) + 1; +//ZZ if (index < 8) { +//ZZ res = newVRegV(env); +//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnaryS( +//ZZ ARMneon_VDUP, +//ZZ mkARMNRS(ARMNRS_Reg, res, 0), +//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), +//ZZ imm4, True +//ZZ )); +//ZZ return res; +//ZZ } +//ZZ } +//ZZ } else if (matchIRExpr(&mi, p_vdup_16x8, e)) { +//ZZ UInt index; +//ZZ UInt imm4; +//ZZ if (mi.bindee[1]->tag == Iex_Const && +//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { +//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; +//ZZ imm4 = (index << 2) + 2; +//ZZ if (index < 4) { +//ZZ res = newVRegV(env); +//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnaryS( +//ZZ ARMneon_VDUP, +//ZZ mkARMNRS(ARMNRS_Reg, res, 0), +//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), +//ZZ imm4, True +//ZZ )); +//ZZ return res; +//ZZ } +//ZZ } +//ZZ } else if (matchIRExpr(&mi, p_vdup_32x4, e)) { +//ZZ UInt index; +//ZZ UInt imm4; +//ZZ if (mi.bindee[1]->tag == Iex_Const && +//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { +//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; +//ZZ imm4 = (index << 3) + 4; +//ZZ if (index < 2) { +//ZZ res = newVRegV(env); +//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); +//ZZ addInstr(env, ARMInstr_NUnaryS( +//ZZ ARMneon_VDUP, +//ZZ mkARMNRS(ARMNRS_Reg, res, 0), +//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), +//ZZ imm4, True +//ZZ )); +//ZZ return res; +//ZZ } +//ZZ } +//ZZ } +//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg); +//ZZ res = newVRegV(env); +//ZZ switch (e->Iex.Unop.op) { +//ZZ case Iop_Dup8x16: size = 0; break; +//ZZ case Iop_Dup16x8: size = 1; break; +//ZZ case Iop_Dup32x4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Abs8x16: +//ZZ case Iop_Abs16x8: +//ZZ case Iop_Abs32x4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Abs8x16: size = 0; break; +//ZZ case Iop_Abs16x8: size = 1; break; +//ZZ case Iop_Abs32x4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Reverse64_8x16: +//ZZ case Iop_Reverse64_16x8: +//ZZ case Iop_Reverse64_32x4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Reverse64_8x16: size = 0; break; +//ZZ case Iop_Reverse64_16x8: size = 1; break; +//ZZ case Iop_Reverse64_32x4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64, +//ZZ res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Reverse32_8x16: +//ZZ case Iop_Reverse32_16x8: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Reverse32_8x16: size = 0; break; +//ZZ case Iop_Reverse32_16x8: size = 1; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32, +//ZZ res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Reverse16_8x16: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16, +//ZZ res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpNEZ64x2: { +//ZZ HReg x_lsh = newVRegV(env); +//ZZ HReg x_rsh = newVRegV(env); +//ZZ HReg lsh_amt = newVRegV(env); +//ZZ HReg rsh_amt = newVRegV(env); +//ZZ HReg zero = newVRegV(env); +//ZZ HReg tmp = newVRegV(env); +//ZZ HReg tmp2 = newVRegV(env); +//ZZ HReg res = newVRegV(env); +//ZZ HReg x = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True)); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True)); +//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32))); +//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0))); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, +//ZZ rsh_amt, zero, lsh_amt, 2, True)); +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, +//ZZ x_lsh, x, lsh_amt, 3, True)); +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, +//ZZ x_rsh, x, rsh_amt, 3, True)); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, +//ZZ tmp, x_lsh, x_rsh, 0, True)); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, +//ZZ res, tmp, x, 0, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Widen8Uto16x8: +//ZZ case Iop_Widen16Uto32x4: +//ZZ case Iop_Widen32Uto64x2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size; +//ZZ switch (e->Iex.Unop.op) { +//ZZ case Iop_Widen8Uto16x8: size = 0; break; +//ZZ case Iop_Widen16Uto32x4: size = 1; break; +//ZZ case Iop_Widen32Uto64x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU, +//ZZ res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Widen8Sto16x8: +//ZZ case Iop_Widen16Sto32x4: +//ZZ case Iop_Widen32Sto64x2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ UInt size; +//ZZ switch (e->Iex.Unop.op) { +//ZZ case Iop_Widen8Sto16x8: size = 0; break; +//ZZ case Iop_Widen16Sto32x4: size = 1; break; +//ZZ case Iop_Widen32Sto64x2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS, +//ZZ res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwAddL8Sx16: +//ZZ case Iop_PwAddL16Sx8: +//ZZ case Iop_PwAddL32Sx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_PwAddL8Sx16: size = 0; break; +//ZZ case Iop_PwAddL16Sx8: size = 1; break; +//ZZ case Iop_PwAddL32Sx4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS, +//ZZ res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwAddL8Ux16: +//ZZ case Iop_PwAddL16Ux8: +//ZZ case Iop_PwAddL32Ux4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_PwAddL8Ux16: size = 0; break; +//ZZ case Iop_PwAddL16Ux8: size = 1; break; +//ZZ case Iop_PwAddL32Ux4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU, +//ZZ res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Cnt8x16: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Clz8Sx16: +//ZZ case Iop_Clz16Sx8: +//ZZ case Iop_Clz32Sx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Clz8Sx16: size = 0; break; +//ZZ case Iop_Clz16Sx8: size = 1; break; +//ZZ case Iop_Clz32Sx4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Cls8Sx16: +//ZZ case Iop_Cls16Sx8: +//ZZ case Iop_Cls32Sx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Cls8Sx16: size = 0; break; +//ZZ case Iop_Cls16Sx8: size = 1; break; +//ZZ case Iop_Cls32Sx4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_FtoI32Sx4_RZ: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS, +//ZZ res, arg, 2, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_FtoI32Ux4_RZ: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU, +//ZZ res, arg, 2, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_I32StoFx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF, +//ZZ res, arg, 2, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_I32UtoFx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF, +//ZZ res, arg, 2, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_F16toF32x4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32, +//ZZ res, arg, 2, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Recip32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF, +//ZZ res, argL, 0, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Recip32x4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP, +//ZZ res, argL, 0, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Rsqrte32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP, +//ZZ res, argL, 0, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Rsqrte32x4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE, +//ZZ res, argL, 0, True)); +//ZZ return res; +//ZZ } + /* ... */ + default: + break; + } /* switch on the unop */ + } /* if (e->tag == Iex_Unop) */ + + if (e->tag == Iex_Binop) { + switch (e->Iex.Binop.op) { + case Iop_64HLtoV128: { + HReg res = newVRegV(env); + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); + HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); + addInstr(env, ARM64Instr_VQfromXX(res, argL, argR)); + return res; + } +//ZZ case Iop_AndV128: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND, +//ZZ res, argL, argR, 4, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_OrV128: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, +//ZZ res, argL, argR, 4, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_XorV128: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR, +//ZZ res, argL, argR, 4, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Add8x16: +//ZZ case Iop_Add16x8: +//ZZ case Iop_Add32x4: + case Iop_AndV128: + case Iop_OrV128: + case Iop_XorV128: + case Iop_Max32Ux4: + case Iop_Max16Ux8: + case Iop_Max8Ux16: + case Iop_Min32Ux4: + case Iop_Min16Ux8: + case Iop_Min8Ux16: + case Iop_Max32Sx4: + case Iop_Max16Sx8: + case Iop_Max8Sx16: + case Iop_Min32Sx4: + case Iop_Min16Sx8: + case Iop_Min8Sx16: + case Iop_Add64x2: + case Iop_Add32x4: + case Iop_Add16x8: + case Iop_Add8x16: + case Iop_Sub64x2: + case Iop_Sub32x4: + case Iop_Sub16x8: + case Iop_Sub8x16: + case Iop_Mul32x4: + case Iop_Mul16x8: + case Iop_Mul8x16: + case Iop_CmpEQ64x2: + case Iop_CmpEQ32x4: + case Iop_CmpEQ16x8: + case Iop_CmpEQ8x16: + case Iop_CmpGT64Ux2: + case Iop_CmpGT32Ux4: + case Iop_CmpGT16Ux8: + case Iop_CmpGT8Ux16: + case Iop_CmpGT64Sx2: + case Iop_CmpGT32Sx4: + case Iop_CmpGT16Sx8: + case Iop_CmpGT8Sx16: + case Iop_CmpEQ64Fx2: + case Iop_CmpEQ32Fx4: + case Iop_CmpLE64Fx2: + case Iop_CmpLE32Fx4: + case Iop_CmpLT64Fx2: + case Iop_CmpLT32Fx4: + case Iop_Perm8x16: + { + HReg res = newVRegV(env); + HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); + HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); + Bool sw = False; + ARM64VecBinOp op = ARM64vecb_INVALID; + switch (e->Iex.Binop.op) { + case Iop_AndV128: op = ARM64vecb_AND; break; + case Iop_OrV128: op = ARM64vecb_ORR; break; + case Iop_XorV128: op = ARM64vecb_XOR; break; + case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break; + case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break; + case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break; + case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break; + case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break; + case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break; + case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break; + case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break; + case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break; + case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break; + case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break; + case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break; + case Iop_Add64x2: op = ARM64vecb_ADD64x2; break; + case Iop_Add32x4: op = ARM64vecb_ADD32x4; break; + case Iop_Add16x8: op = ARM64vecb_ADD16x8; break; + case Iop_Add8x16: op = ARM64vecb_ADD8x16; break; + case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break; + case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break; + case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break; + case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break; + case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break; + case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break; + case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break; + case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break; + case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break; + case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break; + case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break; + case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break; + case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break; + case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break; + case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break; + case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break; + case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break; + case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break; + case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break; + case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break; + case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break; + case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break; + case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break; + case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break; + case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break; + case Iop_Perm8x16: op = ARM64vecb_TBL1; break; + default: vassert(0); + } + if (sw) { + addInstr(env, ARM64Instr_VBinV(op, res, argR, argL)); + } else { + addInstr(env, ARM64Instr_VBinV(op, res, argL, argR)); + } + return res; + } +//ZZ case Iop_Add32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Recps32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Rsqrts32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ // These 6 verified 18 Apr 2013 +//ZZ case Iop_InterleaveEvenLanes8x16: +//ZZ case Iop_InterleaveOddLanes8x16: +//ZZ case Iop_InterleaveEvenLanes16x8: +//ZZ case Iop_InterleaveOddLanes16x8: +//ZZ case Iop_InterleaveEvenLanes32x4: +//ZZ case Iop_InterleaveOddLanes32x4: { +//ZZ HReg rD = newVRegV(env); +//ZZ HReg rM = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ Bool resRd; // is the result in rD or rM ? +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break; +//ZZ case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break; +//ZZ case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break; +//ZZ case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break; +//ZZ case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break; +//ZZ case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); +//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True)); +//ZZ return resRd ? rD : rM; +//ZZ } +//ZZ +//ZZ // These 6 verified 18 Apr 2013 +//ZZ case Iop_InterleaveHI8x16: +//ZZ case Iop_InterleaveLO8x16: +//ZZ case Iop_InterleaveHI16x8: +//ZZ case Iop_InterleaveLO16x8: +//ZZ case Iop_InterleaveHI32x4: +//ZZ case Iop_InterleaveLO32x4: { +//ZZ HReg rD = newVRegV(env); +//ZZ HReg rM = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ Bool resRd; // is the result in rD or rM ? +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_InterleaveHI8x16: resRd = False; size = 0; break; +//ZZ case Iop_InterleaveLO8x16: resRd = True; size = 0; break; +//ZZ case Iop_InterleaveHI16x8: resRd = False; size = 1; break; +//ZZ case Iop_InterleaveLO16x8: resRd = True; size = 1; break; +//ZZ case Iop_InterleaveHI32x4: resRd = False; size = 2; break; +//ZZ case Iop_InterleaveLO32x4: resRd = True; size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); +//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True)); +//ZZ return resRd ? rD : rM; +//ZZ } +//ZZ +//ZZ // These 6 verified 18 Apr 2013 +//ZZ case Iop_CatOddLanes8x16: +//ZZ case Iop_CatEvenLanes8x16: +//ZZ case Iop_CatOddLanes16x8: +//ZZ case Iop_CatEvenLanes16x8: +//ZZ case Iop_CatOddLanes32x4: +//ZZ case Iop_CatEvenLanes32x4: { +//ZZ HReg rD = newVRegV(env); +//ZZ HReg rM = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ Bool resRd; // is the result in rD or rM ? +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_CatOddLanes8x16: resRd = False; size = 0; break; +//ZZ case Iop_CatEvenLanes8x16: resRd = True; size = 0; break; +//ZZ case Iop_CatOddLanes16x8: resRd = False; size = 1; break; +//ZZ case Iop_CatEvenLanes16x8: resRd = True; size = 1; break; +//ZZ case Iop_CatOddLanes32x4: resRd = False; size = 2; break; +//ZZ case Iop_CatEvenLanes32x4: resRd = True; size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); +//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True)); +//ZZ return resRd ? rD : rM; +//ZZ } +//ZZ +//ZZ case Iop_QAdd8Ux16: +//ZZ case Iop_QAdd16Ux8: +//ZZ case Iop_QAdd32Ux4: +//ZZ case Iop_QAdd64Ux2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QAdd8Ux16: size = 0; break; +//ZZ case Iop_QAdd16Ux8: size = 1; break; +//ZZ case Iop_QAdd32Ux4: size = 2; break; +//ZZ case Iop_QAdd64Ux2: size = 3; break; +//ZZ default: +//ZZ ppIROp(e->Iex.Binop.op); +//ZZ vpanic("Illegal element size in VQADDU"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QAdd8Sx16: +//ZZ case Iop_QAdd16Sx8: +//ZZ case Iop_QAdd32Sx4: +//ZZ case Iop_QAdd64Sx2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QAdd8Sx16: size = 0; break; +//ZZ case Iop_QAdd16Sx8: size = 1; break; +//ZZ case Iop_QAdd32Sx4: size = 2; break; +//ZZ case Iop_QAdd64Sx2: size = 3; break; +//ZZ default: +//ZZ ppIROp(e->Iex.Binop.op); +//ZZ vpanic("Illegal element size in VQADDS"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Sub8x16: +//ZZ case Iop_Sub16x8: +//ZZ case Iop_Sub32x4: +//ZZ case Iop_Sub64x2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Sub8x16: size = 0; break; +//ZZ case Iop_Sub16x8: size = 1; break; +//ZZ case Iop_Sub32x4: size = 2; break; +//ZZ case Iop_Sub64x2: size = 3; break; +//ZZ default: +//ZZ ppIROp(e->Iex.Binop.op); +//ZZ vpanic("Illegal element size in VSUB"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Sub32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QSub8Ux16: +//ZZ case Iop_QSub16Ux8: +//ZZ case Iop_QSub32Ux4: +//ZZ case Iop_QSub64Ux2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QSub8Ux16: size = 0; break; +//ZZ case Iop_QSub16Ux8: size = 1; break; +//ZZ case Iop_QSub32Ux4: size = 2; break; +//ZZ case Iop_QSub64Ux2: size = 3; break; +//ZZ default: +//ZZ ppIROp(e->Iex.Binop.op); +//ZZ vpanic("Illegal element size in VQSUBU"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QSub8Sx16: +//ZZ case Iop_QSub16Sx8: +//ZZ case Iop_QSub32Sx4: +//ZZ case Iop_QSub64Sx2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QSub8Sx16: size = 0; break; +//ZZ case Iop_QSub16Sx8: size = 1; break; +//ZZ case Iop_QSub32Sx4: size = 2; break; +//ZZ case Iop_QSub64Sx2: size = 3; break; +//ZZ default: +//ZZ ppIROp(e->Iex.Binop.op); +//ZZ vpanic("Illegal element size in VQSUBS"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Max8Ux16: +//ZZ case Iop_Max16Ux8: +//ZZ case Iop_Max32Ux4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Max8Ux16: size = 0; break; +//ZZ case Iop_Max16Ux8: size = 1; break; +//ZZ case Iop_Max32Ux4: size = 2; break; +//ZZ default: vpanic("Illegal element size in VMAXU"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Max8Sx16: +//ZZ case Iop_Max16Sx8: +//ZZ case Iop_Max32Sx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Max8Sx16: size = 0; break; +//ZZ case Iop_Max16Sx8: size = 1; break; +//ZZ case Iop_Max32Sx4: size = 2; break; +//ZZ default: vpanic("Illegal element size in VMAXU"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Min8Ux16: +//ZZ case Iop_Min16Ux8: +//ZZ case Iop_Min32Ux4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Min8Ux16: size = 0; break; +//ZZ case Iop_Min16Ux8: size = 1; break; +//ZZ case Iop_Min32Ux4: size = 2; break; +//ZZ default: vpanic("Illegal element size in VMAXU"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Min8Sx16: +//ZZ case Iop_Min16Sx8: +//ZZ case Iop_Min32Sx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Min8Sx16: size = 0; break; +//ZZ case Iop_Min16Sx8: size = 1; break; +//ZZ case Iop_Min32Sx4: size = 2; break; +//ZZ default: vpanic("Illegal element size in VMAXU"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Sar8x16: +//ZZ case Iop_Sar16x8: +//ZZ case Iop_Sar32x4: +//ZZ case Iop_Sar64x2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ HReg argR2 = newVRegV(env); +//ZZ HReg zero = newVRegV(env); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Sar8x16: size = 0; break; +//ZZ case Iop_Sar16x8: size = 1; break; +//ZZ case Iop_Sar32x4: size = 2; break; +//ZZ case Iop_Sar64x2: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, +//ZZ argR2, zero, argR, size, True)); +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, +//ZZ res, argL, argR2, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Sal8x16: +//ZZ case Iop_Sal16x8: +//ZZ case Iop_Sal32x4: +//ZZ case Iop_Sal64x2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Sal8x16: size = 0; break; +//ZZ case Iop_Sal16x8: size = 1; break; +//ZZ case Iop_Sal32x4: size = 2; break; +//ZZ case Iop_Sal64x2: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Shr8x16: +//ZZ case Iop_Shr16x8: +//ZZ case Iop_Shr32x4: +//ZZ case Iop_Shr64x2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ HReg argR2 = newVRegV(env); +//ZZ HReg zero = newVRegV(env); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Shr8x16: size = 0; break; +//ZZ case Iop_Shr16x8: size = 1; break; +//ZZ case Iop_Shr32x4: size = 2; break; +//ZZ case Iop_Shr64x2: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, +//ZZ argR2, zero, argR, size, True)); +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, +//ZZ res, argL, argR2, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Shl8x16: +//ZZ case Iop_Shl16x8: +//ZZ case Iop_Shl32x4: +//ZZ case Iop_Shl64x2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_Shl8x16: size = 0; break; +//ZZ case Iop_Shl16x8: size = 1; break; +//ZZ case Iop_Shl32x4: size = 2; break; +//ZZ case Iop_Shl64x2: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QShl8x16: +//ZZ case Iop_QShl16x8: +//ZZ case Iop_QShl32x4: +//ZZ case Iop_QShl64x2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QShl8x16: size = 0; break; +//ZZ case Iop_QShl16x8: size = 1; break; +//ZZ case Iop_QShl32x4: size = 2; break; +//ZZ case Iop_QShl64x2: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QSal8x16: +//ZZ case Iop_QSal16x8: +//ZZ case Iop_QSal32x4: +//ZZ case Iop_QSal64x2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QSal8x16: size = 0; break; +//ZZ case Iop_QSal16x8: size = 1; break; +//ZZ case Iop_QSal32x4: size = 2; break; +//ZZ case Iop_QSal64x2: size = 3; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QShlN8x16: +//ZZ case Iop_QShlN16x8: +//ZZ case Iop_QShlN32x4: +//ZZ case Iop_QShlN64x2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ UInt size, imm; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " +//ZZ "second argument only\n"); +//ZZ } +//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QShlN8x16: size = 8 | imm; break; +//ZZ case Iop_QShlN16x8: size = 16 | imm; break; +//ZZ case Iop_QShlN32x4: size = 32 | imm; break; +//ZZ case Iop_QShlN64x2: size = 64 | imm; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU, +//ZZ res, argL, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QShlN8Sx16: +//ZZ case Iop_QShlN16Sx8: +//ZZ case Iop_QShlN32Sx4: +//ZZ case Iop_QShlN64Sx2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ UInt size, imm; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM taget supports Iop_QShlNASxB with constant " +//ZZ "second argument only\n"); +//ZZ } +//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QShlN8Sx16: size = 8 | imm; break; +//ZZ case Iop_QShlN16Sx8: size = 16 | imm; break; +//ZZ case Iop_QShlN32Sx4: size = 32 | imm; break; +//ZZ case Iop_QShlN64Sx2: size = 64 | imm; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS, +//ZZ res, argL, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_QSalN8x16: +//ZZ case Iop_QSalN16x8: +//ZZ case Iop_QSalN32x4: +//ZZ case Iop_QSalN64x2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ UInt size, imm; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " +//ZZ "second argument only\n"); +//ZZ } +//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_QSalN8x16: size = 8 | imm; break; +//ZZ case Iop_QSalN16x8: size = 16 | imm; break; +//ZZ case Iop_QSalN32x4: size = 32 | imm; break; +//ZZ case Iop_QSalN64x2: size = 64 | imm; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS, +//ZZ res, argL, size, True)); +//ZZ return res; +//ZZ } + case Iop_ShrN64x2: + case Iop_ShrN32x4: + case Iop_ShrN16x8: + case Iop_ShrN8x16: + case Iop_SarN64x2: + case Iop_SarN32x4: + case Iop_SarN16x8: + case Iop_SarN8x16: + case Iop_ShlN64x2: + case Iop_ShlN32x4: + case Iop_ShlN16x8: + case Iop_ShlN8x16: + { + IRExpr* argL = e->Iex.Binop.arg1; + IRExpr* argR = e->Iex.Binop.arg2; + if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { + UInt amt = argR->Iex.Const.con->Ico.U8; + UInt limit = 0; + ARM64VecShiftOp op = ARM64vecsh_INVALID; + switch (e->Iex.Binop.op) { + case Iop_ShrN64x2: + op = ARM64vecsh_USHR64x2; limit = 63; break; + case Iop_ShrN32x4: + op = ARM64vecsh_USHR32x4; limit = 31; break; + case Iop_ShrN16x8: + op = ARM64vecsh_USHR16x8; limit = 15; break; + case Iop_ShrN8x16: + op = ARM64vecsh_USHR8x16; limit = 7; break; + case Iop_SarN64x2: + op = ARM64vecsh_SSHR64x2; limit = 63; break; + case Iop_SarN32x4: + op = ARM64vecsh_SSHR32x4; limit = 31; break; + case Iop_SarN16x8: + op = ARM64vecsh_SSHR16x8; limit = 15; break; + case Iop_SarN8x16: + op = ARM64vecsh_SSHR8x16; limit = 7; break; + case Iop_ShlN64x2: + op = ARM64vecsh_SHL64x2; limit = 63; break; + case Iop_ShlN32x4: + op = ARM64vecsh_SHL32x4; limit = 31; break; + case Iop_ShlN16x8: + op = ARM64vecsh_SHL16x8; limit = 15; break; + case Iop_ShlN8x16: + op = ARM64vecsh_SHL8x16; limit = 7; break; + default: + vassert(0); + } + if (op != ARM64vecsh_INVALID && amt >= 0 && amt <= limit) { + HReg src = iselV128Expr(env, argL); + HReg dst = newVRegV(env); + if (amt > 0) { + addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt)); + } else { + dst = src; + } + return dst; + } + } + /* else fall out; this is unhandled */ + break; + } +//ZZ case Iop_CmpGT8Ux16: +//ZZ case Iop_CmpGT16Ux8: +//ZZ case Iop_CmpGT32Ux4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_CmpGT8Ux16: size = 0; break; +//ZZ case Iop_CmpGT16Ux8: size = 1; break; +//ZZ case Iop_CmpGT32Ux4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpGT8Sx16: +//ZZ case Iop_CmpGT16Sx8: +//ZZ case Iop_CmpGT32Sx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_CmpGT8Sx16: size = 0; break; +//ZZ case Iop_CmpGT16Sx8: size = 1; break; +//ZZ case Iop_CmpGT32Sx4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpEQ8x16: +//ZZ case Iop_CmpEQ16x8: +//ZZ case Iop_CmpEQ32x4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size; +//ZZ switch (e->Iex.Binop.op) { +//ZZ case Iop_CmpEQ8x16: size = 0; break; +//ZZ case Iop_CmpEQ16x8: size = 1; break; +//ZZ case Iop_CmpEQ32x4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Mul8x16: +//ZZ case Iop_Mul16x8: +//ZZ case Iop_Mul32x4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Mul8x16: size = 0; break; +//ZZ case Iop_Mul16x8: size = 1; break; +//ZZ case Iop_Mul32x4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Mul32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Mull8Ux8: +//ZZ case Iop_Mull16Ux4: +//ZZ case Iop_Mull32Ux2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Mull8Ux8: size = 0; break; +//ZZ case Iop_Mull16Ux4: size = 1; break; +//ZZ case Iop_Mull32Ux2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ case Iop_Mull8Sx8: +//ZZ case Iop_Mull16Sx4: +//ZZ case Iop_Mull32Sx2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_Mull8Sx8: size = 0; break; +//ZZ case Iop_Mull16Sx4: size = 1; break; +//ZZ case Iop_Mull32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ case Iop_QDMulHi16Sx8: +//ZZ case Iop_QDMulHi32Sx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_QDMulHi16Sx8: size = 1; break; +//ZZ case Iop_QDMulHi32Sx4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ case Iop_QRDMulHi16Sx8: +//ZZ case Iop_QRDMulHi32Sx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_QRDMulHi16Sx8: size = 1; break; +//ZZ case Iop_QRDMulHi32Sx4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ case Iop_QDMulLong16Sx4: +//ZZ case Iop_QDMulLong32Sx2: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_QDMulLong16Sx4: size = 1; break; +//ZZ case Iop_QDMulLong32Sx2: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PolynomialMul8x16: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Max32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF, +//ZZ res, argL, argR, 2, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_Min32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF, +//ZZ res, argL, argR, 2, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwMax32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF, +//ZZ res, argL, argR, 2, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_PwMin32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF, +//ZZ res, argL, argR, 2, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpGT32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF, +//ZZ res, argL, argR, 2, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpGE32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF, +//ZZ res, argL, argR, 2, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_CmpEQ32Fx4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF, +//ZZ res, argL, argR, 2, True)); +//ZZ return res; +//ZZ } +//ZZ +//ZZ case Iop_PolynomialMull8x8: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } +//ZZ case Iop_F32ToFixed32Ux4_RZ: +//ZZ case Iop_F32ToFixed32Sx4_RZ: +//ZZ case Iop_Fixed32UToF32x4_RN: +//ZZ case Iop_Fixed32SToF32x4_RN: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ ARMNeonUnOp op; +//ZZ UInt imm6; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant " +//ZZ "second argument less than 33 only\n"); +//ZZ } +//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ vassert(imm6 <= 32 && imm6 > 0); +//ZZ imm6 = 64 - imm6; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break; +//ZZ case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break; +//ZZ case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break; +//ZZ case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True)); +//ZZ return res; +//ZZ } +//ZZ /* +//ZZ FIXME remove if not used +//ZZ case Iop_VDup8x16: +//ZZ case Iop_VDup16x8: +//ZZ case Iop_VDup32x4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); +//ZZ UInt imm4; +//ZZ UInt index; +//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { +//ZZ vpanic("ARM supports Iop_VDup with constant " +//ZZ "second argument less than 16 only\n"); +//ZZ } +//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_VDup8x16: imm4 = (index << 1) + 1; break; +//ZZ case Iop_VDup16x8: imm4 = (index << 2) + 2; break; +//ZZ case Iop_VDup32x4: imm4 = (index << 3) + 4; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ if (imm4 >= 16) { +//ZZ vpanic("ARM supports Iop_VDup with constant " +//ZZ "second argument less than 16 only\n"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP, +//ZZ res, argL, imm4, True)); +//ZZ return res; +//ZZ } +//ZZ */ +//ZZ case Iop_PwAdd8x16: +//ZZ case Iop_PwAdd16x8: +//ZZ case Iop_PwAdd32x4: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); +//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); +//ZZ UInt size = 0; +//ZZ switch(e->Iex.Binop.op) { +//ZZ case Iop_PwAdd8x16: size = 0; break; +//ZZ case Iop_PwAdd16x8: size = 1; break; +//ZZ case Iop_PwAdd32x4: size = 2; break; +//ZZ default: vassert(0); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD, +//ZZ res, argL, argR, size, True)); +//ZZ return res; +//ZZ } + /* ... */ + default: + break; + } /* switch on the binop */ + } /* if (e->tag == Iex_Binop) */ + + if (e->tag == Iex_Triop) { + IRTriop* triop = e->Iex.Triop.details; + ARM64VecBinOp vecbop = ARM64vecb_INVALID; + switch (triop->op) { + case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break; + case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break; + case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break; + case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break; + case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break; + case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break; + case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break; + case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break; + default: break; + } + if (vecbop != ARM64vecb_INVALID) { + HReg argL = iselV128Expr(env, triop->arg2); + HReg argR = iselV128Expr(env, triop->arg3); + HReg dst = newVRegV(env); + set_FPCR_rounding_mode(env, triop->arg1); + addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR)); + return dst; + } + +//ZZ switch (triop->op) { +//ZZ case Iop_ExtractV128: { +//ZZ HReg res = newVRegV(env); +//ZZ HReg argL = iselNeonExpr(env, triop->arg1); +//ZZ HReg argR = iselNeonExpr(env, triop->arg2); +//ZZ UInt imm4; +//ZZ if (triop->arg3->tag != Iex_Const || +//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) { +//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant " +//ZZ "third argument less than 16 only\n"); +//ZZ } +//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8; +//ZZ if (imm4 >= 16) { +//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant " +//ZZ "third argument less than 16 only\n"); +//ZZ } +//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT, +//ZZ res, argL, argR, imm4, True)); +//ZZ return res; +//ZZ } +//ZZ default: +//ZZ break; +//ZZ } + } + +//ZZ if (e->tag == Iex_ITE) { // VFD +//ZZ ARMCondCode cc; +//ZZ HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue); +//ZZ HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse); +//ZZ HReg dst = newVRegV(env); +//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True)); +//ZZ cc = iselCondCode(env, e->Iex.ITE.cond); +//ZZ addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0)); +//ZZ return dst; +//ZZ } + + v128_expr_bad: + ppIRExpr(e); + vpanic("iselV128Expr_wrk"); +} + + +/*---------------------------------------------------------*/ +/*--- ISEL: Floating point expressions (64 bit) ---*/ +/*---------------------------------------------------------*/ + +/* Compute a 64-bit floating point value into a register, the identity + of which is returned. As with iselIntExpr_R, the reg may be either + real or virtual; in any case it must not be changed by subsequent + code emitted by the caller. */ + +static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) +{ + HReg r = iselDblExpr_wrk( env, e ); +# if 0 + vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); +# endif + vassert(hregClass(r) == HRcFlt64); + vassert(hregIsVirtual(r)); + return r; +} + +/* DO NOT CALL THIS DIRECTLY */ +static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) +{ + IRType ty = typeOfIRExpr(env->type_env,e); + vassert(e); + vassert(ty == Ity_F64); + + if (e->tag == Iex_RdTmp) { + return lookupIRTemp(env, e->Iex.RdTmp.tmp); + } + + if (e->tag == Iex_Const) { + IRConst* con = e->Iex.Const.con; + if (con->tag == Ico_F64i) { + HReg src = newVRegI(env); + HReg dst = newVRegD(env); + addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i)); + addInstr(env, ARM64Instr_VDfromX(dst, src)); + return dst; + } + } + + if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { + vassert(e->Iex.Load.ty == Ity_F64); + HReg addr = iselIntExpr_R(env, e->Iex.Load.addr); + HReg res = newVRegD(env); + addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0)); + return res; + } + + if (e->tag == Iex_Get) { + Int offs = e->Iex.Get.offset; + if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) { + HReg rD = newVRegD(env); + HReg rN = get_baseblock_register(); + addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs)); + return rD; + } + } + + if (e->tag == Iex_Unop) { + switch (e->Iex.Unop.op) { +//ZZ case Iop_ReinterpI64asF64: { +//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { +//ZZ return iselNeon64Expr(env, e->Iex.Unop.arg); +//ZZ } else { +//ZZ HReg srcHi, srcLo; +//ZZ HReg dst = newVRegD(env); +//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo)); +//ZZ return dst; +//ZZ } +//ZZ } + case Iop_NegF64: { + HReg src = iselDblExpr(env, e->Iex.Unop.arg); + HReg dst = newVRegD(env); + addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src)); + return dst; + } + case Iop_AbsF64: { + HReg src = iselDblExpr(env, e->Iex.Unop.arg); + HReg dst = newVRegD(env); + addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src)); + return dst; + } + case Iop_F32toF64: { + HReg src = iselFltExpr(env, e->Iex.Unop.arg); + HReg dst = newVRegD(env); + addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src)); + return dst; + } + case Iop_I32UtoF64: + case Iop_I32StoF64: { + /* Rounding mode is not involved here, since the + conversion can always be done without loss of + precision. */ + HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); + HReg dst = newVRegD(env); + Bool syned = e->Iex.Unop.op == Iop_I32StoF64; + ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U; + addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src)); + return dst; + } + default: + break; + } + } + + if (e->tag == Iex_Binop) { + switch (e->Iex.Binop.op) { + case Iop_RoundF64toInt: { + HReg src = iselDblExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegD(env); + set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); + addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src)); + return dst; + } + case Iop_SqrtF64: { + HReg src = iselDblExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegD(env); + set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); + addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src)); + return dst; + } + case Iop_I64StoF64: + case Iop_I64UtoF64: { + ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64 + ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U; + HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); + set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); + HReg dstS = newVRegD(env); + addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI)); + return dstS; + } + default: + break; + } + } + + if (e->tag == Iex_Triop) { + IRTriop* triop = e->Iex.Triop.details; + ARM64FpBinOp dblop = ARM64fpb_INVALID; + switch (triop->op) { + case Iop_DivF64: dblop = ARM64fpb_DIV; break; + case Iop_MulF64: dblop = ARM64fpb_MUL; break; + case Iop_SubF64: dblop = ARM64fpb_SUB; break; + case Iop_AddF64: dblop = ARM64fpb_ADD; break; + default: break; + } + if (dblop != ARM64fpb_INVALID) { + HReg argL = iselDblExpr(env, triop->arg2); + HReg argR = iselDblExpr(env, triop->arg3); + HReg dst = newVRegD(env); + set_FPCR_rounding_mode(env, triop->arg1); + addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR)); + return dst; + } + } + +//ZZ if (e->tag == Iex_ITE) { // VFD +//ZZ if (ty == Ity_F64 +//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { +//ZZ HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue); +//ZZ HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse); +//ZZ HReg dst = newVRegD(env); +//ZZ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1)); +//ZZ ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond); +//ZZ addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0)); +//ZZ return dst; +//ZZ } +//ZZ } + + ppIRExpr(e); + vpanic("iselDblExpr_wrk"); +} + + +/*---------------------------------------------------------*/ +/*--- ISEL: Floating point expressions (32 bit) ---*/ +/*---------------------------------------------------------*/ + +/* Compute a 32-bit floating point value into a register, the identity + of which is returned. As with iselIntExpr_R, the reg may be either + real or virtual; in any case it must not be changed by subsequent + code emitted by the caller. Values are generated into HRcFlt64 + registers despite the values themselves being Ity_F32s. */ + +static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) +{ + HReg r = iselFltExpr_wrk( env, e ); +# if 0 + vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); +# endif + vassert(hregClass(r) == HRcFlt64); + vassert(hregIsVirtual(r)); + return r; +} + +/* DO NOT CALL THIS DIRECTLY */ +static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) +{ + IRType ty = typeOfIRExpr(env->type_env,e); + vassert(e); + vassert(ty == Ity_F32); + + if (e->tag == Iex_RdTmp) { + return lookupIRTemp(env, e->Iex.RdTmp.tmp); + } + + if (e->tag == Iex_Const) { + /* This is something of a kludge. Since a 32 bit floating point + zero is just .. all zeroes, just create a 64 bit zero word + and transfer it. This avoids having to create a SfromW + instruction for this specific case. */ + IRConst* con = e->Iex.Const.con; + if (con->tag == Ico_F32i && con->Ico.F32i == 0) { + HReg src = newVRegI(env); + HReg dst = newVRegD(env); + addInstr(env, ARM64Instr_Imm64(src, 0)); + addInstr(env, ARM64Instr_VDfromX(dst, src)); + return dst; + } + } + +//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { +//ZZ ARMAModeV* am; +//ZZ HReg res = newVRegF(env); +//ZZ vassert(e->Iex.Load.ty == Ity_F32); +//ZZ am = iselIntExpr_AModeV(env, e->Iex.Load.addr); +//ZZ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am)); +//ZZ return res; +//ZZ } + + if (e->tag == Iex_Get) { + Int offs = e->Iex.Get.offset; + if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) { + HReg rD = newVRegD(env); + HReg rN = get_baseblock_register(); + addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs)); + return rD; + } + } + + if (e->tag == Iex_Unop) { + switch (e->Iex.Unop.op) { +//ZZ case Iop_ReinterpI32asF32: { +//ZZ HReg dst = newVRegF(env); +//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); +//ZZ addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src)); +//ZZ return dst; +//ZZ } + case Iop_NegF32: { + HReg src = iselFltExpr(env, e->Iex.Unop.arg); + HReg dst = newVRegD(env); + addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src)); + return dst; + } + case Iop_AbsF32: { + HReg src = iselFltExpr(env, e->Iex.Unop.arg); + HReg dst = newVRegD(env); + addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src)); + return dst; + } + default: + break; + } + } + + if (e->tag == Iex_Binop) { + switch (e->Iex.Binop.op) { + case Iop_RoundF32toInt: { + HReg src = iselFltExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegD(env); + set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); + addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src)); + return dst; + } + case Iop_SqrtF32: { + HReg src = iselFltExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegD(env); + set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); + addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src)); + return dst; + } + case Iop_F64toF32: { + HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2); + set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); + HReg dstS = newVRegD(env); + addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD)); + return dstS; + } + case Iop_I32UtoF32: + case Iop_I32StoF32: + case Iop_I64UtoF32: + case Iop_I64StoF32: { + ARM64CvtOp cvt_op = ARM64cvt_INVALID; + switch (e->Iex.Binop.op) { + case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break; + case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break; + case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break; + case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break; + default: vassert(0); + } + HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); + set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); + HReg dstS = newVRegD(env); + addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI)); + return dstS; + } + default: + break; + } + } + + if (e->tag == Iex_Triop) { + IRTriop* triop = e->Iex.Triop.details; + ARM64FpBinOp sglop = ARM64fpb_INVALID; + switch (triop->op) { + case Iop_DivF32: sglop = ARM64fpb_DIV; break; + case Iop_MulF32: sglop = ARM64fpb_MUL; break; + case Iop_SubF32: sglop = ARM64fpb_SUB; break; + case Iop_AddF32: sglop = ARM64fpb_ADD; break; + default: break; + } + if (sglop != ARM64fpb_INVALID) { + HReg argL = iselFltExpr(env, triop->arg2); + HReg argR = iselFltExpr(env, triop->arg3); + HReg dst = newVRegD(env); + set_FPCR_rounding_mode(env, triop->arg1); + addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR)); + return dst; + } + } + +//ZZ +//ZZ if (e->tag == Iex_ITE) { // VFD +//ZZ if (ty == Ity_F32 +//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { +//ZZ ARMCondCode cc; +//ZZ HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue); +//ZZ HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse); +//ZZ HReg dst = newVRegF(env); +//ZZ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1)); +//ZZ cc = iselCondCode(env, e->Iex.ITE.cond); +//ZZ addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0)); +//ZZ return dst; +//ZZ } +//ZZ } + + ppIRExpr(e); + vpanic("iselFltExpr_wrk"); +} + + +/*---------------------------------------------------------*/ +/*--- ISEL: Statements ---*/ +/*---------------------------------------------------------*/ + +static void iselStmt ( ISelEnv* env, IRStmt* stmt ) +{ + if (vex_traceflags & VEX_TRACE_VCODE) { + vex_printf("\n-- "); + ppIRStmt(stmt); + vex_printf("\n"); + } + switch (stmt->tag) { + + /* --------- STORE --------- */ + /* little-endian write to memory */ + case Ist_Store: { + IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); + IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); + IREndness end = stmt->Ist.Store.end; + + if (tya != Ity_I64 || end != Iend_LE) + goto stmt_fail; + + if (tyd == Ity_I64) { + HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); + ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); + addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); + return; + } + if (tyd == Ity_I32) { + HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); + ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); + addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am)); + return; + } + if (tyd == Ity_I16) { + HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); + ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); + addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am)); + return; + } + if (tyd == Ity_I8) { + HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); + ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); + addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am)); + return; + } + if (tyd == Ity_V128) { + HReg qD = iselV128Expr(env, stmt->Ist.Store.data); + HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); + addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr)); + return; + } + if (tyd == Ity_F64) { + HReg dD = iselDblExpr(env, stmt->Ist.Store.data); + HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); + addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0)); + return; + } + if (tyd == Ity_F32) { + HReg sD = iselFltExpr(env, stmt->Ist.Store.data); + HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); + addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0)); + return; + } + +//ZZ if (tyd == Ity_I16) { +//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); +//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr); +//ZZ addInstr(env, ARMInstr_LdSt16(ARMcc_AL, +//ZZ False/*!isLoad*/, +//ZZ False/*!isSignedLoad*/, rD, am)); +//ZZ return; +//ZZ } +//ZZ if (tyd == Ity_I8) { +//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); +//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr); +//ZZ addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am)); +//ZZ return; +//ZZ } +//ZZ if (tyd == Ity_I64) { +//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { +//ZZ HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data); +//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr); +//ZZ addInstr(env, ARMInstr_NLdStD(False, dD, am)); +//ZZ } else { +//ZZ HReg rDhi, rDlo, rA; +//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data); +//ZZ rA = iselIntExpr_R(env, stmt->Ist.Store.addr); +//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi, +//ZZ ARMAMode1_RI(rA,4))); +//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo, +//ZZ ARMAMode1_RI(rA,0))); +//ZZ } +//ZZ return; +//ZZ } +//ZZ if (tyd == Ity_F64) { +//ZZ HReg dD = iselDblExpr(env, stmt->Ist.Store.data); +//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr); +//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am)); +//ZZ return; +//ZZ } +//ZZ if (tyd == Ity_F32) { +//ZZ HReg fD = iselFltExpr(env, stmt->Ist.Store.data); +//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr); +//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am)); +//ZZ return; +//ZZ } +//ZZ if (tyd == Ity_V128) { +//ZZ HReg qD = iselNeonExpr(env, stmt->Ist.Store.data); +//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr); +//ZZ addInstr(env, ARMInstr_NLdStQ(False, qD, am)); +//ZZ return; +//ZZ } + + break; + } + +//ZZ /* --------- CONDITIONAL STORE --------- */ +//ZZ /* conditional little-endian write to memory */ +//ZZ case Ist_StoreG: { +//ZZ IRStoreG* sg = stmt->Ist.StoreG.details; +//ZZ IRType tya = typeOfIRExpr(env->type_env, sg->addr); +//ZZ IRType tyd = typeOfIRExpr(env->type_env, sg->data); +//ZZ IREndness end = sg->end; +//ZZ +//ZZ if (tya != Ity_I32 || end != Iend_LE) +//ZZ goto stmt_fail; +//ZZ +//ZZ switch (tyd) { +//ZZ case Ity_I8: +//ZZ case Ity_I32: { +//ZZ HReg rD = iselIntExpr_R(env, sg->data); +//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr); +//ZZ ARMCondCode cc = iselCondCode(env, sg->guard); +//ZZ addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U) +//ZZ (cc, False/*!isLoad*/, rD, am)); +//ZZ return; +//ZZ } +//ZZ case Ity_I16: { +//ZZ HReg rD = iselIntExpr_R(env, sg->data); +//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr); +//ZZ ARMCondCode cc = iselCondCode(env, sg->guard); +//ZZ addInstr(env, ARMInstr_LdSt16(cc, +//ZZ False/*!isLoad*/, +//ZZ False/*!isSignedLoad*/, rD, am)); +//ZZ return; +//ZZ } +//ZZ default: +//ZZ break; +//ZZ } +//ZZ break; +//ZZ } +//ZZ +//ZZ /* --------- CONDITIONAL LOAD --------- */ +//ZZ /* conditional little-endian load from memory */ +//ZZ case Ist_LoadG: { +//ZZ IRLoadG* lg = stmt->Ist.LoadG.details; +//ZZ IRType tya = typeOfIRExpr(env->type_env, lg->addr); +//ZZ IREndness end = lg->end; +//ZZ +//ZZ if (tya != Ity_I32 || end != Iend_LE) +//ZZ goto stmt_fail; +//ZZ +//ZZ switch (lg->cvt) { +//ZZ case ILGop_8Uto32: +//ZZ case ILGop_Ident32: { +//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt); +//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr); +//ZZ HReg rD = lookupIRTemp(env, lg->dst); +//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt)); +//ZZ ARMCondCode cc = iselCondCode(env, lg->guard); +//ZZ addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32 +//ZZ : ARMInstr_LdSt8U) +//ZZ (cc, True/*isLoad*/, rD, am)); +//ZZ return; +//ZZ } +//ZZ case ILGop_16Sto32: +//ZZ case ILGop_16Uto32: +//ZZ case ILGop_8Sto32: { +//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt); +//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr); +//ZZ HReg rD = lookupIRTemp(env, lg->dst); +//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt)); +//ZZ ARMCondCode cc = iselCondCode(env, lg->guard); +//ZZ if (lg->cvt == ILGop_8Sto32) { +//ZZ addInstr(env, ARMInstr_Ld8S(cc, rD, am)); +//ZZ } else { +//ZZ vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32); +//ZZ Bool sx = lg->cvt == ILGop_16Sto32; +//ZZ addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am)); +//ZZ } +//ZZ return; +//ZZ } +//ZZ default: +//ZZ break; +//ZZ } +//ZZ break; +//ZZ } + + /* --------- PUT --------- */ + /* write guest state, fixed offset */ + case Ist_Put: { + IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); + UInt offs = (UInt)stmt->Ist.Put.offset; + if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) { + HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); + ARM64AMode* am = mk_baseblock_64bit_access_amode(offs); + addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); + return; + } + if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) { + HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); + ARM64AMode* am = mk_baseblock_32bit_access_amode(offs); + addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am)); + return; + } + if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) { + HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); + ARM64AMode* am = mk_baseblock_16bit_access_amode(offs); + addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am)); + return; + } + if (tyd == Ity_I8 && offs < (1<<12)) { + HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); + ARM64AMode* am = mk_baseblock_8bit_access_amode(offs); + addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am)); + return; + } + if (tyd == Ity_V128 && offs < (1<<12)) { + HReg qD = iselV128Expr(env, stmt->Ist.Put.data); + HReg addr = mk_baseblock_128bit_access_addr(env, offs); + addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr)); + return; + } + if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) { + HReg dD = iselDblExpr(env, stmt->Ist.Put.data); + HReg bbp = get_baseblock_register(); + addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs)); + return; + } + if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) { + HReg dD = iselFltExpr(env, stmt->Ist.Put.data); + HReg bbp = get_baseblock_register(); + addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs)); + return; + } + +//ZZ if (tyd == Ity_I64) { +//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { +//ZZ HReg addr = newVRegI(env); +//ZZ HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data); +//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), +//ZZ stmt->Ist.Put.offset)); +//ZZ addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr))); +//ZZ } else { +//ZZ HReg rDhi, rDlo; +//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), +//ZZ stmt->Ist.Put.offset + 0); +//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), +//ZZ stmt->Ist.Put.offset + 4); +//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data); +//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, +//ZZ rDhi, am4)); +//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, +//ZZ rDlo, am0)); +//ZZ } +//ZZ return; +//ZZ } +//ZZ if (tyd == Ity_F64) { +//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4. +//ZZ // In which case we'll have to generate more longwinded code. +//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset); +//ZZ HReg rD = iselDblExpr(env, stmt->Ist.Put.data); +//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am)); +//ZZ return; +//ZZ } +//ZZ if (tyd == Ity_F32) { +//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4. +//ZZ // In which case we'll have to generate more longwinded code. +//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset); +//ZZ HReg rD = iselFltExpr(env, stmt->Ist.Put.data); +//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am)); +//ZZ return; +//ZZ } + break; + } + + /* --------- TMP --------- */ + /* assign value to temporary */ + case Ist_WrTmp: { + IRTemp tmp = stmt->Ist.WrTmp.tmp; + IRType ty = typeOfIRTemp(env->type_env, tmp); + + if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { + /* We could do a lot better here. But for the time being: */ + HReg dst = lookupIRTemp(env, tmp); + HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data); + addInstr(env, ARM64Instr_MovI(dst, rD)); + return; + } + if (ty == Ity_I1) { + /* Here, we are generating a I1 value into a 64 bit register. + Make sure the value in the register is only zero or one, + but no other. This allows optimisation of the + 1Uto64(tmp:I1) case, by making it simply a copy of the + register holding 'tmp'. The point being that the value in + the register holding 'tmp' can only have been created + here. LATER: that seems dangerous; safer to do 'tmp & 1' + in that case. Also, could do this just with a single CINC + insn. */ + /* CLONE-01 */ + HReg zero = newVRegI(env); + HReg one = newVRegI(env); + HReg dst = lookupIRTemp(env, tmp); + addInstr(env, ARM64Instr_Imm64(zero, 0)); + addInstr(env, ARM64Instr_Imm64(one, 1)); + ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data); + addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); + return; + } + if (ty == Ity_F64) { + HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); + HReg dst = lookupIRTemp(env, tmp); + addInstr(env, ARM64Instr_VMov(8, dst, src)); + return; + } + if (ty == Ity_F32) { + HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); + HReg dst = lookupIRTemp(env, tmp); + addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src)); + return; + } + if (ty == Ity_V128) { + HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data); + HReg dst = lookupIRTemp(env, tmp); + addInstr(env, ARM64Instr_VMov(16, dst, src)); + return; + } + break; + } + + /* --------- Call to DIRTY helper --------- */ + /* call complex ("dirty") helper function */ + case Ist_Dirty: { + IRDirty* d = stmt->Ist.Dirty.details; + + /* Figure out the return type, if any. */ + IRType retty = Ity_INVALID; + if (d->tmp != IRTemp_INVALID) + retty = typeOfIRTemp(env->type_env, d->tmp); + + Bool retty_ok = False; + switch (retty) { + case Ity_INVALID: /* function doesn't return anything */ + case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: + case Ity_V128: + retty_ok = True; break; + default: + break; + } + if (!retty_ok) + break; /* will go to stmt_fail: */ + + /* Marshal args, do the call, and set the return value to 0x555..555 + if this is a conditional call that returns a value and the + call is skipped. */ + UInt addToSp = 0; + RetLoc rloc = mk_RetLoc_INVALID(); + doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args ); + vassert(is_sane_RetLoc(rloc)); + + /* Now figure out what to do with the returned value, if any. */ + switch (retty) { + case Ity_INVALID: { + /* No return value. Nothing to do. */ + vassert(d->tmp == IRTemp_INVALID); + vassert(rloc.pri == RLPri_None); + vassert(addToSp == 0); + return; + } + case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: { + vassert(rloc.pri == RLPri_Int); + vassert(addToSp == 0); + /* The returned value is in x0. Park it in the register + associated with tmp. */ + HReg dst = lookupIRTemp(env, d->tmp); + addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) ); + return; + } + case Ity_V128: { + /* The returned value is on the stack, and *retloc tells + us where. Fish it off the stack and then move the + stack pointer upwards to clear it, as directed by + doHelperCall. */ + vassert(rloc.pri == RLPri_V128SpRel); + vassert(rloc.spOff < 256); // stay sane + vassert(addToSp >= 16); // ditto + vassert(addToSp < 256); // ditto + HReg dst = lookupIRTemp(env, d->tmp); + HReg tmp = newVRegI(env); // the address of the returned value + addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP + addInstr(env, ARM64Instr_Arith(tmp, tmp, + ARM64RIA_I12((UShort)rloc.spOff, 0), + True/*isAdd*/ )); + addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp)); + addInstr(env, ARM64Instr_AddToSP(addToSp)); + return; + } + default: + /*NOTREACHED*/ + vassert(0); + } + break; + } + + /* --------- Load Linked and Store Conditional --------- */ + case Ist_LLSC: { + if (stmt->Ist.LLSC.storedata == NULL) { + /* LL */ + IRTemp res = stmt->Ist.LLSC.result; + IRType ty = typeOfIRTemp(env->type_env, res); + if (ty == Ity_I64 || ty == Ity_I32 + || ty == Ity_I16 || ty == Ity_I8) { + Int szB = 0; + HReg r_dst = lookupIRTemp(env, res); + HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); + switch (ty) { + case Ity_I8: szB = 1; break; + case Ity_I16: szB = 2; break; + case Ity_I32: szB = 4; break; + case Ity_I64: szB = 8; break; + default: vassert(0); + } + addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); + addInstr(env, ARM64Instr_LdrEX(szB)); + addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2())); + return; + } + goto stmt_fail; + } else { + /* SC */ + IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); + if (tyd == Ity_I64 || tyd == Ity_I32 + || tyd == Ity_I16 || tyd == Ity_I8) { + Int szB = 0; + HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); + HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); + switch (tyd) { + case Ity_I8: szB = 1; break; + case Ity_I16: szB = 2; break; + case Ity_I32: szB = 4; break; + case Ity_I64: szB = 8; break; + default: vassert(0); + } + addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD)); + addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); + addInstr(env, ARM64Instr_StrEX(szB)); + } else { + goto stmt_fail; + } + /* now r0 is 1 if failed, 0 if success. Change to IR + conventions (0 is fail, 1 is success). Also transfer + result to r_res. */ + IRTemp res = stmt->Ist.LLSC.result; + IRType ty = typeOfIRTemp(env->type_env, res); + HReg r_res = lookupIRTemp(env, res); + ARM64RIL* one = mb_mkARM64RIL_I(1); + vassert(ty == Ity_I1); + vassert(one); + addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one, + ARM64lo_XOR)); + /* And be conservative -- mask off all but the lowest bit. */ + addInstr(env, ARM64Instr_Logic(r_res, r_res, one, + ARM64lo_AND)); + return; + } + break; + } + + /* --------- MEM FENCE --------- */ + case Ist_MBE: + switch (stmt->Ist.MBE.event) { + case Imbe_Fence: + addInstr(env, ARM64Instr_MFence()); + return; +//ZZ case Imbe_CancelReservation: +//ZZ addInstr(env, ARMInstr_CLREX()); +//ZZ return; + default: + break; + } + break; + + /* --------- INSTR MARK --------- */ + /* Doesn't generate any executable code ... */ + case Ist_IMark: + return; + + /* --------- NO-OP --------- */ + case Ist_NoOp: + return; + + /* --------- EXIT --------- */ + case Ist_Exit: { + if (stmt->Ist.Exit.dst->tag != Ico_U64) + vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value"); + + ARM64CondCode cc + = iselCondCode(env, stmt->Ist.Exit.guard); + ARM64AMode* amPC + = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP); + + /* Case: boring transfer to known address */ + if (stmt->Ist.Exit.jk == Ijk_Boring + /*ATC || stmt->Ist.Exit.jk == Ijk_Call */ + /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) { + if (env->chainingAllowed) { + /* .. almost always true .. */ + /* Skip the event check at the dst if this is a forwards + edge. */ + Bool toFastEP + = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga; + if (0) vex_printf("%s", toFastEP ? "Y" : ","); + addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64, + amPC, cc, toFastEP)); + } else { + /* .. very occasionally .. */ + /* We can't use chaining, so ask for an assisted transfer, + as that's the only alternative that is allowable. */ + HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); + addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring)); + } + return; + } + +//ZZ /* Case: assisted transfer to arbitrary address */ +//ZZ switch (stmt->Ist.Exit.jk) { +//ZZ /* Keep this list in sync with that in iselNext below */ +//ZZ case Ijk_ClientReq: +//ZZ case Ijk_NoDecode: +//ZZ case Ijk_NoRedir: +//ZZ case Ijk_Sys_syscall: +//ZZ case Ijk_InvalICache: +//ZZ case Ijk_Yield: +//ZZ { +//ZZ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); +//ZZ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, +//ZZ stmt->Ist.Exit.jk)); +//ZZ return; +//ZZ } +//ZZ default: +//ZZ break; +//ZZ } + + /* Do we ever expect to see any other kind? */ + goto stmt_fail; + } + + default: break; + } + stmt_fail: + ppIRStmt(stmt); + vpanic("iselStmt"); +} + + +/*---------------------------------------------------------*/ +/*--- ISEL: Basic block terminators (Nexts) ---*/ +/*---------------------------------------------------------*/ + +static void iselNext ( ISelEnv* env, + IRExpr* next, IRJumpKind jk, Int offsIP ) +{ + if (vex_traceflags & VEX_TRACE_VCODE) { + vex_printf( "\n-- PUT(%d) = ", offsIP); + ppIRExpr( next ); + vex_printf( "; exit-"); + ppIRJumpKind(jk); + vex_printf( "\n"); + } + + /* Case: boring transfer to known address */ + if (next->tag == Iex_Const) { + IRConst* cdst = next->Iex.Const.con; + vassert(cdst->tag == Ico_U64); + if (jk == Ijk_Boring || jk == Ijk_Call) { + /* Boring transfer to known address */ + ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); + if (env->chainingAllowed) { + /* .. almost always true .. */ + /* Skip the event check at the dst if this is a forwards + edge. */ + Bool toFastEP + = ((Addr64)cdst->Ico.U64) > env->max_ga; + if (0) vex_printf("%s", toFastEP ? "X" : "."); + addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64, + amPC, ARM64cc_AL, + toFastEP)); + } else { + /* .. very occasionally .. */ + /* We can't use chaining, so ask for an assisted transfer, + as that's the only alternative that is allowable. */ + HReg r = iselIntExpr_R(env, next); + addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, + Ijk_Boring)); + } + return; + } + } + + /* Case: call/return (==boring) transfer to any address */ + switch (jk) { + case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { + HReg r = iselIntExpr_R(env, next); + ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); + if (env->chainingAllowed) { + addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL)); + } else { + addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, + Ijk_Boring)); + } + return; + } + default: + break; + } + + /* Case: assisted transfer to arbitrary address */ + switch (jk) { + /* Keep this list in sync with that for Ist_Exit above */ + case Ijk_ClientReq: + case Ijk_NoDecode: + case Ijk_NoRedir: + case Ijk_Sys_syscall: + case Ijk_InvalICache: + case Ijk_FlushDCache: +//ZZ case Ijk_Yield: + { + HReg r = iselIntExpr_R(env, next); + ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); + addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk)); + return; + } + default: + break; + } + + vex_printf( "\n-- PUT(%d) = ", offsIP); + ppIRExpr( next ); + vex_printf( "; exit-"); + ppIRJumpKind(jk); + vex_printf( "\n"); + vassert(0); // are we expecting any other kind? +} + + +/*---------------------------------------------------------*/ +/*--- Insn selector top-level ---*/ +/*---------------------------------------------------------*/ + +/* Translate an entire SB to arm64 code. */ + +HInstrArray* iselSB_ARM64 ( IRSB* bb, + VexArch arch_host, + VexArchInfo* archinfo_host, + VexAbiInfo* vbi/*UNUSED*/, + Int offs_Host_EvC_Counter, + Int offs_Host_EvC_FailAddr, + Bool chainingAllowed, + Bool addProfInc, + Addr64 max_ga ) +{ + Int i, j; + HReg hreg, hregHI; + ISelEnv* env; + UInt hwcaps_host = archinfo_host->hwcaps; + ARM64AMode *amCounter, *amFailAddr; + + /* sanity ... */ + vassert(arch_host == VexArchARM64); + + /* guard against unexpected space regressions */ + vassert(sizeof(ARM64Instr) <= 32); + + /* Make up an initial environment to use. */ + env = LibVEX_Alloc(sizeof(ISelEnv)); + env->vreg_ctr = 0; + + /* Set up output code array. */ + env->code = newHInstrArray(); + + /* Copy BB's type env. */ + env->type_env = bb->tyenv; + + /* Make up an IRTemp -> virtual HReg mapping. This doesn't + change as we go along. */ + env->n_vregmap = bb->tyenv->types_used; + env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); + env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); + + /* and finally ... */ + env->chainingAllowed = chainingAllowed; + env->hwcaps = hwcaps_host; + env->previous_rm = NULL; + env->max_ga = max_ga; + + /* For each IR temporary, allocate a suitably-kinded virtual + register. */ + j = 0; + for (i = 0; i < env->n_vregmap; i++) { + hregHI = hreg = INVALID_HREG; + switch (bb->tyenv->types[i]) { + case Ity_I1: + case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: + hreg = mkHReg(j++, HRcInt64, True); + break; + case Ity_I128: + hreg = mkHReg(j++, HRcInt64, True); + hregHI = mkHReg(j++, HRcInt64, True); + break; + case Ity_F32: // we'll use HRcFlt64 regs for F32 too + case Ity_F64: + hreg = mkHReg(j++, HRcFlt64, True); + break; + case Ity_V128: + hreg = mkHReg(j++, HRcVec128, True); + break; + default: + ppIRType(bb->tyenv->types[i]); + vpanic("iselBB(arm64): IRTemp type"); + } + env->vregmap[i] = hreg; + env->vregmapHI[i] = hregHI; + } + env->vreg_ctr = j; + + /* The very first instruction must be an event check. */ + amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter); + amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr); + addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr)); + + /* Possibly a block counter increment (for profiling). At this + point we don't know the address of the counter, so just pretend + it is zero. It will have to be patched later, but before this + translation is used, by a call to LibVEX_patchProfCtr. */ + if (addProfInc) { + vassert(0); + //addInstr(env, ARM64Instr_ProfInc()); + } + + /* Ok, finally we can iterate over the statements. */ + for (i = 0; i < bb->stmts_used; i++) + iselStmt(env, bb->stmts[i]); + + iselNext(env, bb->next, bb->jumpkind, bb->offsIP); + + /* record the number of vregs we used. */ + env->code->n_vregs = env->vreg_ctr; + return env->code; +} + + +/*---------------------------------------------------------------*/ +/*--- end host_arm64_isel.c ---*/ +/*---------------------------------------------------------------*/ Index: priv/host_arm_defs.c =================================================================== --- priv/host_arm_defs.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_arm_defs.c (.../trunk) (revision 2863) @@ -790,6 +790,7 @@ case ARMneon_VTBL: return "vtbl"; case ARMneon_VRECPS: return "vrecps"; case ARMneon_VRSQRTS: return "vrecps"; + case ARMneon_INVALID: return "??invalid??"; /* ... */ default: vpanic("showARMNeonBinOp"); } @@ -3334,7 +3335,7 @@ //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; - case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; + case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break; case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; Index: priv/host_arm_defs.h =================================================================== --- priv/host_arm_defs.h (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_arm_defs.h (.../trunk) (revision 2863) @@ -468,6 +468,7 @@ ARMneon_VQDMULL, ARMneon_VRECPS, ARMneon_VRSQRTS, + ARMneon_INVALID /* ... */ } ARMNeonBinOp; Index: priv/host_arm_isel.c =================================================================== --- priv/host_arm_isel.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_arm_isel.c (.../trunk) (revision 2863) @@ -4254,26 +4254,11 @@ return res; } case Iop_Abs32Fx4: { - DECLARE_PATTERN(p_vabd_32fx4); - DEFINE_PATTERN(p_vabd_32fx4, - unop(Iop_Abs32Fx4, - binop(Iop_Sub32Fx4, - bind(0), - bind(1)))); - if (matchIRExpr(&mi, p_vabd_32fx4, e)) { - HReg res = newVRegV(env); - HReg argL = iselNeonExpr(env, mi.bindee[0]); - HReg argR = iselNeonExpr(env, mi.bindee[1]); - addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP, - res, argL, argR, 0, True)); - return res; - } else { - HReg res = newVRegV(env); - HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); - addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP, - res, argL, 0, True)); - return res; - } + HReg res = newVRegV(env); + HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); + addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP, + res, argL, 0, True)); + return res; } case Iop_Rsqrte32Fx4: { HReg res = newVRegV(env); @@ -4457,15 +4442,6 @@ res, argL, argR, size, True)); return res; } - case Iop_Add32Fx4: { - HReg res = newVRegV(env); - HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); - HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); - UInt size = 0; - addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP, - res, argL, argR, size, True)); - return res; - } case Iop_Recps32Fx4: { HReg res = newVRegV(env); HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); @@ -4632,15 +4608,6 @@ res, argL, argR, size, True)); return res; } - case Iop_Sub32Fx4: { - HReg res = newVRegV(env); - HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); - HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); - UInt size = 0; - addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP, - res, argL, argR, size, True)); - return res; - } case Iop_QSub8Ux16: case Iop_QSub16Ux8: case Iop_QSub32Ux4: @@ -5083,15 +5050,6 @@ res, argL, argR, size, True)); return res; } - case Iop_Mul32Fx4: { - HReg res = newVRegV(env); - HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); - HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); - UInt size = 0; - addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP, - res, argL, argR, size, True)); - return res; - } case Iop_Mull8Ux8: case Iop_Mull16Ux4: case Iop_Mull32Ux2: { @@ -5352,6 +5310,23 @@ res, argL, argR, imm4, True)); return res; } + case Iop_Mul32Fx4: + case Iop_Sub32Fx4: + case Iop_Add32Fx4: { + HReg res = newVRegV(env); + HReg argL = iselNeonExpr(env, triop->arg2); + HReg argR = iselNeonExpr(env, triop->arg3); + UInt size = 0; + ARMNeonBinOp op = ARMneon_INVALID; + switch (triop->op) { + case Iop_Mul32Fx4: op = ARMneon_VMULFP; break; + case Iop_Sub32Fx4: op = ARMneon_VSUBFP; break; + case Iop_Add32Fx4: op = ARMneon_VADDFP; break; + default: vassert(0); + } + addInstr(env, ARMInstr_NBinary(op, res, argL, argR, size, True)); + return res; + } default: break; } @@ -6218,7 +6193,7 @@ case Ijk_NoDecode: case Ijk_NoRedir: case Ijk_Sys_syscall: - case Ijk_TInval: + case Ijk_InvalICache: case Ijk_Yield: { HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); @@ -6310,7 +6285,7 @@ case Ijk_NoDecode: case Ijk_NoRedir: case Ijk_Sys_syscall: - case Ijk_TInval: + case Ijk_InvalICache: case Ijk_Yield: { HReg r = iselIntExpr_R(env, next); Index: priv/host_generic_reg_alloc2.c =================================================================== --- priv/host_generic_reg_alloc2.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_generic_reg_alloc2.c (.../trunk) (revision 2863) @@ -399,9 +399,9 @@ not at each insn processed. */ Bool do_sanity_check; - vassert(0 == (guest_sizeB % 32)); - vassert(0 == (LibVEX_N_SPILL_BYTES % 32)); - vassert(0 == (N_SPILL64S % 4)); + vassert(0 == (guest_sizeB % 16)); + vassert(0 == (LibVEX_N_SPILL_BYTES % 16)); + vassert(0 == (N_SPILL64S % 2)); /* The live range numbers are signed shorts, and so limiting the number of insns to 15000 comfortably guards against them Index: priv/host_generic_simd64.c =================================================================== --- priv/host_generic_simd64.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_generic_simd64.c (.../trunk) (revision 2863) @@ -1553,7 +1553,11 @@ /* ----------------------------------------------------- */ /* Signed and unsigned integer division, that behave like - the ARMv7 UDIV ansd SDIV instructions. */ + the ARMv7 UDIV ansd SDIV instructions. + + sdiv32 also behaves like 64-bit v8 SDIV on w-regs. + udiv32 also behaves like 64-bit v8 UDIV on w-regs. +*/ /* ----------------------------------------------------- */ UInt h_calc_udiv32_w_arm_semantics ( UInt x, UInt y ) @@ -1564,11 +1568,19 @@ return x / y; } +ULong h_calc_udiv64_w_arm_semantics ( ULong x, ULong y ) +{ + // Division by zero --> zero + if (UNLIKELY(y == 0)) return 0; + // C requires rounding towards zero, which is also what we need. + return x / y; +} + Int h_calc_sdiv32_w_arm_semantics ( Int x, Int y ) { // Division by zero --> zero if (UNLIKELY(y == 0)) return 0; - // The single case that produces an unpresentable result + // The single case that produces an unrepresentable result if (UNLIKELY( ((UInt)x) == ((UInt)0x80000000) && ((UInt)y) == ((UInt)0xFFFFFFFF) )) return (Int)(UInt)0x80000000; @@ -1579,7 +1591,22 @@ return x / y; } +Long h_calc_sdiv64_w_arm_semantics ( Long x, Long y ) +{ + // Division by zero --> zero + if (UNLIKELY(y == 0)) return 0; + // The single case that produces an unrepresentable result + if (UNLIKELY( ((ULong)x) == ((ULong)0x8000000000000000ULL ) + && ((ULong)y) == ((ULong)0xFFFFFFFFFFFFFFFFULL ) )) + return (Long)(ULong)0x8000000000000000ULL; + // Else return the result rounded towards zero. C89 says + // this is implementation defined (in the signed case), but gcc + // promises to round towards zero. Nevertheless, at startup, + // in main_main.c, do a check for that. + return x / y; +} + /*---------------------------------------------------------------*/ /*--- end host_generic_simd64.c ---*/ /*---------------------------------------------------------------*/ Index: priv/host_generic_simd64.h =================================================================== --- priv/host_generic_simd64.h (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_generic_simd64.h (.../trunk) (revision 2863) @@ -166,9 +166,12 @@ // Signed and unsigned integer division, that behave like // the ARMv7 UDIV and SDIV instructions. -extern UInt h_calc_udiv32_w_arm_semantics ( UInt, UInt ); -extern Int h_calc_sdiv32_w_arm_semantics ( Int, Int ); +extern UInt h_calc_udiv32_w_arm_semantics ( UInt, UInt ); +extern ULong h_calc_udiv64_w_arm_semantics ( ULong, ULong ); +extern Int h_calc_sdiv32_w_arm_semantics ( Int, Int ); +extern Long h_calc_sdiv64_w_arm_semantics ( Long, Long ); + #endif /* ndef __VEX_HOST_GENERIC_SIMD64_H */ /*---------------------------------------------------------------*/ Index: priv/host_mips_defs.c =================================================================== --- priv/host_mips_defs.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_mips_defs.c (.../trunk) (revision 2863) @@ -37,7 +37,7 @@ #include "host_mips_defs.h" /* guest_COND offset. */ -#define COND_OFFSET(__mode64) (__mode64 ? 612 : 316) +#define COND_OFFSET(__mode64) (__mode64 ? 612 : 448) /* Register number for guest state pointer in host code. */ #define GuestSP 23 @@ -81,7 +81,7 @@ /* But specific for real regs. */ vassert(hregClass(reg) == HRcInt32 || hregClass(reg) == HRcInt64 || - hregClass(reg) == HRcFlt32 || hregClass(reg) == HRcFlt64); + hregClass(reg) == HRcFlt32 || hregClass(reg) == HRcFlt64); /* But specific for real regs. */ switch (hregClass(reg)) { @@ -91,7 +91,6 @@ vex_printf("%s", ireg32_names[r]); return; case HRcInt64: - vassert(mode64); r = hregNumber (reg); vassert (r >= 0 && r < 32); vex_printf ("%s", ireg32_names[r]); @@ -773,6 +772,12 @@ case Mfp_CVTWD: ret = "cvt.w.d"; break; + case Mfp_CVTLD: + ret = "cvt.l.d"; + break; + case Mfp_CVTLS: + ret = "cvt.l.s"; + break; case Mfp_TRUWD: ret = "trunc.w.d"; break; @@ -797,10 +802,20 @@ case Mfp_CEILLD: ret = "ceil.l.d"; break; - case Mfp_CMP: - ret = "C.cond.d"; + case Mfp_CMP_UN: + ret = "c.un.d"; break; + case Mfp_CMP_EQ: + ret = "c.eq.d"; + break; + case Mfp_CMP_LT: + ret = "c.lt.d"; + break; + case Mfp_CMP_NGT: + ret = "c.ngt.d"; + break; default: + vex_printf("Unknown op: %d", op); vpanic("showMIPSFpOp"); break; } @@ -1497,8 +1512,7 @@ } -MIPSInstr *MIPSInstr_FpCompare(MIPSFpOp op, HReg dst, HReg srcL, HReg srcR, - UChar cond1) +MIPSInstr *MIPSInstr_FpCompare(MIPSFpOp op, HReg dst, HReg srcL, HReg srcR) { MIPSInstr *i = LibVEX_Alloc(sizeof(MIPSInstr)); i->tag = Min_FpCompare; @@ -1506,7 +1520,6 @@ i->Min.FpCompare.dst = dst; i->Min.FpCompare.srcL = srcL; i->Min.FpCompare.srcR = srcR; - i->Min.FpCompare.cond1 = cond1; return i; } @@ -1811,7 +1824,6 @@ ppHRegMIPS(i->Min.FpCompare.srcL, mode64); vex_printf(","); ppHRegMIPS(i->Min.FpCompare.srcR, mode64); - vex_printf(" cond: %c", i->Min.FpCompare.cond1); return; case Min_FpMulAcc: vex_printf("%s ", showMIPSFpOp(i->Min.FpMulAcc.op)); @@ -1864,7 +1876,7 @@ return; } case Min_FpGpMove: { - vex_printf("%s", showMIPSFpGpMoveOp(i->Min.FpGpMove.op)); + vex_printf("%s ", showMIPSFpGpMoveOp(i->Min.FpGpMove.op)); ppHRegMIPS(i->Min.FpGpMove.dst, mode64); vex_printf(", "); ppHRegMIPS(i->Min.FpGpMove.src, mode64); @@ -2101,7 +2113,7 @@ addHRegUse(u, HRmRead, i->Min.FpGpMove.src); return; case Min_MoveCond: - addHRegUse(u, HRmWrite, i->Min.MoveCond.dst); + addHRegUse(u, HRmModify, i->Min.MoveCond.dst); addHRegUse(u, HRmRead, i->Min.MoveCond.src); addHRegUse(u, HRmRead, i->Min.MoveCond.cond); return; @@ -2380,7 +2392,6 @@ static UChar fregNo(HReg r, Bool mode64) { UInt n; - vassert(hregClass(r) == (mode64 ? HRcFlt64 : HRcFlt32)); vassert(!hregIsVirtual(r)); n = hregNumber(r); vassert(n <= 31); @@ -2390,7 +2401,6 @@ static UChar dregNo(HReg r) { UInt n; - vassert(hregClass(r) == HRcFlt64); vassert(!hregIsVirtual(r)); n = hregNumber(r); vassert(n <= 31); @@ -3455,8 +3465,9 @@ case Ijk_EmFail: trcval = VEX_TRC_JMP_EMFAIL; break; /* case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; */ case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; - case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; + case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break; case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; + case Ijk_SigILL: trcval = VEX_TRC_JMP_SIGILL; break; case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; /* case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; */ case Ijk_SigBUS: trcval = VEX_TRC_JMP_SIGBUS; break; @@ -3886,8 +3897,13 @@ p = mkFormR(p, 0x11, 0x15, 0, fr_src, fr_dst, 0x20); break; case Mfp_CVTLS: - fr_dst = fregNo(i->Min.FpConvert.dst, mode64); - fr_src = dregNo(i->Min.FpConvert.src); + if (mode64) { + fr_dst = fregNo(i->Min.FpConvert.dst, mode64); + fr_src = dregNo(i->Min.FpConvert.src); + } else { + fr_dst = dregNo(i->Min.FpConvert.dst); + fr_src = fregNo(i->Min.FpConvert.src, mode64); + } p = mkFormR(p, 0x11, 0x10, 0, fr_src, fr_dst, 0x25); break; case Mfp_CVTLD: @@ -3973,19 +3989,35 @@ } case Min_FpCompare: { - UInt r_dst = iregNo(i->Min.FpCompare.dst, mode64); + UInt r_dst = iregNo(i->Min.FpCompare.dst, mode64); UInt fr_srcL = dregNo(i->Min.FpCompare.srcL); UInt fr_srcR = dregNo(i->Min.FpCompare.srcR); + UInt op; switch (i->Min.FpConvert.op) { - case Mfp_CMP: - p = mkFormR(p, 0x11, 0x11, fr_srcL, fr_srcR, 0, - (i->Min.FpCompare.cond1 + 48)); - p = mkFormR(p, 0x11, 0x2, r_dst, 31, 0, 0); + case Mfp_CMP_UN: + op = 1; break; + case Mfp_CMP_EQ: + op = 2; + break; + case Mfp_CMP_LT: + op = 12; + break; + case Mfp_CMP_NGT: + op = 15; + break; default: goto bad; } + /* c.cond.d fr_srcL, fr_srcR + cfc1 r_dst, $31 + srl r_dst, r_dst, 23 + andi r_dst, r_dst, 1 */ + p = mkFormR(p, 0x11, 0x11, fr_srcL, fr_srcR, 0, op + 48); + p = mkFormR(p, 0x11, 0x2, r_dst, 31, 0, 0); + p = mkFormS(p, 0, r_dst, 0, r_dst, 23, 2); + p = mkFormI(p, 12, r_dst, r_dst, 1); goto done; } Index: priv/host_mips_defs.h =================================================================== --- priv/host_mips_defs.h (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_mips_defs.h (.../trunk) (revision 2863) @@ -366,9 +366,12 @@ Mfp_CVTSD, Mfp_CVTSW, Mfp_CVTWD, Mfp_CVTWS, Mfp_CVTDL, Mfp_CVTSL, Mfp_CVTLS, Mfp_CVTLD, Mfp_TRULS, Mfp_TRULD, Mfp_TRUWS, Mfp_TRUWD, Mfp_FLOORWS, Mfp_FLOORWD, Mfp_ROUNDWS, Mfp_ROUNDWD, - Mfp_CVTDW, Mfp_CMP, Mfp_CEILWS, Mfp_CEILWD, Mfp_CEILLS, Mfp_CEILLD, - Mfp_CVTDS, Mfp_ROUNDLD, Mfp_FLOORLD + Mfp_CVTDW, Mfp_CEILWS, Mfp_CEILWD, Mfp_CEILLS, Mfp_CEILLD, Mfp_CVTDS, + Mfp_ROUNDLD, Mfp_FLOORLD, + /* FP compare */ + Mfp_CMP_UN, Mfp_CMP_EQ, Mfp_CMP_LT, Mfp_CMP_NGT + } MIPSFpOp; extern const HChar *showMIPSFpOp(MIPSFpOp); @@ -664,7 +667,7 @@ HReg src2, HReg src3 ); extern MIPSInstr *MIPSInstr_FpConvert(MIPSFpOp op, HReg dst, HReg src); extern MIPSInstr *MIPSInstr_FpCompare(MIPSFpOp op, HReg dst, HReg srcL, - HReg srcR, UChar cond1); + HReg srcR); extern MIPSInstr *MIPSInstr_FpMulAcc(MIPSFpOp op, HReg dst, HReg srcML, HReg srcMR, HReg srcAcc); extern MIPSInstr *MIPSInstr_FpLdSt(Bool isLoad, UChar sz, HReg, MIPSAMode *); Index: priv/host_mips_isel.c =================================================================== --- priv/host_mips_isel.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_mips_isel.c (.../trunk) (revision 2863) @@ -47,12 +47,14 @@ ZERO0 Reserved GPR12:22 Allocateable 23 GuestStatePointer - 23 Allocateable SP StackFramePointer RA LinkRegister */ static Bool mode64 = False; +/* Host CPU has FPU and 32 dbl. prec. FP registers. */ +static Bool fp_mode64 = False; + /* GPR register class for mips32/64 */ #define HRcGPR(__mode64) (__mode64 ? HRcInt64 : HRcInt32) @@ -60,7 +62,7 @@ #define HRcFPR(__mode64) (__mode64 ? HRcFlt64 : HRcFlt32) /* guest_COND offset */ -#define COND_OFFSET(__mode64) (__mode64 ? 612 : 316) +#define COND_OFFSET(__mode64) (__mode64 ? 612 : 448) /*---------------------------------------------------------*/ /*--- ISelEnv ---*/ @@ -117,6 +119,7 @@ UInt hwcaps; Bool mode64; + Bool fp_mode64; Bool chainingAllowed; Addr64 max_ga; @@ -180,7 +183,7 @@ static HReg newVRegF(ISelEnv * env) { - HReg reg = mkHReg(env->vreg_ctr, HRcFPR(env->mode64), + HReg reg = mkHReg(env->vreg_ctr, HRcFPR(env->fp_mode64), True /*virtual reg */ ); env->vreg_ctr++; return reg; @@ -230,12 +233,13 @@ static MIPSRH *iselWordExpr_RH_wrk(ISelEnv * env, Bool syned, IRExpr * e); static MIPSRH *iselWordExpr_RH(ISelEnv * env, Bool syned, IRExpr * e); -/* Compute an I8 into a reg-or-5-bit-unsigned-immediate, the latter being an immediate in - the range 1 .. 31 inclusive. Used for doing shift amounts. */ +/* Compute an I8 into a reg-or-5-bit-unsigned-immediate, the latter being an + immediate in the range 1 .. 31 inclusive. Used for doing shift amounts. */ static MIPSRH *iselWordExpr_RH5u_wrk(ISelEnv * env, IRExpr * e); static MIPSRH *iselWordExpr_RH5u(ISelEnv * env, IRExpr * e); -/* In 64-bit mode ONLY */ +/* Compute an I8 into a reg-or-6-bit-unsigned-immediate, the latter being an + immediate in the range 1 .. 63 inclusive. Used for doing shift amounts. */ static MIPSRH *iselWordExpr_RH6u_wrk(ISelEnv * env, IRExpr * e); static MIPSRH *iselWordExpr_RH6u(ISelEnv * env, IRExpr * e); @@ -1119,29 +1123,24 @@ /* Create in dst, the IRCmpF64Result encoded result. */ /* chech for EQ */ - addInstr(env, MIPSInstr_FpCompare(Mfp_CMP, tmp, r_srcL, r_srcR, - toUChar(2))); - addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, r_ccMIPS, tmp, - MIPSRH_Imm(False, 22))); + addInstr(env, MIPSInstr_FpCompare(Mfp_CMP_EQ, tmp, r_srcL, r_srcR)); + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, r_ccMIPS, tmp, + MIPSRH_Imm(False, 1))); /* chech for UN */ - addInstr(env, MIPSInstr_FpCompare(Mfp_CMP, tmp, r_srcL, r_srcR, - toUChar(1))); - addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, tmp, tmp, - MIPSRH_Imm(False, 23))); + addInstr(env, MIPSInstr_FpCompare(Mfp_CMP_UN, tmp, r_srcL, r_srcR)); addInstr(env, MIPSInstr_Alu(Malu_OR, r_ccMIPS, r_ccMIPS, MIPSRH_Reg(tmp))); /* chech for LT */ - addInstr(env, MIPSInstr_FpCompare(Mfp_CMP, tmp, r_srcL, r_srcR, - toUChar(12))); - addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, tmp, - tmp, MIPSRH_Imm(False, 21))); + addInstr(env, MIPSInstr_FpCompare(Mfp_CMP_LT, tmp, r_srcL, r_srcR)); + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, tmp, + tmp, MIPSRH_Imm(False, 2))); addInstr(env, MIPSInstr_Alu(Malu_OR, r_ccMIPS, r_ccMIPS, MIPSRH_Reg(tmp))); /* chech for GT */ - addInstr(env, MIPSInstr_FpCompare(Mfp_CMP, tmp, r_srcL, r_srcR, - toUChar(15))); - addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, tmp, tmp, - MIPSRH_Imm(False, 20))); + addInstr(env, MIPSInstr_FpCompare(Mfp_CMP_NGT, + tmp, r_srcL, r_srcR)); + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, tmp, tmp, + MIPSRH_Imm(False, 3))); addInstr(env, MIPSInstr_Alu(Malu_NOR, tmp, tmp, MIPSRH_Reg(tmp))); addInstr(env, MIPSInstr_Alu(Malu_AND, tmp, tmp, @@ -1789,34 +1788,14 @@ if ((ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32 || ((ty == Ity_I64))) && typeOfIRExpr(env->type_env, e->Iex.ITE.cond) == Ity_I1) { + HReg r_dst = iselWordExpr_R(env, e->Iex.ITE.iffalse); + HReg r1 = iselWordExpr_R(env, e->Iex.ITE.iftrue); + HReg r_cond = iselWordExpr_R(env, e->Iex.ITE.cond); /* - * r_dst = cond && r1 - * cond = not(cond) - * tmp = cond && r0 - * r_dst = tmp + r_dst + * r_dst = r0 + * movn r_dst, r1, r_cond */ - HReg r0 = iselWordExpr_R(env, e->Iex.ITE.iffalse); - HReg r1 = iselWordExpr_R(env, e->Iex.ITE.iftrue); - HReg r_cond_1 = iselWordExpr_R(env, e->Iex.ITE.cond); - HReg r_cond = newVRegI(env); - HReg mask = newVRegI(env); - HReg r_dst = newVRegI(env); - HReg r_tmp = newVRegI(env); - HReg r_tmp1 = newVRegI(env); - HReg r_cond_neg = newVRegI(env); - /* r_cond = 0 - r_cond_1 */ - addInstr(env, MIPSInstr_LI(mask, 0x0)); - addInstr(env, MIPSInstr_Alu(Malu_SUB, r_cond, - mask, MIPSRH_Reg(r_cond_1))); - - addInstr(env, MIPSInstr_Alu(Malu_AND, r_tmp, r_cond, MIPSRH_Reg(r1))); - addInstr(env, MIPSInstr_Alu(Malu_NOR, r_cond_neg, r_cond, - MIPSRH_Reg(r_cond))); - addInstr(env, MIPSInstr_Alu(Malu_AND, r_tmp1, r_cond_neg, - MIPSRH_Reg(r0))); - addInstr(env, MIPSInstr_Alu(Malu_ADD, r_dst, r_tmp, - MIPSRH_Reg(r_tmp1))); - + addInstr(env, MIPSInstr_MoveCond(MMoveCond_movn, r_dst, r1, r_cond)); return r_dst; } break; @@ -2009,7 +1988,6 @@ static MIPSRH *iselWordExpr_RH6u ( ISelEnv * env, IRExpr * e ) { MIPSRH *ri; - vassert(env->mode64); ri = iselWordExpr_RH6u_wrk(env, e); /* sanity checks ... */ switch (ri->tag) { @@ -2436,7 +2414,8 @@ /* Check if borrow is nedded. */ addInstr(env, MIPSInstr_Cmp(False, size32, borrow, xLo, yLo, cc)); - addInstr(env, MIPSInstr_Alu(Malu_ADD, yHi, yHi, MIPSRH_Reg(borrow))); + addInstr(env, MIPSInstr_Alu(Malu_ADD, yHi, yHi, + MIPSRH_Reg(borrow))); addInstr(env, MIPSInstr_Alu(Malu_SUB, tHi, xHi, MIPSRH_Reg(yHi))); *rHi = tHi; @@ -2505,177 +2484,309 @@ } case Iop_Shr64: { - HReg xLo, xHi; - HReg tLo = newVRegI(env); - HReg tLo1 = newVRegI(env); - HReg tHi = newVRegI(env); - HReg tmp = newVRegI(env); - HReg tmp2 = newVRegI(env); - HReg tmp3 = newVRegI(env); - HReg mask = newVRegI(env); - HReg tMask = newVRegI(env); - HReg discard = newVRegI(env); - HReg discard1 = newVRegI(env); +#if defined (_MIPSEL) + /* 64-bit logical shift right based on what gcc generates: + : + nor v0, zero, a2 + sll a3, a1, 0x1 + sllv a3, a3, v0 + srlv v0, a0, a2 + srlv v1, a1, a2 + andi a0, a2, 0x20 + or v0, a3, v0 + movn v0, v1, a0 + jr ra + movn v1, zero, a0 + */ + HReg a0, a1; + HReg a0tmp = newVRegI(env); + HReg a2 = newVRegI(env); + HReg a3 = newVRegI(env); + HReg v0 = newVRegI(env); + HReg v1 = newVRegI(env); + HReg zero = newVRegI(env); + MIPSRH *sa = NULL; - /* We assume any literal values are on the second operand. */ - iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); - MIPSRH *ri_srcR = NULL; - MIPSRH *ri_srcR_sub = NULL; + iselInt64Expr(&a1, &a0, env, e->Iex.Binop.arg1); + sa = iselWordExpr_RH6u(env, e->Iex.Binop.arg2); - ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2); - ri_srcR_sub = iselWordExpr_RH(env, True /*signed */ , - e->Iex.Binop.arg2); + if (sa->tag == Mrh_Imm) { + addInstr(env, MIPSInstr_LI(a2, sa->Mrh.Imm.imm16)); + } + else { + addInstr(env, MIPSInstr_Alu(Malu_AND, a2, sa->Mrh.Reg.reg, + MIPSRH_Imm(False, 0x3f))); + } - /* Steps: - 1. Take shift-amount (arg2) least significant bits from upper - half of 64bit input value (arg1) - 2. Shift upper half - 3. Shift lower half - 4. Put discarded bits (those from step 1) to most significant - bit positions of lower half */ + addInstr(env, MIPSInstr_LI(zero, 0x00000000)); + /* nor v0, zero, a2 */ + addInstr(env, MIPSInstr_Alu(Malu_NOR, v0, zero, MIPSRH_Reg(a2))); + /* sll a3, a1, 0x1 */ + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /* 32bit shift */, + a3, a1, MIPSRH_Imm(False, 0x1))); + /* sllv a3, a3, v0 */ + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /* 32bit shift */, + a3, a3, MIPSRH_Reg(v0))); + /* srlv v0, a0, a2 */ + addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /* 32bit shift */, + v0, a0, MIPSRH_Reg(a2))); + /* srlv v1, a1, a2 */ + addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /* 32bit shift */, + v1, a1, MIPSRH_Reg(a2))); + /* andi a0, a2, 0x20 */ + addInstr(env, MIPSInstr_Alu(Malu_AND, a0tmp, a2, + MIPSRH_Imm(False, 0x20))); + /* or v0, a3, v0 */ + addInstr(env, MIPSInstr_Alu(Malu_OR, v0, a3, MIPSRH_Reg(v0))); - /* Mask for extraction of bits that will be discarded. */ - addInstr(env, MIPSInstr_LI(tmp, 0xffffffff)); - addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /*32bit shift */, - tMask, tmp, ri_srcR)); - addInstr(env, MIPSInstr_Alu(Malu_NOR, mask, - tMask, MIPSRH_Reg(tMask))); + /* movn v0, v1, a0 */ + addInstr(env, MIPSInstr_MoveCond(MMoveCond_movn, v0, v1, a0tmp)); + /* movn v1, zero, a0 */ + addInstr(env, MIPSInstr_MoveCond(MMoveCond_movn, v1, zero, a0tmp)); - /* Extraction of bits that will be discarded. */ - addInstr(env, MIPSInstr_Alu(Malu_AND, discard, xHi, - MIPSRH_Reg(mask))); - /* Position discarded bits to most significant bit positions. */ - addInstr(env, MIPSInstr_LI(tmp3, 32)); - addInstr(env, MIPSInstr_Alu(Malu_SUB, tmp2, - tmp3, ri_srcR_sub)); - addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /*32bit shift */, - discard1, discard, MIPSRH_Reg(tmp2))); + *rHi = v1; + *rLo = v0; + return; +#elif defined (_MIPSEB) + /* 64-bit logical shift right based on what gcc generates: + : + nor v0, zero, a2 + sll a3, a0, 0x1 + sllv a3, a3, v0 + srlv v1, a1, a2 + andi v0, a2, 0x20 + or v1, a3, v1 + srlv a2, a0, a2 + movn v1, a2, v0 + movn a2, zero, v0 + jr ra + move v0, a2 + */ + HReg a0, a1; + HReg a2 = newVRegI(env); + HReg a2tmp = newVRegI(env); + HReg a3 = newVRegI(env); + HReg v0 = newVRegI(env); + HReg v1 = newVRegI(env); + HReg zero = newVRegI(env); + MIPSRH *sa = NULL; - addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /*32bit shift */, - tHi, xHi, ri_srcR)); - addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /*32bit shift */, - tLo1, xLo, ri_srcR)); + iselInt64Expr(&a0, &a1, env, e->Iex.Binop.arg1); + sa = iselWordExpr_RH6u(env, e->Iex.Binop.arg2); - addInstr(env, MIPSInstr_Alu(Malu_OR, tLo, - tLo1, MIPSRH_Reg(discard1))); - *rHi = tHi; - *rLo = tLo; + if (sa->tag == Mrh_Imm) { + addInstr(env, MIPSInstr_LI(a2, sa->Mrh.Imm.imm16)); + } + else { + addInstr(env, MIPSInstr_Alu(Malu_AND, a2, sa->Mrh.Reg.reg, + MIPSRH_Imm(False, 0x3f))); + } + + addInstr(env, MIPSInstr_LI(zero, 0x00000000)); + /* nor v0, zero, a2 */ + addInstr(env, MIPSInstr_Alu(Malu_NOR, v0, zero, MIPSRH_Reg(a2))); + /* sll a3, a0, 0x1 */ + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /* 32bit shift */, + a3, a0, MIPSRH_Imm(False, 0x1))); + /* sllv a3, a3, v0 */ + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /* 32bit shift */, + a3, a3, MIPSRH_Reg(v0))); + /* srlv v1, a1, a2 */ + addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /* 32bit shift */, + v1, a1, MIPSRH_Reg(a2))); + /* andi v0, a2, 0x20 */ + addInstr(env, MIPSInstr_Alu(Malu_AND, v0, a2, + MIPSRH_Imm(False, 0x20))); + /* or v1, a3, v1 */ + addInstr(env, MIPSInstr_Alu(Malu_OR, v1, a3, MIPSRH_Reg(v1))); + /* srlv a2, a0, a2 */ + addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /* 32bit shift */, + a2tmp, a0, MIPSRH_Reg(a2))); + + /* movn v1, a2, v0 */ + addInstr(env, MIPSInstr_MoveCond(MMoveCond_movn, v1, a2tmp, v0)); + /* movn a2, zero, v0 */ + addInstr(env, MIPSInstr_MoveCond(MMoveCond_movn, a2tmp, zero, v0)); + /* move v0, a2 */ + addInstr(env, mk_iMOVds_RR(v0, a2tmp)); + + *rHi = v0; + *rLo = v1; return; +#endif } + case Iop_Shl64: { - HReg xLo, xHi; - HReg tLo = newVRegI(env); - HReg tHi1 = newVRegI(env); - HReg tHi = newVRegI(env); - HReg tmp = newVRegI(env); - HReg tmp2 = newVRegI(env); - HReg tmp3 = newVRegI(env); - HReg mask = newVRegI(env); - HReg tMask = newVRegI(env); - HReg discard = newVRegI(env); - HReg discard1 = newVRegI(env); + /* 64-bit shift left based on what gcc generates: + : + nor v0,zero,a2 + srl a3,a0,0x1 + srlv a3,a3,v0 + sllv v1,a1,a2 + andi v0,a2,0x20 + or v1,a3,v1 + sllv a2,a0,a2 + movn v1,a2,v0 + movn a2,zero,v0 + jr ra + move v0,a2 + */ + HReg a0, a1; + HReg a2 = newVRegI(env); + HReg a3 = newVRegI(env); + HReg v0 = newVRegI(env); + HReg v1 = newVRegI(env); + HReg zero = newVRegI(env); + MIPSRH *sa = NULL; - /* We assume any literal values are on the second operand. */ - iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); - MIPSRH *ri_srcR = NULL; - MIPSRH *ri_srcR_sub = NULL; + iselInt64Expr(&a1, &a0, env, e->Iex.Binop.arg1); + sa = iselWordExpr_RH6u(env, e->Iex.Binop.arg2); - ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2); - ri_srcR_sub = iselWordExpr_RH(env, True /*signed */ , - e->Iex.Binop.arg2); + if (sa->tag == Mrh_Imm) { + addInstr(env, MIPSInstr_LI(a2, sa->Mrh.Imm.imm16)); + } + else { + addInstr(env, MIPSInstr_Alu(Malu_AND, a2, sa->Mrh.Reg.reg, + MIPSRH_Imm(False, 0x3f))); + } - /* Steps: - 1. Take shift-amount (arg2) most significant bits from lower - half of 64bit input value (arg1) - 2. Shift lower half - 3. Shift upper half - 4. Put discarded bits (those from step 1) to least significant - bit positions of upper half */ + addInstr(env, MIPSInstr_LI(zero, 0x00000000)); + /* nor v0, zero, a2 */ + addInstr(env, MIPSInstr_Alu(Malu_NOR, v0, zero, MIPSRH_Reg(a2))); + /* srl a3, a0, 0x1 */ + addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /* 32bit shift */, + a3, a0, MIPSRH_Imm(False, 0x1))); + /* srlv a3, a3, v0 */ + addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /* 32bit shift */, + a3, a3, MIPSRH_Reg(v0))); + /* sllv v1, a1, a2 */ + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /* 32bit shift */, + v1, a1, MIPSRH_Reg(a2))); + /* andi v0, a2, 0x20 */ + addInstr(env, MIPSInstr_Alu(Malu_AND, v0, a2, + MIPSRH_Imm(False, 0x20))); + /* or v1, a3, v1 */ + addInstr(env, MIPSInstr_Alu(Malu_OR, v1, a3, MIPSRH_Reg(v1))); + /* sllv a2, a0, a2 */ + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /* 32bit shift */, + a2, a0, MIPSRH_Reg(a2))); - /* Mask for extraction of bits that will be discarded. */ - addInstr(env, MIPSInstr_LI(tmp, 0xffffffff)); - addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /*32bit shift */, - tMask, tmp, ri_srcR)); - addInstr(env, MIPSInstr_Alu(Malu_NOR, mask, - tMask, MIPSRH_Reg(tMask))); + /* movn v1, a2, v0 */ + addInstr(env, MIPSInstr_MoveCond(MMoveCond_movn, v1, a2, v0)); + /* movn a2, zero, v0 */ + addInstr(env, MIPSInstr_MoveCond(MMoveCond_movn, a2, zero, v0)); + addInstr(env, mk_iMOVds_RR(v0, a2)); - /* Extraction of bits that will be discarded. */ - addInstr(env, MIPSInstr_Alu(Malu_AND, discard, xLo, - MIPSRH_Reg(mask))); - /* Position discarded bits to least significant bit positions. */ - addInstr(env, MIPSInstr_LI(tmp3, 32)); - addInstr(env, MIPSInstr_Alu(Malu_SUB, tmp2, - tmp3, ri_srcR_sub)); - addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /*32bit shift */, - discard1, discard, MIPSRH_Reg(tmp2))); + *rHi = v1; + *rLo = v0; + return; + } - addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /*32bit shift */, - tHi1, xHi, ri_srcR)); - addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /*32bit shift */, - tLo, xLo, ri_srcR)); + case Iop_Sar64: { + /* 64-bit arithmetic shift right based on what gcc generates: + : + nor v0, zero, a2 + sll a3, a1, 0x1 + sllv a3, a3, v0 + srlv v0, a0, a2 + srav v1, a1, a2 + andi a0, a2, 0x20 + sra a1, a1, 0x1f + or v0, a3, v0 + movn v0, v1, a0 + jr ra + movn v1, a1, a0 + */ + HReg a0, a1; + HReg a0tmp = newVRegI(env); + HReg a1tmp = newVRegI(env); + HReg a2 = newVRegI(env); + HReg a3 = newVRegI(env); + HReg v0 = newVRegI(env); + HReg v1 = newVRegI(env); + HReg zero = newVRegI(env); + MIPSRH *sa = NULL; - addInstr(env, MIPSInstr_Alu(Malu_OR, tHi, - tHi1, MIPSRH_Reg(discard1))); - *rHi = tHi; - *rLo = tLo; + iselInt64Expr(&a1, &a0, env, e->Iex.Binop.arg1); + sa = iselWordExpr_RH6u(env, e->Iex.Binop.arg2); + + if (sa->tag == Mrh_Imm) { + addInstr(env, MIPSInstr_LI(a2, sa->Mrh.Imm.imm16)); + } + else { + addInstr(env, MIPSInstr_Alu(Malu_AND, a2, sa->Mrh.Reg.reg, + MIPSRH_Imm(False, 0x3f))); + } + + addInstr(env, MIPSInstr_LI(zero, 0x00000000)); + /* nor v0, zero, a2 */ + addInstr(env, MIPSInstr_Alu(Malu_NOR, v0, zero, MIPSRH_Reg(a2))); + /* sll a3, a1, 0x1 */ + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /* 32bit shift */, + a3, a1, MIPSRH_Imm(False, 0x1))); + /* sllv a3, a3, v0 */ + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /* 32bit shift */, + a3, a3, MIPSRH_Reg(v0))); + /* srlv v0, a0, a2 */ + addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /* 32bit shift */, + v0, a0, MIPSRH_Reg(a2))); + /* srav v1, a1, a2 */ + addInstr(env, MIPSInstr_Shft(Mshft_SRA, True /* 32bit shift */, + v1, a1, MIPSRH_Reg(a2))); + /* andi a0, a2, 0x20 */ + addInstr(env, MIPSInstr_Alu(Malu_AND, a0tmp, a2, + MIPSRH_Imm(False, 0x20))); + /* sra a1, a1, 0x1f */ + addInstr(env, MIPSInstr_Shft(Mshft_SRA, True /* 32bit shift */, + a1tmp, a1, MIPSRH_Imm(False, 0x1f))); + /* or v0, a3, v0 */ + addInstr(env, MIPSInstr_Alu(Malu_OR, v0, a3, MIPSRH_Reg(v0))); + + /* movn v0, v1, a0 */ + addInstr(env, MIPSInstr_MoveCond(MMoveCond_movn, v0, v1, a0tmp)); + /* movn v1, a1, a0 */ + addInstr(env, MIPSInstr_MoveCond(MMoveCond_movn, v1, a1tmp, a0tmp)); + + *rHi = v1; + *rLo = v0; return; } - case Iop_Sar64: { - HReg xLo, xHi; - HReg tLo = newVRegI(env); - HReg tLo1 = newVRegI(env); - HReg tHi = newVRegI(env); - HReg tmp = newVRegI(env); - HReg tmp2 = newVRegI(env); - HReg tmp3 = newVRegI(env); - HReg mask = newVRegI(env); - HReg tMask = newVRegI(env); - HReg discard = newVRegI(env); - HReg discard1 = newVRegI(env); - /* We assume any literal values are on the second operand. */ - iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); - MIPSRH *ri_srcR = NULL; - MIPSRH *ri_srcR_sub = NULL; + case Iop_F32toI64S: { + HReg tmpD = newVRegD(env); + HReg valF = iselFltExpr(env, e->Iex.Binop.arg2); + HReg tLo = newVRegI(env); + HReg tHi = newVRegI(env); + MIPSAMode *am_addr; - ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2); - ri_srcR_sub = iselWordExpr_RH(env, True /*signed */ , - e->Iex.Binop.arg2); + /* CVTLS tmpD, valF */ + set_MIPS_rounding_mode(env, e->Iex.Binop.arg1); + addInstr(env, MIPSInstr_FpConvert(Mfp_CVTLS, tmpD, valF)); + set_MIPS_rounding_default(env); - /* Steps: - 1. Take shift-amount (arg2) least significant bits from upper - half of 64bit input value (arg1) - 2. Shift upper half - 3. Shift lower half - 4. Put discarded bits (those from step 1) to most significant - bit positions of lower half */ + sub_from_sp(env, 16); /* Move SP down 16 bytes */ + am_addr = MIPSAMode_IR(0, StackPointer(mode64)); - /* Mask for extraction of bits that will be discarded. */ - addInstr(env, MIPSInstr_LI(tmp, 0xffffffff)); - addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /*32bit shift */, - tMask, tmp, ri_srcR)); - addInstr(env, MIPSInstr_Alu(Malu_NOR, mask, - tMask, MIPSRH_Reg(tMask))); + /* store as F64 */ + addInstr(env, MIPSInstr_FpLdSt(False /*store */ , 8, tmpD, + am_addr)); + /* load as 2xI32 */ +#if defined (_MIPSEL) + addInstr(env, MIPSInstr_Load(4, tLo, am_addr, mode64)); + addInstr(env, MIPSInstr_Load(4, tHi, nextMIPSAModeFloat(am_addr), + mode64)); +#elif defined (_MIPSEB) + addInstr(env, MIPSInstr_Load(4, tHi, am_addr, mode64)); + addInstr(env, MIPSInstr_Load(4, tLo, nextMIPSAModeFloat(am_addr), + mode64)); +#endif - /* Extraction of bits that will be discarded. */ - addInstr(env, MIPSInstr_Alu(Malu_AND, discard, xHi, - MIPSRH_Reg(mask))); - /* Position discarded bits to most significant bit positions. */ - addInstr(env, MIPSInstr_LI(tmp3, 32)); - addInstr(env, MIPSInstr_Alu(Malu_SUB, tmp2, - tmp3, ri_srcR_sub)); - addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /*32bit shift */, - discard1, discard, MIPSRH_Reg(tmp2))); + /* Reset SP */ + add_to_sp(env, 16); - addInstr(env, MIPSInstr_Shft(Mshft_SRA, True /*32bit shift */, - tHi, xHi, ri_srcR)); - addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /*32bit shift */, - tLo1, xLo, ri_srcR)); - - addInstr(env, MIPSInstr_Alu(Malu_OR, tLo, - tLo1, MIPSRH_Reg(discard1))); *rHi = tHi; *rLo = tLo; + return; } @@ -2695,7 +2806,7 @@ addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, tmp, src, MIPSRH_Imm(False, 31))); - addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, tmp, src, + addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, tmp, tmp, MIPSRH_Imm(False, 31))); addInstr(env, mk_iMOVds_RR(tHi, tmp)); @@ -2748,35 +2859,31 @@ } case Iop_Left64: { - HReg yLo, yHi, borrow; + HReg yHi, yLo; HReg tHi = newVRegI(env); HReg tLo = newVRegI(env); + HReg tmp = newVRegI(env); + HReg tmp1 = newVRegI(env); + HReg tmp2 = newVRegI(env); HReg zero = newVRegI(env); - Bool size32 = True; MIPSCondCode cc = MIPScc_LO; - borrow = newVRegI(env); - /* yHi:yLo = arg */ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); /* zero = 0 */ addInstr(env, MIPSInstr_LI(zero, 0x00000000)); - /* tLo = 0 - yLo */ - addInstr(env, MIPSInstr_Alu(Malu_SUB, tLo, zero, MIPSRH_Reg(yLo))); + /* tmp2:tmp1 = 0 - (yHi:yLo)*/ + addInstr(env, MIPSInstr_Alu(Malu_SUB, tmp2, zero, MIPSRH_Reg(yLo))); + addInstr(env, MIPSInstr_Cmp(False, True, tmp1, zero, tmp2, cc)); + addInstr(env, MIPSInstr_Alu(Malu_SUB, tmp, zero, MIPSRH_Reg(yHi))); + addInstr(env, MIPSInstr_Alu(Malu_SUB, tmp1, tmp, MIPSRH_Reg(tmp1))); - /* Check if borrow is needed. */ - addInstr(env, MIPSInstr_Cmp(False, size32, borrow, zero, yLo, cc)); - - /* tHi = 0 - (yHi + borrow) */ - addInstr(env, MIPSInstr_Alu(Malu_ADD, - yHi, yHi, MIPSRH_Reg(borrow))); - addInstr(env, MIPSInstr_Alu(Malu_SUB, tHi, zero, MIPSRH_Reg(yHi))); - /* So now we have tHi:tLo = -arg. To finish off, or 'arg' + /* So now we have tmp2:tmp1 = -arg. To finish off, or 'arg' back in, so as to give the final result tHi:tLo = arg | -arg. */ - addInstr(env, MIPSInstr_Alu(Malu_OR, tHi, tHi, MIPSRH_Reg(yHi))); - addInstr(env, MIPSInstr_Alu(Malu_OR, tLo, tLo, MIPSRH_Reg(yLo))); + addInstr(env, MIPSInstr_Alu(Malu_OR, tHi, yHi, MIPSRH_Reg(tmp1))); + addInstr(env, MIPSInstr_Alu(Malu_OR, tLo, yLo, MIPSRH_Reg(tmp2))); *rHi = tHi; *rLo = tLo; return; @@ -2865,7 +2972,7 @@ static HReg iselFltExpr_wrk(ISelEnv * env, IRExpr * e) { IRType ty = typeOfIRExpr(env->type_env, e); - vassert(ty == Ity_F32 || (ty == Ity_F64 && mode64)); + vassert(ty == Ity_F32 || (ty == Ity_F64 && fp_mode64)); if (e->tag == Iex_RdTmp) { return lookupIRTemp(env, e->Iex.RdTmp.tmp); @@ -2872,26 +2979,31 @@ } if (e->tag == Iex_Load) { - MIPSAMode *am_addr; - HReg r_dst = newVRegF(env); vassert(e->Iex.Load.ty == Ity_F32 - || (e->Iex.Load.ty == Ity_F64 && mode64)); - am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, ty); - if (mode64 && e->Iex.Load.ty == Ity_F64) + || (e->Iex.Load.ty == Ity_F64 && fp_mode64)); + HReg r_dst; + MIPSAMode *am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, ty); + if (e->Iex.Load.ty == Ity_F64) { + r_dst = newVRegD(env); addInstr(env, MIPSInstr_FpLdSt(True /*load */, 8, r_dst, am_addr)); - else + } else { + r_dst = newVRegF(env); addInstr(env, MIPSInstr_FpLdSt(True /*load */, 4, r_dst, am_addr)); + } return r_dst; } if (e->tag == Iex_Get) { - HReg r_dst = newVRegF(env); MIPSAMode *am_addr = MIPSAMode_IR(e->Iex.Get.offset, GuestStatePointer(mode64)); - if (mode64) + HReg r_dst; + if (e->Iex.Load.ty == Ity_F64) { + r_dst = newVRegD(env); addInstr(env, MIPSInstr_FpLdSt(True /*load */, 8, r_dst, am_addr)); - else + } else { + r_dst = newVRegF(env); addInstr(env, MIPSInstr_FpLdSt(True /*load */, 4, r_dst, am_addr)); + } return r_dst; } @@ -2908,7 +3020,7 @@ return r_dst; } case Iop_F32toF64: { - vassert(mode64); + vassert(fp_mode64); HReg src = iselFltExpr(env, e->Iex.Unop.arg); HReg dst = newVRegD(env); @@ -2916,24 +3028,29 @@ return dst; } case Iop_ReinterpI64asF64: { - vassert(mode64); - HReg fr_src = iselWordExpr_R(env, e->Iex.Unop.arg); - HReg r_dst = newVRegF(env); - - /* Move Doubleword to Floating Point - dmtc1 r_dst, fr_src */ - addInstr(env, MIPSInstr_FpGpMove(MFpGpMove_dmtc1, r_dst, fr_src)); - + HReg r_dst; + if (mode64) { + HReg fr_src = iselWordExpr_R(env, e->Iex.Unop.arg); + r_dst = newVRegF(env); + /* Move Doubleword to Floating Point + dmtc1 r_dst, fr_src */ + addInstr(env, MIPSInstr_FpGpMove(MFpGpMove_dmtc1, r_dst, fr_src)); + } else { + HReg Hi, Lo; + r_dst = newVRegD(env); + iselInt64Expr(&Hi, &Lo, env, e->Iex.Unop.arg); + r_dst = mk_LoadRR32toFPR(env, Hi, Lo); /* 2*I32 -> F64 */ + } return r_dst; } case Iop_I32StoF64: { - vassert(mode64); + vassert(fp_mode64); HReg dst = newVRegF(env); HReg tmp = newVRegF(env); HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg); /* Move Word to Floating Point - mtc1 tmp1, r_src */ + mtc1 tmp, r_src */ addInstr(env, MIPSInstr_FpGpMove(MFpGpMove_mtc1, tmp, r_src)); /* and do convert */ @@ -3010,7 +3127,7 @@ op = Mfp_DIVS; break; case Iop_DivF64: - vassert(mode64); + vassert(fp_mode64); op = Mfp_DIVD; break; case Iop_MulF32: @@ -3017,7 +3134,7 @@ op = Mfp_MULS; break; case Iop_MulF64: - vassert(mode64); + vassert(fp_mode64); op = Mfp_MULD; break; case Iop_AddF32: @@ -3024,7 +3141,7 @@ op = Mfp_ADDS; break; case Iop_AddF64: - vassert(mode64); + vassert(fp_mode64); op = Mfp_ADDD; break; case Iop_SubF32: @@ -3031,7 +3148,7 @@ op = Mfp_SUBS; break; case Iop_SubF64: - vassert(mode64); + vassert(fp_mode64); op = Mfp_SUBD; break; default: @@ -3101,24 +3218,30 @@ case Iop_I64StoF64: { HReg r_dst = newVRegF(env); - MIPSAMode *am_addr; - HReg fr_src = iselWordExpr_R(env, e->Iex.Binop.arg2); - HReg tmp = newVRegF(env); + HReg tmp, fr_src; + if (mode64) { + tmp = newVRegF(env); + fr_src = iselWordExpr_R(env, e->Iex.Binop.arg2); + /* Move SP down 8 bytes */ + sub_from_sp(env, 8); + am_addr = MIPSAMode_IR(0, StackPointer(mode64)); - /* Move SP down 8 bytes */ - sub_from_sp(env, 8); - am_addr = MIPSAMode_IR(0, StackPointer(mode64)); + /* store as I64 */ + addInstr(env, MIPSInstr_Store(8, am_addr, fr_src, mode64)); - /* store as I64 */ - addInstr(env, MIPSInstr_Store(8, am_addr, fr_src, mode64)); + /* load as Ity_F64 */ + addInstr(env, MIPSInstr_FpLdSt(True /*load */, 8, tmp, am_addr)); - /* load as Ity_F64 */ - addInstr(env, MIPSInstr_FpLdSt(True /*load */, 8, tmp, am_addr)); + /* Reset SP */ + add_to_sp(env, 8); + } else { + HReg Hi, Lo; + tmp = newVRegD(env); + iselInt64Expr(&Hi, &Lo, env, e->Iex.Binop.arg2); + tmp = mk_LoadRR32toFPR(env, Hi, Lo); /* 2*I32 -> F64 */ + } - /* Reset SP */ - add_to_sp(env, 8); - set_MIPS_rounding_mode(env, e->Iex.Binop.arg1); addInstr(env, MIPSInstr_FpConvert(Mfp_CVTDL, r_dst, tmp)); set_MIPS_rounding_default(env); @@ -3128,24 +3251,30 @@ case Iop_I64StoF32: { HReg r_dst = newVRegF(env); - MIPSAMode *am_addr; - HReg fr_src = iselWordExpr_R(env, e->Iex.Binop.arg2); - HReg tmp = newVRegF(env); + HReg fr_src, tmp; + if (mode64) { + tmp = newVRegF(env); + fr_src = iselWordExpr_R(env, e->Iex.Binop.arg2); + /* Move SP down 8 bytes */ + sub_from_sp(env, 8); + am_addr = MIPSAMode_IR(0, StackPointer(mode64)); - /* Move SP down 8 bytes */ - sub_from_sp(env, 8); - am_addr = MIPSAMode_IR(0, StackPointer(mode64)); + /* store as I64 */ + addInstr(env, MIPSInstr_Store(8, am_addr, fr_src, mode64)); - /* store as I64 */ - addInstr(env, MIPSInstr_Store(8, am_addr, fr_src, mode64)); + /* load as Ity_F64 */ + addInstr(env, MIPSInstr_FpLdSt(True /*load */, 8, tmp, am_addr)); - /* load as Ity_F64 */ - addInstr(env, MIPSInstr_FpLdSt(True /*load */, 8, tmp, am_addr)); + /* Reset SP */ + add_to_sp(env, 8); + } else { + HReg Hi, Lo; + tmp = newVRegD(env); + iselInt64Expr(&Hi, &Lo, env, e->Iex.Binop.arg2); + tmp = mk_LoadRR32toFPR(env, Hi, Lo); /* 2*I32 -> F64 */ + } - /* Reset SP */ - add_to_sp(env, 8); - set_MIPS_rounding_mode(env, e->Iex.Binop.arg1); addInstr(env, MIPSInstr_FpConvert(Mfp_CVTSL, r_dst, tmp)); set_MIPS_rounding_default(env); @@ -3155,7 +3284,6 @@ case Iop_SqrtF32: case Iop_SqrtF64: { - /* first arg is rounding mode; we ignore it. */ Bool sz32 = e->Iex.Binop.op == Iop_SqrtF32; HReg src = iselFltExpr(env, e->Iex.Binop.arg2); HReg dst = newVRegF(env); @@ -3368,26 +3496,22 @@ if (e->tag == Iex_Binop) { switch (e->Iex.Binop.op) { case Iop_RoundF64toInt: { - HReg valD = iselDblExpr(env, e->Iex.Binop.arg2); - MIPSRH *fmt = iselWordExpr_RH(env, False, e->Iex.Binop.arg1); - HReg valD1 = newVRegD(env); + HReg src = iselDblExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegD(env); - if (fmt->Mrh.Imm.imm16 == 0x3) - addInstr(env, MIPSInstr_FpConvert(Mfp_TRULD, valD1, valD)); - else if (fmt->Mrh.Imm.imm16 == 0x2) - addInstr(env, MIPSInstr_FpConvert(Mfp_CEILLD, valD1, valD)); - else if (fmt->Mrh.Imm.imm16 == 0x0) - addInstr(env, MIPSInstr_FpConvert(Mfp_ROUNDLD, valD1, valD)); - else - vassert(0); - return valD1; + set_MIPS_rounding_mode(env, e->Iex.Binop.arg1); + addInstr(env, MIPSInstr_FpConvert(Mfp_CVTLD, dst, src)); + set_MIPS_rounding_default(env); + + return dst; } case Iop_SqrtF64: { - /* first arg is rounding mode; we ignore it. */ HReg src = iselDblExpr(env, e->Iex.Binop.arg2); HReg dst = newVRegD(env); + set_MIPS_rounding_mode(env, e->Iex.Binop.arg1); addInstr(env, MIPSInstr_FpUnary(Mfp_SQRTD, dst, src)); + set_MIPS_rounding_default(env); return dst; } @@ -3412,6 +3536,9 @@ case Iop_DivF64: op = Mfp_DIVD; break; + case Iop_DivF32: + op = Mfp_DIVS; + break; case Iop_MulF64: op = Mfp_MULD; break; @@ -3424,7 +3551,9 @@ default: vassert(0); } + set_MIPS_rounding_mode(env, e->Iex.Triop.details->arg1); addInstr(env, MIPSInstr_FpBinary(op, dst, argL, argR)); + set_MIPS_rounding_default(env); return dst; } default: @@ -3872,7 +4001,7 @@ case Ijk_SigFPE_IntDiv: case Ijk_SigFPE_IntOvf: case Ijk_Sys_syscall: - case Ijk_TInval: + case Ijk_InvalICache: { HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); addInstr(env, MIPSInstr_XAssisted(r, amPC, cc, @@ -3971,11 +4100,12 @@ case Ijk_NoDecode: case Ijk_NoRedir: case Ijk_SigBUS: + case Ijk_SigILL: case Ijk_SigTRAP: case Ijk_SigFPE_IntDiv: case Ijk_SigFPE_IntOvf: case Ijk_Sys_syscall: - case Ijk_TInval: { + case Ijk_InvalICache: { HReg r = iselWordExpr_R(env, next); MIPSAMode* amPC = MIPSAMode_IR(offsIP, GuestStatePointer(env->mode64)); addInstr(env, MIPSInstr_XAssisted(r, amPC, MIPScc_AL, jk)); @@ -4021,11 +4151,16 @@ || VEX_PRID_COMP_NETLOGIC); mode64 = arch_host != VexArchMIPS32; +#if (__mips_fpr==64) + fp_mode64 = ((VEX_MIPS_REV(hwcaps_host) == VEX_PRID_CPU_32FPR) + || arch_host == VexArchMIPS64); +#endif /* Make up an initial environment to use. */ env = LibVEX_Alloc(sizeof(ISelEnv)); env->vreg_ctr = 0; env->mode64 = mode64; + env->fp_mode64 = fp_mode64; /* Set up output code array. */ env->code = newHInstrArray(); @@ -4090,6 +4225,7 @@ default: ppIRType(bb->tyenv->types[i]); vpanic("iselBB(mips): IRTemp type"); + break; } env->vregmap[i] = hreg; env->vregmapHI[i] = hregHI; Index: priv/host_ppc_defs.c =================================================================== --- priv/host_ppc_defs.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_ppc_defs.c (.../trunk) (revision 2863) @@ -4270,7 +4270,7 @@ case Ijk_EmFail: trcval = VEX_TRC_JMP_EMFAIL; break; //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; - case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; + case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break; case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; Index: priv/host_ppc_isel.c =================================================================== --- priv/host_ppc_isel.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_ppc_isel.c (.../trunk) (revision 2863) @@ -4929,11 +4929,8 @@ } } - case Iop_Add32Fx4: fpop = Pavfp_ADDF; goto do_32Fx4; - case Iop_Sub32Fx4: fpop = Pavfp_SUBF; goto do_32Fx4; case Iop_Max32Fx4: fpop = Pavfp_MAXF; goto do_32Fx4; case Iop_Min32Fx4: fpop = Pavfp_MINF; goto do_32Fx4; - case Iop_Mul32Fx4: fpop = Pavfp_MULF; goto do_32Fx4; case Iop_CmpEQ32Fx4: fpop = Pavfp_CMPEQF; goto do_32Fx4; case Iop_CmpGT32Fx4: fpop = Pavfp_CMPGTF; goto do_32Fx4; case Iop_CmpGE32Fx4: fpop = Pavfp_CMPGEF; goto do_32Fx4; @@ -5213,6 +5210,25 @@ return dst; } + case Iop_Add32Fx4: fpop = Pavfp_ADDF; goto do_32Fx4_with_rm; + case Iop_Sub32Fx4: fpop = Pavfp_SUBF; goto do_32Fx4_with_rm; + case Iop_Mul32Fx4: fpop = Pavfp_MULF; goto do_32Fx4_with_rm; + do_32Fx4_with_rm: + { + HReg argL = iselVecExpr(env, triop->arg2); + HReg argR = iselVecExpr(env, triop->arg3); + HReg dst = newVRegV(env); + /* FIXME: this is bogus, in the sense that Altivec ignores + FPSCR.RM, at least for some FP operations. So setting the + RM is pointless. This is only really correct in the case + where the RM is known, at JIT time, to be Irrm_NEAREST, + since -- at least for Altivec FP add/sub/mul -- the + emitted insn is hardwired to round to nearest. */ + set_FPU_rounding_mode(env, triop->arg1); + addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR)); + return dst; + } + default: break; } /* switch (e->Iex.Triop.op) */ @@ -5746,7 +5762,7 @@ case Ijk_SigBUS: case Ijk_SigTRAP: case Ijk_Sys_syscall: - case Ijk_TInval: + case Ijk_InvalICache: { HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, @@ -5846,7 +5862,7 @@ case Ijk_SigBUS: case Ijk_SigTRAP: case Ijk_Sys_syscall: - case Ijk_TInval: + case Ijk_InvalICache: { HReg r = iselWordExpr_R(env, next); PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64)); Index: priv/host_s390_defs.c =================================================================== --- priv/host_s390_defs.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_s390_defs.c (.../trunk) (revision 2863) @@ -273,7 +273,11 @@ } -/* Construct an AMODE for accessing the guest state at OFFSET */ +/* Construct an AMODE for accessing the guest state at OFFSET. + OFFSET can be at most 3 * sizeof(VexGuestS390XState) + LibVEX_N_SPILL_BYTES + which may be too large for a B12 addressing mode. + Use a B20 amode as a fallback which will be safe for any offset. +*/ s390_amode * s390_amode_for_guest_state(Int offset) { @@ -280,6 +284,9 @@ if (fits_unsigned_12bit(offset)) return s390_amode_b12(offset, s390_hreg_guest_state_pointer()); + if (fits_signed_20bit(offset)) + return s390_amode_b20(offset, s390_hreg_guest_state_pointer()); + vpanic("invalid guest state offset"); } @@ -458,7 +465,6 @@ s390_amode *am; vassert(offsetB >= 0); - vassert(offsetB <= (1 << 12)); /* because we use b12 amode */ vassert(!hregIsVirtual(rreg)); *i1 = *i2 = NULL; @@ -485,7 +491,6 @@ s390_amode *am; vassert(offsetB >= 0); - vassert(offsetB <= (1 << 12)); /* because we use b12 amode */ vassert(!hregIsVirtual(rreg)); *i1 = *i2 = NULL; @@ -5861,7 +5866,6 @@ } else { /* From 16 bytes to smaller size */ vassert(is_valid_fp128_regpair(op_hi, op_lo)); - vassert(hregIsInvalid(dst_lo)); } insn->tag = S390_INSN_BFP_CONVERT; @@ -5891,11 +5895,11 @@ s390_insn * -s390_insn_bfp128_convert_from(UChar size, s390_bfp_conv_t tag, HReg dst, - HReg op_hi, HReg op_lo, +s390_insn_bfp128_convert_from(UChar size, s390_bfp_conv_t tag, HReg dst_hi, + HReg dst_lo, HReg op_hi, HReg op_lo, s390_bfp_round_t rounding_mode) { - return s390_insn_bfp128_convert(size, tag, dst, INVALID_HREG, op_hi, op_lo, + return s390_insn_bfp128_convert(size, tag, dst_hi, dst_lo, op_hi, op_lo, rounding_mode); } @@ -6192,7 +6196,6 @@ } else { /* From 16 bytes to smaller size */ vassert(is_valid_fp128_regpair(op_hi, op_lo)); - vassert(hregIsInvalid(dst_lo)); } insn->tag = S390_INSN_DFP_CONVERT; @@ -6222,11 +6225,11 @@ s390_insn * -s390_insn_dfp128_convert_from(UChar size, s390_dfp_conv_t tag, HReg dst, - HReg op_hi, HReg op_lo, +s390_insn_dfp128_convert_from(UChar size, s390_dfp_conv_t tag, HReg dst_hi, + HReg dst_lo, HReg op_hi, HReg op_lo, s390_dfp_round_t rounding_mode) { - return s390_insn_dfp128_convert(size, tag, dst, INVALID_HREG, op_hi, op_lo, + return s390_insn_dfp128_convert(size, tag, dst_hi, dst_lo, op_hi, op_lo, rounding_mode); } @@ -6461,7 +6464,7 @@ case Ijk_EmFail: return "EmFail"; case Ijk_NoDecode: return "NoDecode"; case Ijk_MapFail: return "MapFail"; - case Ijk_TInval: return "Invalidate"; + case Ijk_InvalICache: return "Invalidate"; case Ijk_NoRedir: return "NoRedir"; case Ijk_SigTRAP: return "SigTRAP"; case Ijk_SigSEGV: return "SigSEGV"; @@ -9780,7 +9783,7 @@ case Ijk_EmFail: trcval = VEX_TRC_JMP_EMFAIL; break; case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; - case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; + case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break; case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; Index: priv/host_s390_defs.h =================================================================== --- priv/host_s390_defs.h (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_s390_defs.h (.../trunk) (revision 2863) @@ -665,8 +665,8 @@ s390_insn *s390_insn_bfp128_convert_to(UChar size, s390_bfp_conv_t, HReg dst_hi, HReg dst_lo, HReg op); s390_insn *s390_insn_bfp128_convert_from(UChar size, s390_bfp_conv_t, - HReg dst, HReg op_hi, HReg op_lo, - s390_bfp_round_t); + HReg dst_hi, HReg dst_lo, HReg op_hi, + HReg op_lo, s390_bfp_round_t); s390_insn *s390_insn_dfp_binop(UChar size, s390_dfp_binop_t, HReg dst, HReg op2, HReg op3, s390_dfp_round_t rounding_mode); @@ -699,8 +699,8 @@ s390_insn *s390_insn_dfp128_convert_to(UChar size, s390_dfp_conv_t, HReg dst_hi, HReg dst_lo, HReg op); s390_insn *s390_insn_dfp128_convert_from(UChar size, s390_dfp_conv_t, - HReg dst, HReg op_hi, HReg op_lo, - s390_dfp_round_t); + HReg dst_hi, HReg dst_lo, HReg op_hi, + HReg op_lo, s390_dfp_round_t); s390_insn *s390_insn_dfp128_reround(UChar size, HReg dst_hi, HReg dst_lo, HReg op2, HReg op3_hi, HReg op3_lo, s390_dfp_round_t); Index: priv/host_s390_isel.c =================================================================== --- priv/host_s390_isel.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_s390_isel.c (.../trunk) (revision 2863) @@ -1257,7 +1257,8 @@ addInstr(env, s390_insn_move(8, f15, op_lo)); rounding_mode = get_bfp_rounding_mode(env, arg1); - addInstr(env, s390_insn_bfp128_convert_from(size, conv, res, f13, f15, + addInstr(env, s390_insn_bfp128_convert_from(size, conv, res, + INVALID_HREG, f13, f15, rounding_mode)); return res; } @@ -1290,7 +1291,8 @@ addInstr(env, s390_insn_move(8, f15, op_lo)); rounding_mode = get_dfp_rounding_mode(env, arg1); - addInstr(env, s390_insn_dfp128_convert_from(size, dconv, res, f13, + addInstr(env, s390_insn_dfp128_convert_from(size, dconv, res, + INVALID_HREG, f13, f15, rounding_mode)); return res; } @@ -2455,7 +2457,7 @@ case Iop_F128toF64: case Iop_F128toF32: { - HReg op_hi, op_lo, f13, f15; + HReg op_hi, op_lo, f12, f13, f14, f15; s390_bfp_round_t rounding_mode; conv = op == Iop_F128toF32 ? S390_BFP_F128_TO_F32 @@ -2463,8 +2465,10 @@ s390_isel_float128_expr(&op_hi, &op_lo, env, left); - /* We use non-virtual registers as pairs (f13, f15) */ + /* We use non-virtual registers as pairs (f13, f15) and (f12, f14)) */ + f12 = make_fpr(12); f13 = make_fpr(13); + f14 = make_fpr(14); f15 = make_fpr(15); /* operand --> (f13, f15) */ @@ -2471,7 +2475,8 @@ addInstr(env, s390_insn_move(8, f13, op_hi)); addInstr(env, s390_insn_move(8, f15, op_lo)); - dst = newVRegF(env); + /* result --> (f12, f14) */ + /* load-rounded has a rounding mode field when the floating point extension facility is installed. */ if (s390_host_has_fpext) { @@ -2480,8 +2485,12 @@ set_bfp_rounding_mode_in_fpc(env, irrm); rounding_mode = S390_BFP_ROUND_PER_FPC; } - addInstr(env, s390_insn_bfp128_convert_from(size, conv, dst, f13, f15, - rounding_mode)); + + addInstr(env, s390_insn_bfp128_convert_from(size, conv, f12, f14, + f13, f15, rounding_mode)); + dst = newVRegF(env); + addInstr(env, s390_insn_move(8, dst, f12)); + return dst; } } @@ -3044,7 +3053,7 @@ } case Iop_D128toD64: { - HReg op_hi, op_lo, f13, f15; + HReg op_hi, op_lo, f12, f13, f14, f15; s390_dfp_round_t rounding_mode; conv = S390_DFP_D128_TO_D64; @@ -3051,8 +3060,10 @@ s390_isel_dfp128_expr(&op_hi, &op_lo, env, left); - /* We use non-virtual registers as pairs (f13, f15) */ + /* We use non-virtual registers as pairs (f13, f15) and (f12, f14) */ + f12 = make_fpr(12); f13 = make_fpr(13); + f14 = make_fpr(14); f15 = make_fpr(15); /* operand --> (f13, f15) */ @@ -3059,7 +3070,8 @@ addInstr(env, s390_insn_move(8, f13, op_hi)); addInstr(env, s390_insn_move(8, f15, op_lo)); - dst = newVRegF(env); + /* result --> (f12, f14) */ + /* load-rounded has a rounding mode field when the floating point extension facility is installed. */ if (s390_host_has_fpext) { @@ -3068,8 +3080,11 @@ set_dfp_rounding_mode_in_fpc(env, irrm); rounding_mode = S390_DFP_ROUND_PER_FPC_0; } - addInstr(env, s390_insn_dfp128_convert_from(size, conv, dst, f13, f15, - rounding_mode)); + addInstr(env, s390_insn_dfp128_convert_from(size, conv, f12, f14, + f13, f15, rounding_mode)); + dst = newVRegF(env); + addInstr(env, s390_insn_move(8, dst, f12)); + return dst; } @@ -3921,7 +3936,7 @@ case Ijk_EmFail: case Ijk_EmWarn: case Ijk_NoDecode: - case Ijk_TInval: + case Ijk_InvalICache: case Ijk_Sys_syscall: case Ijk_ClientReq: case Ijk_NoRedir: @@ -4036,7 +4051,7 @@ case Ijk_EmFail: case Ijk_EmWarn: case Ijk_NoDecode: - case Ijk_TInval: + case Ijk_InvalICache: case Ijk_Sys_syscall: case Ijk_ClientReq: case Ijk_NoRedir: Index: priv/host_x86_defs.c =================================================================== --- priv/host_x86_defs.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_x86_defs.c (.../trunk) (revision 2863) @@ -2023,11 +2023,25 @@ case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; case Xfp_MOV: break; - case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */ - *p++ = 0xD9; *p++ = 0xF2; /* fptan */ - *p++ = 0xD9; *p++ = 0xF7; /* fincstp */ - break; - default: vpanic("do_fop1_st: unknown op"); + case Xfp_TAN: + /* fptan pushes 1.0 on the FP stack, except when the argument + is out of range. Hence we have to do the instruction, + then inspect C2 to see if there is an out of range + condition. If there is, we skip the fincstp that is used + by the in-range case to get rid of this extra 1.0 + value. */ + p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */ + *p++ = 0xD9; *p++ = 0xF2; // fptan + *p++ = 0x50; // pushl %eax + *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax + *p++ = 0x66; *p++ = 0xA9; + *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax + *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp + *p++ = 0xD9; *p++ = 0xF7; // fincstp + *p++ = 0x58; // after_fincstp: popl %eax + break; + default: + vpanic("do_fop1_st: unknown op"); } return p; } @@ -2539,7 +2553,7 @@ case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; - case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; + case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break; case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; Index: priv/host_x86_isel.c =================================================================== --- priv/host_x86_isel.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/host_x86_isel.c (.../trunk) (revision 2863) @@ -3147,6 +3147,11 @@ HReg src = iselDblExpr(env, e->Iex.Binop.arg2); /* XXXROUNDINGFIXME */ /* set roundingmode here */ + /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition + codes. I don't think that matters, since this insn + selector never generates such an instruction intervening + between an flag-setting instruction and a flag-using + instruction. */ addInstr(env, X86Instr_FpUnary(fpop,src,res)); if (fpop != Xfp_SQRT && fpop != Xfp_NEG && fpop != Xfp_ABS) @@ -3554,12 +3559,8 @@ case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4; case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4; case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4; - case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4; - case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4; case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4; case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4; - case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4; - case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4; do_32Fx4: { HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); @@ -3574,12 +3575,8 @@ case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2; case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2; case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2; - case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2; - case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2; case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2; case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2; - case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2; - case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2; do_64Fx2: { HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); @@ -3790,6 +3787,50 @@ } /* switch (e->Iex.Binop.op) */ } /* if (e->tag == Iex_Binop) */ + + if (e->tag == Iex_Triop) { + IRTriop *triop = e->Iex.Triop.details; + switch (triop->op) { + + case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm; + case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm; + case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm; + case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm; + do_32Fx4_w_rm: + { + HReg argL = iselVecExpr(env, triop->arg2); + HReg argR = iselVecExpr(env, triop->arg3); + HReg dst = newVRegV(env); + addInstr(env, mk_vMOVsd_RR(argL, dst)); + /* XXXROUNDINGFIXME */ + /* set roundingmode here */ + addInstr(env, X86Instr_Sse32Fx4(op, argR, dst)); + return dst; + } + + case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm; + case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm; + case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm; + case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm; + do_64Fx2_w_rm: + { + HReg argL = iselVecExpr(env, triop->arg2); + HReg argR = iselVecExpr(env, triop->arg3); + HReg dst = newVRegV(env); + REQUIRE_SSE2; + addInstr(env, mk_vMOVsd_RR(argL, dst)); + /* XXXROUNDINGFIXME */ + /* set roundingmode here */ + addInstr(env, X86Instr_Sse64Fx2(op, argR, dst)); + return dst; + } + + default: + break; + } /* switch (triop->op) */ + } /* if (e->tag == Iex_Triop) */ + + if (e->tag == Iex_ITE) { // VFD HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue); HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse); @@ -4244,8 +4285,9 @@ case Ijk_Sys_int128: case Ijk_Sys_int129: case Ijk_Sys_int130: + case Ijk_Sys_syscall: case Ijk_Sys_sysenter: - case Ijk_TInval: + case Ijk_InvalICache: case Ijk_Yield: { HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); @@ -4342,8 +4384,9 @@ case Ijk_Sys_int128: case Ijk_Sys_int129: case Ijk_Sys_int130: + case Ijk_Sys_syscall: case Ijk_Sys_sysenter: - case Ijk_TInval: + case Ijk_InvalICache: case Ijk_Yield: { HReg r = iselIntExpr_R(env, next); Index: priv/ir_defs.c =================================================================== --- priv/ir_defs.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/ir_defs.c (.../trunk) (revision 2863) @@ -640,6 +640,7 @@ case Iop_Recps32Fx2: vex_printf("VRecps32Fx2"); return; case Iop_Recps32Fx4: vex_printf("VRecps32Fx4"); return; case Iop_Abs32Fx4: vex_printf("Abs32Fx4"); return; + case Iop_Abs64Fx2: vex_printf("Abs64Fx2"); return; case Iop_Rsqrts32Fx4: vex_printf("VRsqrts32Fx4"); return; case Iop_Rsqrts32Fx2: vex_printf("VRsqrts32Fx2"); return; @@ -685,6 +686,7 @@ case Iop_CmpLE64F0x2: vex_printf("CmpLE64F0x2"); return; case Iop_CmpUN64F0x2: vex_printf("CmpUN64F0x2"); return; + case Iop_Neg64Fx2: vex_printf("Neg64Fx2"); return; case Iop_Neg32Fx4: vex_printf("Neg32Fx4"); return; case Iop_Neg32Fx2: vex_printf("Neg32Fx2"); return; @@ -695,6 +697,11 @@ case Iop_64UtoV128: vex_printf("64UtoV128"); return; case Iop_SetV128lo64: vex_printf("SetV128lo64"); return; + case Iop_ZeroHI64ofV128: vex_printf("ZeroHI64ofV128"); return; + case Iop_ZeroHI96ofV128: vex_printf("ZeroHI96ofV128"); return; + case Iop_ZeroHI112ofV128: vex_printf("ZeroHI112ofV128"); return; + case Iop_ZeroHI120ofV128: vex_printf("ZeroHI120ofV128"); return; + case Iop_32UtoV128: vex_printf("32UtoV128"); return; case Iop_V128to32: vex_printf("V128to32"); return; case Iop_SetV128lo32: vex_printf("SetV128lo32"); return; @@ -1405,8 +1412,10 @@ case Ijk_EmFail: vex_printf("EmFail"); break; case Ijk_NoDecode: vex_printf("NoDecode"); break; case Ijk_MapFail: vex_printf("MapFail"); break; - case Ijk_TInval: vex_printf("Invalidate"); break; + case Ijk_InvalICache: vex_printf("InvalICache"); break; + case Ijk_FlushDCache: vex_printf("FlushDCache"); break; case Ijk_NoRedir: vex_printf("NoRedir"); break; + case Ijk_SigILL: vex_printf("SigILL"); break; case Ijk_SigTRAP: vex_printf("SigTRAP"); break; case Ijk_SigSEGV: vex_printf("SigSEGV"); break; case Ijk_SigBUS: vex_printf("SigBUS"); break; @@ -2734,7 +2743,7 @@ case Iop_RoundF32x4_RP: case Iop_RoundF32x4_RN: case Iop_RoundF32x4_RZ: - case Iop_Abs32Fx4: + case Iop_Abs64Fx2: case Iop_Abs32Fx4: case Iop_Rsqrte32Fx4: case Iop_Rsqrte32x4: UNARY(Ity_V128, Ity_V128); @@ -2789,19 +2798,19 @@ case Iop_CmpEQ64F0x2: case Iop_CmpLT64F0x2: case Iop_CmpLE32F0x4: case Iop_CmpUN32F0x4: case Iop_CmpLE64F0x2: case Iop_CmpUN64F0x2: - case Iop_Add32Fx4: case Iop_Add32F0x4: - case Iop_Add64Fx2: case Iop_Add64F0x2: - case Iop_Div32Fx4: case Iop_Div32F0x4: - case Iop_Div64Fx2: case Iop_Div64F0x2: + case Iop_Add32F0x4: + case Iop_Add64F0x2: + case Iop_Div32F0x4: + case Iop_Div64F0x2: case Iop_Max32Fx4: case Iop_Max32F0x4: case Iop_PwMax32Fx4: case Iop_PwMin32Fx4: case Iop_Max64Fx2: case Iop_Max64F0x2: case Iop_Min32Fx4: case Iop_Min32F0x4: case Iop_Min64Fx2: case Iop_Min64F0x2: - case Iop_Mul32Fx4: case Iop_Mul32F0x4: - case Iop_Mul64Fx2: case Iop_Mul64F0x2: - case Iop_Sub32Fx4: case Iop_Sub32F0x4: - case Iop_Sub64Fx2: case Iop_Sub64F0x2: + case Iop_Mul32F0x4: + case Iop_Mul64F0x2: + case Iop_Sub32F0x4: + case Iop_Sub64F0x2: case Iop_AndV128: case Iop_OrV128: case Iop_XorV128: case Iop_Add8x16: case Iop_Add16x8: case Iop_Add32x4: case Iop_Add64x2: @@ -2900,10 +2909,12 @@ case Iop_Reverse64_8x16: case Iop_Reverse64_16x8: case Iop_Reverse64_32x4: case Iop_Reverse32_8x16: case Iop_Reverse32_16x8: case Iop_Reverse16_8x16: - case Iop_Neg32Fx4: + case Iop_Neg64Fx2: case Iop_Neg32Fx4: case Iop_Abs8x16: case Iop_Abs16x8: case Iop_Abs32x4: case Iop_CipherSV128: case Iop_PwBitMtxXpose64x2: + case Iop_ZeroHI64ofV128: case Iop_ZeroHI96ofV128: + case Iop_ZeroHI112ofV128: case Iop_ZeroHI120ofV128: UNARY(Ity_V128, Ity_V128); case Iop_ShlV128: case Iop_ShrV128: @@ -2966,7 +2977,7 @@ case Iop_QDMulLong16Sx4: case Iop_QDMulLong32Sx2: BINARY(Ity_I64, Ity_I64, Ity_V128); - /* s390 specific */ + /* s390 specific */ case Iop_MAddF32: case Iop_MSubF32: QUATERNARY(ity_RMode,Ity_F32,Ity_F32,Ity_F32, Ity_F32); @@ -2984,6 +2995,18 @@ case Iop_DivF128: TERNARY(ity_RMode,Ity_F128,Ity_F128, Ity_F128); + case Iop_Add64Fx2: case Iop_Sub64Fx2: + case Iop_Mul64Fx2: case Iop_Div64Fx2: + case Iop_Add32Fx4: case Iop_Sub32Fx4: + case Iop_Mul32Fx4: case Iop_Div32Fx4: + TERNARY(ity_RMode,Ity_V128,Ity_V128, Ity_V128); + + case Iop_Add64Fx4: case Iop_Sub64Fx4: + case Iop_Mul64Fx4: case Iop_Div64Fx4: + case Iop_Add32Fx8: case Iop_Sub32Fx8: + case Iop_Mul32Fx8: case Iop_Div32Fx8: + TERNARY(ity_RMode,Ity_V256,Ity_V256, Ity_V256); + case Iop_NegF128: case Iop_AbsF128: UNARY(Ity_F128, Ity_F128); @@ -3203,10 +3226,6 @@ case Iop_64x4toV256: QUATERNARY(Ity_I64, Ity_I64, Ity_I64, Ity_I64, Ity_V256); - case Iop_Add64Fx4: case Iop_Sub64Fx4: - case Iop_Mul64Fx4: case Iop_Div64Fx4: - case Iop_Add32Fx8: case Iop_Sub32Fx8: - case Iop_Mul32Fx8: case Iop_Div32Fx8: case Iop_AndV256: case Iop_OrV256: case Iop_XorV256: case Iop_Max32Fx8: case Iop_Min32Fx8: @@ -4465,6 +4484,17 @@ } } +IRType integerIRTypeOfSize ( Int szB ) +{ + switch (szB) { + case 8: return Ity_I64; + case 4: return Ity_I32; + case 2: return Ity_I16; + case 1: return Ity_I8; + default: vpanic("integerIRTypeOfSize"); + } +} + IRExpr* mkIRExpr_HWord ( HWord hw ) { vassert(sizeof(void*) == sizeof(HWord)); Index: priv/ir_opt.c =================================================================== --- priv/ir_opt.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/ir_opt.c (.../trunk) (revision 2863) @@ -1178,6 +1178,30 @@ && e->Iex.Const.con->Ico.U32 == 0); } +/* Is this literally IRExpr_Const(IRConst_U64(0)) ? */ +static Bool isZeroU64 ( IRExpr* e ) +{ + return toBool( e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_U64 + && e->Iex.Const.con->Ico.U64 == 0); +} + +/* Is this literally IRExpr_Const(IRConst_V128(0)) ? */ +static Bool isZeroV128 ( IRExpr* e ) +{ + return toBool( e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_V128 + && e->Iex.Const.con->Ico.V128 == 0x0000); +} + +/* Is this literally IRExpr_Const(IRConst_V256(0)) ? */ +static Bool isZeroV256 ( IRExpr* e ) +{ + return toBool( e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_V256 + && e->Iex.Const.con->Ico.V256 == 0x00000000); +} + /* Is this an integer constant with value 0 ? */ static Bool isZeroU ( IRExpr* e ) { @@ -1224,9 +1248,11 @@ case Iop_Xor16: return IRExpr_Const(IRConst_U16(0)); case Iop_Sub32: case Iop_Xor32: return IRExpr_Const(IRConst_U32(0)); + case Iop_And64: case Iop_Sub64: case Iop_Xor64: return IRExpr_Const(IRConst_U64(0)); - case Iop_XorV128: return IRExpr_Const(IRConst_V128(0)); + case Iop_XorV128: + case Iop_AndV128: return IRExpr_Const(IRConst_V128(0)); default: vpanic("mkZeroOfPrimopResultType: bad primop"); } } @@ -1990,6 +2016,17 @@ } break; } + /* Same reasoning for the 256-bit version. */ + case Iop_V128HLtoV256: { + IRExpr* argHi = e->Iex.Binop.arg1; + IRExpr* argLo = e->Iex.Binop.arg2; + if (isZeroV128(argHi) && isZeroV128(argLo)) { + e2 = IRExpr_Const(IRConst_V256(0)); + } else { + goto unhandled; + } + break; + } /* -- V128 stuff -- */ case Iop_InterleaveLO8x16: { @@ -2114,6 +2151,13 @@ break; } break; + case Iop_Sub8x16: + /* Sub8x16(x,0) ==> x */ + if (isZeroV128(e->Iex.Binop.arg2)) { + e2 = e->Iex.Binop.arg1; + break; + } + break; case Iop_And64: case Iop_And32: @@ -2149,6 +2193,19 @@ e2 = e->Iex.Binop.arg1; break; } + /* Deal with either arg zero. Could handle other And + cases here too. */ + if (e->Iex.Binop.op == Iop_And64 + && (isZeroU64(e->Iex.Binop.arg1) + || isZeroU64(e->Iex.Binop.arg2))) { + e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); + break; + } else if (e->Iex.Binop.op == Iop_AndV128 + && (isZeroV128(e->Iex.Binop.arg1) + || isZeroV128(e->Iex.Binop.arg2))) { + e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); + break; + } break; case Iop_OrV128: @@ -2158,6 +2215,29 @@ e2 = e->Iex.Binop.arg1; break; } + /* OrV128(t,0) ==> t */ + if (e->Iex.Binop.op == Iop_OrV128) { + if (isZeroV128(e->Iex.Binop.arg2)) { + e2 = e->Iex.Binop.arg1; + break; + } + if (isZeroV128(e->Iex.Binop.arg1)) { + e2 = e->Iex.Binop.arg2; + break; + } + } + /* OrV256(t,0) ==> t */ + if (e->Iex.Binop.op == Iop_OrV256) { + if (isZeroV256(e->Iex.Binop.arg2)) { + e2 = e->Iex.Binop.arg1; + break; + } + //Disabled because there's no known test case right now. + //if (isZeroV256(e->Iex.Binop.arg1)) { + // e2 = e->Iex.Binop.arg2; + // break; + //} + } break; case Iop_Xor8: Index: priv/main_main.c =================================================================== --- priv/main_main.c (.../tags/VEX_3_9_0) (revision 2863) +++ priv/main_main.c (.../trunk) (revision 2863) @@ -38,6 +38,7 @@ #include "libvex_guest_x86.h" #include "libvex_guest_amd64.h" #include "libvex_guest_arm.h" +#include "libvex_guest_arm64.h" #include "libvex_guest_ppc32.h" #include "libvex_guest_ppc64.h" #include "libvex_guest_s390x.h" @@ -53,6 +54,7 @@ #include "host_amd64_defs.h" #include "host_ppc_defs.h" #include "host_arm_defs.h" +#include "host_arm64_defs.h" #include "host_s390_defs.h" #include "host_mips_defs.h" @@ -60,6 +62,7 @@ #include "guest_x86_defs.h" #include "guest_amd64_defs.h" #include "guest_arm_defs.h" +#include "guest_arm64_defs.h" #include "guest_ppc_defs.h" #include "guest_s390_defs.h" #include "guest_mips_defs.h" @@ -89,6 +92,7 @@ void LibVEX_default_VexControl ( /*OUT*/ VexControl* vcon ) { + vex_bzero(vcon, sizeof(*vcon)); vcon->iropt_verbosity = 0; vcon->iropt_level = 2; vcon->iropt_register_updates = VexRegUpdUnwindregsAtMemAccess; @@ -233,7 +237,7 @@ HInstrArray* vcode; HInstrArray* rcode; Int i, j, k, out_used, guest_sizeB; - Int offB_TISTART, offB_TILEN, offB_GUEST_IP, szB_GUEST_IP; + Int offB_CMSTART, offB_CMLEN, offB_GUEST_IP, szB_GUEST_IP; Int offB_HOST_EvC_COUNTER, offB_HOST_EvC_FAILADDR; UChar insn_bytes[128]; IRType guest_word_type; @@ -259,8 +263,8 @@ disInstrFn = NULL; guest_word_type = Ity_INVALID; host_word_type = Ity_INVALID; - offB_TISTART = 0; - offB_TILEN = 0; + offB_CMSTART = 0; + offB_CMLEN = 0; offB_GUEST_IP = 0; szB_GUEST_IP = 0; offB_HOST_EvC_COUNTER = 0; @@ -417,6 +421,30 @@ vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_host.hwcaps)); break; + case VexArchARM64: + mode64 = True; + getAllocableRegs_ARM64 ( &n_available_real_regs, + &available_real_regs ); + isMove = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_ARM64Instr; + getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool)) + getRegUsage_ARM64Instr; + mapRegs = (void(*)(HRegRemap*,HInstr*, Bool)) + mapRegs_ARM64Instr; + genSpill = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) + genSpill_ARM64; + genReload = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) + genReload_ARM64; + ppInstr = (void(*)(HInstr*, Bool)) ppARM64Instr; + ppReg = (void(*)(HReg)) ppHRegARM64; + iselSB = iselSB_ARM64; + emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool, + void*,void*,void*,void*)) + emit_ARM64Instr; + host_is_bigendian = False; + host_word_type = Ity_I64; + vassert(are_valid_hwcaps(VexArchARM64, vta->archinfo_host.hwcaps)); + break; + case VexArchMIPS32: mode64 = False; getAllocableRegs_MIPS ( &n_available_real_regs, @@ -479,8 +507,8 @@ guest_sizeB = sizeof(VexGuestX86State); guest_word_type = Ity_I32; guest_layout = &x86guest_layout; - offB_TISTART = offsetof(VexGuestX86State,guest_TISTART); - offB_TILEN = offsetof(VexGuestX86State,guest_TILEN); + offB_CMSTART = offsetof(VexGuestX86State,guest_CMSTART); + offB_CMLEN = offsetof(VexGuestX86State,guest_CMLEN); offB_GUEST_IP = offsetof(VexGuestX86State,guest_EIP); szB_GUEST_IP = sizeof( ((VexGuestX86State*)0)->guest_EIP ); offB_HOST_EvC_COUNTER = offsetof(VexGuestX86State,host_EvC_COUNTER); @@ -487,8 +515,8 @@ offB_HOST_EvC_FAILADDR = offsetof(VexGuestX86State,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestX86State) % 16); - vassert(sizeof( ((VexGuestX86State*)0)->guest_TISTART) == 4); - vassert(sizeof( ((VexGuestX86State*)0)->guest_TILEN ) == 4); + vassert(sizeof( ((VexGuestX86State*)0)->guest_CMSTART) == 4); + vassert(sizeof( ((VexGuestX86State*)0)->guest_CMLEN ) == 4); vassert(sizeof( ((VexGuestX86State*)0)->guest_NRADDR ) == 4); break; @@ -499,8 +527,8 @@ guest_sizeB = sizeof(VexGuestAMD64State); guest_word_type = Ity_I64; guest_layout = &amd64guest_layout; - offB_TISTART = offsetof(VexGuestAMD64State,guest_TISTART); - offB_TILEN = offsetof(VexGuestAMD64State,guest_TILEN); + offB_CMSTART = offsetof(VexGuestAMD64State,guest_CMSTART); + offB_CMLEN = offsetof(VexGuestAMD64State,guest_CMLEN); offB_GUEST_IP = offsetof(VexGuestAMD64State,guest_RIP); szB_GUEST_IP = sizeof( ((VexGuestAMD64State*)0)->guest_RIP ); offB_HOST_EvC_COUNTER = offsetof(VexGuestAMD64State,host_EvC_COUNTER); @@ -507,8 +535,8 @@ offB_HOST_EvC_FAILADDR = offsetof(VexGuestAMD64State,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestAMD64State) % 16); - vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TISTART ) == 8); - vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TILEN ) == 8); + vassert(sizeof( ((VexGuestAMD64State*)0)->guest_CMSTART ) == 8); + vassert(sizeof( ((VexGuestAMD64State*)0)->guest_CMLEN ) == 8); vassert(sizeof( ((VexGuestAMD64State*)0)->guest_NRADDR ) == 8); break; @@ -519,8 +547,8 @@ guest_sizeB = sizeof(VexGuestPPC32State); guest_word_type = Ity_I32; guest_layout = &ppc32Guest_layout; - offB_TISTART = offsetof(VexGuestPPC32State,guest_TISTART); - offB_TILEN = offsetof(VexGuestPPC32State,guest_TILEN); + offB_CMSTART = offsetof(VexGuestPPC32State,guest_CMSTART); + offB_CMLEN = offsetof(VexGuestPPC32State,guest_CMLEN); offB_GUEST_IP = offsetof(VexGuestPPC32State,guest_CIA); szB_GUEST_IP = sizeof( ((VexGuestPPC32State*)0)->guest_CIA ); offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC32State,host_EvC_COUNTER); @@ -527,8 +555,8 @@ offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC32State,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestPPC32State) % 16); - vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TISTART ) == 4); - vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TILEN ) == 4); + vassert(sizeof( ((VexGuestPPC32State*)0)->guest_CMSTART ) == 4); + vassert(sizeof( ((VexGuestPPC32State*)0)->guest_CMLEN ) == 4); vassert(sizeof( ((VexGuestPPC32State*)0)->guest_NRADDR ) == 4); break; @@ -539,8 +567,8 @@ guest_sizeB = sizeof(VexGuestPPC64State); guest_word_type = Ity_I64; guest_layout = &ppc64Guest_layout; - offB_TISTART = offsetof(VexGuestPPC64State,guest_TISTART); - offB_TILEN = offsetof(VexGuestPPC64State,guest_TILEN); + offB_CMSTART = offsetof(VexGuestPPC64State,guest_CMSTART); + offB_CMLEN = offsetof(VexGuestPPC64State,guest_CMLEN); offB_GUEST_IP = offsetof(VexGuestPPC64State,guest_CIA); szB_GUEST_IP = sizeof( ((VexGuestPPC64State*)0)->guest_CIA ); offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC64State,host_EvC_COUNTER); @@ -547,8 +575,8 @@ offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC64State,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestPPC64State) % 16); - vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TISTART ) == 8); - vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TILEN ) == 8); + vassert(sizeof( ((VexGuestPPC64State*)0)->guest_CMSTART ) == 8); + vassert(sizeof( ((VexGuestPPC64State*)0)->guest_CMLEN ) == 8); vassert(sizeof( ((VexGuestPPC64State*)0)->guest_NRADDR ) == 8); vassert(sizeof( ((VexGuestPPC64State*)0)->guest_NRADDR_GPR2) == 8); break; @@ -560,8 +588,8 @@ guest_sizeB = sizeof(VexGuestS390XState); guest_word_type = Ity_I64; guest_layout = &s390xGuest_layout; - offB_TISTART = offsetof(VexGuestS390XState,guest_TISTART); - offB_TILEN = offsetof(VexGuestS390XState,guest_TILEN); + offB_CMSTART = offsetof(VexGuestS390XState,guest_CMSTART); + offB_CMLEN = offsetof(VexGuestS390XState,guest_CMLEN); offB_GUEST_IP = offsetof(VexGuestS390XState,guest_IA); szB_GUEST_IP = sizeof( ((VexGuestS390XState*)0)->guest_IA); offB_HOST_EvC_COUNTER = offsetof(VexGuestS390XState,host_EvC_COUNTER); @@ -568,8 +596,8 @@ offB_HOST_EvC_FAILADDR = offsetof(VexGuestS390XState,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchS390X, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestS390XState) % 16); - vassert(sizeof( ((VexGuestS390XState*)0)->guest_TISTART ) == 8); - vassert(sizeof( ((VexGuestS390XState*)0)->guest_TILEN ) == 8); + vassert(sizeof( ((VexGuestS390XState*)0)->guest_CMSTART ) == 8); + vassert(sizeof( ((VexGuestS390XState*)0)->guest_CMLEN ) == 8); vassert(sizeof( ((VexGuestS390XState*)0)->guest_NRADDR ) == 8); break; @@ -580,8 +608,8 @@ guest_sizeB = sizeof(VexGuestARMState); guest_word_type = Ity_I32; guest_layout = &armGuest_layout; - offB_TISTART = offsetof(VexGuestARMState,guest_TISTART); - offB_TILEN = offsetof(VexGuestARMState,guest_TILEN); + offB_CMSTART = offsetof(VexGuestARMState,guest_CMSTART); + offB_CMLEN = offsetof(VexGuestARMState,guest_CMLEN); offB_GUEST_IP = offsetof(VexGuestARMState,guest_R15T); szB_GUEST_IP = sizeof( ((VexGuestARMState*)0)->guest_R15T ); offB_HOST_EvC_COUNTER = offsetof(VexGuestARMState,host_EvC_COUNTER); @@ -588,11 +616,31 @@ offB_HOST_EvC_FAILADDR = offsetof(VexGuestARMState,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestARMState) % 16); - vassert(sizeof( ((VexGuestARMState*)0)->guest_TISTART) == 4); - vassert(sizeof( ((VexGuestARMState*)0)->guest_TILEN ) == 4); + vassert(sizeof( ((VexGuestARMState*)0)->guest_CMSTART) == 4); + vassert(sizeof( ((VexGuestARMState*)0)->guest_CMLEN ) == 4); vassert(sizeof( ((VexGuestARMState*)0)->guest_NRADDR ) == 4); break; + case VexArchARM64: + preciseMemExnsFn = guest_arm64_state_requires_precise_mem_exns; + disInstrFn = disInstr_ARM64; + specHelper = guest_arm64_spechelper; + guest_sizeB = sizeof(VexGuestARM64State); + guest_word_type = Ity_I64; + guest_layout = &arm64Guest_layout; + offB_CMSTART = offsetof(VexGuestARM64State,guest_CMSTART); + offB_CMLEN = offsetof(VexGuestARM64State,guest_CMLEN); + offB_GUEST_IP = offsetof(VexGuestARM64State,guest_PC); + szB_GUEST_IP = sizeof( ((VexGuestARM64State*)0)->guest_PC ); + offB_HOST_EvC_COUNTER = offsetof(VexGuestARM64State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestARM64State,host_EvC_FAILADDR); + vassert(are_valid_hwcaps(VexArchARM64, vta->archinfo_guest.hwcaps)); + vassert(0 == sizeof(VexGuestARM64State) % 16); + vassert(sizeof( ((VexGuestARM64State*)0)->guest_CMSTART) == 8); + vassert(sizeof( ((VexGuestARM64State*)0)->guest_CMLEN ) == 8); + vassert(sizeof( ((VexGuestARM64State*)0)->guest_NRADDR ) == 8); + break; + case VexArchMIPS32: preciseMemExnsFn = guest_mips32_state_requires_precise_mem_exns; disInstrFn = disInstr_MIPS; @@ -600,8 +648,8 @@ guest_sizeB = sizeof(VexGuestMIPS32State); guest_word_type = Ity_I32; guest_layout = &mips32Guest_layout; - offB_TISTART = offsetof(VexGuestMIPS32State,guest_TISTART); - offB_TILEN = offsetof(VexGuestMIPS32State,guest_TILEN); + offB_CMSTART = offsetof(VexGuestMIPS32State,guest_CMSTART); + offB_CMLEN = offsetof(VexGuestMIPS32State,guest_CMLEN); offB_GUEST_IP = offsetof(VexGuestMIPS32State,guest_PC); szB_GUEST_IP = sizeof( ((VexGuestMIPS32State*)0)->guest_PC ); offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS32State,host_EvC_COUNTER); @@ -608,8 +656,8 @@ offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchMIPS32, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestMIPS32State) % 16); - vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_TISTART) == 4); - vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_TILEN ) == 4); + vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_CMSTART) == 4); + vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_CMLEN ) == 4); vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_NRADDR ) == 4); break; @@ -620,8 +668,8 @@ guest_sizeB = sizeof(VexGuestMIPS64State); guest_word_type = Ity_I64; guest_layout = &mips64Guest_layout; - offB_TISTART = offsetof(VexGuestMIPS64State,guest_TISTART); - offB_TILEN = offsetof(VexGuestMIPS64State,guest_TILEN); + offB_CMSTART = offsetof(VexGuestMIPS64State,guest_CMSTART); + offB_CMLEN = offsetof(VexGuestMIPS64State,guest_CMLEN); offB_GUEST_IP = offsetof(VexGuestMIPS64State,guest_PC); szB_GUEST_IP = sizeof( ((VexGuestMIPS64State*)0)->guest_PC ); offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS64State,host_EvC_COUNTER); @@ -628,8 +676,8 @@ offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS64State,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchMIPS64, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestMIPS64State) % 16); - vassert(sizeof( ((VexGuestMIPS64State*)0)->guest_TISTART) == 8); - vassert(sizeof( ((VexGuestMIPS64State*)0)->guest_TILEN ) == 8); + vassert(sizeof( ((VexGuestMIPS64State*)0)->guest_CMSTART) == 8); + vassert(sizeof( ((VexGuestMIPS64State*)0)->guest_CMLEN ) == 8); vassert(sizeof( ((VexGuestMIPS64State*)0)->guest_NRADDR ) == 8); break; @@ -675,8 +723,8 @@ guest_word_type, vta->needs_self_check, vta->preamble_function, - offB_TISTART, - offB_TILEN, + offB_CMSTART, + offB_CMLEN, offB_GUEST_IP, szB_GUEST_IP ); @@ -958,6 +1006,8 @@ chainXDirect = chainXDirect_AMD64; break; case VexArchARM: chainXDirect = chainXDirect_ARM; break; + case VexArchARM64: + chainXDirect = chainXDirect_ARM64; break; case VexArchS390X: chainXDirect = chainXDirect_S390; break; case VexArchPPC32: @@ -999,6 +1049,8 @@ unchainXDirect = unchainXDirect_AMD64; break; case VexArchARM: unchainXDirect = unchainXDirect_ARM; break; + case VexArchARM64: + unchainXDirect = unchainXDirect_ARM64; break; case VexArchS390X: unchainXDirect = unchainXDirect_S390; break; case VexArchPPC32: @@ -1038,6 +1090,8 @@ cached = evCheckSzB_AMD64(); break; case VexArchARM: cached = evCheckSzB_ARM(); break; + case VexArchARM64: + cached = evCheckSzB_ARM64(); break; case VexArchS390X: cached = evCheckSzB_S390(); break; case VexArchPPC32: @@ -1152,6 +1206,7 @@ case VexArchX86: return "X86"; case VexArchAMD64: return "AMD64"; case VexArchARM: return "ARM"; + case VexArchARM64: return "ARM64"; case VexArchPPC32: return "PPC32"; case VexArchPPC64: return "PPC64"; case VexArchS390X: return "S390X"; @@ -1171,14 +1226,16 @@ /* Write default settings info *vai. */ void LibVEX_default_VexArchInfo ( /*OUT*/VexArchInfo* vai ) { + vex_bzero(vai, sizeof(*vai)); vai->hwcaps = 0; vai->ppc_icache_line_szB = 0; vai->ppc_dcbz_szB = 0; vai->ppc_dcbzl_szB = 0; - + vai->arm64_dMinLine_lg2_szB = 0; + vai->arm64_iMinLine_lg2_szB = 0; vai->hwcache_info.num_levels = 0; vai->hwcache_info.num_caches = 0; - vai->hwcache_info.caches = NULL; + vai->hwcache_info.caches = NULL; vai->hwcache_info.icaches_maintain_coherence = True; // whatever } @@ -1185,6 +1242,7 @@ /* Write default settings info *vbi. */ void LibVEX_default_VexAbiInfo ( /*OUT*/VexAbiInfo* vbi ) { + vex_bzero(vbi, sizeof(*vbi)); vbi->guest_stack_redzone_size = 0; vbi->guest_amd64_assume_fs_is_zero = False; vbi->guest_amd64_assume_gs_is_0x60 = False; @@ -1381,6 +1439,15 @@ return NULL; } +static const HChar* show_hwcaps_arm64 ( UInt hwcaps ) +{ + /* Since there are no variants, just insist that hwcaps is zero, + and declare it invalid otherwise. */ + if (hwcaps == 0) + return "baseline"; + return NULL; +} + static const HChar* show_hwcaps_s390x ( UInt hwcaps ) { static const HChar prefix[] = "s390x"; @@ -1472,6 +1539,7 @@ case VexArchPPC32: return show_hwcaps_ppc32(hwcaps); case VexArchPPC64: return show_hwcaps_ppc64(hwcaps); case VexArchARM: return show_hwcaps_arm(hwcaps); + case VexArchARM64: return show_hwcaps_arm64(hwcaps); case VexArchS390X: return show_hwcaps_s390x(hwcaps); case VexArchMIPS32: return show_hwcaps_mips32(hwcaps); case VexArchMIPS64: return show_hwcaps_mips64(hwcaps); Index: pub/libvex.h =================================================================== --- pub/libvex.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex.h (.../trunk) (revision 2863) @@ -55,6 +55,7 @@ VexArchX86, VexArchAMD64, VexArchARM, + VexArchARM64, VexArchPPC32, VexArchPPC64, VexArchS390X, @@ -172,6 +173,9 @@ /* Get an ARM architecure level from HWCAPS */ #define VEX_ARM_ARCHLEVEL(x) ((x) & 0x3f) +/* ARM64: baseline capability is AArch64 v8. */ +/* (no definitions since no variants so far) */ + /* MIPS baseline capability */ /* Assigned Company values for bits 23:16 of the PRId Register (CP0 register 15, select 0). As of the MIPS32 and MIPS64 specs from @@ -196,10 +200,15 @@ #define VEX_PRID_IMP_34K 0x9500 #define VEX_PRID_IMP_74K 0x9700 +/* CPU has FPU and 32 dbl. prec. FP registers */ +#define VEX_PRID_CPU_32FPR 0x00000040 + /* Get MIPS Company ID from HWCAPS */ #define VEX_MIPS_COMP_ID(x) ((x) & 0x00FF0000) /* Get MIPS Processor ID from HWCAPS */ -#define VEX_MIPS_PROC_ID(x) ((x) & 0x0000FFFF) +#define VEX_MIPS_PROC_ID(x) ((x) & 0x0000FF00) +/* Get MIPS Revision from HWCAPS */ +#define VEX_MIPS_REV(x) ((x) & 0x000000FF) /* Check if the processor supports DSP ASE Rev 2. */ #define VEX_MIPS_PROC_DSP2(x) ((VEX_MIPS_COMP_ID(x) == VEX_PRID_COMP_MIPS) && \ (VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_74K)) @@ -213,6 +222,7 @@ extern const HChar* LibVEX_ppVexArch ( VexArch ); extern const HChar* LibVEX_ppVexHwCaps ( VexArch, UInt ); + /* The various kinds of caches */ typedef enum { DATA_CACHE, @@ -266,9 +276,14 @@ /* PPC32/PPC64 only: size of instruction cache line */ Int ppc_icache_line_szB; /* PPC32/PPC64 only: sizes zeroed by the dcbz/dcbzl instructions - * (bug#135264) */ + (bug#135264) */ UInt ppc_dcbz_szB; UInt ppc_dcbzl_szB; /* 0 means unsupported (SIGILL) */ + /* ARM64: I- and D- minimum line sizes in log2(bytes), as + obtained from ctr_el0.DminLine and .IminLine. For example, a + line size of 64 bytes would be encoded here as 6. */ + UInt arm64_dMinLine_lg2_szB; + UInt arm64_iMinLine_lg2_szB; } VexArchInfo; @@ -516,7 +531,7 @@ typedef struct { /* Total size of the guest state, in bytes. Must be - 8-aligned. */ + 16-aligned. */ Int total_sizeB; /* Whereabouts is the stack pointer? */ Int offset_SP; @@ -907,13 +922,25 @@ ~~~~~ Same as ppc32. + arm32 + ~~~~~ + r8 is GSP. + + arm64 + ~~~~~ + r21 is GSP. + ALL GUEST ARCHITECTURES ~~~~~~~~~~~~~~~~~~~~~~~ - The guest state must contain two pseudo-registers, guest_TISTART - and guest_TILEN. These are used to pass the address of areas of - guest code, translations of which are to be invalidated, back to - the despatcher. Both pseudo-regs must have size equal to the guest - word size. + The guest state must contain two pseudo-registers, guest_CMSTART + and guest_CMLEN. These are used to specify guest address ranges, + either of code to be invalidated, when used in conjunction with + Ijk_InvalICache, or of d-cache ranges to be flushed, when used in + conjunction with Ijk_FlushDCache. In such cases, the two _CM + pseudo-regs should be filled in by the IR, and then an exit with + one of the two abovementioned Ijk_ kinds should happen, so that the + dispatcher can action them. Both pseudo-regs must have size equal + to the guest word size. The architecture must a third pseudo-register, guest_NRADDR, also guest-word-sized. This is used to record the unredirected guest Index: pub/libvex_basictypes.h =================================================================== --- pub/libvex_basictypes.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex_basictypes.h (.../trunk) (revision 2863) @@ -192,28 +192,24 @@ # define VEX_HOST_WORDSIZE 4 # define VEX_REGPARM(_n) /* */ -#elif defined(__arm__) +#elif defined(__arm__) && !defined(__aarch64__) # define VEX_HOST_WORDSIZE 4 # define VEX_REGPARM(_n) /* */ -#elif defined(_AIX) && !defined(__64BIT__) -# define VEX_HOST_WORDSIZE 4 +#elif defined(__aarch64__) && !defined(__arm__) +# define VEX_HOST_WORDSIZE 8 # define VEX_REGPARM(_n) /* */ -#elif defined(_AIX) && defined(__64BIT__) +#elif defined(__s390x__) # define VEX_HOST_WORDSIZE 8 # define VEX_REGPARM(_n) /* */ -#elif defined(__s390x__) +#elif defined(__mips__) && (__mips == 64) # define VEX_HOST_WORDSIZE 8 # define VEX_REGPARM(_n) /* */ -#elif defined(__mips__) -#if (__mips==64) -# define VEX_HOST_WORDSIZE 8 -#else +#elif defined(__mips__) && (__mips != 64) # define VEX_HOST_WORDSIZE 4 -#endif # define VEX_REGPARM(_n) /* */ #else Index: pub/libvex_guest_amd64.h =================================================================== --- pub/libvex_guest_amd64.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex_guest_amd64.h (.../trunk) (revision 2863) @@ -138,8 +138,8 @@ compilation breakage. On amd64, these two fields are set to zero by LibVEX_GuestAMD64_initialise and then should be ignored forever thereafter. */ - ULong guest_TISTART; - ULong guest_TILEN; + ULong guest_CMSTART; + ULong guest_CMLEN; /* Used to record the unredirected guest address at the start of a translation whose start has been redirected. By reading Index: pub/libvex_guest_arm.h =================================================================== --- pub/libvex_guest_arm.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex_guest_arm.h (.../trunk) (revision 2863) @@ -94,9 +94,9 @@ /* Emulation notes */ UInt guest_EMNOTE; - /* For clflush: record start and length of area to invalidate */ - UInt guest_TISTART; - UInt guest_TILEN; + /* For clinval/clflush: record start and length of area */ + UInt guest_CMSTART; + UInt guest_CMLEN; /* Used to record the unredirected guest address at the start of a translation whose start has been redirected. By reading @@ -193,12 +193,8 @@ */ UInt guest_ITSTATE; - /* Padding to make it have an 32-aligned size */ + /* Padding to make it have an 16-aligned size */ UInt padding1; - UInt padding2; - UInt padding3; - UInt padding4; - UInt padding5; } VexGuestARMState; Index: pub/libvex_guest_arm64.h =================================================================== --- pub/libvex_guest_arm64.h (.../tags/VEX_3_9_0) (revision 0) +++ pub/libvex_guest_arm64.h (.../trunk) (revision 2863) @@ -0,0 +1,190 @@ + +/*---------------------------------------------------------------*/ +/*--- begin libvex_guest_arm64.h ---*/ +/*---------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2013-2013 OpenWorks + info@open-works.net + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __LIBVEX_PUB_GUEST_ARM64_H +#define __LIBVEX_PUB_GUEST_ARM64_H + +#include "libvex_basictypes.h" + + +/*---------------------------------------------------------------*/ +/*--- Vex's representation of the ARM64 CPU state. ---*/ +/*---------------------------------------------------------------*/ + +typedef + struct { + /* Event check fail addr and counter. */ + /* 0 */ ULong host_EvC_FAILADDR; + /* 8 */ UInt host_EvC_COUNTER; + /* 12 */ UInt pad0; + /* 16 */ + ULong guest_X0; + ULong guest_X1; + ULong guest_X2; + ULong guest_X3; + ULong guest_X4; + ULong guest_X5; + ULong guest_X6; + ULong guest_X7; + ULong guest_X8; + ULong guest_X9; + ULong guest_X10; + ULong guest_X11; + ULong guest_X12; + ULong guest_X13; + ULong guest_X14; + ULong guest_X15; + ULong guest_X16; + ULong guest_X17; + ULong guest_X18; + ULong guest_X19; + ULong guest_X20; + ULong guest_X21; + ULong guest_X22; + ULong guest_X23; + ULong guest_X24; + ULong guest_X25; + ULong guest_X26; + ULong guest_X27; + ULong guest_X28; + ULong guest_X29; + ULong guest_X30; /* link register */ + ULong guest_XSP; + ULong guest_PC; + + /* 4-word thunk used to calculate N(sign) Z(zero) C(carry, + unsigned overflow) and V(signed overflow) flags. */ + ULong guest_CC_OP; + ULong guest_CC_DEP1; + ULong guest_CC_DEP2; + ULong guest_CC_NDEP; + + /* User-space thread register? */ + ULong guest_TPIDR_EL0; + + /* FP/SIMD state */ + U128 guest_Q0; + U128 guest_Q1; + U128 guest_Q2; + U128 guest_Q3; + U128 guest_Q4; + U128 guest_Q5; + U128 guest_Q6; + U128 guest_Q7; + U128 guest_Q8; + U128 guest_Q9; + U128 guest_Q10; + U128 guest_Q11; + U128 guest_Q12; + U128 guest_Q13; + U128 guest_Q14; + U128 guest_Q15; + U128 guest_Q16; + U128 guest_Q17; + U128 guest_Q18; + U128 guest_Q19; + U128 guest_Q20; + U128 guest_Q21; + U128 guest_Q22; + U128 guest_Q23; + U128 guest_Q24; + U128 guest_Q25; + U128 guest_Q26; + U128 guest_Q27; + U128 guest_Q28; + U128 guest_Q29; + U128 guest_Q30; + U128 guest_Q31; + + /* Various pseudo-regs mandated by Vex or Valgrind. */ + /* Emulation notes */ + UInt guest_EMNOTE; + + /* For clflush/clinval: record start and length of area */ + ULong guest_CMSTART; + ULong guest_CMLEN; + + /* Used to record the unredirected guest address at the start of + a translation whose start has been redirected. By reading + this pseudo-register shortly afterwards, the translation can + find out what the corresponding no-redirection address was. + Note, this is only set for wrap-style redirects, not for + replace-style ones. */ + ULong guest_NRADDR; + + /* Needed for Darwin (but mandated for all guest architectures): + program counter at the last syscall insn (int 0x80/81/82, + sysenter, syscall, svc). Used when backing up to restart a + syscall that has been interrupted by a signal. */ + ULong guest_IP_AT_SYSCALL; + + /* The complete FPCR. Default value seems to be zero. We + ignore all bits except 23 and 22, which are the rounding + mode. The guest is unconstrained in what values it can write + to and read from this register, but the emulation only takes + note of bits 23 and 22. */ + UInt guest_FPCR; + + /* The complete FPSR. As with FPCR, the guest may write and + read any values here, and the emulation ignores it, with the + exception of bit 27 (QC, the sticky saturation bit) which + does get set when required. */ + UInt guest_FPSR; + + /* Padding to make it have an 16-aligned size */ + UInt pad_end_0; + ULong pad_end_1; + } + VexGuestARM64State; + + +/*---------------------------------------------------------------*/ +/*--- Utility functions for ARM64 guest stuff. ---*/ +/*---------------------------------------------------------------*/ + +/* ALL THE FOLLOWING ARE VISIBLE TO LIBRARY CLIENT */ + +/* Initialise all guest ARM64 state. */ + +extern +void LibVEX_GuestARM64_initialise ( /*OUT*/VexGuestARM64State* vex_state ); + +/* Calculate the ARM64 flag state from the saved data, in the format + 32x0:n:z:c:v:28x0. */ +extern +ULong LibVEX_GuestARM64_get_nzcv ( /*IN*/ + const VexGuestARM64State* vex_state ); + +#endif /* ndef __LIBVEX_PUB_GUEST_ARM64_H */ + + +/*---------------------------------------------------------------*/ +/*--- libvex_guest_arm64.h ---*/ +/*---------------------------------------------------------------*/ Index: pub/libvex_guest_mips32.h =================================================================== --- pub/libvex_guest_mips32.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex_guest_mips32.h (.../trunk) (revision 2863) @@ -41,82 +41,82 @@ typedef struct { /* CPU Registers */ - /* 0 */ UInt guest_r0; /* Hardwired to 0 */ - /* 4 */ UInt guest_r1; /* Assembler temporary */ - /* 8 */ UInt guest_r2; /* Values for function returns ...*/ - /* 12 */ UInt guest_r3; /* ...and expression evaluation */ - /* 16 */ UInt guest_r4; /* Function arguments */ - /* 20 */ UInt guest_r5; - /* 24 */ UInt guest_r6; - /* 28 */ UInt guest_r7; - /* 32 */ UInt guest_r8; /* Temporaries */ - /* 36 */ UInt guest_r9; - /* 40 */ UInt guest_r10; - /* 44 */ UInt guest_r11; - /* 48 */ UInt guest_r12; - /* 52 */ UInt guest_r13; - /* 56 */ UInt guest_r14; - /* 60 */ UInt guest_r15; - /* 64 */ UInt guest_r16; /* Saved temporaries */ - /* 68 */ UInt guest_r17; - /* 72 */ UInt guest_r18; - /* 76 */ UInt guest_r19; - /* 80 */ UInt guest_r20; - /* 84 */ UInt guest_r21; - /* 88 */ UInt guest_r22; - /* 92 */ UInt guest_r23; - /* 96 */ UInt guest_r24; /* Temporaries */ - /* 100 */ UInt guest_r25; - /* 104 */ UInt guest_r26; /* Reserved for OS kernel */ - /* 108 */ UInt guest_r27; - /* 112 */ UInt guest_r28; /* Global pointer */ - /* 116 */ UInt guest_r29; /* Stack pointer */ - /* 120 */ UInt guest_r30; /* Frame pointer */ - /* 124 */ UInt guest_r31; /* Return address */ - /* 128 */ UInt guest_PC; /* Program counter */ - /* 132 */ UInt guest_HI;/* Multiply and divide register higher result */ - /* 136 */ UInt guest_LO;/* Multiply and divide register lower result */ + /* 0 */ UInt guest_r0; /* Hardwired to 0 */ + /* 4 */ UInt guest_r1; /* Assembler temporary */ + /* 8 */ UInt guest_r2; /* Values for function returns ...*/ + /* 12 */ UInt guest_r3; /* ...and expression evaluation */ + /* 16 */ UInt guest_r4; /* Function arguments */ + /* 20 */ UInt guest_r5; + /* 24 */ UInt guest_r6; + /* 28 */ UInt guest_r7; + /* 32 */ UInt guest_r8; /* Temporaries */ + /* 36 */ UInt guest_r9; + /* 40 */ UInt guest_r10; + /* 44 */ UInt guest_r11; + /* 48 */ UInt guest_r12; + /* 52 */ UInt guest_r13; + /* 56 */ UInt guest_r14; + /* 60 */ UInt guest_r15; + /* 64 */ UInt guest_r16; /* Saved temporaries */ + /* 68 */ UInt guest_r17; + /* 72 */ UInt guest_r18; + /* 76 */ UInt guest_r19; + /* 80 */ UInt guest_r20; + /* 84 */ UInt guest_r21; + /* 88 */ UInt guest_r22; + /* 92 */ UInt guest_r23; + /* 96 */ UInt guest_r24; /* Temporaries */ + /* 100 */ UInt guest_r25; + /* 104 */ UInt guest_r26; /* Reserved for OS kernel */ + /* 108 */ UInt guest_r27; + /* 112 */ UInt guest_r28; /* Global pointer */ + /* 116 */ UInt guest_r29; /* Stack pointer */ + /* 120 */ UInt guest_r30; /* Frame pointer */ + /* 124 */ UInt guest_r31; /* Return address */ + /* 128 */ UInt guest_PC; /* Program counter */ + /* 132 */ UInt guest_HI; /* Multiply and divide register higher result */ + /* 136 */ UInt guest_LO; /* Multiply and divide register lower result */ /* FPU Registers */ - /* 140 */ UInt guest_f0; /* Floting point general purpose registers */ - /* 144 */ UInt guest_f1; - /* 148 */ UInt guest_f2; - /* 152 */ UInt guest_f3; - /* 156 */ UInt guest_f4; - /* 160 */ UInt guest_f5; - /* 164 */ UInt guest_f6; - /* 168 */ UInt guest_f7; - /* 172 */ UInt guest_f8; - /* 176 */ UInt guest_f9; - /* 180 */ UInt guest_f10; - /* 184 */ UInt guest_f11; - /* 188 */ UInt guest_f12; - /* 192 */ UInt guest_f13; - /* 196 */ UInt guest_f14; - /* 200 */ UInt guest_f15; - /* 204 */ UInt guest_f16; - /* 208 */ UInt guest_f17; - /* 212 */ UInt guest_f18; - /* 216 */ UInt guest_f19; - /* 220 */ UInt guest_f20; - /* 224 */ UInt guest_f21; - /* 228 */ UInt guest_f22; - /* 232 */ UInt guest_f23; - /* 236 */ UInt guest_f24; - /* 240 */ UInt guest_f25; - /* 244 */ UInt guest_f26; - /* 248 */ UInt guest_f27; - /* 252 */ UInt guest_f28; - /* 256 */ UInt guest_f29; - /* 260 */ UInt guest_f30; - /* 264 */ UInt guest_f31; - - /* 268 */ UInt guest_FIR; - /* 272 */ UInt guest_FCCR; - /* 276 */ UInt guest_FEXR; - /* 280 */ UInt guest_FENR; - /* 284 */ UInt guest_FCSR; + /* 144 */ ULong guest_f0; /* Floating point general purpose registers */ + /* 152 */ ULong guest_f1; + /* 160 */ ULong guest_f2; + /* 168 */ ULong guest_f3; + /* 176 */ ULong guest_f4; + /* 184 */ ULong guest_f5; + /* 192 */ ULong guest_f6; + /* 200 */ ULong guest_f7; + /* 208 */ ULong guest_f8; + /* 216 */ ULong guest_f9; + /* 224 */ ULong guest_f10; + /* 232 */ ULong guest_f11; + /* 240 */ ULong guest_f12; + /* 248 */ ULong guest_f13; + /* 256 */ ULong guest_f14; + /* 264 */ ULong guest_f15; + /* 272 */ ULong guest_f16; + /* 280 */ ULong guest_f17; + /* 288 */ ULong guest_f18; + /* 296 */ ULong guest_f19; + /* 304 */ ULong guest_f20; + /* 312 */ ULong guest_f21; + /* 320 */ ULong guest_f22; + /* 328 */ ULong guest_f23; + /* 336 */ ULong guest_f24; + /* 344 */ ULong guest_f25; + /* 352 */ ULong guest_f26; + /* 360 */ ULong guest_f27; + /* 368 */ ULong guest_f28; + /* 376 */ ULong guest_f29; + /* 384 */ ULong guest_f30; + /* 392 */ ULong guest_f31; + /* 400 */ UInt guest_FIR; + /* 404 */ UInt guest_FCCR; + /* 408 */ UInt guest_FEXR; + /* 412 */ UInt guest_FENR; + /* 416 */ UInt guest_FCSR; + /* TLS pointer for the thread. It's read-only in user space. On Linux it is set in user space by various thread-related syscalls. @@ -126,29 +126,28 @@ environments, the UserLocal register is a pointer to a thread-specific storage block. */ - /* 288 */ UInt guest_ULR; + /* 420 */ UInt guest_ULR; /* Emulation notes */ - UInt guest_EMNOTE; /* 292 */ + /* 424 */ UInt guest_EMNOTE; /* For clflush: record start and length of area to invalidate */ - UInt guest_TISTART; /* 296 */ - UInt guest_TILEN; /* 300 */ - UInt guest_NRADDR; /* 304 */ + /* 428 */ UInt guest_CMSTART; + /* 432 */ UInt guest_CMLEN; + /* 436 */ UInt guest_NRADDR; - UInt host_EvC_FAILADDR; /* 308 */ - UInt host_EvC_COUNTER; /* 312 */ - UInt guest_COND; /* 316 */ + /* 440 */ UInt host_EvC_FAILADDR; + /* 444 */ UInt host_EvC_COUNTER; + /* 448 */ UInt guest_COND; - UInt padding1; /* MIPS32 DSP ASE(r2) specific registers. */ - UInt guest_DSPControl; /* 324 */ - ULong guest_ac0; /* 328 */ - ULong guest_ac1; /* 336 */ - ULong guest_ac2; /* 344 */ - ULong guest_ac3; /* 352 */ - - UInt padding[6]; + /* 452 */ UInt guest_DSPControl; + /* 456 */ ULong guest_ac0; + /* 464 */ ULong guest_ac1; + /* 472 */ ULong guest_ac2; + /* 480 */ ULong guest_ac3; + + UInt padding; } VexGuestMIPS32State; /*---------------------------------------------------------------*/ /*--- Utility functions for MIPS32 guest stuff. ---*/ Index: pub/libvex_guest_mips64.h =================================================================== --- pub/libvex_guest_mips64.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex_guest_mips64.h (.../trunk) (revision 2863) @@ -137,8 +137,8 @@ UInt guest_EMNOTE; /* 568 */ /* For clflush: record start and length of area to invalidate */ - ULong guest_TISTART; /* 576 */ - ULong guest_TILEN; /* 584 */ + ULong guest_CMSTART; /* 576 */ + ULong guest_CMLEN; /* 584 */ ULong guest_NRADDR; /* 592 */ @@ -145,7 +145,7 @@ ULong host_EvC_FAILADDR; /* 600 */ UInt host_EvC_COUNTER; /* 608 */ UInt guest_COND; /* 612 */ - UInt padding[6]; + UInt padding[2]; } VexGuestMIPS64State; /*---------------------------------------------------------------*/ Index: pub/libvex_guest_ppc32.h =================================================================== --- pub/libvex_guest_ppc32.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex_guest_ppc32.h (.../trunk) (revision 2863) @@ -210,8 +210,8 @@ /* 1196 */ UInt guest_EMNOTE; /* For icbi: record start and length of area to invalidate */ - /* 1200 */ UInt guest_TISTART; - /* 1204 */ UInt guest_TILEN; + /* 1200 */ UInt guest_CMSTART; + /* 1204 */ UInt guest_CMLEN; /* Used to record the unredirected guest address at the start of a translation whose start has been redirected. By reading @@ -242,7 +242,7 @@ /* 1368 */ ULong guest_TEXASR; // Transaction EXception And Summary Register /* 1376 */ ULong guest_TFIAR; // Transaction Failure Instruction Address Register - /* Padding to make it have an 8-aligned size */ + /* Padding to make it have an 16-aligned size */ /* 1384 */ UInt padding2; } Index: pub/libvex_guest_ppc64.h =================================================================== --- pub/libvex_guest_ppc64.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex_guest_ppc64.h (.../trunk) (revision 2863) @@ -252,8 +252,8 @@ /* 1340 */ UInt padding; /* For icbi: record start and length of area to invalidate */ - /* 1344 */ ULong guest_TISTART; - /* 1352 */ ULong guest_TILEN; + /* 1344 */ ULong guest_CMSTART; + /* 1352 */ ULong guest_CMLEN; /* Used to record the unredirected guest address at the start of a translation whose start has been redirected. By reading Index: pub/libvex_guest_s390x.h =================================================================== --- pub/libvex_guest_s390x.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex_guest_s390x.h (.../trunk) (revision 2863) @@ -132,8 +132,8 @@ /* See comments at bottom of libvex.h */ /* 384 */ ULong guest_NRADDR; - /* 392 */ ULong guest_TISTART; - /* 400 */ ULong guest_TILEN; + /* 392 */ ULong guest_CMSTART; + /* 400 */ ULong guest_CMLEN; /* Used when backing up to restart a syscall that has been interrupted by a signal. See also comment in @@ -148,11 +148,11 @@ /* 424 */ ULong host_EvC_FAILADDR; /*------------------------------------------------------------*/ -/*--- Force alignment to 32 bytes ---*/ +/*--- Force alignment to 16 bytes ---*/ /*------------------------------------------------------------*/ - /* 432 */ UChar padding[16]; + /* 432 */ UChar padding[0]; - /* 448 */ /* This is the size of the guest state */ + /* 432 */ /* This is the size of the guest state */ } VexGuestS390XState; Index: pub/libvex_guest_x86.h =================================================================== --- pub/libvex_guest_x86.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex_guest_x86.h (.../trunk) (revision 2863) @@ -199,9 +199,9 @@ /* Emulation notes */ UInt guest_EMNOTE; - /* For clflush: record start and length of area to invalidate */ - UInt guest_TISTART; - UInt guest_TILEN; + /* For clflush/clinval: record start and length of area */ + UInt guest_CMSTART; + UInt guest_CMLEN; /* Used to record the unredirected guest address at the start of a translation whose start has been redirected. By reading @@ -220,8 +220,8 @@ been interrupted by a signal. */ UInt guest_IP_AT_SYSCALL; - /* Padding to make it have an 32-aligned size */ - UInt padding[5]; + /* Padding to make it have an 16-aligned size */ + UInt padding1; } VexGuestX86State; Index: pub/libvex_ir.h =================================================================== --- pub/libvex_ir.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex_ir.h (.../trunk) (revision 2863) @@ -242,7 +242,11 @@ /* Get the size (in bytes) of an IRType */ extern Int sizeofIRType ( IRType ); +/* Translate 1/2/4/8 into Ity_I{8,16,32,64} respectively. Asserts on + any other input. */ +extern IRType integerIRTypeOfSize ( Int szB ); + /* ------------------ Endianness ------------------ */ /* IREndness is used in load IRExprs and store IRStmts. */ @@ -481,9 +485,11 @@ Iop_DivS32, // ditto, signed Iop_DivU64, // :: I64,I64 -> I64 (simple div, no mod) Iop_DivS64, // ditto, signed - Iop_DivU64E, // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) concat with 64 0's (low)) + Iop_DivU64E, // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) + // concat with 64 0's (low)) Iop_DivS64E, // ditto, signed - Iop_DivU32E, // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) concat with 32 0's (low)) + Iop_DivU32E, // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) + // concat with 32 0's (low)) Iop_DivS32E, // ditto, signed Iop_DivModU64to32, // :: I64,I32 -> I64 @@ -1240,8 +1246,8 @@ /* BCD arithmetic instructions, (V128, V128) -> V128 * The BCD format is the same as that used in the BCD<->DPB conversion - * routines, except using 124 digits (vs 60) plus the trailing 4-bit signed code. - * */ + * routines, except using 124 digits (vs 60) plus the trailing 4-bit + * signed code. */ Iop_BCDAdd, Iop_BCDSub, /* Conversion I64 -> D64 */ @@ -1254,8 +1260,10 @@ /* --- 32x4 vector FP --- */ + /* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */ + Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4, + /* binary */ - Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4, Iop_Max32Fx4, Iop_Min32Fx4, Iop_Add32Fx2, Iop_Sub32Fx2, /* Note: For the following compares, the ppc and arm front-ends assume a @@ -1263,13 +1271,11 @@ Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4, Iop_CmpGT32Fx4, Iop_CmpGE32Fx4, - /* Vector Absolute */ - Iop_Abs32Fx4, - /* Pairwise Max and Min. See integer pairwise operations for details. */ Iop_PwMax32Fx4, Iop_PwMin32Fx4, /* unary */ + Iop_Abs32Fx4, Iop_Sqrt32Fx4, Iop_RSqrt32Fx4, Iop_Neg32Fx4, @@ -1296,9 +1302,9 @@ /* Unlike the standard fp conversions, these irops take no rounding mode argument. Instead the irop trailers _R{M,P,N,Z} indicate the mode: {-inf, +inf, nearest, zero} respectively. */ - Iop_I32UtoFx4, Iop_I32StoFx4, /* I32x4 -> F32x4 */ + Iop_I32UtoFx4, Iop_I32StoFx4, /* I32x4 -> F32x4 */ Iop_FtoI32Ux4_RZ, Iop_FtoI32Sx4_RZ, /* F32x4 -> I32x4 */ - Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ, /* F32x4 -> I32x4 (with saturation) */ + Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ, /* F32x4 -> I32x4 (saturating) */ Iop_RoundF32x4_RM, Iop_RoundF32x4_RP, /* round to fp integer */ Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ, /* round to fp integer */ /* Fixed32 format is floating-point number with fixed number of fraction @@ -1326,14 +1332,21 @@ /* --- 64x2 vector FP --- */ + /* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */ + Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2, + /* binary */ - Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2, Iop_Max64Fx2, Iop_Min64Fx2, Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2, /* unary */ - Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2, + Iop_Abs64Fx2, + Iop_Sqrt64Fx2, Iop_RSqrt64Fx2, + Iop_Neg64Fx2, + /* Vector Reciprocal Estimate */ + Iop_Recip64Fx2, + /* --- 64x2 lowest-lane-only scalar FP --- */ /* In binary cases, upper half is copied from first operand. In @@ -1357,6 +1370,12 @@ Iop_64UtoV128, Iop_SetV128lo64, + /* Copies lower 64/32/16/8 bits, zeroes out the rest. */ + Iop_ZeroHI64ofV128, // :: V128 -> V128 + Iop_ZeroHI96ofV128, // :: V128 -> V128 + Iop_ZeroHI112ofV128, // :: V128 -> V128 + Iop_ZeroHI120ofV128, // :: V128 -> V128 + /* 32 <-> 128 bit vector */ Iop_32UtoV128, Iop_V128to32, // :: V128 -> I32, lowest lane @@ -1405,8 +1424,8 @@ Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, /* Doubling saturating multiplication (long) (I64, I64) -> V128 */ Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2, - /* Plynomial multiplication treats it's arguments as coefficients of - polynoms over {0, 1}. */ + /* Polynomial multiplication treats its arguments as + coefficients of polynomials over {0, 1}. */ Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */ Iop_PolynomialMull8x8, /* (I64, I64) -> V128 */ @@ -1519,7 +1538,8 @@ /* NARROWING (unary) -- narrow V128 into I64 */ Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2, - /* Saturating narrowing from signed source to signed/unsigned destination */ + /* Saturating narrowing from signed source to signed/unsigned + destination */ Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2, Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2, /* Saturating narrowing from unsigned source to unsigned destination */ @@ -1657,15 +1677,11 @@ Iop_SHA512, Iop_SHA256, /* ------------------ 256-bit SIMD FP. ------------------ */ - Iop_Add64Fx4, - Iop_Sub64Fx4, - Iop_Mul64Fx4, - Iop_Div64Fx4, - Iop_Add32Fx8, - Iop_Sub32Fx8, - Iop_Mul32Fx8, - Iop_Div32Fx8, + /* ternary :: IRRoundingMode(I32) x V256 x V256 -> V256 */ + Iop_Add64Fx4, Iop_Sub64Fx4, Iop_Mul64Fx4, Iop_Div64Fx4, + Iop_Add32Fx8, Iop_Sub32Fx8, Iop_Mul32Fx8, Iop_Div32Fx8, + Iop_Sqrt32Fx8, Iop_Sqrt64Fx4, Iop_RSqrt32Fx8, @@ -1691,7 +1707,7 @@ Irrm_PosINF = 2, // Round to positive infinity Irrm_ZERO = 3, // Round toward zero Irrm_NEAREST_TIE_AWAY_0 = 4, // Round to nearest, ties away from 0 - Irrm_PREPARE_SHORTER = 5, // Round to prepare for storter + Irrm_PREPARE_SHORTER = 5, // Round to prepare for shorter // precision Irrm_AWAY_FROM_ZERO = 6, // Round to away from 0 Irrm_NEAREST_TIE_TOWARD_0 = 7 // Round to nearest, ties towards 0 @@ -2059,13 +2075,18 @@ /* This describes hints which can be passed to the dispatcher at guest control-flow transfer points. - Re Ijk_TInval: the guest state _must_ have two pseudo-registers, - guest_TISTART and guest_TILEN, which specify the start and length - of the region to be invalidated. These are both the size of a - guest word. It is the responsibility of the relevant toIR.c to - ensure that these are filled in with suitable values before issuing - a jump of kind Ijk_TInval. + Re Ijk_InvalICache and Ijk_FlushDCache: the guest state _must_ have + two pseudo-registers, guest_CMSTART and guest_CMLEN, which specify + the start and length of the region to be invalidated. CM stands + for "Cache Management". These are both the size of a guest word. + It is the responsibility of the relevant toIR.c to ensure that + these are filled in with suitable values before issuing a jump of + kind Ijk_InvalICache or Ijk_FlushDCache. + Ijk_InvalICache requests invalidation of translations taken from + the requested range. Ijk_FlushDCache requests flushing of the D + cache for the specified range. + Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a pseudo-register guest_EMNOTE, which is 32-bits regardless of the host or guest word size. That register should be made to hold a @@ -2093,8 +2114,10 @@ Ijk_EmFail, /* emulation critical (FATAL) error; give up */ Ijk_NoDecode, /* current instruction cannot be decoded */ Ijk_MapFail, /* Vex-provided address translation failed */ - Ijk_TInval, /* Invalidate translations before continuing. */ + Ijk_InvalICache, /* Inval icache for range [CMSTART, +CMLEN) */ + Ijk_FlushDCache, /* Flush dcache for range [CMSTART, +CMLEN) */ Ijk_NoRedir, /* Jump to un-redirected guest addr */ + Ijk_SigILL, /* current instruction synths SIGILL */ Ijk_SigTRAP, /* current instruction synths SIGTRAP */ Ijk_SigSEGV, /* current instruction synths SIGSEGV */ Ijk_SigBUS, /* current instruction synths SIGBUS */ @@ -2102,7 +2125,7 @@ Ijk_SigFPE_IntOvf, /* current instruction synths SIGFPE - IntOvf */ /* Unfortunately, various guest-dependent syscall kinds. They all mean: do a syscall before continuing. */ - Ijk_Sys_syscall, /* amd64 'syscall', ppc 'sc', arm 'svc #0' */ + Ijk_Sys_syscall, /* amd64/x86 'syscall', ppc 'sc', arm 'svc #0' */ Ijk_Sys_int32, /* amd64/x86 'int $0x20' */ Ijk_Sys_int128, /* amd64/x86 'int $0x80' */ Ijk_Sys_int129, /* amd64/x86 'int $0x81' */ @@ -2849,12 +2872,12 @@ /*---------------------------------------------------------------*/ /*--- IR injection ---*/ /*---------------------------------------------------------------*/ + void vex_inject_ir(IRSB *, IREndness); #endif /* ndef __LIBVEX_IR_H */ - /*---------------------------------------------------------------*/ /*--- libvex_ir.h ---*/ /*---------------------------------------------------------------*/ Index: pub/libvex_trc_values.h =================================================================== --- pub/libvex_trc_values.h (.../tags/VEX_3_9_0) (revision 2863) +++ pub/libvex_trc_values.h (.../trunk) (revision 2863) @@ -46,15 +46,12 @@ These values should be 61 or above so as not to conflict with Valgrind's VG_TRC_ values, which are 60 or below. - - These values *must* be odd (have bit 0 set) because the dispatchers - (coregrind/m_dispatch/dispatch-*-*.S) use this fact to distinguish - a TRC value from the unchanged baseblock pointer -- which has 0 as - its lowest bit. */ -#define VEX_TRC_JMP_TINVAL 61 /* invalidate translations before - continuing */ +#define VEX_TRC_JMP_INVALICACHE 61 /* invalidate icache (translations) + before continuing */ +#define VEX_TRC_JMP_FLUSHDCACHE 103 /* flush dcache before continuing */ + #define VEX_TRC_JMP_NOREDIR 81 /* jump to undirected guest addr */ #define VEX_TRC_JMP_SIGTRAP 85 /* deliver trap (SIGTRAP) before continuing */ @@ -68,6 +65,9 @@ #define VEX_TRC_JMP_SIGFPE_INTOVF 99 /* deliver SIGFPE (integer overflow) before continuing */ +#define VEX_TRC_JMP_SIGILL 101 /* deliver SIGILL (Illegal instruction) + before continuing */ + #define VEX_TRC_JMP_EMWARN 63 /* deliver emulation warning before continuing */ #define VEX_TRC_JMP_EMFAIL 83 /* emulation fatal error; abort system */