Index: VEX/priv/guest_ppc_helpers.c =================================================================== --- VEX/priv/guest_ppc_helpers.c.orig +++ VEX/priv/guest_ppc_helpers.c @@ -385,74 +385,74 @@ void LibVEX_GuestPPC32_initialise ( /*OU vex_state->guest_GPR30 = 0; vex_state->guest_GPR31 = 0; - vex_state->guest_FPR0 = 0; - vex_state->guest_FPR1 = 0; - vex_state->guest_FPR2 = 0; - vex_state->guest_FPR3 = 0; - vex_state->guest_FPR4 = 0; - vex_state->guest_FPR5 = 0; - vex_state->guest_FPR6 = 0; - vex_state->guest_FPR7 = 0; - vex_state->guest_FPR8 = 0; - vex_state->guest_FPR9 = 0; - vex_state->guest_FPR10 = 0; - vex_state->guest_FPR11 = 0; - vex_state->guest_FPR12 = 0; - vex_state->guest_FPR13 = 0; - vex_state->guest_FPR14 = 0; - vex_state->guest_FPR15 = 0; - vex_state->guest_FPR16 = 0; - vex_state->guest_FPR17 = 0; - vex_state->guest_FPR18 = 0; - vex_state->guest_FPR19 = 0; - vex_state->guest_FPR20 = 0; - vex_state->guest_FPR21 = 0; - vex_state->guest_FPR22 = 0; - vex_state->guest_FPR23 = 0; - vex_state->guest_FPR24 = 0; - vex_state->guest_FPR25 = 0; - vex_state->guest_FPR26 = 0; - vex_state->guest_FPR27 = 0; - vex_state->guest_FPR28 = 0; - vex_state->guest_FPR29 = 0; - vex_state->guest_FPR30 = 0; - vex_state->guest_FPR31 = 0; /* Initialise the vector state. */ # define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0; - VECZERO(vex_state->guest_VR0 ); - VECZERO(vex_state->guest_VR1 ); - VECZERO(vex_state->guest_VR2 ); - VECZERO(vex_state->guest_VR3 ); - VECZERO(vex_state->guest_VR4 ); - VECZERO(vex_state->guest_VR5 ); - VECZERO(vex_state->guest_VR6 ); - VECZERO(vex_state->guest_VR7 ); - VECZERO(vex_state->guest_VR8 ); - VECZERO(vex_state->guest_VR9 ); - VECZERO(vex_state->guest_VR10); - VECZERO(vex_state->guest_VR11); - VECZERO(vex_state->guest_VR12); - VECZERO(vex_state->guest_VR13); - VECZERO(vex_state->guest_VR14); - VECZERO(vex_state->guest_VR15); - VECZERO(vex_state->guest_VR16); - VECZERO(vex_state->guest_VR17); - VECZERO(vex_state->guest_VR18); - VECZERO(vex_state->guest_VR19); - VECZERO(vex_state->guest_VR20); - VECZERO(vex_state->guest_VR21); - VECZERO(vex_state->guest_VR22); - VECZERO(vex_state->guest_VR23); - VECZERO(vex_state->guest_VR24); - VECZERO(vex_state->guest_VR25); - VECZERO(vex_state->guest_VR26); - VECZERO(vex_state->guest_VR27); - VECZERO(vex_state->guest_VR28); - VECZERO(vex_state->guest_VR29); - VECZERO(vex_state->guest_VR30); - VECZERO(vex_state->guest_VR31); + VECZERO(vex_state->guest_VSR0 ); + VECZERO(vex_state->guest_VSR1 ); + VECZERO(vex_state->guest_VSR2 ); + VECZERO(vex_state->guest_VSR3 ); + VECZERO(vex_state->guest_VSR4 ); + VECZERO(vex_state->guest_VSR5 ); + VECZERO(vex_state->guest_VSR6 ); + VECZERO(vex_state->guest_VSR7 ); + VECZERO(vex_state->guest_VSR8 ); + VECZERO(vex_state->guest_VSR9 ); + VECZERO(vex_state->guest_VSR10); + VECZERO(vex_state->guest_VSR11); + VECZERO(vex_state->guest_VSR12); + VECZERO(vex_state->guest_VSR13); + VECZERO(vex_state->guest_VSR14); + VECZERO(vex_state->guest_VSR15); + VECZERO(vex_state->guest_VSR16); + VECZERO(vex_state->guest_VSR17); + VECZERO(vex_state->guest_VSR18); + VECZERO(vex_state->guest_VSR19); + VECZERO(vex_state->guest_VSR20); + VECZERO(vex_state->guest_VSR21); + VECZERO(vex_state->guest_VSR22); + VECZERO(vex_state->guest_VSR23); + VECZERO(vex_state->guest_VSR24); + VECZERO(vex_state->guest_VSR25); + VECZERO(vex_state->guest_VSR26); + VECZERO(vex_state->guest_VSR27); + VECZERO(vex_state->guest_VSR28); + VECZERO(vex_state->guest_VSR29); + VECZERO(vex_state->guest_VSR30); + VECZERO(vex_state->guest_VSR31); + VECZERO(vex_state->guest_VSR32); + VECZERO(vex_state->guest_VSR33); + VECZERO(vex_state->guest_VSR34); + VECZERO(vex_state->guest_VSR35); + VECZERO(vex_state->guest_VSR36); + VECZERO(vex_state->guest_VSR37); + VECZERO(vex_state->guest_VSR38); + VECZERO(vex_state->guest_VSR39); + VECZERO(vex_state->guest_VSR40); + VECZERO(vex_state->guest_VSR41); + VECZERO(vex_state->guest_VSR42); + VECZERO(vex_state->guest_VSR43); + VECZERO(vex_state->guest_VSR44); + VECZERO(vex_state->guest_VSR45); + VECZERO(vex_state->guest_VSR46); + VECZERO(vex_state->guest_VSR47); + VECZERO(vex_state->guest_VSR48); + VECZERO(vex_state->guest_VSR49); + VECZERO(vex_state->guest_VSR50); + VECZERO(vex_state->guest_VSR51); + VECZERO(vex_state->guest_VSR52); + VECZERO(vex_state->guest_VSR53); + VECZERO(vex_state->guest_VSR54); + VECZERO(vex_state->guest_VSR55); + VECZERO(vex_state->guest_VSR56); + VECZERO(vex_state->guest_VSR57); + VECZERO(vex_state->guest_VSR58); + VECZERO(vex_state->guest_VSR59); + VECZERO(vex_state->guest_VSR60); + VECZERO(vex_state->guest_VSR61); + VECZERO(vex_state->guest_VSR62); + VECZERO(vex_state->guest_VSR63); # undef VECZERO @@ -542,74 +542,73 @@ void LibVEX_GuestPPC64_initialise ( /*OU vex_state->guest_GPR30 = 0; vex_state->guest_GPR31 = 0; - vex_state->guest_FPR0 = 0; - vex_state->guest_FPR1 = 0; - vex_state->guest_FPR2 = 0; - vex_state->guest_FPR3 = 0; - vex_state->guest_FPR4 = 0; - vex_state->guest_FPR5 = 0; - vex_state->guest_FPR6 = 0; - vex_state->guest_FPR7 = 0; - vex_state->guest_FPR8 = 0; - vex_state->guest_FPR9 = 0; - vex_state->guest_FPR10 = 0; - vex_state->guest_FPR11 = 0; - vex_state->guest_FPR12 = 0; - vex_state->guest_FPR13 = 0; - vex_state->guest_FPR14 = 0; - vex_state->guest_FPR15 = 0; - vex_state->guest_FPR16 = 0; - vex_state->guest_FPR17 = 0; - vex_state->guest_FPR18 = 0; - vex_state->guest_FPR19 = 0; - vex_state->guest_FPR20 = 0; - vex_state->guest_FPR21 = 0; - vex_state->guest_FPR22 = 0; - vex_state->guest_FPR23 = 0; - vex_state->guest_FPR24 = 0; - vex_state->guest_FPR25 = 0; - vex_state->guest_FPR26 = 0; - vex_state->guest_FPR27 = 0; - vex_state->guest_FPR28 = 0; - vex_state->guest_FPR29 = 0; - vex_state->guest_FPR30 = 0; - vex_state->guest_FPR31 = 0; - /* Initialise the vector state. */ # define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0; - VECZERO(vex_state->guest_VR0 ); - VECZERO(vex_state->guest_VR1 ); - VECZERO(vex_state->guest_VR2 ); - VECZERO(vex_state->guest_VR3 ); - VECZERO(vex_state->guest_VR4 ); - VECZERO(vex_state->guest_VR5 ); - VECZERO(vex_state->guest_VR6 ); - VECZERO(vex_state->guest_VR7 ); - VECZERO(vex_state->guest_VR8 ); - VECZERO(vex_state->guest_VR9 ); - VECZERO(vex_state->guest_VR10); - VECZERO(vex_state->guest_VR11); - VECZERO(vex_state->guest_VR12); - VECZERO(vex_state->guest_VR13); - VECZERO(vex_state->guest_VR14); - VECZERO(vex_state->guest_VR15); - VECZERO(vex_state->guest_VR16); - VECZERO(vex_state->guest_VR17); - VECZERO(vex_state->guest_VR18); - VECZERO(vex_state->guest_VR19); - VECZERO(vex_state->guest_VR20); - VECZERO(vex_state->guest_VR21); - VECZERO(vex_state->guest_VR22); - VECZERO(vex_state->guest_VR23); - VECZERO(vex_state->guest_VR24); - VECZERO(vex_state->guest_VR25); - VECZERO(vex_state->guest_VR26); - VECZERO(vex_state->guest_VR27); - VECZERO(vex_state->guest_VR28); - VECZERO(vex_state->guest_VR29); - VECZERO(vex_state->guest_VR30); - VECZERO(vex_state->guest_VR31); + VECZERO(vex_state->guest_VSR0 ); + VECZERO(vex_state->guest_VSR1 ); + VECZERO(vex_state->guest_VSR2 ); + VECZERO(vex_state->guest_VSR3 ); + VECZERO(vex_state->guest_VSR4 ); + VECZERO(vex_state->guest_VSR5 ); + VECZERO(vex_state->guest_VSR6 ); + VECZERO(vex_state->guest_VSR7 ); + VECZERO(vex_state->guest_VSR8 ); + VECZERO(vex_state->guest_VSR9 ); + VECZERO(vex_state->guest_VSR10); + VECZERO(vex_state->guest_VSR11); + VECZERO(vex_state->guest_VSR12); + VECZERO(vex_state->guest_VSR13); + VECZERO(vex_state->guest_VSR14); + VECZERO(vex_state->guest_VSR15); + VECZERO(vex_state->guest_VSR16); + VECZERO(vex_state->guest_VSR17); + VECZERO(vex_state->guest_VSR18); + VECZERO(vex_state->guest_VSR19); + VECZERO(vex_state->guest_VSR20); + VECZERO(vex_state->guest_VSR21); + VECZERO(vex_state->guest_VSR22); + VECZERO(vex_state->guest_VSR23); + VECZERO(vex_state->guest_VSR24); + VECZERO(vex_state->guest_VSR25); + VECZERO(vex_state->guest_VSR26); + VECZERO(vex_state->guest_VSR27); + VECZERO(vex_state->guest_VSR28); + VECZERO(vex_state->guest_VSR29); + VECZERO(vex_state->guest_VSR30); + VECZERO(vex_state->guest_VSR31); + VECZERO(vex_state->guest_VSR32); + VECZERO(vex_state->guest_VSR33); + VECZERO(vex_state->guest_VSR34); + VECZERO(vex_state->guest_VSR35); + VECZERO(vex_state->guest_VSR36); + VECZERO(vex_state->guest_VSR37); + VECZERO(vex_state->guest_VSR38); + VECZERO(vex_state->guest_VSR39); + VECZERO(vex_state->guest_VSR40); + VECZERO(vex_state->guest_VSR41); + VECZERO(vex_state->guest_VSR42); + VECZERO(vex_state->guest_VSR43); + VECZERO(vex_state->guest_VSR44); + VECZERO(vex_state->guest_VSR45); + VECZERO(vex_state->guest_VSR46); + VECZERO(vex_state->guest_VSR47); + VECZERO(vex_state->guest_VSR48); + VECZERO(vex_state->guest_VSR49); + VECZERO(vex_state->guest_VSR50); + VECZERO(vex_state->guest_VSR51); + VECZERO(vex_state->guest_VSR52); + VECZERO(vex_state->guest_VSR53); + VECZERO(vex_state->guest_VSR54); + VECZERO(vex_state->guest_VSR55); + VECZERO(vex_state->guest_VSR56); + VECZERO(vex_state->guest_VSR57); + VECZERO(vex_state->guest_VSR58); + VECZERO(vex_state->guest_VSR59); + VECZERO(vex_state->guest_VSR60); + VECZERO(vex_state->guest_VSR61); + VECZERO(vex_state->guest_VSR62); + VECZERO(vex_state->guest_VSR63); # undef VECZERO Index: VEX/priv/guest_ppc_toIR.c =================================================================== --- VEX/priv/guest_ppc_toIR.c.orig +++ VEX/priv/guest_ppc_toIR.c @@ -1,4 +1,5 @@ + /*--------------------------------------------------------------------*/ /*--- begin guest_ppc_toIR.c ---*/ /*--------------------------------------------------------------------*/ @@ -257,22 +258,52 @@ static UChar ifieldRegDS( UInt instr ) { return toUChar( IFIELD( instr, 21, 5 ) ); } +/* Extract XT (destination register) field, instr[0,25:21] */ +static UChar ifieldRegXT ( UInt instr ) +{ + UChar upper_bit = toUChar (IFIELD (instr, 0, 1)); + UChar lower_bits = toUChar (IFIELD (instr, 21, 5)); + return (upper_bit << 5) | lower_bits; +} + +/* Extract XS (store source register) field, instr[0,25:21] */ +static inline UChar ifieldRegXS ( UInt instr ) +{ + return ifieldRegXT ( instr ); +} + /* Extract RA (1st source register) field, instr[20:16] */ static UChar ifieldRegA ( UInt instr ) { return toUChar( IFIELD( instr, 16, 5 ) ); } +/* Extract XA (1st source register) field, instr[2,20:16] */ +static UChar ifieldRegXA ( UInt instr ) +{ + UChar upper_bit = toUChar (IFIELD (instr, 2, 1)); + UChar lower_bits = toUChar (IFIELD (instr, 16, 5)); + return (upper_bit << 5) | lower_bits; +} + /* Extract RB (2nd source register) field, instr[15:11] */ static UChar ifieldRegB ( UInt instr ) { return toUChar( IFIELD( instr, 11, 5 ) ); } +/* Extract XB (2nd source register) field, instr[1,15:11] */ +static UChar ifieldRegXB ( UInt instr ) +{ + UChar upper_bit = toUChar (IFIELD (instr, 1, 1)); + UChar lower_bits = toUChar (IFIELD (instr, 11, 5)); + return (upper_bit << 5) | lower_bits; +} + /* Extract RC (3rd source register) field, instr[10:6] */ static UChar ifieldRegC ( UInt instr ) { return toUChar( IFIELD( instr, 6, 5 ) ); } -/* Extract 2nd lowest bit, instr[1] */ +/* Extract bit 10, instr[10] */ static UChar ifieldBIT10 ( UInt instr ) { return toUChar( IFIELD( instr, 10, 1 ) ); } @@ -297,6 +328,16 @@ static UInt ifieldUIMM26 ( UInt instr ) return instr & 0x3FFFFFF; } +/* Extract DM field, instr[9:8] */ +static UChar ifieldDM ( UInt instr ) { + return toUChar( IFIELD( instr, 8, 2 ) ); +} + +/* Extract SHW field, instr[9:8] */ +static inline UChar ifieldSHW ( UInt instr ) +{ + return ifieldDM ( instr ); +} /*------------------------------------------------------------*/ /*--- Guest-state identifiers ---*/ @@ -506,6 +547,12 @@ static IRExpr* mkU64 ( ULong i ) return IRExpr_Const(IRConst_U64(i)); } +static IRExpr* mkV128 ( UShort i ) +{ + vassert(i == 0 || i == 0xffff); + return IRExpr_Const(IRConst_V128(i)); +} + /* This generates a normal (non load-linked) load. */ static IRExpr* loadBE ( IRType ty, IRExpr* addr ) { @@ -932,43 +979,44 @@ static void putIReg ( UInt archreg, IREx } +/* Floating point egisters are mapped to VSX registers[0..31]. */ static Int floatGuestRegOffset ( UInt archreg ) { vassert(archreg < 32); switch (archreg) { - case 0: return offsetofPPCGuestState(guest_FPR0); - case 1: return offsetofPPCGuestState(guest_FPR1); - case 2: return offsetofPPCGuestState(guest_FPR2); - case 3: return offsetofPPCGuestState(guest_FPR3); - case 4: return offsetofPPCGuestState(guest_FPR4); - case 5: return offsetofPPCGuestState(guest_FPR5); - case 6: return offsetofPPCGuestState(guest_FPR6); - case 7: return offsetofPPCGuestState(guest_FPR7); - case 8: return offsetofPPCGuestState(guest_FPR8); - case 9: return offsetofPPCGuestState(guest_FPR9); - case 10: return offsetofPPCGuestState(guest_FPR10); - case 11: return offsetofPPCGuestState(guest_FPR11); - case 12: return offsetofPPCGuestState(guest_FPR12); - case 13: return offsetofPPCGuestState(guest_FPR13); - case 14: return offsetofPPCGuestState(guest_FPR14); - case 15: return offsetofPPCGuestState(guest_FPR15); - case 16: return offsetofPPCGuestState(guest_FPR16); - case 17: return offsetofPPCGuestState(guest_FPR17); - case 18: return offsetofPPCGuestState(guest_FPR18); - case 19: return offsetofPPCGuestState(guest_FPR19); - case 20: return offsetofPPCGuestState(guest_FPR20); - case 21: return offsetofPPCGuestState(guest_FPR21); - case 22: return offsetofPPCGuestState(guest_FPR22); - case 23: return offsetofPPCGuestState(guest_FPR23); - case 24: return offsetofPPCGuestState(guest_FPR24); - case 25: return offsetofPPCGuestState(guest_FPR25); - case 26: return offsetofPPCGuestState(guest_FPR26); - case 27: return offsetofPPCGuestState(guest_FPR27); - case 28: return offsetofPPCGuestState(guest_FPR28); - case 29: return offsetofPPCGuestState(guest_FPR29); - case 30: return offsetofPPCGuestState(guest_FPR30); - case 31: return offsetofPPCGuestState(guest_FPR31); + case 0: return offsetofPPCGuestState(guest_VSR0); + case 1: return offsetofPPCGuestState(guest_VSR1); + case 2: return offsetofPPCGuestState(guest_VSR2); + case 3: return offsetofPPCGuestState(guest_VSR3); + case 4: return offsetofPPCGuestState(guest_VSR4); + case 5: return offsetofPPCGuestState(guest_VSR5); + case 6: return offsetofPPCGuestState(guest_VSR6); + case 7: return offsetofPPCGuestState(guest_VSR7); + case 8: return offsetofPPCGuestState(guest_VSR8); + case 9: return offsetofPPCGuestState(guest_VSR9); + case 10: return offsetofPPCGuestState(guest_VSR10); + case 11: return offsetofPPCGuestState(guest_VSR11); + case 12: return offsetofPPCGuestState(guest_VSR12); + case 13: return offsetofPPCGuestState(guest_VSR13); + case 14: return offsetofPPCGuestState(guest_VSR14); + case 15: return offsetofPPCGuestState(guest_VSR15); + case 16: return offsetofPPCGuestState(guest_VSR16); + case 17: return offsetofPPCGuestState(guest_VSR17); + case 18: return offsetofPPCGuestState(guest_VSR18); + case 19: return offsetofPPCGuestState(guest_VSR19); + case 20: return offsetofPPCGuestState(guest_VSR20); + case 21: return offsetofPPCGuestState(guest_VSR21); + case 22: return offsetofPPCGuestState(guest_VSR22); + case 23: return offsetofPPCGuestState(guest_VSR23); + case 24: return offsetofPPCGuestState(guest_VSR24); + case 25: return offsetofPPCGuestState(guest_VSR25); + case 26: return offsetofPPCGuestState(guest_VSR26); + case 27: return offsetofPPCGuestState(guest_VSR27); + case 28: return offsetofPPCGuestState(guest_VSR28); + case 29: return offsetofPPCGuestState(guest_VSR29); + case 30: return offsetofPPCGuestState(guest_VSR30); + case 31: return offsetofPPCGuestState(guest_VSR31); default: break; } vpanic("floatGuestRegOffset(ppc)"); /*notreached*/ @@ -988,44 +1036,117 @@ static void putFReg ( UInt archreg, IREx stmt( IRStmt_Put(floatGuestRegOffset(archreg), e) ); } +static Int vsxGuestRegOffset ( UInt archreg ) +{ + vassert(archreg < 64); + switch (archreg) { + case 0: return offsetofPPCGuestState(guest_VSR0); + case 1: return offsetofPPCGuestState(guest_VSR1); + case 2: return offsetofPPCGuestState(guest_VSR2); + case 3: return offsetofPPCGuestState(guest_VSR3); + case 4: return offsetofPPCGuestState(guest_VSR4); + case 5: return offsetofPPCGuestState(guest_VSR5); + case 6: return offsetofPPCGuestState(guest_VSR6); + case 7: return offsetofPPCGuestState(guest_VSR7); + case 8: return offsetofPPCGuestState(guest_VSR8); + case 9: return offsetofPPCGuestState(guest_VSR9); + case 10: return offsetofPPCGuestState(guest_VSR10); + case 11: return offsetofPPCGuestState(guest_VSR11); + case 12: return offsetofPPCGuestState(guest_VSR12); + case 13: return offsetofPPCGuestState(guest_VSR13); + case 14: return offsetofPPCGuestState(guest_VSR14); + case 15: return offsetofPPCGuestState(guest_VSR15); + case 16: return offsetofPPCGuestState(guest_VSR16); + case 17: return offsetofPPCGuestState(guest_VSR17); + case 18: return offsetofPPCGuestState(guest_VSR18); + case 19: return offsetofPPCGuestState(guest_VSR19); + case 20: return offsetofPPCGuestState(guest_VSR20); + case 21: return offsetofPPCGuestState(guest_VSR21); + case 22: return offsetofPPCGuestState(guest_VSR22); + case 23: return offsetofPPCGuestState(guest_VSR23); + case 24: return offsetofPPCGuestState(guest_VSR24); + case 25: return offsetofPPCGuestState(guest_VSR25); + case 26: return offsetofPPCGuestState(guest_VSR26); + case 27: return offsetofPPCGuestState(guest_VSR27); + case 28: return offsetofPPCGuestState(guest_VSR28); + case 29: return offsetofPPCGuestState(guest_VSR29); + case 30: return offsetofPPCGuestState(guest_VSR30); + case 31: return offsetofPPCGuestState(guest_VSR31); + case 32: return offsetofPPCGuestState(guest_VSR32); + case 33: return offsetofPPCGuestState(guest_VSR33); + case 34: return offsetofPPCGuestState(guest_VSR34); + case 35: return offsetofPPCGuestState(guest_VSR35); + case 36: return offsetofPPCGuestState(guest_VSR36); + case 37: return offsetofPPCGuestState(guest_VSR37); + case 38: return offsetofPPCGuestState(guest_VSR38); + case 39: return offsetofPPCGuestState(guest_VSR39); + case 40: return offsetofPPCGuestState(guest_VSR40); + case 41: return offsetofPPCGuestState(guest_VSR41); + case 42: return offsetofPPCGuestState(guest_VSR42); + case 43: return offsetofPPCGuestState(guest_VSR43); + case 44: return offsetofPPCGuestState(guest_VSR44); + case 45: return offsetofPPCGuestState(guest_VSR45); + case 46: return offsetofPPCGuestState(guest_VSR46); + case 47: return offsetofPPCGuestState(guest_VSR47); + case 48: return offsetofPPCGuestState(guest_VSR48); + case 49: return offsetofPPCGuestState(guest_VSR49); + case 50: return offsetofPPCGuestState(guest_VSR50); + case 51: return offsetofPPCGuestState(guest_VSR51); + case 52: return offsetofPPCGuestState(guest_VSR52); + case 53: return offsetofPPCGuestState(guest_VSR53); + case 54: return offsetofPPCGuestState(guest_VSR54); + case 55: return offsetofPPCGuestState(guest_VSR55); + case 56: return offsetofPPCGuestState(guest_VSR56); + case 57: return offsetofPPCGuestState(guest_VSR57); + case 58: return offsetofPPCGuestState(guest_VSR58); + case 59: return offsetofPPCGuestState(guest_VSR59); + case 60: return offsetofPPCGuestState(guest_VSR60); + case 61: return offsetofPPCGuestState(guest_VSR61); + case 62: return offsetofPPCGuestState(guest_VSR62); + case 63: return offsetofPPCGuestState(guest_VSR63); + default: break; + } + vpanic("vsxGuestRegOffset(ppc)"); /*notreached*/ +} +/* Vector registers are mapped to VSX registers[32..63]. */ static Int vectorGuestRegOffset ( UInt archreg ) { vassert(archreg < 32); switch (archreg) { - case 0: return offsetofPPCGuestState(guest_VR0); - case 1: return offsetofPPCGuestState(guest_VR1); - case 2: return offsetofPPCGuestState(guest_VR2); - case 3: return offsetofPPCGuestState(guest_VR3); - case 4: return offsetofPPCGuestState(guest_VR4); - case 5: return offsetofPPCGuestState(guest_VR5); - case 6: return offsetofPPCGuestState(guest_VR6); - case 7: return offsetofPPCGuestState(guest_VR7); - case 8: return offsetofPPCGuestState(guest_VR8); - case 9: return offsetofPPCGuestState(guest_VR9); - case 10: return offsetofPPCGuestState(guest_VR10); - case 11: return offsetofPPCGuestState(guest_VR11); - case 12: return offsetofPPCGuestState(guest_VR12); - case 13: return offsetofPPCGuestState(guest_VR13); - case 14: return offsetofPPCGuestState(guest_VR14); - case 15: return offsetofPPCGuestState(guest_VR15); - case 16: return offsetofPPCGuestState(guest_VR16); - case 17: return offsetofPPCGuestState(guest_VR17); - case 18: return offsetofPPCGuestState(guest_VR18); - case 19: return offsetofPPCGuestState(guest_VR19); - case 20: return offsetofPPCGuestState(guest_VR20); - case 21: return offsetofPPCGuestState(guest_VR21); - case 22: return offsetofPPCGuestState(guest_VR22); - case 23: return offsetofPPCGuestState(guest_VR23); - case 24: return offsetofPPCGuestState(guest_VR24); - case 25: return offsetofPPCGuestState(guest_VR25); - case 26: return offsetofPPCGuestState(guest_VR26); - case 27: return offsetofPPCGuestState(guest_VR27); - case 28: return offsetofPPCGuestState(guest_VR28); - case 29: return offsetofPPCGuestState(guest_VR29); - case 30: return offsetofPPCGuestState(guest_VR30); - case 31: return offsetofPPCGuestState(guest_VR31); + case 0: return offsetofPPCGuestState(guest_VSR32); + case 1: return offsetofPPCGuestState(guest_VSR33); + case 2: return offsetofPPCGuestState(guest_VSR34); + case 3: return offsetofPPCGuestState(guest_VSR35); + case 4: return offsetofPPCGuestState(guest_VSR36); + case 5: return offsetofPPCGuestState(guest_VSR37); + case 6: return offsetofPPCGuestState(guest_VSR38); + case 7: return offsetofPPCGuestState(guest_VSR39); + case 8: return offsetofPPCGuestState(guest_VSR40); + case 9: return offsetofPPCGuestState(guest_VSR41); + case 10: return offsetofPPCGuestState(guest_VSR42); + case 11: return offsetofPPCGuestState(guest_VSR43); + case 12: return offsetofPPCGuestState(guest_VSR44); + case 13: return offsetofPPCGuestState(guest_VSR45); + case 14: return offsetofPPCGuestState(guest_VSR46); + case 15: return offsetofPPCGuestState(guest_VSR47); + case 16: return offsetofPPCGuestState(guest_VSR48); + case 17: return offsetofPPCGuestState(guest_VSR49); + case 18: return offsetofPPCGuestState(guest_VSR50); + case 19: return offsetofPPCGuestState(guest_VSR51); + case 20: return offsetofPPCGuestState(guest_VSR52); + case 21: return offsetofPPCGuestState(guest_VSR53); + case 22: return offsetofPPCGuestState(guest_VSR54); + case 23: return offsetofPPCGuestState(guest_VSR55); + case 24: return offsetofPPCGuestState(guest_VSR56); + case 25: return offsetofPPCGuestState(guest_VSR57); + case 26: return offsetofPPCGuestState(guest_VSR58); + case 27: return offsetofPPCGuestState(guest_VSR59); + case 28: return offsetofPPCGuestState(guest_VSR60); + case 29: return offsetofPPCGuestState(guest_VSR61); + case 30: return offsetofPPCGuestState(guest_VSR62); + case 31: return offsetofPPCGuestState(guest_VSR63); default: break; } vpanic("vextorGuestRegOffset(ppc)"); /*notreached*/ @@ -1045,6 +1166,22 @@ static void putVReg ( UInt archreg, IREx stmt( IRStmt_Put(vectorGuestRegOffset(archreg), e) ); } +/* Get contents of VSX guest register */ +static IRExpr* getVSReg ( UInt archreg ) +{ + vassert(archreg < 64); + return IRExpr_Get( vsxGuestRegOffset(archreg), Ity_V128 ); +} + +/* Ditto, but write to a VSX reg instead. */ +static void putVSReg ( UInt archreg, IRExpr* e ) +{ + vassert(archreg < 64); + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); + stmt( IRStmt_Put(vsxGuestRegOffset(archreg), e) ); +} + + static Int guestCR321offset ( UInt cr ) { switch (cr) { @@ -1075,6 +1212,42 @@ static Int guestCR0offset ( UInt cr ) } } +/* Generate an IR sequence to do a popcount operation on the supplied + * IRTemp, and return an IRTemp holding the result. + */ +static IRTemp gen_POPCOUNT ( IRTemp src ) +{ + Int i, shift[6]; + IRTemp old = IRTemp_INVALID; + IRTemp nyu = IRTemp_INVALID; + IRTemp mask[6]; + + vassert(typeOfIRExpr(irsb->tyenv,mkexpr(src)) == Ity_I64); + for (i = 0; i < 6; i++) { + mask[i] = newTemp( Ity_I64 ); + shift[i] = 1 << i; + } + assign( mask[0], mkU64( 0x5555555555555555ULL ) ); + assign( mask[1], mkU64( 0x3333333333333333ULL ) ); + assign( mask[2], mkU64( 0x0F0F0F0F0F0F0F0FULL ) ); + assign( mask[3], mkU64( 0x00FF00FF00FF00FFULL ) ); + assign( mask[4], mkU64( 0x0000FFFF0000FFFFULL ) ); + assign( mask[5], mkU64( 0x00000000FFFFFFFFULL ) ); + old = src; + for (i = 0; i < 6; i++) { + nyu = newTemp( Ity_I64 ); + assign( nyu, + binop( Iop_Add64, + binop( Iop_And64, mkexpr( old ), mkexpr( mask[i] ) ), + binop( Iop_And64, + binop( Iop_Shr64, mkexpr( old ), mkU8( shift[i] ) ), + mkexpr( mask[i] ) ) ) ); + old = nyu; + } + return nyu; +} + + // ROTL(src32/64, rot_amt5/6) static IRExpr* /* :: Ity_I32/64 */ ROTL ( IRExpr* src, IRExpr* rot_amt ) @@ -2345,6 +2518,80 @@ static void putGST_field ( PPC_GST reg, } } +/*------------------------------------------------------------*/ +/* Helpers for VSX instructions that do floating point + * operations and need to determine if a src contains a + * special FP value. + * + *------------------------------------------------------------*/ + +#define NONZERO_FRAC_MASK 0x000fffffffffffffULL +#define FP_FRAC_PART(x) binop( Iop_And64, \ + mkexpr( x ), \ + mkU64( NONZERO_FRAC_MASK ) ) + +// Returns exponent part of floating point as I32 +static IRExpr * fp_exp_part(IRTemp src) +{ + IRExpr * exp; + if (!mode64) + exp = binop( Iop_And32, binop( Iop_Shr32, unop( Iop_64HIto32, + mkexpr( src ) ), + mkU8( 20 ) ), mkU32( 0x7ff ) ); + else + exp = unop( Iop_64to32, + binop( Iop_And64, + binop( Iop_Shr64, mkexpr( src ), mkU8( 52 ) ), + mkU64( 0x7ff ) ) ); + return exp; +} + + +// Infinity: exp = 7ff and fraction is zero; s = 0/1 +static IRExpr * is_Inf(IRTemp src) +{ + IRExpr * Inf_exp, * hi32, * low32; + IRTemp frac_part = newTemp(Ity_I64); + + assign( frac_part, FP_FRAC_PART(src) ); + Inf_exp = binop( Iop_CmpEQ32, fp_exp_part( src ), mkU32( 0x7ff ) ); + hi32 = unop( Iop_64HIto32, mkexpr( frac_part ) ); + low32 = unop( Iop_64to32, mkexpr( frac_part ) ); + return mkAND1( Inf_exp, binop( Iop_CmpEQ32, binop( Iop_Or32, low32, hi32 ), + mkU32( 0 ) ) ); +} + +// Zero: exp is zero and fraction is zero; s = 0/1 +static IRExpr * is_Zero(IRTemp src) +{ +#define SIGN_MASK 0x7fffffffffffffffULL + IRExpr * hi32, * low32; + IRTemp sign_less_part = newTemp(Ity_I64); + + assign( sign_less_part, binop( Iop_And64, mkexpr( src ), mkU64( SIGN_MASK ) ) ); + hi32 = unop( Iop_64HIto32, mkexpr( sign_less_part ) ); + low32 = unop( Iop_64to32, mkexpr( sign_less_part ) ); + return binop( Iop_CmpEQ32, binop( Iop_Or32, low32, hi32 ), + mkU32( 0 ) ); +} + +/* SNAN: s = 1/0; exp = 0x7ff; fraction is nonzero, with highest bit '1' + * QNAN: s = 1/0; exp = 0x7ff; fraction is nonzero, with highest bit '0' + * This function returns an IRExpr value of '1' for any type of NaN. + */ +static IRExpr * is_NaN(IRTemp src) +{ + IRExpr * NaN_exp, * hi32, * low32; + IRTemp frac_part = newTemp(Ity_I64); + + assign( frac_part, FP_FRAC_PART(src) ); + hi32 = unop( Iop_64HIto32, mkexpr( frac_part ) ); + low32 = unop( Iop_64to32, mkexpr( frac_part ) ); + NaN_exp = binop( Iop_CmpEQ32, fp_exp_part( src ), mkU32( 0x7ffULL ) ); + + return mkAND1( NaN_exp, binop( Iop_CmpNE32, binop( Iop_Or32, low32, hi32 ), + mkU32( 0 ) ) ); +} /*------------------------------------------------------------*/ @@ -3264,6 +3511,13 @@ static Bool dis_int_logic ( UInt theInst putFReg( rS_addr, mkexpr(frA)); return True; } + case 0x1FA: // popcntd (population count doubleword + { + DIP("popcntd r%u,r%u\n", rA_addr, rS_addr); + IRTemp result = gen_POPCOUNT(rS); + putIReg( rA_addr, mkexpr(result) ); + return True; + } default: vex_printf("dis_int_logic(ppc)(opc2)\n"); @@ -5485,7 +5739,23 @@ static Bool dis_int_ldst_rev ( UInt theI putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2), /* Signed */False) ); break; - + + case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed) + { + IRExpr * nextAddr; + IRTemp w3 = newTemp( Ity_I32 ); + IRTemp w4 = newTemp( Ity_I32 ); + DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + assign( w1, loadBE( Ity_I32, mkexpr( EA ) ) ); + assign( w2, gen_byterev32( w1 ) ); + nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), + ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) ); + assign( w3, loadBE( Ity_I32, nextAddr ) ); + assign( w4, gen_byterev32( w3 ) ); + putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) ); + break; + } + case 0x396: // sthbrx (Store Half Word Byte-Reverse Indexed, PPC32 p523) DIP("sthbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) ); @@ -6059,6 +6329,17 @@ static Bool dis_fp_load ( UInt theInstr binop(Iop_32HLto64, mkexpr(iHi), mkexpr(iLo))) ); break; + case 0x377: // lfiwzx (Load floating-point as integer word, zero indexed + { + IRTemp dw = newTemp( Ity_I64 ); + DIP("lfiwzx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr); + assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) ); + assign( iLo, loadBE(Ity_I32, mkexpr(EA)) ); + assign( dw, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( iLo ) ) ); + putFReg( frD_addr, unop( Iop_ReinterpI64asF64, mkexpr( dw ) ) ); + break; + } + default: vex_printf("dis_fp_load(ppc)(opc2)\n"); return False; @@ -6591,7 +6872,238 @@ static Bool dis_fp_multadd ( UInt theIns return True; } +/* + * fe_flag is set to 1 if any of the following conditions occurs: + * - The double-precision floating-point operand in register FRA is a NaN or an + * Infinity. + * - The double-precision floating-point operand in register FRB is a Zero, a + * NaN, or an Infinity. + * - e_b is less than or equal to -1022. + * - e_b is greater than or equal to 1021. + * - The double-precision floating-point operand in register FRA is not a zero + * and the difference, e_a - e_b, is greater than or equal to 1023. + * - The double-precision floating-point operand in register FRA is not a zero + * and the difference, e_a - e_b, is less than or equal to -1021. + * - The double-precision floating-point operand in register FRA is not a zero + * and e_a is less than or equal to -970 + * Otherwise fe_flag is set to 0. + * + * fg_flag is set to 1 if either of the following conditions occurs. + * - The double-precision floating-point operand in register FRA is an Infinity. + * - The double-precision floating-point operand in register FRB is a Zero, an + * Infinity, or a denormalized value. + * Otherwise fg_flag is set to 0. + * + */ +static Bool dis_fp_ftdiv ( UInt theInstr ) +{ + UChar opc1 = ifieldOPC(theInstr); + UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) ); + UChar b21to22 = toUChar( IFIELD( theInstr, 21, 2 ) ); + UChar frA_addr = ifieldRegA(theInstr); + UChar frB_addr = ifieldRegB(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + // The following temps are for holding intermediate results + IRTemp frA_I64 = newTemp(Ity_I64); + IRTemp frB_I64 = newTemp(Ity_I64); + IRTemp e_a = newTemp(Ity_I32); + IRTemp e_b = newTemp(Ity_I32); + IRTemp frA_exp_shR = newTemp(Ity_I32); + IRTemp frB_exp_shR = newTemp(Ity_I32); + + UInt bias = 1023; + + /* The following variables hold boolean results from tests + * that are OR'ed together for setting the fe_ and fg_ flags. + * For some cases, the booleans are used more than once, so + * I make those IRTemp's instead of IRExpr's. + */ + IRExpr * fraNaN, * frbNaN, * frbDenorm; + IRExpr * eb_LTE, * eb_GTE, * ea_eb_GTE, * ea_eb_LTE, * ea_LTE; + IRTemp fraInf_tmp = newTemp(Ity_I1); + IRTemp frbZero_tmp = newTemp(Ity_I1); + IRTemp frbInf_tmp = newTemp(Ity_I1); + IRTemp fraNotZero_tmp = newTemp(Ity_I1); + +/* The following are the flags that are set by OR'ing the results of + * all the tests done for ftdiv. These flags are the input to the specified CR. + */ + IRExpr * fe_flag, * fl_flag, * fg_flag; + + + if (opc1 != 0x3F || b21to22 != 0 || b0 != 0) { + vex_printf("dis_fp_ftdiv(ppc)(instr)\n"); + return False; + } + // Create temps that will be used throughout the following tests. + assign( frA_I64, unop( Iop_ReinterpF64asI64, getFReg( frA_addr ) ) ); + assign( frB_I64, unop( Iop_ReinterpF64asI64, getFReg( frB_addr ) ) ); + assign( frA_exp_shR, fp_exp_part( frA_I64 ) ); + assign( frB_exp_shR, fp_exp_part( frB_I64 ) ); + /* Let e_[a|b] be the unbiased exponent: i.e. exp - 1023. */ + assign(e_a, binop( Iop_Sub32, mkexpr(frA_exp_shR), mkU32( bias ) )); + assign(e_b, binop( Iop_Sub32, mkexpr(frB_exp_shR), mkU32( bias ) )); + + + ////////////////// fe_flag tests BEGIN ////////////////////// + /* We first do all tests that may result in setting fe_flag to '1'. */ + + /* + * Test if the double-precision floating-point operand in register FRA is + * a NaN: + * exp = 7ff and fraction is non-zero; s = 0/1 + */ + fraNaN = is_NaN(frA_I64); + /* + * Test if the double-precision floating-point operand in register FRA is + * an Infinity. + * exp = 7ff and fraction is zero; s = 0/1 + */ + assign(fraInf_tmp, is_Inf(frA_I64)); + + /* + * Test if the double-precision floating-point operand in register FRB is + * a NaN: + * exp = 7ff and fraction is non-zero; s = 0/1 + */ + frbNaN = is_NaN(frB_I64); + /* + * Test if the double-precision floating-point operand in register FRB is + * an Infinity. + * exp = 7ff and fraction is zero; s = 0/1 + */ + assign( frbInf_tmp, is_Inf(frB_I64) ); + /* + * Test if the double-precision floating-point operand in register FRB is + * a Zero. + * exp is zero and fraction is zero; s = 0/1 + */ + assign( frbZero_tmp, is_Zero(frB_I64) ); + + /* + * Test if e_b <= -1022 + */ + { + UInt test_value = 0xfffffc02; //Int test_value = -1022; + eb_LTE = binop(Iop_CmpLE32S, mkexpr(e_b), mkU32(test_value)); + } + + /* + * Test if e_b >= 1021 + * ==> 1021 < e_b + */ + { + Int test_value = 1021; + eb_GTE = binop(Iop_CmpLT32S, mkU32(test_value), mkexpr(e_b)); + } + + /* + * Test if FRA != Zero and (e_a - e_b) >= 1023 + * ==> FRA != Zero && (1023 < (e_a - e_b) + */ + assign( fraNotZero_tmp, unop( Iop_Not1, is_Zero( frA_I64 ) ) ); + ea_eb_GTE = mkAND1( mkexpr( fraNotZero_tmp ), + binop( Iop_CmpLT32S, mkU32( bias ), + binop( Iop_Sub32, mkexpr( e_a ), + mkexpr( e_b ) ) ) ); + + /* + * Test if FRA != Zero and (e_a - e_b) <= -1021 + */ + { + UInt test_value = 0xfffffc03; //Int test_value = -1021; + + ea_eb_LTE = mkAND1( mkexpr( fraNotZero_tmp ), + binop( Iop_CmpLE32S, + binop( Iop_Sub32, + mkexpr( e_a ), + mkexpr( e_b ) ), + mkU32( test_value ) ) ); + } + + /* + * Test if FRA != Zero and e_a <= -970 + */ + { + UInt test_value = 0xfffffc36; //Int test_value = -970; + + ea_LTE = mkAND1( mkexpr( fraNotZero_tmp ), binop( Iop_CmpLE32S, + mkexpr( e_a ), + mkU32( test_value ) ) ); + } + ////////////////// fe_flag tests END ////////////////////// + + ////////////////// fg_flag tests BEGIN ////////////////////// + /* + * The following tests were already performed above in the fe_flag + * tests. So these conditions will result in both fe_ and fg_ flags + * being set. + * - Test if FRA is an Infinity + * - Test if FRB ix Zero + * - Test if FRB is an Infinity + */ + + /* + * Test if FRB holds a denormalized value. A denormalized value is one where + * the exp is 0 and the fraction is non-zero. + */ + { + IRExpr * hi32, * low32, * fraction_is_nonzero; + IRTemp frac_part = newTemp(Ity_I64); + + assign( frac_part, FP_FRAC_PART(frB_I64) ); + hi32 = unop( Iop_64HIto32, mkexpr( frac_part ) ); + low32 = unop( Iop_64to32, mkexpr( frac_part ) ); + fraction_is_nonzero = binop( Iop_CmpNE32, binop( Iop_Or32, low32, hi32 ), + mkU32( 0 ) ); + frbDenorm = mkAND1( binop( Iop_CmpEQ32, mkexpr( frB_exp_shR ), + mkU32( 0x0ULL ) ), fraction_is_nonzero ); + + } + ////////////////// fg_flag tests END ////////////////////// + + ///////////////////////// + /* The CR field consists of fl_flag || fg_flag || fe_flag || 0b0 + * where fl_flag == 1 on ppc64. + */ + fl_flag = unop(Iop_Not32, mkU32(0xFFFFFE)); + + + fe_flag + = mkOR1( + fraNaN, + mkOR1( + mkexpr( fraInf_tmp ), + mkOR1( + mkexpr( frbZero_tmp ), + mkOR1( + frbNaN, + mkOR1( + mkexpr( frbInf_tmp ), + mkOR1( eb_LTE, + mkOR1( eb_GTE, + mkOR1( ea_eb_GTE, + mkOR1( ea_eb_LTE, + ea_LTE ) ) ) ) ) ) ) ) ); + + fe_flag = unop(Iop_1Uto32, fe_flag); + + fg_flag = mkOR1( mkexpr( fraInf_tmp ), mkOR1( mkexpr( frbZero_tmp ), + mkOR1( mkexpr( frbInf_tmp ), + frbDenorm ) ) ); + fg_flag = unop(Iop_1Uto32, fg_flag); + + putGST_field( PPC_GST_CR, binop( Iop_Or32, + binop( Iop_Or32, + binop( Iop_Shl32, fl_flag, mkU8( 3 ) ), + binop( Iop_Shl32, fg_flag, mkU8( 2 ) ) ), + binop( Iop_Shl32, fe_flag, mkU8( 1 ) ) ), crfD ); + + DIP("ftdiv crf%d,fr%u,fr%u\n", crfD, frA_addr, frB_addr); + return True; +} /* Floating Point Compare Instructions @@ -6721,13 +7233,34 @@ static Bool dis_fp_round ( UInt theInstr simulating exceptions, the exception status will appear to be zero. Hence cr1 should be cleared if this is a . form insn. */ Bool clear_CR1 = True; - - if (opc1 != 0x3F || b16to20 != 0) { + if ((!(opc1 == 0x3F || opc1 == 0x3B)) || b16to20 != 0) { vex_printf("dis_fp_round(ppc)(instr)\n"); return False; } assign( frB, getFReg(frB_addr)); + // TODO : add support here for fcfdus + if (opc1 == 0x3B) { + /* The fcfid[u]s instructions (from ISA 2.06) are a bit odd because + * they're very similar to the other instructions handled here, but have + * a different primary opcode. + */ + switch (opc2) { + case 0x34E: // fcfids (Float convert from signed DWord to single precision) + DIP("fcfids%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) ); + assign( frD, binop( Iop_RoundF64toF32, rm, binop( Iop_I64StoF64, rm, + mkexpr( r_tmp64 ) ) ) ); + goto putFR; + + case 0x3Ce: // fcfidus (Float convert from unsigned DWord to single precision) + DIP("fcfidus%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) ); + assign( frD, binop( Iop_I64UtoF32, rm, mkexpr( r_tmp64 ) ) ); + goto putFR; + } + } + switch (opc2) { case 0x00C: // frsp (Float Round to Single, PPC32 p423) @@ -6780,6 +7313,12 @@ static Bool dis_fp_round ( UInt theInstr binop(Iop_I64StoF64, rm, mkexpr(r_tmp64)) ); break; + case 0x3CE: // fcfidu (Float convert from unsigned DWord) + DIP("fcfidu%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) ); + assign( frD, binop( Iop_I64UtoF64, rm, mkexpr( r_tmp64 ) ) ); + break; + case 0x188: case 0x1A8: case 0x1C8: case 0x1E8: // frin, friz, frip, frim switch(opc2) { case 0x188: // frin (Floating Round to Integer Nearest) @@ -6830,7 +7369,7 @@ static Bool dis_fp_round ( UInt theInstr vex_printf("dis_fp_round(ppc)(opc2)\n"); return False; } - +putFR: putFReg( frD_addr, mkexpr(frD) ); if (set_FPRF) { @@ -7271,6 +7810,722 @@ static Bool dis_av_procctl ( UInt theIns } /* + * + */ +static Bool +dis_vx_conv ( UInt theInstr, UInt opc2 ) +{ + /* XX2-Form */ + UChar opc1 = ifieldOPC( theInstr ); + UChar XT = ifieldRegXT( theInstr ); + UChar XB = ifieldRegXB( theInstr ); + IRTemp frB = newTemp(Ity_F64); + IRTemp r_tmp64 = newTemp(Ity_I64); + IRExpr* rm = get_IR_roundingmode(); + + if (opc1 != 0x3C) { + vex_printf( "dis_vx_conv(ppc)(instr)\n" ); + return False; + } + + assign(frB, unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XB )))); + /* For all the VSX convert instructions, the contents of doubleword element 1 + * of VSX[XT] are undefined after the operation; therefore, we can simply + * move the entire array element where it makes sense to do so. + */ + + switch (opc2) { + case 0x2B0: + // xscvdpsxds (VSX Scalar truncate Double-Precision to integer and Convert + // to Signed Integer Doubleword format with Saturate) + DIP("xscvdpsxds fr%u,fr%u\n", (UInt)XT, (UInt)XB); + putVSReg( XT, + binop( Iop_64HLtoV128, binop( Iop_F64toI64S, + mkU32( Irrm_ZERO ), + mkexpr( frB ) ), mkU64( 0 ) ) ); + break; + case 0x2F0: + // xscvsxddp (VSX Scalar Convert and round Signed Integer Doubleword to + // Double-Precision format) + DIP("xscvsxddp v%u,v%u\n", (UInt)XT, (UInt)XB); + + assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) ); + putVSReg( XT, + binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64, + binop( Iop_I64StoF64, rm, + mkexpr( r_tmp64 ) ) ), + mkU64( 0 ) ) ); + break; + case 0x2D0: + // xscvuxddp (VSX Scalar Convert and round Unsigned Integer Doubleword to + // Double-Precision format) + DIP("xscvuxddp v%u,v%u\n", (UInt)XT, (UInt)XB); + + assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) ); + putVSReg( XT, + binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64, + binop( Iop_I64UtoF64, rm, + mkexpr( r_tmp64 ) ) ), + mkU64( 0 ) ) ); + break; + + default: + vex_printf( "dis_vx_conv(ppc)(opc2)\n" ); + return False; + } + return True; +} + +/* + * VSX Scalar Floating Point Arithmetic Instructions + */ +static Bool +dis_vx_arith ( UInt theInstr, UInt opc2 ) +{ + /* XX3-Form */ + UChar opc1 = ifieldOPC( theInstr ); + UChar XT = ifieldRegXT( theInstr ); + UChar XA = ifieldRegXA( theInstr ); + UChar XB = ifieldRegXB( theInstr ); + IRExpr* rm = get_IR_roundingmode(); + IRTemp frA = newTemp(Ity_F64); + IRTemp frB = newTemp(Ity_F64); + + if (opc1 != 0x3C) { + vex_printf( "dis_vx_arith(ppc)(instr)\n" ); + return False; + } + + assign(frA, unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XA )))); + assign(frB, unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XB )))); + + /* For all the VSX sclar arithmetic instructions, the contents of doubleword element 1 + * of VSX[XT] are undefined after the operation; therefore, we can simply set + * element to zero where it makes sense to do so. + */ + switch (opc2) { + case 0x080: // xsadddp (VSX scalar add double-precision) + DIP("xsadddp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); + putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64, + triop( Iop_AddF64, rm, + mkexpr( frA ), + mkexpr( frB ) ) ), + mkU64( 0 ) ) ); + break; + case 0x0E0: // xsdivdp (VSX scalar divide double-precision) + DIP("xsdivdp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); + putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64, + triop( Iop_DivF64, rm, + mkexpr( frA ), + mkexpr( frB ) ) ), + mkU64( 0 ) ) ); + break; + case 0x084: case 0x0A4: // xsmaddadp, xsmaddmdp (VSX scalar multiply-add double-precision) + { + IRTemp frT = newTemp(Ity_F64); + Bool mdp = opc2 == 0x0A4; + DIP("xsmadd%sdp v%d,v%d,v%d\n", mdp ? "m" : "a", (UInt)XT, (UInt)XA, (UInt)XB); + assign( frT, unop( Iop_ReinterpI64asF64, unop( Iop_V128HIto64, + getVSReg( XT ) ) ) ); + putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64, + qop( Iop_MAddF64, rm, + mkexpr( frA ), + mkexpr( mdp ? frT : frB ), + mkexpr( mdp ? frB : frT ) ) ), + mkU64( 0 ) ) ); + break; + } + case 0x0C4: case 0x0E4: // xsmsubadp, xsmsubmdp (VSX scalar multiply-subtract double-precision) + { + IRTemp frT = newTemp(Ity_F64); + Bool mdp = opc2 == 0x0E4; + DIP("xsmsub%sdp v%d,v%d,v%d\n", mdp ? "m" : "a", (UInt)XT, (UInt)XA, (UInt)XB); + assign( frT, unop( Iop_ReinterpI64asF64, unop( Iop_V128HIto64, + getVSReg( XT ) ) ) ); + putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64, + qop( Iop_MSubF64, rm, + mkexpr( frA ), + mkexpr( mdp ? frT : frB ), + mkexpr( mdp ? frB : frT ) ) ), + mkU64( 0 ) ) ); + break; + } + case 0x284: case 0x2A4: // xsnmaddadp, xsnmaddmdp (VSX scalar multiply-add double-precision) + { + /* TODO: mpj -- Naturally, I expected to be able to leverage the implementation + * of fnmadd and use pretty much the same code. However, that code has a bug in the + * way it blindly negates the signbit, even if the floating point result is a NaN. + * So, the TODO is to fix fnmadd (which I'll do in a different patch). + */ + ULong signbit_mask = 0x8000000000000000ULL; + Bool mdp = opc2 == 0x2A4; + IRTemp frT = newTemp(Ity_F64); + IRTemp maddResult = newTemp(Ity_I64); + IRTemp negatedResult = newTemp(Ity_I64); + IRTemp signbit_32 = newTemp(Ity_I32); + IRTemp resultantSignbit = newTemp(Ity_I1); + + DIP("xsnmadd%sdp v%d,v%d,v%d\n", mdp ? "m" : "a", (UInt)XT, (UInt)XA, (UInt)XB); + assign( frT, unop( Iop_ReinterpI64asF64, unop( Iop_V128HIto64, + getVSReg( XT ) ) ) ); + assign( maddResult, unop( Iop_ReinterpF64asI64, qop( Iop_MAddF64, rm, + mkexpr( frA ), + mkexpr( mdp ? frT : frB ), + mkexpr( mdp ? frB : frT ) ) ) ); + assign( signbit_32, binop( Iop_Shr32, + unop( Iop_64HIto32, + binop( Iop_And64, mkexpr( maddResult ), + mkU64( signbit_mask ) ) ), + mkU8( 31 ) ) ); + /* We negate the signbit if and only if the intermediate result from the + * multiply-add was NOT a NaN. This is an XNOR predicate. + */ + assign( resultantSignbit, + unop( Iop_Not1, + binop( Iop_CmpEQ32, + binop( Iop_Xor32, mkexpr( signbit_32 ), + unop( Iop_1Uto32, + is_NaN( maddResult ) ) ), + mkU32( 1 ) ) ) ); + + assign( negatedResult, + binop( Iop_Or64, binop( Iop_And64, mkexpr( maddResult ), + mkU64( ~signbit_mask ) ), + binop( Iop_32HLto64, + binop( Iop_Shl32, + unop( Iop_1Uto32, + mkexpr( resultantSignbit ) ), + mkU8( 31 ) ), mkU32( 0 ) ) ) ); + + putVSReg( XT, binop( Iop_64HLtoV128, mkexpr( negatedResult ), + mkU64( 0 ) ) ); + break; + } + case 0x0C0: // xsmuldp (VSX Scalar Multiply Double-Precision) + DIP("xsmuldp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); + putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64, + triop( Iop_MulF64, rm, + mkexpr( frA ), + mkexpr( frB ) ) ), + mkU64( 0 ) ) ); + break; + case 0x0A0: // xssubdp (VSX Scalar Subtract Double-Precision) + DIP("xssubdp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); + putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64, + triop( Iop_SubF64, rm, + mkexpr( frA ), + mkexpr( frB ) ) ), + mkU64( 0 ) ) ); + break; + + default: + vex_printf( "dis_vx_arith(ppc)(opc2)\n" ); + return False; + } + return True; +} + +/* + * VSX Floating Point Compare Instructions + */ +static Bool +dis_vx_cmp( UInt theInstr, UInt opc2 ) +{ + /* XX3-Form and XX2-Form */ + UChar opc1 = ifieldOPC( theInstr ); + UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) ); + IRTemp ccPPC32 = newTemp(Ity_I32); + IRTemp ccIR = newTemp(Ity_I32); + UChar XA = ifieldRegXA ( theInstr ); + UChar XB = ifieldRegXB ( theInstr ); + IRTemp frA = newTemp(Ity_F64); + IRTemp frB = newTemp(Ity_F64); + + if (opc1 != 0x3C) { + vex_printf( "dis_vx_cmp(ppc)(instr)\n" ); + return False; + } + + assign(frA, unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XA )))); + assign(frB, unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XB )))); + switch (opc2) { + case 0x08C: case 0x0AC: // xscmpudp, xscmpodp + /* Note: Differences between xscmpudp and xscmpodp are only in + * exception flag settings, which aren't supported anyway. */ + DIP("xscmp%sdp crf%d,fr%u,fr%u\n", opc2 == 0x08c ? "u" : "o", + crfD, (UInt)XA, (UInt)XB); + assign( ccIR, binop(Iop_CmpF64, mkexpr(frA), mkexpr(frB)) ); + + /* Map compare result from IR to PPC32 */ + /* + FP cmp result | PPC | IR + -------------------------- + UN | 0x1 | 0x45 + EQ | 0x2 | 0x40 + GT | 0x4 | 0x00 + LT | 0x8 | 0x01 + */ + + // ccPPC32 = Shl(1, (~(ccIR>>5) & 2) + // | ((ccIR ^ (ccIR>>6)) & 1) + assign( + ccPPC32, + binop( + Iop_Shl32, + mkU32(1), + unop( + Iop_32to8, + binop( + Iop_Or32, + binop( + Iop_And32, + unop( + Iop_Not32, + binop(Iop_Shr32, mkexpr(ccIR), mkU8(5)) + ), + mkU32(2) + ), + binop( + Iop_And32, + binop( + Iop_Xor32, + mkexpr(ccIR), + binop(Iop_Shr32, mkexpr(ccIR), mkU8(6)) + ), + mkU32(1) + ) + ) + ) + ) + ); + + putGST_field( PPC_GST_CR, mkexpr(ccPPC32), crfD ); + + break; + default: + vex_printf( "dis_vx_cmp(ppc)(opc2)\n" ); + return False; + } + return True; +} +/* + * VSX Move Instructions + */ +static Bool +dis_vx_move( UInt theInstr, UInt opc2 ) +{ + /* XX3-Form and XX2-Form */ + UChar opc1 = ifieldOPC( theInstr ); + UChar XT = ifieldRegXT ( theInstr ); + UChar XA = ifieldRegXA ( theInstr ); + UChar XB = ifieldRegXB ( theInstr ); + IRTemp vA = newTemp( Ity_V128 ); + IRTemp vB = newTemp( Ity_V128 ); + + if (opc1 != 0x3C) { + vex_printf( "dis_vx_move(ppc)(instr)\n" ); + return False; + } + + assign( vA, getVSReg( XA ) ); + assign( vB, getVSReg( XB ) ); + + /* For all the VSX move instructions, the contents of doubleword element 1 + * of VSX[XT] are undefined after the operation; therefore, we can simply + * move the entire array element where it makes sense to do so. + */ + + switch (opc2) { + case 0x2B2: // xsabsdp (VSX scalar absolute value double-precision + { + /* Move abs val of dw 0 of VSX[XB] to dw 0 of VSX[XT]. */ + IRTemp absVal = newTemp(Ity_V128); + assign(absVal, binop(Iop_ShrV128, binop(Iop_ShlV128, mkexpr(vB), mkU8(1)), mkU8(1))); + DIP("xsabsdp v%d,v%d\n", (UInt)XT, (UInt)XB); + putVSReg(XT, mkexpr(absVal)); + break; + } + case 0x2C0: // xscpsgndp + { + /* Scalar copy sign double-precision */ + IRTemp vecA_signbit = newTemp(Ity_V128); + IRTemp vecB_no_signbit = newTemp(Ity_V128); + IRTemp vec_result = newTemp(Ity_V128); + DIP("xscpsgndp v%d,v%d v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); + assign( vecB_no_signbit, binop( Iop_ShrV128, binop( Iop_ShlV128, + mkexpr( vB ), + mkU8( 1 ) ), + mkU8( 1 ) ) ); + assign( vecA_signbit, binop( Iop_ShlV128, binop( Iop_ShrV128, + mkexpr( vA ), + mkU8( 127 ) ), + mkU8( 127 ) ) ); + assign( vec_result, binop( Iop_OrV128, mkexpr(vecA_signbit), mkexpr( vecB_no_signbit ) ) ); + putVSReg(XT, mkexpr(vec_result)); + break; + } + case 0x2D2: // xsnabsdp + { + /* Scalar negative absolute value double-precision */ + IRTemp vec_neg_signbit = newTemp(Ity_V128); + DIP("xsnabsdp v%d,v%d\n", (UInt)XT, (UInt)XB); + assign( vec_neg_signbit, unop( Iop_NotV128, binop( Iop_ShrV128, + mkV128( 0xffff ), + mkU8( 1 ) ) ) ); + putVSReg(XT, binop(Iop_OrV128, mkexpr(vec_neg_signbit), mkexpr(vB))); + break; + } + case 0x2F2: // xsnegdp + { + /* Scalar negate double-precision */ + IRTemp vecB_no_signbit = newTemp(Ity_V128); + IRTemp vecB_signbit_comp = newTemp(Ity_V128); + DIP("xsnabsdp v%d,v%d\n", (UInt)XT, (UInt)XB); + assign( vecB_no_signbit, binop( Iop_ShrV128, binop( Iop_ShlV128, + mkexpr( vB ), + mkU8( 1 ) ), + mkU8( 1 ) ) ); + assign( vecB_signbit_comp, binop( Iop_ShlV128, + unop( Iop_NotV128, + binop( Iop_ShrV128, + mkexpr( vB ), + mkU8( 127 ) ) ), + mkU8( 127 ) ) ); + putVSReg( XT, binop( Iop_OrV128, mkexpr( vecB_no_signbit ), + mkexpr( vecB_signbit_comp ) ) ); + break; + } + + default: + vex_printf( "dis_vx_move(ppc)(opc2)\n" ); + return False; + } + return True; +} + +/* + * VSX Logical Instructions + */ +static Bool +dis_vx_logic ( UInt theInstr, UInt opc2 ) +{ + /* XX3-Form */ + UChar opc1 = ifieldOPC( theInstr ); + UChar XT = ifieldRegXT ( theInstr ); + UChar XA = ifieldRegXA ( theInstr ); + UChar XB = ifieldRegXB ( theInstr ); + IRTemp vA = newTemp( Ity_V128 ); + IRTemp vB = newTemp( Ity_V128 ); + + if (opc1 != 0x3C) { + vex_printf( "dis_vx_logic(ppc)(instr)\n" ); + return False; + } + + assign( vA, getVSReg( XA ) ); + assign( vB, getVSReg( XB ) ); + + switch (opc2) { + case 0x268: // xxlxor + DIP("xxlxor v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); + putVSReg( XT, binop( Iop_XorV128, mkexpr( vA ), mkexpr( vB ) ) ); + break; + case 0x248: // xxlor + DIP("xxlor v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); + putVSReg( XT, binop( Iop_OrV128, mkexpr( vA ), mkexpr( vB ) ) ); + break; + case 0x288: // xxlnor + DIP("xxlnor v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); + putVSReg( XT, unop( Iop_NotV128, binop( Iop_OrV128, mkexpr( vA ), + mkexpr( vB ) ) ) ); + break; + case 0x208: // xxland + DIP("xxland v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); + putVSReg( XT, binop( Iop_AndV128, mkexpr( vA ), mkexpr( vB ) ) ); + break; + case 0x228: //xxlandc + DIP("xxlandc v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); + putVSReg( XT, binop( Iop_AndV128, mkexpr( vA ), unop( Iop_NotV128, + mkexpr( vB ) ) ) ); + break; + default: + vex_printf( "dis_vx_logic(ppc)(opc2)\n" ); + return False; + } + return True; +} + +/* + * VSX Load Instructions + * NOTE: VSX supports word-aligned storage access. + */ +static Bool +dis_vx_load ( UInt theInstr ) +{ + /* XX1-Form */ + UChar opc1 = ifieldOPC( theInstr ); + UChar XT = ifieldRegXT ( theInstr ); + UChar rA_addr = ifieldRegA( theInstr ); + UChar rB_addr = ifieldRegB( theInstr ); + UInt opc2 = ifieldOPClo10( theInstr ); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp EA = newTemp( ty ); + + if (opc1 != 0x1F) { + vex_printf( "dis_vx_load(ppc)(instr)\n" ); + return False; + } + + assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) ); + + switch (opc2) { + case 0x24C: // lxsdx + { + IRExpr * exp; + DIP("lxsdx %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr); + exp = loadBE( Ity_I64, mkexpr( EA ) ); + // We need to pass an expression of type Ity_V128 with putVSReg, but the load + // we just performed is only a DW. But since the contents of VSR[XT] element 1 + // are undefined after this operation, we can just do a splat op. + putVSReg( XT, binop( Iop_64HLtoV128, exp, exp ) ); + break; + } + case 0x34C: // lxvd2x + { + IROp addOp = ty == Ity_I64 ? Iop_Add64 : Iop_Add32; + IRExpr * high, *low; + ULong ea_off = 8; + IRExpr* high_addr; + DIP("lxvd2x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr); + high = loadBE( Ity_I64, mkexpr( EA ) ); + high_addr = binop( addOp, mkexpr( EA ), ty == Ity_I64 ? mkU64( ea_off ) + : mkU32( ea_off ) ); + low = loadBE( Ity_I64, high_addr ); + putVSReg( XT, binop( Iop_64HLtoV128, high, low ) ); + break; + } + case 0x14C: // lxvdsx + { + IRTemp data = newTemp(Ity_I64); + DIP("lxvdsx %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr); + assign( data, loadBE( Ity_I64, mkexpr( EA ) ) ); + putVSReg( XT, binop( Iop_64HLtoV128, mkexpr( data ), mkexpr( data ) ) ); + break; + } + case 0x30C: + { + IRExpr * t3, *t2, *t1, *t0; + UInt ea_off = 0; + IRExpr* irx_addr; + + DIP("lxvw4x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr); + t3 = loadBE( Ity_I32, mkexpr( EA ) ); + ea_off += 4; + irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), + ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) ); + t2 = loadBE( Ity_I32, irx_addr ); + ea_off += 4; + irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), + ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) ); + t1 = loadBE( Ity_I32, irx_addr ); + ea_off += 4; + irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), + ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) ); + t0 = loadBE( Ity_I32, irx_addr ); + putVSReg( XT, binop( Iop_64HLtoV128, binop( Iop_32HLto64, t3, t2 ), + binop( Iop_32HLto64, t1, t0 ) ) ); + break; + } + default: + vex_printf( "dis_vx_load(ppc)(opc2)\n" ); + return False; + } + return True; +} + +/* + * VSX Store Instructions + * NOTE: VSX supports word-aligned storage access. + */ +static Bool +dis_vx_store ( UInt theInstr ) +{ + /* XX1-Form */ + UChar opc1 = ifieldOPC( theInstr ); + UChar XS = ifieldRegXS( theInstr ); + UChar rA_addr = ifieldRegA( theInstr ); + UChar rB_addr = ifieldRegB( theInstr ); + IRTemp vS = newTemp( Ity_V128 ); + UInt opc2 = ifieldOPClo10( theInstr ); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp EA = newTemp( ty ); + + if (opc1 != 0x1F) { + vex_printf( "dis_vx_store(ppc)(instr)\n" ); + return False; + } + + assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) ); + assign( vS, getVSReg( XS ) ); + + switch (opc2) { + case 0x2CC: + { + IRExpr * high64; + DIP("stxsdx %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr); + high64 = unop( Iop_V128HIto64, mkexpr( vS ) ); + storeBE( mkexpr( EA ), high64 ); + break; + } + case 0x3CC: + { + IRExpr * high64, *low64; + DIP("stxvd2x %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr); + high64 = unop( Iop_V128HIto64, mkexpr( vS ) ); + low64 = unop( Iop_V128to64, mkexpr( vS ) ); + storeBE( mkexpr( EA ), high64 ); + storeBE( binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), ty == Ity_I64 ? mkU64( 8 ) + : mkU32( 8 ) ), low64 ); + break; + } + case 0x38C: + { + UInt ea_off = 0; + IRExpr* irx_addr; + IRTemp hi64 = newTemp( Ity_I64 ); + IRTemp lo64 = newTemp( Ity_I64 ); + + DIP("stxvw4x %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr); + + // This instruction supports word-aligned stores, so EA may not be + // quad-word aligned. Therefore, do 4 individual word-size stores. + assign( hi64, unop( Iop_V128HIto64, mkexpr( vS ) ) ); + assign( lo64, unop( Iop_V128to64, mkexpr( vS ) ) ); + + storeBE( mkexpr( EA ), unop( Iop_64HIto32, mkexpr( hi64 ) ) ); + ea_off += 4; + irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), + ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) ); + storeBE( irx_addr, unop( Iop_64to32, mkexpr( hi64 ) ) ); + ea_off += 4; + irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), + ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) ); + storeBE( irx_addr, unop( Iop_64HIto32, mkexpr( lo64 ) ) ); + ea_off += 4; + irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), + ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) ); + storeBE( irx_addr, unop( Iop_64to32, mkexpr( lo64 ) ) ); + + break; + } + default: + vex_printf( "dis_vx_store(ppc)(opc2)\n" ); + return False; + } + return True; +} + +/* + * VSX Permute Instructions + */ +static Bool +dis_vx_permute( UInt theInstr, UInt opc2 ) +{ + /* XX3-Form */ + UChar opc1 = ifieldOPC( theInstr ); + UChar XT = ifieldRegXT ( theInstr ); + UChar XA = ifieldRegXA ( theInstr ); + UChar XB = ifieldRegXB ( theInstr ); + IRTemp vT = newTemp( Ity_V128 ); + IRTemp vA = newTemp( Ity_V128 ); + IRTemp vB = newTemp( Ity_V128 ); + + if (opc1 != 0x3C) { + vex_printf( "dis_vx_permute(ppc)(instr)\n" ); + return False; + } + + assign( vA, getVSReg( XA ) ); + assign( vB, getVSReg( XB ) ); + + switch (opc2) { + case 0x8: // xxsldwi (VSX Shift Left Double by Word Immediate) + { + UChar SHW = ifieldSHW ( theInstr ); + IRTemp result = newTemp(Ity_V128); + if ( SHW != 0 ) { + IRTemp hi = newTemp(Ity_V128); + IRTemp lo = newTemp(Ity_V128); + assign( hi, binop(Iop_ShlV128, mkexpr(vA), mkU8(SHW*32)) ); + assign( lo, binop(Iop_ShrV128, mkexpr(vB), mkU8(128-SHW*32)) ); + assign ( result, binop(Iop_OrV128, mkexpr(hi), mkexpr(lo)) ); + } else + assign ( result, mkexpr(vA) ); + DIP("xxsldwi v%d,v%d,v%d,%d\n", (UInt)XT, (UInt)XA, (UInt)XB, (UInt)SHW); + putVSReg( XT, mkexpr(result) ); + break; + } + case 0x28: // xpermdi (VSX Permute Doubleword Immediate) + { + UChar DM = ifieldDM ( theInstr ); + IRTemp hi = newTemp(Ity_I64); + IRTemp lo = newTemp(Ity_I64); + + if (DM & 0x2) + assign( hi, unop(Iop_V128to64, mkexpr(vA)) ); + else + assign( hi, unop(Iop_V128HIto64, mkexpr(vA)) ); + + if (DM & 0x1) + assign( lo, unop(Iop_V128to64, mkexpr(vB)) ); + else + assign( lo, unop(Iop_V128HIto64, mkexpr(vB)) ); + + assign( vT, binop(Iop_64HLtoV128, mkexpr(hi), mkexpr(lo)) ); + + DIP("xxpermdi v%d,v%d,v%d,0x%x\n", (UInt)XT, (UInt)XA, (UInt)XB, (UInt)DM); + putVSReg( XT, mkexpr( vT ) ); + break; + } + case 0x48: // xxmrghw (VSX Merge High Word) + case 0xc8: // xxmrglw (VSX Merge Low Word) + { + char type = (opc2 == 0x48) ? 'h' : 'l'; + IROp word_op = (opc2 == 0x48) ? Iop_V128HIto64 : Iop_V128to64; + IRTemp a64 = newTemp(Ity_I64); + IRTemp ahi32 = newTemp(Ity_I32); + IRTemp alo32 = newTemp(Ity_I32); + IRTemp b64 = newTemp(Ity_I64); + IRTemp bhi32 = newTemp(Ity_I32); + IRTemp blo32 = newTemp(Ity_I32); + + assign( a64, unop(word_op, mkexpr(vA)) ); + assign( ahi32, unop(Iop_64HIto32, mkexpr(a64)) ); + assign( alo32, unop(Iop_64to32, mkexpr(a64)) ); + + assign( b64, unop(word_op, mkexpr(vB)) ); + assign( bhi32, unop(Iop_64HIto32, mkexpr(b64)) ); + assign( blo32, unop(Iop_64to32, mkexpr(b64)) ); + + assign( vT, binop(Iop_64HLtoV128, + binop(Iop_32HLto64, mkexpr(ahi32), mkexpr(bhi32)), + binop(Iop_32HLto64, mkexpr(alo32), mkexpr(blo32))) ); + + DIP("xxmrg%cw v%d,v%d,v%d\n", type, (UInt)XT, (UInt)XA, (UInt)XB); + putVSReg( XT, mkexpr( vT ) ); + break; + } + + default: + vex_printf( "dis_vx_permute(ppc)(opc2)\n" ); + return False; + } + return True; +} + +/* AltiVec Load Instructions */ static Bool dis_av_load ( VexAbiInfo* vbi, UInt theInstr ) @@ -7400,7 +8655,6 @@ static Bool dis_av_load ( VexAbiInfo* vb return True; } - /* AltiVec Store Instructions */ @@ -9238,8 +10492,221 @@ static Bool dis_av_fp_convert ( UInt the } +/* The 0x3C primary opcode (VSX category) uses several different forms of + * extended opcodes: + * o XX2-form: + * - [10:2] (IBM notation [21:29]) + * o XX3-form variants: + * - variant 1: [10:3] (IBM notation [21:28]) + * - variant 2: [9:3] (IBM notation [22:28]) + * - variant 3: [7:3] (IBM notation [24:28]) + * o XX-4 form: + * - [10:6] (IBM notation [21:25]) + * + * The XX2-form needs bit 0 masked from the standard extended opcode + * as returned by ifieldOPClo10; the XX3-form needs bits 0 and 1 masked; + * and the XX4-form needs bits 0, 1, and 2 masked. Additionally, the + * XX4 and XX3 (variants 2 and 3) forms need certain bits masked on the + * front end since their encoding does not begin at bit 21 like the standard + * format. + * + * The get_VSX60_opc2() function uses the vsx_insn array below to obtain the + * secondary opcode for such VSX instructions. + * +*/ + + +struct vsx_insn { + UInt opcode; + Char * name; +}; + +// ATTENTION: Keep this array sorted on the opcocde!!! +static struct vsx_insn vsx_all[] = { + { 0x8, "xxsldwi" }, + { 0x18, "xxsel" }, + { 0x28, "xxpermdi" }, + { 0x48, "xxmrghw" }, + { 0x80, "xsadddp" }, + { 0x84, "xsmaddadp" }, + { 0x8c, "xscmpudp" }, + { 0x90, "xscvdpuxws" }, + { 0x92, "xsrdpi" }, + { 0x94, "xsrsqrtedp" }, + { 0x96, "xssqrtdp" }, + { 0xa0, "xssubdp" }, + { 0xa4, "xsmaddmdp" }, + { 0xac, "xscmpodp" }, + { 0xb0, "xscvdpsxws" }, + { 0xb2, "xsrdpiz" }, + { 0xb4, "xsredp" }, + { 0xc0, "xsmuldp" }, + { 0xc4, "xsmsubadp" }, + { 0xc8, "xxmrglw" }, + { 0xd2, "xsrdpip" }, + { 0xd4, "xstsqrtdp" }, + { 0xd6, "xsrdpic" }, + { 0xe0, "xsdivdp" }, + { 0xe4, "xsmsubmdp" }, + { 0xf2, "xsrdpim" }, + { 0xf4, "xstdivdp" }, + { 0x100, "xvaddsp" }, + { 0x104, "xvmaddasp" }, + { 0x10c, "xvcmpeqsp" }, + { 0x110, "xvcvspuxws" }, + { 0x112, "xvrspi" }, + { 0x114, "xvrsqrtesp" }, + { 0x116, "xvsqrtsp" }, + { 0x120, "xvsubsp" }, + { 0x124, "xvmaddmsp" }, + { 0x12c, "xvcmpgtsp" }, + { 0x130, "xvcvspsxws" }, + { 0x132, "xvrspiz" }, + { 0x134, "xvresp" }, + { 0x140, "xvmulsp" }, + { 0x144, "xvmsubasp" }, + { 0x148, "xxspltw" }, + { 0x14c, "xvcmpgesp" }, + { 0x150, "xvcvuxwsp" }, + { 0x152, "xvrspip" }, + { 0x154, "xvtsqrtsp" }, + { 0x156, "xvrspic" }, + { 0x160, "xvdivsp" }, + { 0x164, "xvmsubmsp" }, + { 0x170, "xvcvsxwsp" }, + { 0x172, "xvrspim" }, + { 0x174, "xvtdivsp" }, + { 0x180, "xvadddp" }, + { 0x184, "xvmaddadp" }, + { 0x18c, "xvcmpeqdp" }, + { 0x190, "xvcvdpuxws" }, + { 0x192, "xvrdpi" }, + { 0x194, "xvrsqrtedp" }, + { 0x196, "xvsqrtdp" }, + { 0x1a0, "xvsubdp" }, + { 0x1a4, "xvmaddmdp" }, + { 0x1ac, "xvcmpgtdp" }, + { 0x1b0, "xvcvdpsxws" }, + { 0x1b2, "xvrdpiz" }, + { 0x1b4, "xvredp" }, + { 0x1c0, "xvmuldp" }, + { 0x1c4, "xvmsubadp" }, + { 0x1cc, "xvcmpgedp" }, + { 0x1d0, "xvcvuxwdp" }, + { 0x1d2, "xvrdpip" }, + { 0x1d4, "xvtsqrtdp" }, + { 0x1d6, "xvrdpic" }, + { 0x1e0, "xvdivdp" }, + { 0x1e4, "xvmsubmdp" }, + { 0x1f0, "xvcvsxwdp" }, + { 0x1f2, "xvrdpim" }, + { 0x1f4, "xvtdivdp" }, + { 0x208, "xxland" }, + { 0x212, "xscvdpsp" }, + { 0x228, "xxlandc" }, + { 0x248 , "xxlor" }, + { 0x268, "xxlxor" }, + { 0x280, "xsmaxdp" }, + { 0x284, "xsnmaddadp" }, + { 0x288, "xxlnor" }, + { 0x290, "xscvdpuxds" }, + { 0x292, "xscvspdp" }, + { 0x2a0, "xsmindp" }, + { 0x2a4, "xsnmaddmdp" }, + { 0x2b0, "xscvdpsxds" }, + { 0x2b2, "xsabsdp" }, + { 0x2c0, "xscpsgndp" }, + { 0x2c4, "xsnmsubadp" }, + { 0x2d0, "xscvuxddp" }, + { 0x2d2, "xsnabsdp" }, + { 0x2e4, "xsnmsubmdp" }, + { 0x2f0, "xscvsxddp" }, + { 0x2f2, "xsnegdp" }, + { 0x300, "xvmaxsp" }, + { 0x304, "xvnmaddasp" }, + { 0x30c, "xvcmpeqsp." }, + { 0x310, "xvcvspuxds" }, + { 0x312, "xvcvdpsp" }, + { 0x320, "xvminsp" }, + { 0x324, "xvnmaddmsp" }, + { 0x32c, "xvcmpgtsp." }, + { 0x330, "xvcvspsxds" }, + { 0x332, "xvabssp" }, + { 0x340, "xvcpsgnsp" }, + { 0x344, "xvnmsubasp" }, + { 0x34c, "xvcmpgesp." }, + { 0x350, "xvcvuxdsp" }, + { 0x352, "xvnabssp" }, + { 0x364, "xvnmsubmsp" }, + { 0x370, "xvcvsxdsp" }, + { 0x372, "xvnegsp" }, + { 0x380, "xvmaxdp" }, + { 0x384, "xvnmaddadp" }, + { 0x38c, "xvcmpeqdp." }, + { 0x390, "xvcvdpuxds" }, + { 0x392, "xvcvspdp" }, + { 0x3a0, "xvmindp" }, + { 0x3a4, "xvnmaddmdp" }, + { 0x3ac, "xvcmpgtdp." }, + { 0x3b0, "xvcvdpsxds" }, + { 0x3b2, "xvabsdp" }, + { 0x3c0, "xvcpsgndp" }, + { 0x3c4, "xvnmsubadp" }, + { 0x3cc, "xvcmpgedp." }, + { 0x3d0, "xvcvuxddp" }, + { 0x3d2, "xvnabsdp" }, + { 0x3e4, "xvnmsubmdp" }, + { 0x3f0, "xvcvsxddp" }, + { 0x3f2, "xvnegdp" } +}; +#define VSX_ALL_LEN 135 + +// ATTENTION: This search function assumes vsx_all array is sorted. +static Int findVSXextOpCode(UInt opcode) +{ + Int low, mid, high; + low = 0; + high = VSX_ALL_LEN - 1; + while (low <= high) { + mid = (low + high)/2; + if (opcode < vsx_all[mid].opcode) + high = mid - 1; + else if (opcode > vsx_all[mid].opcode) + low = mid + 1; + else + return mid; + } + return -1; +} +/* The full 10-bit extended opcode retrieved via ifieldOPClo10 is + * passed, and we then try to match it up with one of the above + * VSX forms. + */ +static UInt get_VSX60_opc2(UInt opc2_full) +{ +#define XX2_MASK 0x000003FE +#define XX3_1_MASK 0x000003FC +#define XX3_2_MASK 0x000001FC +#define XX3_3_MASK 0x0000007C +#define XX4_MASK 0x000003E0 + Int ret; + UInt vsxExtOpcode = 0; + + if (( ret = findVSXextOpCode(opc2_full & XX2_MASK)) >= 0) + vsxExtOpcode = vsx_all[ret].opcode; + else if (( ret = findVSXextOpCode(opc2_full & XX3_1_MASK)) >= 0) + vsxExtOpcode = vsx_all[ret].opcode; + else if (( ret = findVSXextOpCode(opc2_full & XX3_2_MASK)) >= 0) + vsxExtOpcode = vsx_all[ret].opcode; + else if (( ret = findVSXextOpCode(opc2_full & XX3_3_MASK)) >= 0) + vsxExtOpcode = vsx_all[ret].opcode; + else if (( ret = findVSXextOpCode(opc2_full & XX4_MASK)) >= 0) + vsxExtOpcode = vsx_all[ret].opcode; + + return vsxExtOpcode; +} /*------------------------------------------------------------*/ /*--- Disassemble a single instruction ---*/ @@ -9268,6 +10735,7 @@ DisResult disInstr_PPC_WRK ( Bool allow_V = False; Bool allow_FX = False; Bool allow_GX = False; + Bool allow_VX = False; UInt hwcaps = archinfo->hwcaps; Long delta; @@ -9277,11 +10745,13 @@ DisResult disInstr_PPC_WRK ( allow_V = (0 != (hwcaps & VEX_HWCAPS_PPC64_V)); allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC64_FX)); allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC64_GX)); + allow_VX = (0 != (hwcaps & VEX_HWCAPS_PPC64_VX)); } else { allow_F = (0 != (hwcaps & VEX_HWCAPS_PPC32_F)); allow_V = (0 != (hwcaps & VEX_HWCAPS_PPC32_V)); allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC32_FX)); allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC32_GX)); + allow_VX = (0 != (hwcaps & VEX_HWCAPS_PPC32_VX)); } /* The running delta */ @@ -9473,6 +10943,20 @@ DisResult disInstr_PPC_WRK ( case 0x3B: if (!allow_F) goto decode_noF; + opc2 = ifieldOPClo10(theInstr); + switch (opc2) { + case 0x3CE: // fcfidus (implemented as native insn + if (!allow_VX) + goto decode_noVX; + if (dis_fp_round( theInstr )) + goto decode_success; + goto decode_failure; + case 0x34E: // fcfids + if (dis_fp_round( theInstr )) + goto decode_success; + goto decode_failure; + } + opc2 = IFIELD(theInstr, 1, 5); switch (opc2) { /* Floating Point Arith Instructions */ @@ -9499,12 +10983,64 @@ DisResult disInstr_PPC_WRK ( if (!allow_GX) goto decode_noGX; if (dis_fp_arith(theInstr)) goto decode_success; goto decode_failure; - + default: goto decode_failure; } break; + case 0x3C: // VSX instructions (except load/store) + { + UInt vsxOpc2 = get_VSX60_opc2(opc2); + /* The vsxOpc2 returned is the "normalized" value, representing the + * instructions secondary opcode as taken from the standard secondary + * opcode field [21:30] (IBM notatition), even if the actual field + * is non-standard. These normalized values are given in the opcode + * appendices of the ISA 2.06 document. + */ + if (vsxOpc2 == 0) + goto decode_failure; + + switch (vsxOpc2) { + case 0x8: case 0x28: case 0x48: case 0xc8: // xxsldwi, xxpermdi, xxmrghw, xxmrglw + if (dis_vx_permute(theInstr, vsxOpc2)) goto decode_success; + goto decode_failure; + case 0x268: case 0x248: case 0x288: case 0x208: case 0x228: // xxlxor, xxlor, xxlnor, xxland, xxlandc + if (dis_vx_logic(theInstr, vsxOpc2)) goto decode_success; + goto decode_failure; + case 0x2B2: // xsabsdp + if (dis_vx_move(theInstr, vsxOpc2)) goto decode_success; + goto decode_failure; + case 0x2C0: // xscpsgndp + if (dis_vx_move(theInstr, vsxOpc2)) goto decode_success; + goto decode_failure; + case 0x2D2: // xsnabsdp + if (dis_vx_move(theInstr, vsxOpc2)) goto decode_success; + goto decode_failure; + case 0x2F2: // xsnegdp + if (dis_vx_move(theInstr, vsxOpc2)) goto decode_success; + goto decode_failure; + case 0x08C: case 0x0AC: // xscmpudp, xscmpodp + if (dis_vx_cmp(theInstr, vsxOpc2)) goto decode_success; + goto decode_failure; + case 0x080: case 0x0E0: // xsadddp, xsdivdp + case 0x084: case 0x0A4: // xsmaddadp, xsmaddmdp + case 0x0C4: case 0x0E4: // xsmsubadp, xsmsubmdp + case 0x284: case 0x2A4: // xsnmaddadp, xsnmaddmdp + case 0x0C0: // xsmuldp + case 0x0A0: // xssubdp + if (dis_vx_arith(theInstr, vsxOpc2)) goto decode_success; + goto decode_failure; + case 0x2B0: case 0x2F0: case 0x2D0: // xscvdpsxds, xscvsxddp, xscvuxddp + if (dis_vx_conv(theInstr, vsxOpc2)) goto decode_success; + goto decode_failure; + + default: + goto decode_failure; + } + break; + } + /* 64bit Integer Stores */ case 0x3E: // std, stdu if (!mode64) goto decode_failure; @@ -9555,6 +11091,10 @@ DisResult disInstr_PPC_WRK ( if (dis_fp_cmp(theInstr)) goto decode_success; goto decode_failure; + case 0x080: // ftdiv + if (dis_fp_ftdiv(theInstr)) goto decode_success; + goto decode_failure; + /* Floating Point Rounding/Conversion Instructions */ case 0x00C: // frsp case 0x00E: // fctiw @@ -9564,6 +11104,11 @@ DisResult disInstr_PPC_WRK ( case 0x34E: // fcfid if (dis_fp_round(theInstr)) goto decode_success; goto decode_failure; + case 0x3CE: // fcfidu (implemented as native insn) + if (!allow_VX) goto decode_noVX; + if (dis_fp_round(theInstr)) goto decode_success; + goto decode_failure; + /* Power6 rounding stuff */ case 0x1E8: // frim @@ -9730,7 +11275,7 @@ DisResult disInstr_PPC_WRK ( /* Integer Load and Store with Byte Reverse Instructions */ case 0x316: case 0x216: case 0x396: // lhbrx, lwbrx, sthbrx - case 0x296: // stwbrx + case 0x296: case 0x214: // stwbrx, ldbrx if (dis_int_ldst_rev( theInstr )) goto decode_success; goto decode_failure; @@ -9815,6 +11360,11 @@ DisResult disInstr_PPC_WRK ( if (dis_fp_load( theInstr )) goto decode_success; goto decode_failure; + case 0x377: // lfiwzx + if (!allow_F) goto decode_noF; + if (dis_fp_load( theInstr )) goto decode_success; + goto decode_failure; + /* AltiVec instructions */ /* AV Cache Control - Data streams */ @@ -9838,6 +11388,26 @@ DisResult disInstr_PPC_WRK ( if (dis_av_store( theInstr )) goto decode_success; goto decode_failure; + /* VSX Load */ + case 0x24C: // lxsdx + case 0x34C: // lxvd2x + case 0x14C: // lxvdsx + case 0x30C: // lxvw4x + if (dis_vx_load( theInstr )) goto decode_success; + goto decode_failure; + + /* VSX Store */ + case 0x2CC: // stxsdx + case 0x3CC: // stxvd2x + case 0x38C: // stxvw4x + if (dis_vx_store( theInstr )) goto decode_success; + goto decode_failure; + + /* Miscellaneous ISA 2.06 instructions */ + case 0x1FA: // popcntd + if (dis_int_logic( theInstr )) goto decode_success; + goto decode_failure; + default: /* Deal with some other cases that we would otherwise have punted on. */ @@ -10023,6 +11593,10 @@ DisResult disInstr_PPC_WRK ( vassert(!allow_V); vex_printf("disInstr(ppc): declined to decode an AltiVec insn.\n"); goto decode_failure; + decode_noVX: + vassert(!allow_VX); + vex_printf("disInstr(ppc): declined to decode a VSX insn.\n"); + goto decode_failure; decode_noFX: vassert(!allow_FX); vex_printf("disInstr(ppc): " @@ -10105,10 +11679,10 @@ DisResult disInstr_PPC ( IRSB* ir /* do some sanity checks */ mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V - | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX; + | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX; - mask64 = VEX_HWCAPS_PPC64_V - | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX; + mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX + | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX; if (mode64) { vassert((hwcaps_guest & mask32) == 0); Index: VEX/priv/host_ppc_defs.c =================================================================== --- VEX/priv/host_ppc_defs.c.orig +++ VEX/priv/host_ppc_defs.c @@ -962,12 +962,14 @@ PPCInstr* PPCInstr_FpRSP ( HReg dst, HRe i->Pin.FpRSP.src = src; return i; } -PPCInstr* PPCInstr_FpCftI ( Bool fromI, Bool int32, - HReg dst, HReg src ) { +PPCInstr* PPCInstr_FpCftI ( Bool fromI, Bool int32, Bool syned, + Bool dst64, HReg dst, HReg src ) { PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); i->tag = Pin_FpCftI; i->Pin.FpCftI.fromI = fromI; i->Pin.FpCftI.int32 = int32; + i->Pin.FpCftI.syned = syned; + i->Pin.FpCftI.dst64 = dst64; i->Pin.FpCftI.dst = dst; i->Pin.FpCftI.src = src; vassert(!(int32 && fromI)); /* no such insn ("fcfiw"). */ @@ -1433,15 +1435,21 @@ void ppPPCInstr ( PPCInstr* i, Bool mode ppHRegPPC(i->Pin.FpRSP.src); return; case Pin_FpCftI: { - HChar* str = "fc???"; + HChar* str = "fc?????"; if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == False) str = "fctid"; else if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == True) str = "fctiw"; else - if (i->Pin.FpCftI.fromI == True && i->Pin.FpCftI.int32 == False) - str = "fcfid"; + if (i->Pin.FpCftI.fromI == True && i->Pin.FpCftI.int32 == False) { + if (i->Pin.FpCftI.syned == True) + str = "fcfid"; + else if (i->Pin.FpCftI.dst64 == True) + str = "fcfidu"; + else + str = "fcfidus"; + } vex_printf("%s ", str); ppHRegPPC(i->Pin.FpCftI.dst); vex_printf(","); @@ -3385,9 +3393,19 @@ Int emit_PPCInstr ( UChar* buf, Int nbuf goto done; } if (i->Pin.FpCftI.fromI == True && i->Pin.FpCftI.int32 == False) { - // fcfid (conv i64 to f64), PPC64 p434 - p = mkFormX(p, 63, fr_dst, 0, fr_src, 846, 0); - goto done; + if (i->Pin.FpCftI.syned == True) { + // fcfid (conv i64 to f64), PPC64 p434 + p = mkFormX(p, 63, fr_dst, 0, fr_src, 846, 0); + goto done; + } else if (i->Pin.FpCftI.dst64 == True) { + // fcfidu (conv u64 to f64) + p = mkFormX(p, 63, fr_dst, 0, fr_src, 974, 0); + goto done; + } else { + // fcfidus (conv u64 to f32) + p = mkFormX(p, 59, fr_dst, 0, fr_src, 974, 0); + goto done; + } } goto bad; } Index: VEX/priv/host_ppc_defs.h =================================================================== --- VEX/priv/host_ppc_defs.h.orig +++ VEX/priv/host_ppc_defs.h @@ -667,6 +667,8 @@ typedef struct { Bool fromI; /* False==F->I, True==I->F */ Bool int32; /* True== I is 32, False==I is 64 */ + Bool syned; + Bool dst64; /* True==dest is 64bit; False==dest is 32bit */ HReg src; HReg dst; } FpCftI; @@ -811,8 +813,8 @@ extern PPCInstr* PPCInstr_FpMulAcc ( P extern PPCInstr* PPCInstr_FpLdSt ( Bool isLoad, UChar sz, HReg, PPCAMode* ); extern PPCInstr* PPCInstr_FpSTFIW ( HReg addr, HReg data ); extern PPCInstr* PPCInstr_FpRSP ( HReg dst, HReg src ); -extern PPCInstr* PPCInstr_FpCftI ( Bool fromI, Bool int32, - HReg dst, HReg src ); +extern PPCInstr* PPCInstr_FpCftI ( Bool fromI, Bool int32, Bool syned, + Bool dst64, HReg dst, HReg src ); extern PPCInstr* PPCInstr_FpCMov ( PPCCondCode, HReg dst, HReg src ); extern PPCInstr* PPCInstr_FpLdFPSCR ( HReg src ); extern PPCInstr* PPCInstr_FpCmp ( HReg dst, HReg srcL, HReg srcR ); Index: VEX/priv/host_ppc_isel.c =================================================================== --- VEX/priv/host_ppc_isel.c.orig +++ VEX/priv/host_ppc_isel.c @@ -126,7 +126,10 @@ fnabs[.] if . fadd[.] if . y y fadds[.] if . y y -fcfid[.] (i64->dbl) if . y y +fcfid[.] (Si64->dbl) if . y y +fcfidU[.] (Ui64->dbl) if . y y +fcfids[.] (Si64->sngl) if . Y Y +fcfidus[.] (Ui64->sngl) if . Y Y fcmpo (cmp, result n n n fcmpu to crfD) n n n fctid[.] (dbl->i64) if . ->undef y @@ -559,7 +562,7 @@ PPCAMode* genGuestArrayOffset ( ISelEnv* if (bias < -100 || bias > 100) /* somewhat arbitrarily */ vpanic("genGuestArrayOffset(ppc host)(3)"); - if (descr->base < 0 || descr->base > 4000) /* somewhat arbitrarily */ + if (descr->base < 0 || descr->base > 5000) /* somewhat arbitrarily */ vpanic("genGuestArrayOffset(ppc host)(4)"); /* Compute off into a reg, %off. Then return: @@ -1468,8 +1471,8 @@ static HReg iselWordExpr_R_wrk ( ISelEnv set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); sub_from_sp( env, 16 ); - addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/, - ftmp, fsrc)); + addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/, True, + False, ftmp, fsrc)); addInstr(env, PPCInstr_FpSTFIW(r1, ftmp)); addInstr(env, PPCInstr_Load(4, idst, zero_r1, mode64)); @@ -1496,8 +1499,8 @@ static HReg iselWordExpr_R_wrk ( ISelEnv set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); sub_from_sp( env, 16 ); - addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/, - ftmp, fsrc)); + addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/, True, + True, ftmp, fsrc)); addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1)); addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/)); add_to_sp( env, 16 ); @@ -2400,8 +2403,10 @@ static PPCCondCode iselCondCode_wrk ( IS switch (e->Iex.Binop.op) { case Iop_CmpEQ32: return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ ); case Iop_CmpNE32: return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ ); - case Iop_CmpLT32U: return mk_PPCCondCode( Pct_TRUE, Pcf_7LT ); - case Iop_CmpLE32U: return mk_PPCCondCode( Pct_FALSE, Pcf_7GT ); + case Iop_CmpLT32U: case Iop_CmpLT32S: + return mk_PPCCondCode( Pct_TRUE, Pcf_7LT ); + case Iop_CmpLE32U: case Iop_CmpLE32S: + return mk_PPCCondCode( Pct_FALSE, Pcf_7GT ); default: vpanic("iselCondCode(ppc): CmpXX32"); } } @@ -2746,8 +2751,8 @@ static void iselInt64Expr_wrk ( HReg* rH set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); sub_from_sp( env, 16 ); - addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/, - ftmp, fsrc)); + addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/, True, + True, ftmp, fsrc)); addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1)); addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/)); addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/)); @@ -3185,7 +3190,8 @@ static HReg iselDblExpr_wrk ( ISelEnv* e return r_dst; } - if (e->Iex.Binop.op == Iop_I64StoF64) { + if (e->Iex.Binop.op == Iop_I64StoF64 || e->Iex.Binop.op == Iop_I64UtoF64 + || e->Iex.Binop.op == Iop_I64UtoF32) { if (mode64) { HReg fdst = newVRegF(env); HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2); @@ -3200,6 +3206,8 @@ static HReg iselDblExpr_wrk ( ISelEnv* e addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/)); addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1)); addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/, + e->Iex.Binop.op == Iop_I64StoF64 ? True : False, + e->Iex.Binop.op == Iop_I64UtoF32 ? False : True, fdst, fdst)); add_to_sp( env, 16 ); @@ -3226,6 +3234,8 @@ static HReg iselDblExpr_wrk ( ISelEnv* e addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/)); addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1)); addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/, + e->Iex.Binop.op == Iop_I64StoF64 ? True : False, + e->Iex.Binop.op == Iop_I64UtoF32 ? False : True, fdst, fdst)); add_to_sp( env, 16 ); @@ -4129,10 +4139,10 @@ HInstrArray* iselSB_PPC ( IRSB* bb, VexA /* do some sanity checks */ mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V - | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX; + | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX; - mask64 = VEX_HWCAPS_PPC64_V - | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX; + mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX + | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX; if (mode64) { vassert((hwcaps_host & mask32) == 0); Index: VEX/priv/ir_defs.c =================================================================== --- VEX/priv/ir_defs.c.orig +++ VEX/priv/ir_defs.c @@ -362,6 +362,8 @@ void ppIROp ( IROp op ) case Iop_I16StoF64: vex_printf("I16StoF64"); return; case Iop_I32StoF64: vex_printf("I32StoF64"); return; case Iop_I64StoF64: vex_printf("I64StoF64"); return; + case Iop_I64UtoF64: vex_printf("I64UtoF64"); return; + case Iop_I64UtoF32: vex_printf("I64UtoF32"); return; case Iop_I32UtoF64: vex_printf("I32UtoF64"); return; @@ -2262,6 +2264,8 @@ void typeOfPrimop ( IROp op, case Iop_I16StoF64: UNARY(Ity_I16, Ity_F64); case Iop_I32StoF64: UNARY(Ity_I32, Ity_F64); case Iop_I64StoF64: BINARY(ity_RMode,Ity_I64, Ity_F64); + case Iop_I64UtoF64: BINARY(ity_RMode,Ity_I64, Ity_F64); + case Iop_I64UtoF32: BINARY(ity_RMode,Ity_I64, Ity_F64); case Iop_I32UtoF64: UNARY(Ity_I32, Ity_F64); Index: VEX/priv/main_main.c =================================================================== --- VEX/priv/main_main.c.orig +++ VEX/priv/main_main.c @@ -855,6 +855,7 @@ static HChar* show_hwcaps_ppc32 ( UInt h const UInt V = VEX_HWCAPS_PPC32_V; const UInt FX = VEX_HWCAPS_PPC32_FX; const UInt GX = VEX_HWCAPS_PPC32_GX; + const UInt VX = VEX_HWCAPS_PPC32_VX; UInt c = hwcaps; if (c == 0) return "ppc32-int"; if (c == F) return "ppc32-int-flt"; @@ -865,6 +866,7 @@ static HChar* show_hwcaps_ppc32 ( UInt h if (c == (F|V|FX)) return "ppc32-int-flt-vmx-FX"; if (c == (F|V|GX)) return "ppc32-int-flt-vmx-GX"; if (c == (F|V|FX|GX)) return "ppc32-int-flt-vmx-FX-GX"; + if (c == (F|V|FX|GX|VX)) return "ppc32-int-flt-vmx-FX-GX-VX"; return NULL; } @@ -875,6 +877,7 @@ static HChar* show_hwcaps_ppc64 ( UInt h const UInt V = VEX_HWCAPS_PPC64_V; const UInt FX = VEX_HWCAPS_PPC64_FX; const UInt GX = VEX_HWCAPS_PPC64_GX; + const UInt VX = VEX_HWCAPS_PPC64_VX; UInt c = hwcaps; if (c == 0) return "ppc64-int-flt"; if (c == FX) return "ppc64-int-flt-FX"; @@ -884,6 +887,7 @@ static HChar* show_hwcaps_ppc64 ( UInt h if (c == (V|FX)) return "ppc64-int-flt-vmx-FX"; if (c == (V|GX)) return "ppc64-int-flt-vmx-GX"; if (c == (V|FX|GX)) return "ppc64-int-flt-vmx-FX-GX"; + if (c == (V|FX|GX|VX)) return "ppc64-int-flt-vmx-FX-GX-VX"; return NULL; } Index: VEX/pub/libvex_guest_ppc32.h =================================================================== --- VEX/pub/libvex_guest_ppc32.h.orig +++ VEX/pub/libvex_guest_ppc32.h @@ -82,120 +82,128 @@ typedef /* 120 */ UInt guest_GPR30; /* 124 */ UInt guest_GPR31; - // Floating Point Registers - /* 128 */ ULong guest_FPR0; - /* 136 */ ULong guest_FPR1; - /* 144 */ ULong guest_FPR2; - /* 152 */ ULong guest_FPR3; - /* 160 */ ULong guest_FPR4; - /* 168 */ ULong guest_FPR5; - /* 176 */ ULong guest_FPR6; - /* 184 */ ULong guest_FPR7; - /* 192 */ ULong guest_FPR8; - /* 200 */ ULong guest_FPR9; - /* 208 */ ULong guest_FPR10; - /* 216 */ ULong guest_FPR11; - /* 224 */ ULong guest_FPR12; - /* 232 */ ULong guest_FPR13; - /* 240 */ ULong guest_FPR14; - /* 248 */ ULong guest_FPR15; - /* 256 */ ULong guest_FPR16; - /* 264 */ ULong guest_FPR17; - /* 272 */ ULong guest_FPR18; - /* 280 */ ULong guest_FPR19; - /* 288 */ ULong guest_FPR20; - /* 296 */ ULong guest_FPR21; - /* 304 */ ULong guest_FPR22; - /* 312 */ ULong guest_FPR23; - /* 320 */ ULong guest_FPR24; - /* 328 */ ULong guest_FPR25; - /* 336 */ ULong guest_FPR26; - /* 344 */ ULong guest_FPR27; - /* 352 */ ULong guest_FPR28; - /* 360 */ ULong guest_FPR29; - /* 368 */ ULong guest_FPR30; - /* 376 */ ULong guest_FPR31; + // Vector Registers, Floating Point Registers, and VSX Registers + // With ISA 2.06, the "Vector-Scalar Floating-point" category + // provides facilities to support vector and scalar binary floating- + // point operations. A unified register file is an integral part + // of this new facility, combining floating point and vector registers + // using a 64x128-bit vector. These are referred to as VSR[0..63]. + // The floating point registers are now mapped into double word element 0 + // of VSR[0..31]. The 32x128-bit vector registers defined by the "Vector + // Facility [Category: Vector]" are now mapped to VSR[32..63]. - // Vector Registers // IMPORTANT: the user of libvex must place the guest state so as - // to ensure that guest_VR{0..31}, and any shadows thereof, are + // to ensure that guest_VSR{0..63}, and any shadows thereof, are // 16-aligned. - /* 384 */ U128 guest_VR0; - /* 400 */ U128 guest_VR1; - /* 416 */ U128 guest_VR2; - /* 432 */ U128 guest_VR3; - /* 448 */ U128 guest_VR4; - /* 464 */ U128 guest_VR5; - /* 480 */ U128 guest_VR6; - /* 496 */ U128 guest_VR7; - /* 512 */ U128 guest_VR8; - /* 528 */ U128 guest_VR9; - /* 544 */ U128 guest_VR10; - /* 560 */ U128 guest_VR11; - /* 576 */ U128 guest_VR12; - /* 592 */ U128 guest_VR13; - /* 608 */ U128 guest_VR14; - /* 624 */ U128 guest_VR15; - /* 640 */ U128 guest_VR16; - /* 656 */ U128 guest_VR17; - /* 672 */ U128 guest_VR18; - /* 688 */ U128 guest_VR19; - /* 704 */ U128 guest_VR20; - /* 720 */ U128 guest_VR21; - /* 736 */ U128 guest_VR22; - /* 752 */ U128 guest_VR23; - /* 768 */ U128 guest_VR24; - /* 784 */ U128 guest_VR25; - /* 800 */ U128 guest_VR26; - /* 816 */ U128 guest_VR27; - /* 832 */ U128 guest_VR28; - /* 848 */ U128 guest_VR29; - /* 864 */ U128 guest_VR30; - /* 880 */ U128 guest_VR31; - - /* 896 */ UInt guest_CIA; // IP (no arch visible register) - /* 900 */ UInt guest_LR; // Link Register - /* 904 */ UInt guest_CTR; // Count Register + + /* 128 */ U128 guest_VSR0; + /* 144 */ U128 guest_VSR1; + /* 160 */ U128 guest_VSR2; + /* 176 */ U128 guest_VSR3; + /* 192 */ U128 guest_VSR4; + /* 208 */ U128 guest_VSR5; + /* 224 */ U128 guest_VSR6; + /* 240 */ U128 guest_VSR7; + /* 256 */ U128 guest_VSR8; + /* 272 */ U128 guest_VSR9; + /* 288 */ U128 guest_VSR10; + /* 304 */ U128 guest_VSR11; + /* 320 */ U128 guest_VSR12; + /* 336 */ U128 guest_VSR13; + /* 352 */ U128 guest_VSR14; + /* 368 */ U128 guest_VSR15; + /* 384 */ U128 guest_VSR16; + /* 400 */ U128 guest_VSR17; + /* 416 */ U128 guest_VSR18; + /* 432 */ U128 guest_VSR19; + /* 448 */ U128 guest_VSR20; + /* 464 */ U128 guest_VSR21; + /* 480 */ U128 guest_VSR22; + /* 496 */ U128 guest_VSR23; + /* 512 */ U128 guest_VSR24; + /* 528 */ U128 guest_VSR25; + /* 544 */ U128 guest_VSR26; + /* 560 */ U128 guest_VSR27; + /* 576 */ U128 guest_VSR28; + /* 592 */ U128 guest_VSR29; + /* 608 */ U128 guest_VSR30; + /* 624 */ U128 guest_VSR31; + /* 640 */ U128 guest_VSR32; + /* 656 */ U128 guest_VSR33; + /* 672 */ U128 guest_VSR34; + /* 688 */ U128 guest_VSR35; + /* 704 */ U128 guest_VSR36; + /* 720 */ U128 guest_VSR37; + /* 736 */ U128 guest_VSR38; + /* 752 */ U128 guest_VSR39; + /* 768 */ U128 guest_VSR40; + /* 784 */ U128 guest_VSR41; + /* 800 */ U128 guest_VSR42; + /* 816 */ U128 guest_VSR43; + /* 832 */ U128 guest_VSR44; + /* 848 */ U128 guest_VSR45; + /* 864 */ U128 guest_VSR46; + /* 880 */ U128 guest_VSR47; + /* 896 */ U128 guest_VSR48; + /* 912 */ U128 guest_VSR49; + /* 928 */ U128 guest_VSR50; + /* 944 */ U128 guest_VSR51; + /* 960 */ U128 guest_VSR52; + /* 976 */ U128 guest_VSR53; + /* 992 */ U128 guest_VSR54; + /* 1008 */ U128 guest_VSR55; + /* 1024 */ U128 guest_VSR56; + /* 1040 */ U128 guest_VSR57; + /* 1056 */ U128 guest_VSR58; + /* 1072 */ U128 guest_VSR59; + /* 1088 */ U128 guest_VSR60; + /* 1104 */ U128 guest_VSR61; + /* 1120 */ U128 guest_VSR62; + /* 1136 */ U128 guest_VSR63; + + /* 1152 */ UInt guest_CIA; // IP (no arch visible register) + /* 1156 */ UInt guest_LR; // Link Register + /* 1160 */ UInt guest_CTR; // Count Register /* XER pieces */ - /* 908 */ UChar guest_XER_SO; /* in lsb */ - /* 909 */ UChar guest_XER_OV; /* in lsb */ - /* 910 */ UChar guest_XER_CA; /* in lsb */ - /* 911 */ UChar guest_XER_BC; /* all bits */ + /* 1164 */ UChar guest_XER_SO; /* in lsb */ + /* 1165 */ UChar guest_XER_OV; /* in lsb */ + /* 1166 */ UChar guest_XER_CA; /* in lsb */ + /* 1167 */ UChar guest_XER_BC; /* all bits */ /* CR pieces */ - /* 912 */ UChar guest_CR0_321; /* in [3:1] */ - /* 913 */ UChar guest_CR0_0; /* in lsb */ - /* 914 */ UChar guest_CR1_321; /* in [3:1] */ - /* 915 */ UChar guest_CR1_0; /* in lsb */ - /* 916 */ UChar guest_CR2_321; /* in [3:1] */ - /* 917 */ UChar guest_CR2_0; /* in lsb */ - /* 918 */ UChar guest_CR3_321; /* in [3:1] */ - /* 919 */ UChar guest_CR3_0; /* in lsb */ - /* 920 */ UChar guest_CR4_321; /* in [3:1] */ - /* 921 */ UChar guest_CR4_0; /* in lsb */ - /* 922 */ UChar guest_CR5_321; /* in [3:1] */ - /* 923 */ UChar guest_CR5_0; /* in lsb */ - /* 924 */ UChar guest_CR6_321; /* in [3:1] */ - /* 925 */ UChar guest_CR6_0; /* in lsb */ - /* 926 */ UChar guest_CR7_321; /* in [3:1] */ - /* 927 */ UChar guest_CR7_0; /* in lsb */ + /* 1168 */ UChar guest_CR0_321; /* in [3:1] */ + /* 1169 */ UChar guest_CR0_0; /* in lsb */ + /* 1170 */ UChar guest_CR1_321; /* in [3:1] */ + /* 1171 */ UChar guest_CR1_0; /* in lsb */ + /* 1172 */ UChar guest_CR2_321; /* in [3:1] */ + /* 1173 */ UChar guest_CR2_0; /* in lsb */ + /* 1174 */ UChar guest_CR3_321; /* in [3:1] */ + /* 1175 */ UChar guest_CR3_0; /* in lsb */ + /* 1176 */ UChar guest_CR4_321; /* in [3:1] */ + /* 1177 */ UChar guest_CR4_0; /* in lsb */ + /* 1178 */ UChar guest_CR5_321; /* in [3:1] */ + /* 1179 */ UChar guest_CR5_0; /* in lsb */ + /* 1180 */ UChar guest_CR6_321; /* in [3:1] */ + /* 1181 */ UChar guest_CR6_0; /* in lsb */ + /* 1182 */ UChar guest_CR7_321; /* in [3:1] */ + /* 1183 */ UChar guest_CR7_0; /* in lsb */ /* FP Status & Control Register fields */ - /* 928 */ UInt guest_FPROUND; // FP Rounding Mode + /* 1184 */ UInt guest_FPROUND; // FP Rounding Mode /* Vector Save/Restore Register */ - /* 932 */ UInt guest_VRSAVE; + /* 1188 */ UInt guest_VRSAVE; /* Vector Status and Control Register */ - /* 936 */ UInt guest_VSCR; + /* 1192 */ UInt guest_VSCR; /* Emulation warnings */ - /* 940 */ UInt guest_EMWARN; + /* 1196 */ UInt guest_EMWARN; /* For icbi: record start and length of area to invalidate */ - /* 944 */ UInt guest_TISTART; - /* 948 */ UInt guest_TILEN; + /* 1200 */ UInt guest_TISTART; + /* 1204 */ UInt guest_TILEN; /* Used to record the unredirected guest address at the start of a translation whose start has been redirected. By reading @@ -203,27 +211,27 @@ typedef find out what the corresponding no-redirection address was. Note, this is only set for wrap-style redirects, not for replace-style ones. */ - /* 952 */ UInt guest_NRADDR; - /* 956 */ UInt guest_NRADDR_GPR2; /* needed by aix */ + /* 1208 */ UInt guest_NRADDR; + /* 1212 */ UInt guest_NRADDR_GPR2; /* needed by aix */ /* A grows-upwards stack for hidden saves/restores of LR and R2 needed for function interception and wrapping on ppc32-aix5. A horrible hack. REDIR_SP points to the highest live entry, and so starts at -1. */ - /* 960 */ UInt guest_REDIR_SP; - /* 964 */ UInt guest_REDIR_STACK[VEX_GUEST_PPC32_REDIR_STACK_SIZE]; + /* 1216 */ UInt guest_REDIR_SP; + /* 1220 */ UInt guest_REDIR_STACK[VEX_GUEST_PPC32_REDIR_STACK_SIZE]; /* Needed for AIX (but mandated for all guest architectures): CIA at the last SC insn. Used when backing up to restart a syscall that has been interrupted by a signal. */ - /* ??? */ UInt guest_IP_AT_SYSCALL; + /* 1348 */ UInt guest_IP_AT_SYSCALL; /* SPRG3, which AIUI is readonly in user space. Needed for threading on AIX. */ - /* ??? */ UInt guest_SPRG3_RO; + /* 1352 */ UInt guest_SPRG3_RO; /* Padding to make it have an 8-aligned size */ - /* UInt padding; */ + /* 1356 */ UInt padding; } VexGuestPPC32State; Index: VEX/pub/libvex_guest_ppc64.h =================================================================== --- VEX/pub/libvex_guest_ppc64.h.orig +++ VEX/pub/libvex_guest_ppc64.h @@ -120,123 +120,131 @@ typedef /* 240 */ ULong guest_GPR30; /* 248 */ ULong guest_GPR31; - // Floating Point Registers - /* 256 */ ULong guest_FPR0; - /* 264 */ ULong guest_FPR1; - /* 272 */ ULong guest_FPR2; - /* 280 */ ULong guest_FPR3; - /* 288 */ ULong guest_FPR4; - /* 296 */ ULong guest_FPR5; - /* 304 */ ULong guest_FPR6; - /* 312 */ ULong guest_FPR7; - /* 320 */ ULong guest_FPR8; - /* 328 */ ULong guest_FPR9; - /* 336 */ ULong guest_FPR10; - /* 344 */ ULong guest_FPR11; - /* 352 */ ULong guest_FPR12; - /* 360 */ ULong guest_FPR13; - /* 368 */ ULong guest_FPR14; - /* 376 */ ULong guest_FPR15; - /* 384 */ ULong guest_FPR16; - /* 392 */ ULong guest_FPR17; - /* 400 */ ULong guest_FPR18; - /* 408 */ ULong guest_FPR19; - /* 416 */ ULong guest_FPR20; - /* 424 */ ULong guest_FPR21; - /* 432 */ ULong guest_FPR22; - /* 440 */ ULong guest_FPR23; - /* 448 */ ULong guest_FPR24; - /* 456 */ ULong guest_FPR25; - /* 464 */ ULong guest_FPR26; - /* 472 */ ULong guest_FPR27; - /* 480 */ ULong guest_FPR28; - /* 488 */ ULong guest_FPR29; - /* 496 */ ULong guest_FPR30; - /* 504 */ ULong guest_FPR31; + // Vector Registers, Floating Point Registers, and VSX Registers + // With ISA 2.06, the "Vector-Scalar Floating-point" category + // provides facilities to support vector and scalar binary floating- + // point operations. A unified register file is an integral part + // of this new facility, combining floating point and vector registers + // using a 64x128-bit vector. These are referred to as VSR[0..63]. + // The floating point registers are now mapped into double word element 0 + // of VSR[0..31]. The 32x128-bit vector registers defined by the "Vector + // Facility [Category: Vector]" are now mapped to VSR[32..63]. - // Vector Registers // IMPORTANT: the user of libvex must place the guest state so as - // to ensure that guest_VR{0..31}, and any shadows thereof, are + // to ensure that guest_VSR{0..63}, and any shadows thereof, are // 16-aligned. - /* 512 */ U128 guest_VR0; - /* 528 */ U128 guest_VR1; - /* 544 */ U128 guest_VR2; - /* 560 */ U128 guest_VR3; - /* 576 */ U128 guest_VR4; - /* 592 */ U128 guest_VR5; - /* 608 */ U128 guest_VR6; - /* 624 */ U128 guest_VR7; - /* 640 */ U128 guest_VR8; - /* 656 */ U128 guest_VR9; - /* 672 */ U128 guest_VR10; - /* 688 */ U128 guest_VR11; - /* 704 */ U128 guest_VR12; - /* 720 */ U128 guest_VR13; - /* 736 */ U128 guest_VR14; - /* 752 */ U128 guest_VR15; - /* 768 */ U128 guest_VR16; - /* 784 */ U128 guest_VR17; - /* 800 */ U128 guest_VR18; - /* 816 */ U128 guest_VR19; - /* 832 */ U128 guest_VR20; - /* 848 */ U128 guest_VR21; - /* 864 */ U128 guest_VR22; - /* 880 */ U128 guest_VR23; - /* 896 */ U128 guest_VR24; - /* 912 */ U128 guest_VR25; - /* 928 */ U128 guest_VR26; - /* 944 */ U128 guest_VR27; - /* 960 */ U128 guest_VR28; - /* 976 */ U128 guest_VR29; - /* 992 */ U128 guest_VR30; - /* 1008 */ U128 guest_VR31; - - /* 1024 */ ULong guest_CIA; // IP (no arch visible register) - /* 1032 */ ULong guest_LR; // Link Register - /* 1040 */ ULong guest_CTR; // Count Register + + /* 256 */ U128 guest_VSR0; + /* 272 */ U128 guest_VSR1; + /* 288 */ U128 guest_VSR2; + /* 304 */ U128 guest_VSR3; + /* 320 */ U128 guest_VSR4; + /* 336 */ U128 guest_VSR5; + /* 352 */ U128 guest_VSR6; + /* 368 */ U128 guest_VSR7; + /* 384 */ U128 guest_VSR8; + /* 400 */ U128 guest_VSR9; + /* 416 */ U128 guest_VSR10; + /* 432 */ U128 guest_VSR11; + /* 448 */ U128 guest_VSR12; + /* 464 */ U128 guest_VSR13; + /* 480 */ U128 guest_VSR14; + /* 496 */ U128 guest_VSR15; + /* 512 */ U128 guest_VSR16; + /* 528 */ U128 guest_VSR17; + /* 544 */ U128 guest_VSR18; + /* 560 */ U128 guest_VSR19; + /* 576 */ U128 guest_VSR20; + /* 592 */ U128 guest_VSR21; + /* 608 */ U128 guest_VSR22; + /* 624 */ U128 guest_VSR23; + /* 640 */ U128 guest_VSR24; + /* 656 */ U128 guest_VSR25; + /* 672 */ U128 guest_VSR26; + /* 688 */ U128 guest_VSR27; + /* 704 */ U128 guest_VSR28; + /* 720 */ U128 guest_VSR29; + /* 736 */ U128 guest_VSR30; + /* 752 */ U128 guest_VSR31; + /* 768 */ U128 guest_VSR32; + /* 784 */ U128 guest_VSR33; + /* 800 */ U128 guest_VSR34; + /* 816 */ U128 guest_VSR35; + /* 832 */ U128 guest_VSR36; + /* 848 */ U128 guest_VSR37; + /* 864 */ U128 guest_VSR38; + /* 880 */ U128 guest_VSR39; + /* 896 */ U128 guest_VSR40; + /* 912 */ U128 guest_VSR41; + /* 928 */ U128 guest_VSR42; + /* 944 */ U128 guest_VSR43; + /* 960 */ U128 guest_VSR44; + /* 976 */ U128 guest_VSR45; + /* 992 */ U128 guest_VSR46; + /* 1008 */ U128 guest_VSR47; + /* 1024 */ U128 guest_VSR48; + /* 1040 */ U128 guest_VSR49; + /* 1056 */ U128 guest_VSR50; + /* 1072 */ U128 guest_VSR51; + /* 1088 */ U128 guest_VSR52; + /* 1104 */ U128 guest_VSR53; + /* 1120 */ U128 guest_VSR54; + /* 1136 */ U128 guest_VSR55; + /* 1152 */ U128 guest_VSR56; + /* 1168 */ U128 guest_VSR57; + /* 1184 */ U128 guest_VSR58; + /* 1200 */ U128 guest_VSR59; + /* 1216 */ U128 guest_VSR60; + /* 1232 */ U128 guest_VSR61; + /* 1248 */ U128 guest_VSR62; + /* 1264 */ U128 guest_VSR63; + + /* 1280 */ ULong guest_CIA; // IP (no arch visible register) + /* 1288 */ ULong guest_LR; // Link Register + /* 1296 */ ULong guest_CTR; // Count Register /* XER pieces */ - /* 1048 */ UChar guest_XER_SO; /* in lsb */ - /* 1049 */ UChar guest_XER_OV; /* in lsb */ - /* 1050 */ UChar guest_XER_CA; /* in lsb */ - /* 1051 */ UChar guest_XER_BC; /* all bits */ + /* 1304 */ UChar guest_XER_SO; /* in lsb */ + /* 1305 */ UChar guest_XER_OV; /* in lsb */ + /* 1306 */ UChar guest_XER_CA; /* in lsb */ + /* 1307 */ UChar guest_XER_BC; /* all bits */ /* CR pieces */ - /* 1052 */ UChar guest_CR0_321; /* in [3:1] */ - /* 1053 */ UChar guest_CR0_0; /* in lsb */ - /* 1054 */ UChar guest_CR1_321; /* in [3:1] */ - /* 1055 */ UChar guest_CR1_0; /* in lsb */ - /* 1056 */ UChar guest_CR2_321; /* in [3:1] */ - /* 1057 */ UChar guest_CR2_0; /* in lsb */ - /* 1058 */ UChar guest_CR3_321; /* in [3:1] */ - /* 1059 */ UChar guest_CR3_0; /* in lsb */ - /* 1060 */ UChar guest_CR4_321; /* in [3:1] */ - /* 1061 */ UChar guest_CR4_0; /* in lsb */ - /* 1062 */ UChar guest_CR5_321; /* in [3:1] */ - /* 1063 */ UChar guest_CR5_0; /* in lsb */ - /* 1064 */ UChar guest_CR6_321; /* in [3:1] */ - /* 1065 */ UChar guest_CR6_0; /* in lsb */ - /* 1066 */ UChar guest_CR7_321; /* in [3:1] */ - /* 1067 */ UChar guest_CR7_0; /* in lsb */ + /* 1308 */ UChar guest_CR0_321; /* in [3:1] */ + /* 1309 */ UChar guest_CR0_0; /* in lsb */ + /* 1310 */ UChar guest_CR1_321; /* in [3:1] */ + /* 1311 */ UChar guest_CR1_0; /* in lsb */ + /* 1312 */ UChar guest_CR2_321; /* in [3:1] */ + /* 1313 */ UChar guest_CR2_0; /* in lsb */ + /* 1314 */ UChar guest_CR3_321; /* in [3:1] */ + /* 1315 */ UChar guest_CR3_0; /* in lsb */ + /* 1316 */ UChar guest_CR4_321; /* in [3:1] */ + /* 1317 */ UChar guest_CR4_0; /* in lsb */ + /* 1318 */ UChar guest_CR5_321; /* in [3:1] */ + /* 1319 */ UChar guest_CR5_0; /* in lsb */ + /* 1320 */ UChar guest_CR6_321; /* in [3:1] */ + /* 1321 */ UChar guest_CR6_0; /* in lsb */ + /* 1322 */ UChar guest_CR7_321; /* in [3:1] */ + /* 1323 */ UChar guest_CR7_0; /* in lsb */ /* FP Status & Control Register fields */ - /* 1068 */ UInt guest_FPROUND; // FP Rounding Mode + /* 1324 */ UInt guest_FPROUND; // FP Rounding Mode /* Vector Save/Restore Register */ - /* 1072 */ UInt guest_VRSAVE; + /* 1328 */ UInt guest_VRSAVE; /* Vector Status and Control Register */ - /* 1076 */ UInt guest_VSCR; + /* 1332 */ UInt guest_VSCR; /* Emulation warnings */ - /* 1080 */ UInt guest_EMWARN; + /* 1336 */ UInt guest_EMWARN; /* gcc adds 4 bytes padding here: pre-empt it. */ - /* 1084 */ UInt padding; + /* 1340 */ UInt padding; /* For icbi: record start and length of area to invalidate */ - /* 1088 */ ULong guest_TISTART; - /* 1096 */ ULong guest_TILEN; + /* 1344 */ ULong guest_TISTART; + /* 1352 */ ULong guest_TILEN; /* Used to record the unredirected guest address at the start of a translation whose start has been redirected. By reading @@ -244,26 +252,26 @@ typedef find out what the corresponding no-redirection address was. Note, this is only set for wrap-style redirects, not for replace-style ones. */ - /* 1104 */ ULong guest_NRADDR; - /* 1112 */ ULong guest_NRADDR_GPR2; + /* 1360 */ ULong guest_NRADDR; + /* 1368 */ ULong guest_NRADDR_GPR2; /* A grows-upwards stack for hidden saves/restores of LR and R2 needed for function interception and wrapping on ppc64-linux. A horrible hack. REDIR_SP points to the highest live entry, and so starts at -1. */ - /* 1120 */ ULong guest_REDIR_SP; - /* 1128 */ ULong guest_REDIR_STACK[VEX_GUEST_PPC64_REDIR_STACK_SIZE]; + /* 1376 */ ULong guest_REDIR_SP; + /* 1384 */ ULong guest_REDIR_STACK[VEX_GUEST_PPC64_REDIR_STACK_SIZE]; /* Needed for AIX: CIA at the last SC insn. Used when backing up to restart a syscall that has been interrupted by a signal. */ - /* 1384 */ ULong guest_IP_AT_SYSCALL; + /* 1640 */ ULong guest_IP_AT_SYSCALL; /* SPRG3, which AIUI is readonly in user space. Needed for threading on AIX. */ - /* ???? */ ULong guest_SPRG3_RO; + /* 1648 */ ULong guest_SPRG3_RO; - /* Padding to make it have an 8-aligned size */ - /* ???? */ ULong padding2; + /* Padding to make it have an 16-aligned size */ + /* 1656 */ ULong padding2; } VexGuestPPC64State; Index: VEX/pub/libvex.h =================================================================== --- VEX/pub/libvex.h.orig +++ VEX/pub/libvex.h @@ -87,12 +87,14 @@ typedef #define VEX_HWCAPS_PPC32_FX (1<<10) /* FP extns (fsqrt, fsqrts) */ #define VEX_HWCAPS_PPC32_GX (1<<11) /* Graphics extns (fres,frsqrte,fsel,stfiwx) */ +#define VEX_HWCAPS_PPC32_VX (1<<12) /* Vector-scalar floating-point (VSX); implies ISA 2.06 or higher */ /* ppc64: baseline capability is integer and basic FP insns */ -#define VEX_HWCAPS_PPC64_V (1<<12) /* Altivec (VMX) */ -#define VEX_HWCAPS_PPC64_FX (1<<13) /* FP extns (fsqrt, fsqrts) */ -#define VEX_HWCAPS_PPC64_GX (1<<14) /* Graphics extns +#define VEX_HWCAPS_PPC64_V (1<<13) /* Altivec (VMX) */ +#define VEX_HWCAPS_PPC64_FX (1<<14) /* FP extns (fsqrt, fsqrts) */ +#define VEX_HWCAPS_PPC64_GX (1<<15) /* Graphics extns (fres,frsqrte,fsel,stfiwx) */ +#define VEX_HWCAPS_PPC64_VX (1<<16) /* Vector-scalar floating-point (VSX); implies ISA 2.06 or higher */ /* s390x: baseline capability is z/Architecture with long displacement */ #define VEX_HWCAPS_S390X_LDISP (1<<13) /* Long-displacement facility */ Index: VEX/pub/libvex_ir.h =================================================================== --- VEX/pub/libvex_ir.h.orig +++ VEX/pub/libvex_ir.h @@ -616,6 +616,8 @@ typedef Iop_I16StoF64, /* signed I16 -> F64 */ Iop_I32StoF64, /* signed I32 -> F64 */ Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */ + Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */ + Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */ Iop_I32UtoF64, /* unsigned I32 -> F64 */