tcg/ppc: Reorg goto_tb implementation
The old ppc64 implementation replaces 2 or 4 insns, which leaves a race condition in which a thread could be stopped at a PC in the middle of the sequence, and when restarted does not see the complete address computation and branches to nowhere. The new implemetation replaces only one insn, swapping between b <dest> and mtctr r31 falling through to a general-case indirect branch. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
		| @@ -1854,104 +1854,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) | ||||
|     tcg_out32(s, insn); | ||||
| } | ||||
| 
 | ||||
| static inline uint64_t make_pair(tcg_insn_unit i1, tcg_insn_unit i2) | ||||
| { | ||||
|     if (HOST_BIG_ENDIAN) { | ||||
|         return (uint64_t)i1 << 32 | i2; | ||||
|     } | ||||
|     return (uint64_t)i2 << 32 | i1; | ||||
| } | ||||
| 
 | ||||
| static inline void ppc64_replace2(uintptr_t rx, uintptr_t rw, | ||||
|                                   tcg_insn_unit i0, tcg_insn_unit i1) | ||||
| { | ||||
| #if TCG_TARGET_REG_BITS == 64
 | ||||
|     qatomic_set((uint64_t *)rw, make_pair(i0, i1)); | ||||
|     flush_idcache_range(rx, rw, 8); | ||||
| #else
 | ||||
|     qemu_build_not_reached(); | ||||
| #endif
 | ||||
| } | ||||
| 
 | ||||
| static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw, | ||||
|                                   tcg_insn_unit i0, tcg_insn_unit i1, | ||||
|                                   tcg_insn_unit i2, tcg_insn_unit i3) | ||||
| { | ||||
|     uint64_t p[2]; | ||||
| 
 | ||||
|     p[!HOST_BIG_ENDIAN] = make_pair(i0, i1); | ||||
|     p[HOST_BIG_ENDIAN] = make_pair(i2, i3); | ||||
| 
 | ||||
|     /* | ||||
|      * There's no convenient way to get the compiler to allocate a pair | ||||
|      * of registers at an even index, so copy into r6/r7 and clobber. | ||||
|      */ | ||||
|     asm("mr  %%r6, %1\n\t" | ||||
|         "mr  %%r7, %2\n\t" | ||||
|         "stq %%r6, %0" | ||||
|         : "=Q"(*(__int128 *)rw) : "r"(p[0]), "r"(p[1]) : "r6", "r7"); | ||||
|     flush_idcache_range(rx, rw, 16); | ||||
| } | ||||
| 
 | ||||
| void tb_target_set_jmp_target(const TranslationBlock *tb, int n, | ||||
|                               uintptr_t jmp_rx, uintptr_t jmp_rw) | ||||
| { | ||||
|     tcg_insn_unit i0, i1, i2, i3; | ||||
|     uintptr_t addr = tb->jmp_target_addr[n]; | ||||
|     intptr_t tb_diff = addr - (uintptr_t)tb->tc.ptr; | ||||
|     intptr_t br_diff = addr - (jmp_rx + 4); | ||||
|     intptr_t lo, hi; | ||||
| 
 | ||||
|     if (TCG_TARGET_REG_BITS == 32) { | ||||
|         intptr_t diff = addr - jmp_rx; | ||||
|         tcg_debug_assert(in_range_b(diff)); | ||||
|         qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc)); | ||||
|         flush_idcache_range(jmp_rx, jmp_rw, 4); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     /* | ||||
|      * For 16-bit displacements, we can use a single add + branch. | ||||
|      * This happens quite often. | ||||
|      */ | ||||
|     if (tb_diff == (int16_t)tb_diff) { | ||||
|         i0 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff); | ||||
|         i1 = B | (br_diff & 0x3fffffc); | ||||
|         ppc64_replace2(jmp_rx, jmp_rw, i0, i1); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     lo = (int16_t)tb_diff; | ||||
|     hi = (int32_t)(tb_diff - lo); | ||||
|     assert(tb_diff == hi + lo); | ||||
|     i0 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16); | ||||
|     i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo); | ||||
| 
 | ||||
|     /* | ||||
|      * Without stq from 2.07, we can only update two insns, | ||||
|      * and those must be the ones that load the target address. | ||||
|      */ | ||||
|     if (!have_isa_2_07) { | ||||
|         ppc64_replace2(jmp_rx, jmp_rw, i0, i1); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     /* | ||||
|      * For 26-bit displacements, we can use a direct branch. | ||||
|      * Otherwise we still need the indirect branch, which we | ||||
|      * must restore after a potential direct branch write. | ||||
|      */ | ||||
|     br_diff -= 4; | ||||
|     if (in_range_b(br_diff)) { | ||||
|         i2 = B | (br_diff & 0x3fffffc); | ||||
|         i3 = NOP; | ||||
|     } else { | ||||
|         i2 = MTSPR | RS(TCG_REG_TB) | CTR; | ||||
|         i3 = BCCTR | BO_ALWAYS; | ||||
|     } | ||||
|     ppc64_replace4(jmp_rx, jmp_rw, i0, i1, i2, i3); | ||||
| } | ||||
| 
 | ||||
| static void tcg_out_call_int(TCGContext *s, int lk, | ||||
|                              const tcg_insn_unit *target) | ||||
| { | ||||
| @@ -2625,30 +2527,56 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) | ||||
| 
 | ||||
| static void tcg_out_goto_tb(TCGContext *s, int which) | ||||
| { | ||||
|     /* Direct jump. */ | ||||
|     if (TCG_TARGET_REG_BITS == 64) { | ||||
|         /* Ensure the next insns are 8 or 16-byte aligned. */ | ||||
|         while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { | ||||
|             tcg_out32(s, NOP); | ||||
|         } | ||||
|     uintptr_t ptr = get_jmp_target_addr(s, which); | ||||
| 
 | ||||
|     if (USE_REG_TB) { | ||||
|         ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr); | ||||
|         tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset); | ||||
|      | ||||
|         /* Direct branch will be patched by tb_target_set_jmp_target. */ | ||||
|         set_jmp_insn_offset(s, which); | ||||
|         tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); | ||||
|         tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); | ||||
|         tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); | ||||
| 
 | ||||
|         /* When branch is out of range, fall through to indirect. */ | ||||
|         tcg_out32(s, BCCTR | BO_ALWAYS); | ||||
| 
 | ||||
|         /* For the unlinked case, need to reset TCG_REG_TB.  */ | ||||
|         set_jmp_reset_offset(s, which); | ||||
|         tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, | ||||
|                          -tcg_current_code_size(s)); | ||||
|     } else { | ||||
|         /* Direct branch will be patched by tb_target_set_jmp_target. */ | ||||
|         set_jmp_insn_offset(s, which); | ||||
|         tcg_out32(s, NOP); | ||||
| 
 | ||||
|         /* When branch is out of range, fall through to indirect. */ | ||||
|         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr); | ||||
|         tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr); | ||||
|         tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); | ||||
|         tcg_out32(s, BCCTR | BO_ALWAYS); | ||||
|         set_jmp_reset_offset(s, which); | ||||
|         if (USE_REG_TB) { | ||||
|             /* For the unlinked case, need to reset TCG_REG_TB.  */ | ||||
|             tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, | ||||
|                              -tcg_current_code_size(s)); | ||||
|         } | ||||
|     } else { | ||||
|         set_jmp_insn_offset(s, which); | ||||
|         tcg_out32(s, B); | ||||
|         set_jmp_reset_offset(s, which); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void tb_target_set_jmp_target(const TranslationBlock *tb, int n, | ||||
|                               uintptr_t jmp_rx, uintptr_t jmp_rw) | ||||
| { | ||||
|     uintptr_t addr = tb->jmp_target_addr[n]; | ||||
|     intptr_t diff = addr - jmp_rx; | ||||
|     tcg_insn_unit insn; | ||||
| 
 | ||||
|     if (in_range_b(diff)) { | ||||
|         insn = B | (diff & 0x3fffffc); | ||||
|     } else if (USE_REG_TB) { | ||||
|         insn = MTSPR | RS(TCG_REG_TB) | CTR; | ||||
|     } else { | ||||
|         insn = NOP; | ||||
|     } | ||||
| 
 | ||||
|     qatomic_set((uint32_t *)jmp_rw, insn); | ||||
|     flush_idcache_range(jmp_rx, jmp_rw, 4); | ||||
| } | ||||
| 
 | ||||
| static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||||
|                        const TCGArg args[TCG_MAX_OP_ARGS], | ||||
|                        const int const_args[TCG_MAX_OP_ARGS]) | ||||
|   | ||||
| @@ -27,11 +27,10 @@ | ||||
|  | ||||
| #ifdef _ARCH_PPC64 | ||||
| # define TCG_TARGET_REG_BITS  64 | ||||
| # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB) | ||||
| #else | ||||
| # define TCG_TARGET_REG_BITS  32 | ||||
| # define MAX_CODE_GEN_BUFFER_SIZE  (32 * MiB) | ||||
| #endif | ||||
| #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1) | ||||
|  | ||||
| #define TCG_TARGET_NB_REGS 64 | ||||
| #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user