Files
libffi/810.patch

425 lines
8.4 KiB
Diff

From ad3b42caa238330e872ae32adea55defe7a14286 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Mon, 27 Nov 2023 00:39:21 +0200
Subject: [PATCH] aarch64: Write the BTI instructions as "hint" instructions
GNU binutils refuses to assemble the direct BTI instructions unless
the target architecture explicitly supports BTI, ending up with errors
such as
../src/aarch64/sysv.S: Assembler messages:
../src/aarch64/sysv.S:87: Error: selected processor does not support `bti c'
../src/aarch64/sysv.S:156: Error: selected processor does not support `bti j'
Building with -march=armv8.5-a fixes building this.
However, the BTI instructions assemble into hint instructions, that
are ignored by processors that don't implement them. Therefore it is
possible to assemble them for the baseline armv8.0-a target as well,
by replacing "bti j" with "hint #36", "bti c" with "hint #34" and
"bti jc" with "hint #38"; this assembles into the same instruction
bits.
---
src/aarch64/ffi.c | 25 ++++++------
src/aarch64/sysv.S | 97 ++++++++++++++++++++++++----------------------
2 files changed, 63 insertions(+), 59 deletions(-)
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index 67b18fbf..8661a352 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -386,55 +386,56 @@ extend_hfa_type (void *dest, void *src, int h)
ssize_t f = h - AARCH64_RET_S4;
void *x0;
+#define BTI_J "hint #36"
asm volatile (
"adr %0, 0f\n"
" add %0, %0, %1\n"
" br %0\n"
-"0: bti j\n" /* S4 */
+"0: "BTI_J"\n" /* S4 */
" ldp s16, s17, [%3]\n"
" ldp s18, s19, [%3, #8]\n"
" b 4f\n"
-" bti j\n" /* S3 */
+" "BTI_J"\n" /* S3 */
" ldp s16, s17, [%3]\n"
" ldr s18, [%3, #8]\n"
" b 3f\n"
-" bti j\n" /* S2 */
+" "BTI_J"\n" /* S2 */
" ldp s16, s17, [%3]\n"
" b 2f\n"
" nop\n"
-" bti j\n" /* S1 */
+" "BTI_J"\n" /* S1 */
" ldr s16, [%3]\n"
" b 1f\n"
" nop\n"
-" bti j\n" /* D4 */
+" "BTI_J"\n" /* D4 */
" ldp d16, d17, [%3]\n"
" ldp d18, d19, [%3, #16]\n"
" b 4f\n"
-" bti j\n" /* D3 */
+" "BTI_J"\n" /* D3 */
" ldp d16, d17, [%3]\n"
" ldr d18, [%3, #16]\n"
" b 3f\n"
-" bti j\n" /* D2 */
+" "BTI_J"\n" /* D2 */
" ldp d16, d17, [%3]\n"
" b 2f\n"
" nop\n"
-" bti j\n" /* D1 */
+" "BTI_J"\n" /* D1 */
" ldr d16, [%3]\n"
" b 1f\n"
" nop\n"
-" bti j\n" /* Q4 */
+" "BTI_J"\n" /* Q4 */
" ldp q16, q17, [%3]\n"
" ldp q18, q19, [%3, #32]\n"
" b 4f\n"
-" bti j\n" /* Q3 */
+" "BTI_J"\n" /* Q3 */
" ldp q16, q17, [%3]\n"
" ldr q18, [%3, #32]\n"
" b 3f\n"
-" bti j\n" /* Q2 */
+" "BTI_J"\n" /* Q2 */
" ldp q16, q17, [%3]\n"
" b 2f\n"
" nop\n"
-" bti j\n" /* Q1 */
+" "BTI_J"\n" /* Q1 */
" ldr q16, [%3]\n"
" b 1f\n"
"4: str q19, [%2, #48]\n"
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
index 9f4188e0..90816752 100644
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -64,6 +64,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define PTR_SIZE 8
#endif
+#define BTI_C hint #34
+#define BTI_J hint #36
+
.text
.align 4
@@ -84,7 +87,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
cfi_startproc
CNAME(ffi_call_SYSV):
- bti c
+ BTI_C
/* Sign the lr with x1 since that is where it will be stored */
SIGN_LR_WITH_REG(x1)
@@ -153,15 +156,15 @@ CNAME(ffi_call_SYSV):
and therefore we want to extend to 64 bits; these types
have two consecutive entries allocated for them. */
.align 4
-0: bti j /* VOID */
+0: BTI_J /* VOID */
b 99f
nop
nop
-1: bti j /* INT64 */
+1: BTI_J /* INT64 */
str x0, [x3]
b 99f
nop
-2: bti j /* INT128 */
+2: BTI_J /* INT128 */
stp x0, x1, [x3]
b 99f
nop
@@ -185,55 +188,55 @@ CNAME(ffi_call_SYSV):
b 99f
nop
nop
-8: bti j /* S4 */
+8: BTI_J /* S4 */
st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3]
b 99f
nop
-9: bti j /* S3 */
+9: BTI_J /* S3 */
st3 { v0.s, v1.s, v2.s }[0], [x3]
b 99f
nop
-10: bti j /* S2 */
+10: BTI_J /* S2 */
stp s0, s1, [x3]
b 99f
nop
-11: bti j
+11: BTI_J
str s0, [x3] /* S1 */
b 99f
nop
-12: bti j /* D4 */
+12: BTI_J /* D4 */
st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3]
b 99f
nop
-13: bti j /* D3 */
+13: BTI_J /* D3 */
st3 { v0.d, v1.d, v2.d }[0], [x3]
b 99f
nop
-14: bti j /* D2 */
+14: BTI_J /* D2 */
stp d0, d1, [x3]
b 99f
nop
-15: bti j /* D1 */
+15: BTI_J /* D1 */
str d0, [x3]
b 99f
nop
-16: bti j /* Q4 */
+16: BTI_J /* Q4 */
str q3, [x3, #48]
nop
nop
-17: bti j /* Q3 */
+17: BTI_J /* Q3 */
str q2, [x3, #32]
nop
nop
-18: bti j /* Q2 */
+18: BTI_J /* Q2 */
stp q0, q1, [x3]
b 99f
nop
-19: bti j /* Q1 */
+19: BTI_J /* Q1 */
str q0, [x3]
b 99f
nop
-20: bti j /* UINT8 */
+20: BTI_J /* UINT8 */
uxtb w0, w0
str x0, [x3]
nop
@@ -241,7 +244,7 @@ CNAME(ffi_call_SYSV):
nop
nop
nop
-22: bti j /* UINT16 */
+22: BTI_J /* UINT16 */
uxth w0, w0
str x0, [x3]
nop
@@ -249,7 +252,7 @@ CNAME(ffi_call_SYSV):
nop
nop
nop
-24: bti j /* UINT32 */
+24: BTI_J /* UINT32 */
mov w0, w0
str x0, [x3]
nop
@@ -257,7 +260,7 @@ CNAME(ffi_call_SYSV):
nop
nop
nop
-26: bti j /* SINT8 */
+26: BTI_J /* SINT8 */
sxtb x0, w0
str x0, [x3]
nop
@@ -265,7 +268,7 @@ CNAME(ffi_call_SYSV):
nop
nop
nop
-28: bti j /* SINT16 */
+28: BTI_J /* SINT16 */
sxth x0, w0
str x0, [x3]
nop
@@ -273,7 +276,7 @@ CNAME(ffi_call_SYSV):
nop
nop
nop
-30: bti j /* SINT32 */
+30: BTI_J /* SINT32 */
sxtw x0, w0
str x0, [x3]
nop
@@ -317,7 +320,7 @@ CNAME(ffi_call_SYSV):
.align 4
CNAME(ffi_closure_SYSV_V):
cfi_startproc
- bti c
+ BTI_C
SIGN_LR
stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
@@ -342,7 +345,7 @@ CNAME(ffi_closure_SYSV_V):
.align 4
cfi_startproc
CNAME(ffi_closure_SYSV):
- bti c
+ BTI_C
SIGN_LR
stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
@@ -378,15 +381,15 @@ L(do_closure):
/* Note that each table entry is 4 insns, and thus 16 bytes. */
.align 4
-0: bti j /* VOID */
+0: BTI_J /* VOID */
b 99f
nop
nop
-1: bti j /* INT64 */
+1: BTI_J /* INT64 */
ldr x0, [x3]
b 99f
nop
-2: bti j /* INT128 */
+2: BTI_J /* INT128 */
ldp x0, x1, [x3]
b 99f
nop
@@ -410,55 +413,55 @@ L(do_closure):
nop
nop
nop
-8: bti j /* S4 */
+8: BTI_J /* S4 */
ldr s3, [x3, #12]
nop
nop
-9: bti j /* S3 */
+9: BTI_J /* S3 */
ldr s2, [x3, #8]
nop
nop
-10: bti j /* S2 */
+10: BTI_J /* S2 */
ldp s0, s1, [x3]
b 99f
nop
-11: bti j /* S1 */
+11: BTI_J /* S1 */
ldr s0, [x3]
b 99f
nop
-12: bti j /* D4 */
+12: BTI_J /* D4 */
ldr d3, [x3, #24]
nop
nop
-13: bti j /* D3 */
+13: BTI_J /* D3 */
ldr d2, [x3, #16]
nop
nop
-14: bti j /* D2 */
+14: BTI_J /* D2 */
ldp d0, d1, [x3]
b 99f
nop
-15: bti j /* D1 */
+15: BTI_J /* D1 */
ldr d0, [x3]
b 99f
nop
-16: bti j /* Q4 */
+16: BTI_J /* Q4 */
ldr q3, [x3, #48]
nop
nop
-17: bti j /* Q3 */
+17: BTI_J /* Q3 */
ldr q2, [x3, #32]
nop
nop
-18: bti j /* Q2 */
+18: BTI_J /* Q2 */
ldp q0, q1, [x3]
b 99f
nop
-19: bti j /* Q1 */
+19: BTI_J /* Q1 */
ldr q0, [x3]
b 99f
nop
-20: bti j /* UINT8 */
+20: BTI_J /* UINT8 */
ldrb w0, [x3, #BE(7)]
b 99f
nop
@@ -466,7 +469,7 @@ L(do_closure):
nop
nop
nop
-22: bti j /* UINT16 */
+22: BTI_J /* UINT16 */
ldrh w0, [x3, #BE(6)]
b 99f
nop
@@ -474,7 +477,7 @@ L(do_closure):
nop
nop
nop
-24: bti j /* UINT32 */
+24: BTI_J /* UINT32 */
ldr w0, [x3, #BE(4)]
b 99f
nop
@@ -482,7 +485,7 @@ L(do_closure):
nop
nop
nop
-26: bti j /* SINT8 */
+26: BTI_J /* SINT8 */
ldrsb x0, [x3, #BE(7)]
b 99f
nop
@@ -490,7 +493,7 @@ L(do_closure):
nop
nop
nop
-28: bti j /* SINT16 */
+28: BTI_J /* SINT16 */
ldrsh x0, [x3, #BE(6)]
b 99f
nop
@@ -498,7 +501,7 @@ L(do_closure):
nop
nop
nop
-30: bti j /* SINT32 */
+30: BTI_J /* SINT32 */
ldrsw x0, [x3, #BE(4)]
nop
nop
@@ -614,7 +617,7 @@ CNAME(ffi_closure_trampoline_table_page):
.align 4
CNAME(ffi_go_closure_SYSV_V):
cfi_startproc
- bti c
+ BTI_C
stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
cfi_rel_offset (x29, 0)
@@ -638,7 +641,7 @@ CNAME(ffi_go_closure_SYSV_V):
.align 4
cfi_startproc
CNAME(ffi_go_closure_SYSV):
- bti c
+ BTI_C
stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
cfi_rel_offset (x29, 0)