violation with getauxval. [bsc#1167939] OBS-URL: https://build.opensuse.org/package/show/devel:gcc/gcc7?expand=0&rev=204
4299 lines
144 KiB
Diff
4299 lines
144 KiB
Diff
From 56c60ff0b1ee5d15e9e1673eddda6cb450e4253c Mon Sep 17 00:00:00 2001
|
|
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
|
Date: Tue, 6 Jun 2017 13:26:46 +0000
|
|
Subject: [PATCH 01/24] Allow const0_rtx operand for atomic compare-exchange
|
|
patterns
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2017-06-06 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
|
|
|
gcc/
|
|
* config/aarch64/atomics.md (atomic_compare_and_swap<mode> expander):
|
|
Use aarch64_reg_or_zero predicate for operand 4.
|
|
(aarch64_compare_and_swap<mode> define_insn_and_split):
|
|
Use aarch64_reg_or_zero predicate for operand 3. Add 'Z' constraint.
|
|
(aarch64_store_exclusive<mode>): Likewise for operand 2.
|
|
|
|
gcc/testsuite/
|
|
* gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c: New test.
|
|
|
|
(cherry picked from commit 4ebcc903bf03705099cd4b50231dc8fe444d70b9)
|
|
---
|
|
gcc/config/aarch64/atomics.md | 8 ++++----
|
|
.../aarch64/atomic_cmp_exchange_zero_reg_1.c | 12 ++++++++++++
|
|
2 files changed, 16 insertions(+), 4 deletions(-)
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
|
|
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index 09d441075f0..27fc1933ce3 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -25,7 +25,7 @@
|
|
(match_operand:ALLI 1 "register_operand" "") ;; val out
|
|
(match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory
|
|
(match_operand:ALLI 3 "general_operand" "") ;; expected
|
|
- (match_operand:ALLI 4 "register_operand" "") ;; desired
|
|
+ (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired
|
|
(match_operand:SI 5 "const_int_operand") ;; is_weak
|
|
(match_operand:SI 6 "const_int_operand") ;; mod_s
|
|
(match_operand:SI 7 "const_int_operand")] ;; mod_f
|
|
@@ -45,7 +45,7 @@
|
|
(set (match_dup 1)
|
|
(unspec_volatile:SHORT
|
|
[(match_operand:SI 2 "aarch64_plus_operand" "rI") ;; expected
|
|
- (match_operand:SHORT 3 "register_operand" "r") ;; desired
|
|
+ (match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ") ;; desired
|
|
(match_operand:SI 4 "const_int_operand") ;; is_weak
|
|
(match_operand:SI 5 "const_int_operand") ;; mod_s
|
|
(match_operand:SI 6 "const_int_operand")] ;; mod_f
|
|
@@ -69,7 +69,7 @@
|
|
(set (match_dup 1)
|
|
(unspec_volatile:GPI
|
|
[(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect
|
|
- (match_operand:GPI 3 "register_operand" "r") ;; desired
|
|
+ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ") ;; desired
|
|
(match_operand:SI 4 "const_int_operand") ;; is_weak
|
|
(match_operand:SI 5 "const_int_operand") ;; mod_s
|
|
(match_operand:SI 6 "const_int_operand")] ;; mod_f
|
|
@@ -534,7 +534,7 @@
|
|
(unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
|
|
(set (match_operand:ALLI 1 "aarch64_sync_memory_operand" "=Q")
|
|
(unspec_volatile:ALLI
|
|
- [(match_operand:ALLI 2 "register_operand" "r")
|
|
+ [(match_operand:ALLI 2 "aarch64_reg_or_zero" "rZ")
|
|
(match_operand:SI 3 "const_int_operand")]
|
|
UNSPECV_SX))]
|
|
""
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
|
|
new file mode 100644
|
|
index 00000000000..15606b68990
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
|
|
@@ -0,0 +1,12 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2" } */
|
|
+
|
|
+int
|
|
+foo (int *a)
|
|
+{
|
|
+ int x = 3;
|
|
+ return __atomic_compare_exchange_n (a, &x, 0, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler "stxr\\tw\[0-9\]+, wzr,.*" } } */
|
|
+/* { dg-final { scan-assembler-not "mov\\tw\[0-9\]+, 0" } } */
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From b8e2b779d1815147073e9dcb04a1f8f9d96b1a62 Mon Sep 17 00:00:00 2001
|
|
From: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
|
|
Date: Mon, 16 Jul 2018 09:03:48 +0000
|
|
Subject: [PATCH 02/24] Add early clobber for aarch64_store_exclusive.
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2018-07-16 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
|
|
|
|
gcc/
|
|
* config/aarch64/atomics.md (aarch64_store_execlusive<mode>): Add
|
|
early clobber.
|
|
|
|
(cherry picked from commit 1d896f48fa2f796ba13773ab735ef40fa3afb257)
|
|
---
|
|
gcc/config/aarch64/atomics.md | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index 27fc1933ce3..eb4b95c2453 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -530,7 +530,7 @@
|
|
)
|
|
|
|
(define_insn "aarch64_store_exclusive<mode>"
|
|
- [(set (match_operand:SI 0 "register_operand" "=r")
|
|
+ [(set (match_operand:SI 0 "register_operand" "=&r")
|
|
(unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
|
|
(set (match_operand:ALLI 1 "aarch64_sync_memory_operand" "=Q")
|
|
(unspec_volatile:ALLI
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 03ffcedc78870eaf89e0f45b3d7a2e3af003e45a Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Wed, 31 Oct 2018 09:29:29 +0000
|
|
Subject: [PATCH 03/24] aarch64: Simplify LSE cas generation
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
The cas insn is a single insn, and if expanded properly need not
|
|
be split after reload. Use the proper inputs for the insn.
|
|
|
|
2018-10-31 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
* config/aarch64/aarch64.c (aarch64_expand_compare_and_swap):
|
|
Force oldval into the rval register for TARGET_LSE; emit the compare
|
|
during initial expansion so that it may be deleted if unused.
|
|
(aarch64_gen_atomic_cas): Remove.
|
|
* config/aarch64/atomics.md (@aarch64_compare_and_swap<SHORT>_lse):
|
|
Change =&r to +r for operand 0; use match_dup for operand 2;
|
|
remove is_weak and mod_f operands as unused. Drop the split
|
|
and merge with...
|
|
(@aarch64_atomic_cas<SHORT>): ... this pattern's output; remove.
|
|
(@aarch64_compare_and_swap<GPI>_lse): Similarly.
|
|
(@aarch64_atomic_cas<GPI>): Similarly.
|
|
|
|
(cherry picked from commit 77f33f44baf24c22848197aa80962c003dd7b3e2)
|
|
---
|
|
gcc/config/aarch64/aarch64-protos.h | 1 -
|
|
gcc/config/aarch64/aarch64.c | 62 ++++--------
|
|
gcc/config/aarch64/atomics.md | 143 +++++++++-------------------
|
|
3 files changed, 63 insertions(+), 143 deletions(-)
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
|
index 9543f8c9f29..e25f9833af4 100644
|
|
--- a/gcc/config/aarch64/aarch64-protos.h
|
|
+++ b/gcc/config/aarch64/aarch64-protos.h
|
|
@@ -425,7 +425,6 @@ rtx aarch64_load_tp (rtx);
|
|
|
|
void aarch64_expand_compare_and_swap (rtx op[]);
|
|
void aarch64_split_compare_and_swap (rtx op[]);
|
|
-void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
|
|
|
|
bool aarch64_atomic_ldop_supported_p (enum rtx_code);
|
|
void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index 05eaef3d2e6..c03d0ff9b40 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -11865,7 +11865,6 @@ aarch64_expand_compare_and_swap (rtx operands[])
|
|
machine_mode mode, cmp_mode;
|
|
typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
|
|
int idx;
|
|
- gen_cas_fn gen;
|
|
const gen_cas_fn split_cas[] =
|
|
{
|
|
gen_aarch64_compare_and_swapqi,
|
|
@@ -11873,7 +11872,8 @@ aarch64_expand_compare_and_swap (rtx operands[])
|
|
gen_aarch64_compare_and_swapsi,
|
|
gen_aarch64_compare_and_swapdi
|
|
};
|
|
- const gen_cas_fn atomic_cas[] =
|
|
+ typedef rtx (*gen_lse_fn) (rtx, rtx, rtx, rtx);
|
|
+ const gen_lse_fn atomic_cas[] =
|
|
{
|
|
gen_aarch64_compare_and_swapqi_lse,
|
|
gen_aarch64_compare_and_swaphi_lse,
|
|
@@ -11932,14 +11932,26 @@ aarch64_expand_compare_and_swap (rtx operands[])
|
|
gcc_unreachable ();
|
|
}
|
|
if (TARGET_LSE)
|
|
- gen = atomic_cas[idx];
|
|
+ {
|
|
+ /* The CAS insn requires oldval and rval overlap, but we need to
|
|
+ have a copy of oldval saved across the operation to tell if
|
|
+ the operation is successful. */
|
|
+ if (mode == QImode || mode == HImode)
|
|
+ rval = copy_to_mode_reg (SImode, gen_lowpart (SImode, oldval));
|
|
+ else if (reg_overlap_mentioned_p (rval, oldval))
|
|
+ rval = copy_to_mode_reg (mode, oldval);
|
|
+ else
|
|
+ emit_move_insn (rval, oldval);
|
|
+ emit_insn (atomic_cas[idx] (rval, mem, newval, mod_s));
|
|
+ aarch64_gen_compare_reg (EQ, rval, oldval);
|
|
+ }
|
|
else
|
|
- gen = split_cas[idx];
|
|
-
|
|
- emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
|
|
+ emit_insn (split_cas[idx] (rval, mem, oldval, newval, is_weak, mod_s,
|
|
+ mod_f));
|
|
|
|
if (mode == QImode || mode == HImode)
|
|
- emit_move_insn (operands[1], gen_lowpart (mode, rval));
|
|
+ rval = gen_lowpart (mode, rval);
|
|
+ emit_move_insn (operands[1], rval);
|
|
|
|
x = gen_rtx_REG (CCmode, CC_REGNUM);
|
|
x = gen_rtx_EQ (SImode, x, const0_rtx);
|
|
@@ -11989,42 +12001,6 @@ aarch64_emit_post_barrier (enum memmodel model)
|
|
}
|
|
}
|
|
|
|
-/* Emit an atomic compare-and-swap operation. RVAL is the destination register
|
|
- for the data in memory. EXPECTED is the value expected to be in memory.
|
|
- DESIRED is the value to store to memory. MEM is the memory location. MODEL
|
|
- is the memory ordering to use. */
|
|
-
|
|
-void
|
|
-aarch64_gen_atomic_cas (rtx rval, rtx mem,
|
|
- rtx expected, rtx desired,
|
|
- rtx model)
|
|
-{
|
|
- rtx (*gen) (rtx, rtx, rtx, rtx);
|
|
- machine_mode mode;
|
|
-
|
|
- mode = GET_MODE (mem);
|
|
-
|
|
- switch (mode)
|
|
- {
|
|
- case QImode: gen = gen_aarch64_atomic_casqi; break;
|
|
- case HImode: gen = gen_aarch64_atomic_cashi; break;
|
|
- case SImode: gen = gen_aarch64_atomic_cassi; break;
|
|
- case DImode: gen = gen_aarch64_atomic_casdi; break;
|
|
- default:
|
|
- gcc_unreachable ();
|
|
- }
|
|
-
|
|
- /* Move the expected value into the CAS destination register. */
|
|
- emit_insn (gen_rtx_SET (rval, expected));
|
|
-
|
|
- /* Emit the CAS. */
|
|
- emit_insn (gen (rval, mem, desired, model));
|
|
-
|
|
- /* Compare the expected value with the value loaded by the CAS, to establish
|
|
- whether the swap was made. */
|
|
- aarch64_gen_compare_reg (EQ, rval, expected);
|
|
-}
|
|
-
|
|
/* Split a compare and swap pattern. */
|
|
|
|
void
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index eb4b95c2453..713aec618a2 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -85,56 +85,50 @@
|
|
}
|
|
)
|
|
|
|
-(define_insn_and_split "aarch64_compare_and_swap<mode>_lse"
|
|
- [(set (reg:CC CC_REGNUM) ;; bool out
|
|
- (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
|
|
- (set (match_operand:SI 0 "register_operand" "=&r") ;; val out
|
|
- (zero_extend:SI
|
|
- (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
|
|
- (set (match_dup 1)
|
|
- (unspec_volatile:SHORT
|
|
- [(match_operand:SI 2 "aarch64_plus_operand" "rI") ;; expected
|
|
- (match_operand:SHORT 3 "register_operand" "r") ;; desired
|
|
- (match_operand:SI 4 "const_int_operand") ;; is_weak
|
|
- (match_operand:SI 5 "const_int_operand") ;; mod_s
|
|
- (match_operand:SI 6 "const_int_operand")] ;; mod_f
|
|
- UNSPECV_ATOMIC_CMPSW))]
|
|
- "TARGET_LSE"
|
|
- "#"
|
|
- "&& reload_completed"
|
|
- [(const_int 0)]
|
|
- {
|
|
- aarch64_gen_atomic_cas (operands[0], operands[1],
|
|
- operands[2], operands[3],
|
|
- operands[5]);
|
|
- DONE;
|
|
- }
|
|
-)
|
|
-
|
|
-(define_insn_and_split "aarch64_compare_and_swap<mode>_lse"
|
|
- [(set (reg:CC CC_REGNUM) ;; bool out
|
|
- (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
|
|
- (set (match_operand:GPI 0 "register_operand" "=&r") ;; val out
|
|
- (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
|
|
- (set (match_dup 1)
|
|
- (unspec_volatile:GPI
|
|
- [(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect
|
|
- (match_operand:GPI 3 "register_operand" "r") ;; desired
|
|
- (match_operand:SI 4 "const_int_operand") ;; is_weak
|
|
- (match_operand:SI 5 "const_int_operand") ;; mod_s
|
|
- (match_operand:SI 6 "const_int_operand")] ;; mod_f
|
|
- UNSPECV_ATOMIC_CMPSW))]
|
|
- "TARGET_LSE"
|
|
- "#"
|
|
- "&& reload_completed"
|
|
- [(const_int 0)]
|
|
- {
|
|
- aarch64_gen_atomic_cas (operands[0], operands[1],
|
|
- operands[2], operands[3],
|
|
- operands[5]);
|
|
- DONE;
|
|
- }
|
|
-)
|
|
+(define_insn "aarch64_compare_and_swap<mode>_lse"
|
|
+ [(set (match_operand:SI 0 "register_operand" "+r") ;; val out
|
|
+ (zero_extend:SI
|
|
+ (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
|
|
+ (set (match_dup 1)
|
|
+ (unspec_volatile:SHORT
|
|
+ [(match_dup 0) ;; expected
|
|
+ (match_operand:SHORT 2 "aarch64_reg_or_zero" "rZ") ;; desired
|
|
+ (match_operand:SI 3 "const_int_operand")] ;; mod_s
|
|
+ UNSPECV_ATOMIC_CMPSW))]
|
|
+ "TARGET_LSE"
|
|
+{
|
|
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
+ if (is_mm_relaxed (model))
|
|
+ return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
+ else if (is_mm_acquire (model) || is_mm_consume (model))
|
|
+ return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
+ else if (is_mm_release (model))
|
|
+ return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
+ else
|
|
+ return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
+})
|
|
+
|
|
+(define_insn "aarch64_compare_and_swap<mode>_lse"
|
|
+ [(set (match_operand:GPI 0 "register_operand" "+r") ;; val out
|
|
+ (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
|
|
+ (set (match_dup 1)
|
|
+ (unspec_volatile:GPI
|
|
+ [(match_dup 0) ;; expected
|
|
+ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") ;; desired
|
|
+ (match_operand:SI 3 "const_int_operand")] ;; mod_s
|
|
+ UNSPECV_ATOMIC_CMPSW))]
|
|
+ "TARGET_LSE"
|
|
+{
|
|
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
+ if (is_mm_relaxed (model))
|
|
+ return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
+ else if (is_mm_acquire (model) || is_mm_consume (model))
|
|
+ return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
+ else if (is_mm_release (model))
|
|
+ return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
+ else
|
|
+ return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
+})
|
|
|
|
(define_expand "atomic_exchange<mode>"
|
|
[(match_operand:ALLI 0 "register_operand" "")
|
|
@@ -607,55 +601,6 @@
|
|
return "swpal<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
})
|
|
|
|
-;; Atomic compare-and-swap: HI and smaller modes.
|
|
-
|
|
-(define_insn "aarch64_atomic_cas<mode>"
|
|
- [(set (match_operand:SI 0 "register_operand" "+&r") ;; out
|
|
- (zero_extend:SI
|
|
- (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory.
|
|
- (set (match_dup 1)
|
|
- (unspec_volatile:SHORT
|
|
- [(match_dup 0)
|
|
- (match_operand:SHORT 2 "register_operand" "r") ;; value.
|
|
- (match_operand:SI 3 "const_int_operand" "")] ;; model.
|
|
- UNSPECV_ATOMIC_CAS))]
|
|
- "TARGET_LSE && reload_completed"
|
|
-{
|
|
- enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
- if (is_mm_relaxed (model))
|
|
- return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
- else if (is_mm_acquire (model) || is_mm_consume (model))
|
|
- return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
- else if (is_mm_release (model))
|
|
- return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
- else
|
|
- return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
-})
|
|
-
|
|
-;; Atomic compare-and-swap: SI and larger modes.
|
|
-
|
|
-(define_insn "aarch64_atomic_cas<mode>"
|
|
- [(set (match_operand:GPI 0 "register_operand" "+&r") ;; out
|
|
- (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory.
|
|
- (set (match_dup 1)
|
|
- (unspec_volatile:GPI
|
|
- [(match_dup 0)
|
|
- (match_operand:GPI 2 "register_operand" "r") ;; value.
|
|
- (match_operand:SI 3 "const_int_operand" "")] ;; model.
|
|
- UNSPECV_ATOMIC_CAS))]
|
|
- "TARGET_LSE && reload_completed"
|
|
-{
|
|
- enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
- if (is_mm_relaxed (model))
|
|
- return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
- else if (is_mm_acquire (model) || is_mm_consume (model))
|
|
- return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
- else if (is_mm_release (model))
|
|
- return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
- else
|
|
- return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
-})
|
|
-
|
|
;; Atomic load-op: Load data, operate, store result, keep data.
|
|
|
|
(define_insn "aarch64_atomic_load<atomic_ldop><mode>"
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 19c478bafa944772683d4fdf6a4be962b9fb5d7e Mon Sep 17 00:00:00 2001
|
|
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
|
Date: Fri, 2 Jun 2017 14:41:52 +0000
|
|
Subject: [PATCH 04/24] Emit tighter strong atomic compare-exchange loop when
|
|
comparing against zero
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2017-06-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
|
|
|
gcc/
|
|
* config/aarch64/aarch64.c (aarch64_split_compare_and_swap):
|
|
Emit CBNZ inside loop when doing a strong exchange and comparing
|
|
against zero. Generate the CC flags after the loop.
|
|
|
|
gcc/testsuite/
|
|
* gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c: New test.
|
|
|
|
(cherry picked from commit 17f47f8621099fdc4b0fc6681924f97230c8a114)
|
|
---
|
|
gcc/config/aarch64/aarch64.c | 41 ++++++++++++++++---
|
|
.../atomic_cmp_exchange_zero_strong_1.c | 12 ++++++
|
|
2 files changed, 47 insertions(+), 6 deletions(-)
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index c03d0ff9b40..c2789665728 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -12024,6 +12024,17 @@ aarch64_split_compare_and_swap (rtx operands[])
|
|
mode = GET_MODE (mem);
|
|
model = memmodel_from_int (INTVAL (model_rtx));
|
|
|
|
+ /* When OLDVAL is zero and we want the strong version we can emit a tighter
|
|
+ loop:
|
|
+ .label1:
|
|
+ LD[A]XR rval, [mem]
|
|
+ CBNZ rval, .label2
|
|
+ ST[L]XR scratch, newval, [mem]
|
|
+ CBNZ scratch, .label1
|
|
+ .label2:
|
|
+ CMP rval, 0. */
|
|
+ bool strong_zero_p = !is_weak && oldval == const0_rtx;
|
|
+
|
|
label1 = NULL;
|
|
if (!is_weak)
|
|
{
|
|
@@ -12040,11 +12051,21 @@ aarch64_split_compare_and_swap (rtx operands[])
|
|
else
|
|
aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
|
|
|
|
- cond = aarch64_gen_compare_reg (NE, rval, oldval);
|
|
- x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
|
- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
|
|
- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
|
|
- aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
|
|
+ if (strong_zero_p)
|
|
+ {
|
|
+ x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
|
|
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
|
|
+ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
|
|
+ aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ cond = aarch64_gen_compare_reg (NE, rval, oldval);
|
|
+ x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
|
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
|
|
+ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
|
|
+ aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
|
|
+ }
|
|
|
|
aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
|
|
|
|
@@ -12063,7 +12084,15 @@ aarch64_split_compare_and_swap (rtx operands[])
|
|
}
|
|
|
|
emit_label (label2);
|
|
-
|
|
+ /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
|
|
+ to set the condition flags. If this is not used it will be removed by
|
|
+ later passes. */
|
|
+ if (strong_zero_p)
|
|
+ {
|
|
+ cond = gen_rtx_REG (CCmode, CC_REGNUM);
|
|
+ x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
|
|
+ emit_insn (gen_rtx_SET (cond, x));
|
|
+ }
|
|
/* Emit any final barrier needed for a __sync operation. */
|
|
if (is_mm_sync (model))
|
|
aarch64_emit_post_barrier (model);
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
|
|
new file mode 100644
|
|
index 00000000000..b14a7c29437
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
|
|
@@ -0,0 +1,12 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2" } */
|
|
+
|
|
+int
|
|
+foo (int *a)
|
|
+{
|
|
+ int x = 0;
|
|
+ return __atomic_compare_exchange_n (a, &x, 4, 0,
|
|
+ __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times "cbnz\\tw\[0-9\]+" 2 } } */
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 1b90e5f91c930b124f1d4940b515a7ea64809904 Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Wed, 31 Oct 2018 09:42:39 +0000
|
|
Subject: [PATCH 05/24] aarch64: Improve cas generation
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
Do not zero-extend the input to the cas for subword operations;
|
|
instead, use the appropriate zero-extending compare insns.
|
|
Correct the predicates and constraints for immediate expected operand.
|
|
|
|
2018-10-31 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
* config/aarch64/aarch64.c (aarch64_gen_compare_reg_maybe_ze): New.
|
|
(aarch64_split_compare_and_swap): Use it.
|
|
(aarch64_expand_compare_and_swap): Likewise. Remove convert_modes;
|
|
test oldval against the proper predicate.
|
|
* config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI>):
|
|
Use nonmemory_operand for expected.
|
|
(cas_short_expected_pred): New.
|
|
(@aarch64_compare_and_swap<SHORT>): Use it; use "rn" not "rI" to match.
|
|
(@aarch64_compare_and_swap<GPI>): Use "rn" not "rI" for expected.
|
|
* config/aarch64/predicates.md (aarch64_plushi_immediate): New.
|
|
(aarch64_plushi_operand): New.
|
|
|
|
(cherry picked from commit d400fda3a8c3330f77eb9d51874f5482d3819a9f)
|
|
---
|
|
gcc/config/aarch64/aarch64.c | 97 ++++++++++++++++++++------------
|
|
gcc/config/aarch64/atomics.md | 19 ++++---
|
|
gcc/config/aarch64/predicates.md | 12 ++++
|
|
3 files changed, 84 insertions(+), 44 deletions(-)
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index c2789665728..afde9e291f9 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -1198,6 +1198,33 @@ aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
|
|
return cc_reg;
|
|
}
|
|
|
|
+/* Similarly, but maybe zero-extend Y if Y_MODE < SImode. */
|
|
+
|
|
+static rtx
|
|
+aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
|
|
+ machine_mode y_mode)
|
|
+{
|
|
+ if (y_mode == QImode || y_mode == HImode)
|
|
+ {
|
|
+ if (CONST_INT_P (y))
|
|
+ y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
|
|
+ else
|
|
+ {
|
|
+ rtx t, cc_reg;
|
|
+ machine_mode cc_mode;
|
|
+
|
|
+ t = gen_rtx_ZERO_EXTEND (SImode, y);
|
|
+ t = gen_rtx_COMPARE (CC_SWPmode, t, x);
|
|
+ cc_mode = CC_SWPmode;
|
|
+ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
|
|
+ emit_set_insn (cc_reg, t);
|
|
+ return cc_reg;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return aarch64_gen_compare_reg (code, x, y);
|
|
+}
|
|
+
|
|
/* Build the SYMBOL_REF for __tls_get_addr. */
|
|
|
|
static GTY(()) rtx tls_get_addr_libfunc;
|
|
@@ -11861,8 +11888,8 @@ aarch64_emit_unlikely_jump (rtx insn)
|
|
void
|
|
aarch64_expand_compare_and_swap (rtx operands[])
|
|
{
|
|
- rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
|
|
- machine_mode mode, cmp_mode;
|
|
+ rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg;
|
|
+ machine_mode mode, r_mode;
|
|
typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
|
|
int idx;
|
|
const gen_cas_fn split_cas[] =
|
|
@@ -11890,36 +11917,19 @@ aarch64_expand_compare_and_swap (rtx operands[])
|
|
mod_s = operands[6];
|
|
mod_f = operands[7];
|
|
mode = GET_MODE (mem);
|
|
- cmp_mode = mode;
|
|
|
|
/* Normally the succ memory model must be stronger than fail, but in the
|
|
unlikely event of fail being ACQUIRE and succ being RELEASE we need to
|
|
promote succ to ACQ_REL so that we don't lose the acquire semantics. */
|
|
-
|
|
if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
|
|
&& is_mm_release (memmodel_from_int (INTVAL (mod_s))))
|
|
mod_s = GEN_INT (MEMMODEL_ACQ_REL);
|
|
|
|
- switch (mode)
|
|
+ r_mode = mode;
|
|
+ if (mode == QImode || mode == HImode)
|
|
{
|
|
- case QImode:
|
|
- case HImode:
|
|
- /* For short modes, we're going to perform the comparison in SImode,
|
|
- so do the zero-extension now. */
|
|
- cmp_mode = SImode;
|
|
- rval = gen_reg_rtx (SImode);
|
|
- oldval = convert_modes (SImode, mode, oldval, true);
|
|
- /* Fall through. */
|
|
-
|
|
- case SImode:
|
|
- case DImode:
|
|
- /* Force the value into a register if needed. */
|
|
- if (!aarch64_plus_operand (oldval, mode))
|
|
- oldval = force_reg (cmp_mode, oldval);
|
|
- break;
|
|
-
|
|
- default:
|
|
- gcc_unreachable ();
|
|
+ r_mode = SImode;
|
|
+ rval = gen_reg_rtx (r_mode);
|
|
}
|
|
|
|
switch (mode)
|
|
@@ -11936,25 +11946,40 @@ aarch64_expand_compare_and_swap (rtx operands[])
|
|
/* The CAS insn requires oldval and rval overlap, but we need to
|
|
have a copy of oldval saved across the operation to tell if
|
|
the operation is successful. */
|
|
- if (mode == QImode || mode == HImode)
|
|
- rval = copy_to_mode_reg (SImode, gen_lowpart (SImode, oldval));
|
|
- else if (reg_overlap_mentioned_p (rval, oldval))
|
|
- rval = copy_to_mode_reg (mode, oldval);
|
|
+ if (reg_overlap_mentioned_p (rval, oldval))
|
|
+ rval = copy_to_mode_reg (r_mode, oldval);
|
|
else
|
|
- emit_move_insn (rval, oldval);
|
|
+ emit_move_insn (rval, gen_lowpart (r_mode, oldval));
|
|
+
|
|
emit_insn (atomic_cas[idx] (rval, mem, newval, mod_s));
|
|
- aarch64_gen_compare_reg (EQ, rval, oldval);
|
|
+ cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
|
|
}
|
|
else
|
|
- emit_insn (split_cas[idx] (rval, mem, oldval, newval, is_weak, mod_s,
|
|
- mod_f));
|
|
+ {
|
|
+ /* The oldval predicate varies by mode. Test it and force to reg. */
|
|
+ insn_code code;
|
|
+ switch (mode)
|
|
+ {
|
|
+ case QImode: code = CODE_FOR_aarch64_compare_and_swapqi; break;
|
|
+ case HImode: code = CODE_FOR_aarch64_compare_and_swaphi; break;
|
|
+ case SImode: code = CODE_FOR_aarch64_compare_and_swapsi; break;
|
|
+ case DImode: code = CODE_FOR_aarch64_compare_and_swapdi; break;
|
|
+ default:
|
|
+ gcc_unreachable ();
|
|
+ }
|
|
+ if (!insn_data[code].operand[2].predicate (oldval, mode))
|
|
+ oldval = force_reg (mode, oldval);
|
|
|
|
- if (mode == QImode || mode == HImode)
|
|
+ emit_insn (split_cas[idx] (rval, mem, oldval, newval, is_weak, mod_s,
|
|
+ mod_f));
|
|
+ cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
|
|
+ }
|
|
+
|
|
+ if (r_mode != mode)
|
|
rval = gen_lowpart (mode, rval);
|
|
emit_move_insn (operands[1], rval);
|
|
|
|
- x = gen_rtx_REG (CCmode, CC_REGNUM);
|
|
- x = gen_rtx_EQ (SImode, x, const0_rtx);
|
|
+ x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
|
|
emit_insn (gen_rtx_SET (bval, x));
|
|
}
|
|
|
|
@@ -12060,10 +12085,10 @@ aarch64_split_compare_and_swap (rtx operands[])
|
|
}
|
|
else
|
|
{
|
|
- cond = aarch64_gen_compare_reg (NE, rval, oldval);
|
|
+ cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
|
|
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
|
x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
|
|
- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
|
|
+ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
|
|
aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
|
|
}
|
|
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index 713aec618a2..577000fa6a4 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -24,8 +24,8 @@
|
|
[(match_operand:SI 0 "register_operand" "") ;; bool out
|
|
(match_operand:ALLI 1 "register_operand" "") ;; val out
|
|
(match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory
|
|
- (match_operand:ALLI 3 "general_operand" "") ;; expected
|
|
- (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired
|
|
+ (match_operand:ALLI 3 "nonmemory_operand" "") ;; expected
|
|
+ (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired
|
|
(match_operand:SI 5 "const_int_operand") ;; is_weak
|
|
(match_operand:SI 6 "const_int_operand") ;; mod_s
|
|
(match_operand:SI 7 "const_int_operand")] ;; mod_f
|
|
@@ -36,19 +36,22 @@
|
|
}
|
|
)
|
|
|
|
+(define_mode_attr cas_short_expected_pred
|
|
+ [(QI "aarch64_reg_or_imm") (HI "aarch64_plushi_operand")])
|
|
+
|
|
(define_insn_and_split "aarch64_compare_and_swap<mode>"
|
|
[(set (reg:CC CC_REGNUM) ;; bool out
|
|
(unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
|
|
- (set (match_operand:SI 0 "register_operand" "=&r") ;; val out
|
|
+ (set (match_operand:SI 0 "register_operand" "=&r") ;; val out
|
|
(zero_extend:SI
|
|
(match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
|
|
(set (match_dup 1)
|
|
(unspec_volatile:SHORT
|
|
- [(match_operand:SI 2 "aarch64_plus_operand" "rI") ;; expected
|
|
+ [(match_operand:SHORT 2 "<cas_short_expected_pred>" "rn") ;; expected
|
|
(match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ") ;; desired
|
|
- (match_operand:SI 4 "const_int_operand") ;; is_weak
|
|
- (match_operand:SI 5 "const_int_operand") ;; mod_s
|
|
- (match_operand:SI 6 "const_int_operand")] ;; mod_f
|
|
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
|
|
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
|
|
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
|
|
UNSPECV_ATOMIC_CMPSW))
|
|
(clobber (match_scratch:SI 7 "=&r"))]
|
|
""
|
|
@@ -68,7 +71,7 @@
|
|
(match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
|
|
(set (match_dup 1)
|
|
(unspec_volatile:GPI
|
|
- [(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect
|
|
+ [(match_operand:GPI 2 "aarch64_plus_operand" "rn") ;; expect
|
|
(match_operand:GPI 3 "aarch64_reg_or_zero" "rZ") ;; desired
|
|
(match_operand:SI 4 "const_int_operand") ;; is_weak
|
|
(match_operand:SI 5 "const_int_operand") ;; mod_s
|
|
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
|
|
index e83d45b3945..20429e5d04c 100644
|
|
--- a/gcc/config/aarch64/predicates.md
|
|
+++ b/gcc/config/aarch64/predicates.md
|
|
@@ -86,6 +86,18 @@
|
|
(ior (match_operand 0 "register_operand")
|
|
(match_operand 0 "aarch64_plus_immediate")))
|
|
|
|
+(define_predicate "aarch64_plushi_immediate"
|
|
+ (match_code "const_int")
|
|
+{
|
|
+ HOST_WIDE_INT val = INTVAL (op);
|
|
+ /* The HImode value must be zero-extendable to an SImode plus_operand. */
|
|
+ return ((val & 0xfff) == val || sext_hwi (val & 0xf000, 16) == val);
|
|
+})
|
|
+
|
|
+(define_predicate "aarch64_plushi_operand"
|
|
+ (ior (match_operand 0 "register_operand")
|
|
+ (match_operand 0 "aarch64_plushi_immediate")))
|
|
+
|
|
(define_predicate "aarch64_pluslong_immediate"
|
|
(and (match_code "const_int")
|
|
(match_test "(INTVAL (op) < 0xffffff && INTVAL (op) > -0xffffff)")))
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 585818b9697910d3c136db9805f129f4d735e28d Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Wed, 31 Oct 2018 09:47:21 +0000
|
|
Subject: [PATCH 06/24] aarch64: Improve swp generation
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
Allow zero as an input; fix constraints; avoid unnecessary split.
|
|
|
|
2018-10-31 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
* config/aarch64/aarch64.c (aarch64_emit_atomic_swap): Remove.
|
|
(aarch64_gen_atomic_ldop): Don't call it.
|
|
* config/aarch64/atomics.md (atomic_exchange<ALLI>):
|
|
Use aarch64_reg_or_zero.
|
|
(aarch64_atomic_exchange<ALLI>): Likewise.
|
|
(aarch64_atomic_exchange<ALLI>_lse): Remove split; remove & from
|
|
operand 0; use aarch64_reg_or_zero for input; merge ...
|
|
(@aarch64_atomic_swp<ALLI>): ... this and remove.
|
|
|
|
(cherry picked from commit 8f5603d363a4e0453d2c38c7103aeb0bdca85c4e)
|
|
---
|
|
gcc/config/aarch64/aarch64.c | 25 ------------------
|
|
gcc/config/aarch64/atomics.md | 49 +++++++++++------------------------
|
|
2 files changed, 15 insertions(+), 59 deletions(-)
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index afde9e291f9..d08af9d63ca 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -12142,27 +12142,6 @@ aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
|
|
emit_insn (gen (dst, s2, shift_rtx, s1));
|
|
}
|
|
|
|
-/* Emit an atomic swap. */
|
|
-
|
|
-static void
|
|
-aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
|
|
- rtx mem, rtx model)
|
|
-{
|
|
- rtx (*gen) (rtx, rtx, rtx, rtx);
|
|
-
|
|
- switch (mode)
|
|
- {
|
|
- case QImode: gen = gen_aarch64_atomic_swpqi; break;
|
|
- case HImode: gen = gen_aarch64_atomic_swphi; break;
|
|
- case SImode: gen = gen_aarch64_atomic_swpsi; break;
|
|
- case DImode: gen = gen_aarch64_atomic_swpdi; break;
|
|
- default:
|
|
- gcc_unreachable ();
|
|
- }
|
|
-
|
|
- emit_insn (gen (dst, mem, value, model));
|
|
-}
|
|
-
|
|
/* Operations supported by aarch64_emit_atomic_load_op. */
|
|
|
|
enum aarch64_atomic_load_op_code
|
|
@@ -12275,10 +12254,6 @@ aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
|
|
a SET then emit a swap instruction and finish. */
|
|
switch (code)
|
|
{
|
|
- case SET:
|
|
- aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
|
|
- return;
|
|
-
|
|
case MINUS:
|
|
/* Negate the value and treat it as a PLUS. */
|
|
{
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index 577000fa6a4..f1cc972bae4 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -136,7 +136,7 @@
|
|
(define_expand "atomic_exchange<mode>"
|
|
[(match_operand:ALLI 0 "register_operand" "")
|
|
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
|
|
- (match_operand:ALLI 2 "register_operand" "")
|
|
+ (match_operand:ALLI 2 "aarch64_reg_or_zero" "")
|
|
(match_operand:SI 3 "const_int_operand" "")]
|
|
""
|
|
{
|
|
@@ -156,10 +156,10 @@
|
|
|
|
(define_insn_and_split "aarch64_atomic_exchange<mode>"
|
|
[(set (match_operand:ALLI 0 "register_operand" "=&r") ;; output
|
|
- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
|
|
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
|
|
(set (match_dup 1)
|
|
(unspec_volatile:ALLI
|
|
- [(match_operand:ALLI 2 "register_operand" "r") ;; input
|
|
+ [(match_operand:ALLI 2 "aarch64_reg_or_zero" "rZ") ;; input
|
|
(match_operand:SI 3 "const_int_operand" "")] ;; model
|
|
UNSPECV_ATOMIC_EXCHG))
|
|
(clobber (reg:CC CC_REGNUM))
|
|
@@ -175,22 +175,25 @@
|
|
}
|
|
)
|
|
|
|
-(define_insn_and_split "aarch64_atomic_exchange<mode>_lse"
|
|
- [(set (match_operand:ALLI 0 "register_operand" "=&r")
|
|
+(define_insn "aarch64_atomic_exchange<mode>_lse"
|
|
+ [(set (match_operand:ALLI 0 "register_operand" "=r")
|
|
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
|
|
(set (match_dup 1)
|
|
(unspec_volatile:ALLI
|
|
- [(match_operand:ALLI 2 "register_operand" "r")
|
|
+ [(match_operand:ALLI 2 "aarch64_reg_or_zero" "rZ")
|
|
(match_operand:SI 3 "const_int_operand" "")]
|
|
UNSPECV_ATOMIC_EXCHG))]
|
|
"TARGET_LSE"
|
|
- "#"
|
|
- "&& reload_completed"
|
|
- [(const_int 0)]
|
|
{
|
|
- aarch64_gen_atomic_ldop (SET, operands[0], NULL, operands[1],
|
|
- operands[2], operands[3]);
|
|
- DONE;
|
|
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
+ if (is_mm_relaxed (model))
|
|
+ return "swp<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
+ else if (is_mm_acquire (model) || is_mm_consume (model))
|
|
+ return "swpa<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
+ else if (is_mm_release (model))
|
|
+ return "swpl<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
+ else
|
|
+ return "swpal<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
}
|
|
)
|
|
|
|
@@ -582,28 +585,6 @@
|
|
|
|
;; ARMv8.1-A LSE instructions.
|
|
|
|
-;; Atomic swap with memory.
|
|
-(define_insn "aarch64_atomic_swp<mode>"
|
|
- [(set (match_operand:ALLI 0 "register_operand" "+&r")
|
|
- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
|
|
- (set (match_dup 1)
|
|
- (unspec_volatile:ALLI
|
|
- [(match_operand:ALLI 2 "register_operand" "r")
|
|
- (match_operand:SI 3 "const_int_operand" "")]
|
|
- UNSPECV_ATOMIC_SWP))]
|
|
- "TARGET_LSE && reload_completed"
|
|
- {
|
|
- enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
- if (is_mm_relaxed (model))
|
|
- return "swp<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
- else if (is_mm_acquire (model) || is_mm_consume (model))
|
|
- return "swpa<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
- else if (is_mm_release (model))
|
|
- return "swpl<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
- else
|
|
- return "swpal<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
- })
|
|
-
|
|
;; Atomic load-op: Load data, operate, store result, keep data.
|
|
|
|
(define_insn "aarch64_atomic_load<atomic_ldop><mode>"
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 2d9e1f5e434f9511ca82149bbf9b0d64acacac64 Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Wed, 31 Oct 2018 09:58:48 +0000
|
|
Subject: [PATCH 07/24] aarch64: Improve atomic-op lse generation
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
Fix constraints; avoid unnecessary split. Drop the use of the atomic_op
|
|
iterator in favor of the ATOMIC_LDOP iterator; this is simplier and more
|
|
logical for ldclr aka bic.
|
|
|
|
2018-10-31 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
* config/aarch64/aarch64.c (aarch64_emit_bic): Remove.
|
|
(aarch64_atomic_ldop_supported_p): Remove.
|
|
(aarch64_gen_atomic_ldop): Remove.
|
|
* config/aarch64/atomic.md (atomic_<atomic_optab><ALLI>):
|
|
Fully expand LSE operations here.
|
|
(atomic_fetch_<atomic_optab><ALLI>): Likewise.
|
|
(atomic_<atomic_optab>_fetch<ALLI>): Likewise.
|
|
(aarch64_atomic_<ATOMIC_LDOP><ALLI>_lse): Drop atomic_op iterator
|
|
and use ATOMIC_LDOP instead; use register_operand for the input;
|
|
drop the split and emit insns directly.
|
|
(aarch64_atomic_fetch_<ATOMIC_LDOP><ALLI>_lse): Likewise.
|
|
(aarch64_atomic_<atomic_op>_fetch<ALLI>_lse): Remove.
|
|
(@aarch64_atomic_load<ATOMIC_LDOP><ALLI>): Remove.
|
|
|
|
(cherry picked from commit 7803ec5ee2a547043fb6708a08ddb1361ba91202)
|
|
---
|
|
gcc/config/aarch64/aarch64-protos.h | 2 -
|
|
gcc/config/aarch64/aarch64.c | 247 ----------------------------
|
|
gcc/config/aarch64/atomics.md | 197 +++++++++++-----------
|
|
gcc/config/aarch64/iterators.md | 5 +-
|
|
4 files changed, 108 insertions(+), 343 deletions(-)
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
|
index e25f9833af4..e47f2174479 100644
|
|
--- a/gcc/config/aarch64/aarch64-protos.h
|
|
+++ b/gcc/config/aarch64/aarch64-protos.h
|
|
@@ -426,8 +426,6 @@ rtx aarch64_load_tp (rtx);
|
|
void aarch64_expand_compare_and_swap (rtx op[]);
|
|
void aarch64_split_compare_and_swap (rtx op[]);
|
|
|
|
-bool aarch64_atomic_ldop_supported_p (enum rtx_code);
|
|
-void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
|
|
void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
|
|
|
|
bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE);
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index d08af9d63ca..ed3cec30859 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -11983,32 +11983,6 @@ aarch64_expand_compare_and_swap (rtx operands[])
|
|
emit_insn (gen_rtx_SET (bval, x));
|
|
}
|
|
|
|
-/* Test whether the target supports using a atomic load-operate instruction.
|
|
- CODE is the operation and AFTER is TRUE if the data in memory after the
|
|
- operation should be returned and FALSE if the data before the operation
|
|
- should be returned. Returns FALSE if the operation isn't supported by the
|
|
- architecture. */
|
|
-
|
|
-bool
|
|
-aarch64_atomic_ldop_supported_p (enum rtx_code code)
|
|
-{
|
|
- if (!TARGET_LSE)
|
|
- return false;
|
|
-
|
|
- switch (code)
|
|
- {
|
|
- case SET:
|
|
- case AND:
|
|
- case IOR:
|
|
- case XOR:
|
|
- case MINUS:
|
|
- case PLUS:
|
|
- return true;
|
|
- default:
|
|
- return false;
|
|
- }
|
|
-}
|
|
-
|
|
/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
|
|
sequence implementing an atomic operation. */
|
|
|
|
@@ -12123,227 +12097,6 @@ aarch64_split_compare_and_swap (rtx operands[])
|
|
aarch64_emit_post_barrier (model);
|
|
}
|
|
|
|
-/* Emit a BIC instruction. */
|
|
-
|
|
-static void
|
|
-aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
|
|
-{
|
|
- rtx shift_rtx = GEN_INT (shift);
|
|
- rtx (*gen) (rtx, rtx, rtx, rtx);
|
|
-
|
|
- switch (mode)
|
|
- {
|
|
- case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
|
|
- case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
|
|
- default:
|
|
- gcc_unreachable ();
|
|
- }
|
|
-
|
|
- emit_insn (gen (dst, s2, shift_rtx, s1));
|
|
-}
|
|
-
|
|
-/* Operations supported by aarch64_emit_atomic_load_op. */
|
|
-
|
|
-enum aarch64_atomic_load_op_code
|
|
-{
|
|
- AARCH64_LDOP_PLUS, /* A + B */
|
|
- AARCH64_LDOP_XOR, /* A ^ B */
|
|
- AARCH64_LDOP_OR, /* A | B */
|
|
- AARCH64_LDOP_BIC /* A & ~B */
|
|
-};
|
|
-
|
|
-/* Emit an atomic load-operate. */
|
|
-
|
|
-static void
|
|
-aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
|
|
- machine_mode mode, rtx dst, rtx src,
|
|
- rtx mem, rtx model)
|
|
-{
|
|
- typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
|
|
- const aarch64_atomic_load_op_fn plus[] =
|
|
- {
|
|
- gen_aarch64_atomic_loadaddqi,
|
|
- gen_aarch64_atomic_loadaddhi,
|
|
- gen_aarch64_atomic_loadaddsi,
|
|
- gen_aarch64_atomic_loadadddi
|
|
- };
|
|
- const aarch64_atomic_load_op_fn eor[] =
|
|
- {
|
|
- gen_aarch64_atomic_loadeorqi,
|
|
- gen_aarch64_atomic_loadeorhi,
|
|
- gen_aarch64_atomic_loadeorsi,
|
|
- gen_aarch64_atomic_loadeordi
|
|
- };
|
|
- const aarch64_atomic_load_op_fn ior[] =
|
|
- {
|
|
- gen_aarch64_atomic_loadsetqi,
|
|
- gen_aarch64_atomic_loadsethi,
|
|
- gen_aarch64_atomic_loadsetsi,
|
|
- gen_aarch64_atomic_loadsetdi
|
|
- };
|
|
- const aarch64_atomic_load_op_fn bic[] =
|
|
- {
|
|
- gen_aarch64_atomic_loadclrqi,
|
|
- gen_aarch64_atomic_loadclrhi,
|
|
- gen_aarch64_atomic_loadclrsi,
|
|
- gen_aarch64_atomic_loadclrdi
|
|
- };
|
|
- aarch64_atomic_load_op_fn gen;
|
|
- int idx = 0;
|
|
-
|
|
- switch (mode)
|
|
- {
|
|
- case QImode: idx = 0; break;
|
|
- case HImode: idx = 1; break;
|
|
- case SImode: idx = 2; break;
|
|
- case DImode: idx = 3; break;
|
|
- default:
|
|
- gcc_unreachable ();
|
|
- }
|
|
-
|
|
- switch (code)
|
|
- {
|
|
- case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
|
|
- case AARCH64_LDOP_XOR: gen = eor[idx]; break;
|
|
- case AARCH64_LDOP_OR: gen = ior[idx]; break;
|
|
- case AARCH64_LDOP_BIC: gen = bic[idx]; break;
|
|
- default:
|
|
- gcc_unreachable ();
|
|
- }
|
|
-
|
|
- emit_insn (gen (dst, mem, src, model));
|
|
-}
|
|
-
|
|
-/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
|
|
- location to store the data read from memory. OUT_RESULT is the location to
|
|
- store the result of the operation. MEM is the memory location to read and
|
|
- modify. MODEL_RTX is the memory ordering to use. VALUE is the second
|
|
- operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
|
|
- be NULL. */
|
|
-
|
|
-void
|
|
-aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
|
|
- rtx mem, rtx value, rtx model_rtx)
|
|
-{
|
|
- machine_mode mode = GET_MODE (mem);
|
|
- machine_mode wmode = (mode == DImode ? DImode : SImode);
|
|
- const bool short_mode = (mode < SImode);
|
|
- aarch64_atomic_load_op_code ldop_code;
|
|
- rtx src;
|
|
- rtx x;
|
|
-
|
|
- if (out_data)
|
|
- out_data = gen_lowpart (mode, out_data);
|
|
-
|
|
- if (out_result)
|
|
- out_result = gen_lowpart (mode, out_result);
|
|
-
|
|
- /* Make sure the value is in a register, putting it into a destination
|
|
- register if it needs to be manipulated. */
|
|
- if (!register_operand (value, mode)
|
|
- || code == AND || code == MINUS)
|
|
- {
|
|
- src = out_result ? out_result : out_data;
|
|
- emit_move_insn (src, gen_lowpart (mode, value));
|
|
- }
|
|
- else
|
|
- src = value;
|
|
- gcc_assert (register_operand (src, mode));
|
|
-
|
|
- /* Preprocess the data for the operation as necessary. If the operation is
|
|
- a SET then emit a swap instruction and finish. */
|
|
- switch (code)
|
|
- {
|
|
- case MINUS:
|
|
- /* Negate the value and treat it as a PLUS. */
|
|
- {
|
|
- rtx neg_src;
|
|
-
|
|
- /* Resize the value if necessary. */
|
|
- if (short_mode)
|
|
- src = gen_lowpart (wmode, src);
|
|
-
|
|
- neg_src = gen_rtx_NEG (wmode, src);
|
|
- emit_insn (gen_rtx_SET (src, neg_src));
|
|
-
|
|
- if (short_mode)
|
|
- src = gen_lowpart (mode, src);
|
|
- }
|
|
- /* Fall-through. */
|
|
- case PLUS:
|
|
- ldop_code = AARCH64_LDOP_PLUS;
|
|
- break;
|
|
-
|
|
- case IOR:
|
|
- ldop_code = AARCH64_LDOP_OR;
|
|
- break;
|
|
-
|
|
- case XOR:
|
|
- ldop_code = AARCH64_LDOP_XOR;
|
|
- break;
|
|
-
|
|
- case AND:
|
|
- {
|
|
- rtx not_src;
|
|
-
|
|
- /* Resize the value if necessary. */
|
|
- if (short_mode)
|
|
- src = gen_lowpart (wmode, src);
|
|
-
|
|
- not_src = gen_rtx_NOT (wmode, src);
|
|
- emit_insn (gen_rtx_SET (src, not_src));
|
|
-
|
|
- if (short_mode)
|
|
- src = gen_lowpart (mode, src);
|
|
- }
|
|
- ldop_code = AARCH64_LDOP_BIC;
|
|
- break;
|
|
-
|
|
- default:
|
|
- /* The operation can't be done with atomic instructions. */
|
|
- gcc_unreachable ();
|
|
- }
|
|
-
|
|
- aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
|
|
-
|
|
- /* If necessary, calculate the data in memory after the update by redoing the
|
|
- operation from values in registers. */
|
|
- if (!out_result)
|
|
- return;
|
|
-
|
|
- if (short_mode)
|
|
- {
|
|
- src = gen_lowpart (wmode, src);
|
|
- out_data = gen_lowpart (wmode, out_data);
|
|
- out_result = gen_lowpart (wmode, out_result);
|
|
- }
|
|
-
|
|
- x = NULL_RTX;
|
|
-
|
|
- switch (code)
|
|
- {
|
|
- case MINUS:
|
|
- case PLUS:
|
|
- x = gen_rtx_PLUS (wmode, out_data, src);
|
|
- break;
|
|
- case IOR:
|
|
- x = gen_rtx_IOR (wmode, out_data, src);
|
|
- break;
|
|
- case XOR:
|
|
- x = gen_rtx_XOR (wmode, out_data, src);
|
|
- break;
|
|
- case AND:
|
|
- aarch64_emit_bic (wmode, out_result, out_data, src, 0);
|
|
- return;
|
|
- default:
|
|
- gcc_unreachable ();
|
|
- }
|
|
-
|
|
- emit_set_insn (out_result, x);
|
|
-
|
|
- return;
|
|
-}
|
|
-
|
|
/* Split an atomic operation. */
|
|
|
|
void
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index f1cc972bae4..735407c9fd7 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -207,13 +207,37 @@
|
|
rtx (*gen) (rtx, rtx, rtx);
|
|
|
|
/* Use an atomic load-operate instruction when possible. */
|
|
- if (aarch64_atomic_ldop_supported_p (<CODE>))
|
|
- gen = gen_aarch64_atomic_<atomic_optab><mode>_lse;
|
|
+ if (TARGET_LSE)
|
|
+ {
|
|
+ switch (<CODE>)
|
|
+ {
|
|
+ case MINUS:
|
|
+ operands[1] = expand_simple_unop (<MODE>mode, NEG, operands[1],
|
|
+ NULL, 1);
|
|
+ /* fallthru */
|
|
+ case PLUS:
|
|
+ gen = gen_aarch64_atomic_add<mode>_lse;
|
|
+ break;
|
|
+ case IOR:
|
|
+ gen = gen_aarch64_atomic_ior<mode>_lse;
|
|
+ break;
|
|
+ case XOR:
|
|
+ gen = gen_aarch64_atomic_xor<mode>_lse;
|
|
+ break;
|
|
+ case AND:
|
|
+ operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
|
|
+ NULL, 1);
|
|
+ gen = gen_aarch64_atomic_bic<mode>_lse;
|
|
+ break;
|
|
+ default:
|
|
+ gcc_unreachable ();
|
|
+ }
|
|
+ operands[1] = force_reg (<MODE>mode, operands[1]);
|
|
+ }
|
|
else
|
|
gen = gen_aarch64_atomic_<atomic_optab><mode>;
|
|
|
|
emit_insn (gen (operands[0], operands[1], operands[2]));
|
|
-
|
|
DONE;
|
|
}
|
|
)
|
|
@@ -239,22 +263,25 @@
|
|
}
|
|
)
|
|
|
|
-(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse"
|
|
+(define_insn "aarch64_atomic_<atomic_ldoptab><mode>_lse"
|
|
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
|
|
- (unspec_volatile:ALLI
|
|
- [(atomic_op:ALLI (match_dup 0)
|
|
- (match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
|
|
- (match_operand:SI 2 "const_int_operand")]
|
|
- UNSPECV_ATOMIC_OP))
|
|
+ (unspec_volatile:ALLI
|
|
+ [(match_dup 0)
|
|
+ (match_operand:ALLI 1 "register_operand" "r")
|
|
+ (match_operand:SI 2 "const_int_operand")]
|
|
+ ATOMIC_LDOP))
|
|
(clobber (match_scratch:ALLI 3 "=&r"))]
|
|
"TARGET_LSE"
|
|
- "#"
|
|
- "&& reload_completed"
|
|
- [(const_int 0)]
|
|
{
|
|
- aarch64_gen_atomic_ldop (<CODE>, operands[3], NULL, operands[0],
|
|
- operands[1], operands[2]);
|
|
- DONE;
|
|
+ enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
|
|
+ if (is_mm_relaxed (model))
|
|
+ return "ld<atomic_ldop><atomic_sfx>\t%<w>1, %<w>3, %0";
|
|
+ else if (is_mm_release (model))
|
|
+ return "ld<atomic_ldop>l<atomic_sfx>\t%<w>1, %<w>3, %0";
|
|
+ else if (is_mm_acquire (model) || is_mm_consume (model))
|
|
+ return "ld<atomic_ldop>a<atomic_sfx>\t%<w>1, %<w>3, %0";
|
|
+ else
|
|
+ return "ld<atomic_ldop>al<atomic_sfx>\t%<w>1, %<w>3, %0";
|
|
}
|
|
)
|
|
|
|
@@ -280,7 +307,7 @@
|
|
}
|
|
)
|
|
|
|
-;; Load-operate-store, returning the updated memory data.
|
|
+;; Load-operate-store, returning the original memory data.
|
|
|
|
(define_expand "atomic_fetch_<atomic_optab><mode>"
|
|
[(match_operand:ALLI 0 "register_operand" "")
|
|
@@ -293,13 +320,37 @@
|
|
rtx (*gen) (rtx, rtx, rtx, rtx);
|
|
|
|
/* Use an atomic load-operate instruction when possible. */
|
|
- if (aarch64_atomic_ldop_supported_p (<CODE>))
|
|
- gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse;
|
|
+ if (TARGET_LSE)
|
|
+ {
|
|
+ switch (<CODE>)
|
|
+ {
|
|
+ case MINUS:
|
|
+ operands[2] = expand_simple_unop (<MODE>mode, NEG, operands[2],
|
|
+ NULL, 1);
|
|
+ /* fallthru */
|
|
+ case PLUS:
|
|
+ gen = gen_aarch64_atomic_fetch_add<mode>_lse;
|
|
+ break;
|
|
+ case IOR:
|
|
+ gen = gen_aarch64_atomic_fetch_ior<mode>_lse;
|
|
+ break;
|
|
+ case XOR:
|
|
+ gen = gen_aarch64_atomic_fetch_xor<mode>_lse;
|
|
+ break;
|
|
+ case AND:
|
|
+ operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
|
|
+ NULL, 1);
|
|
+ gen = gen_aarch64_atomic_fetch_bic<mode>_lse;
|
|
+ break;
|
|
+ default:
|
|
+ gcc_unreachable ();
|
|
+ }
|
|
+ operands[2] = force_reg (<MODE>mode, operands[2]);
|
|
+ }
|
|
else
|
|
gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
|
|
|
|
emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
|
|
-
|
|
DONE;
|
|
})
|
|
|
|
@@ -326,23 +377,26 @@
|
|
}
|
|
)
|
|
|
|
-(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse"
|
|
- [(set (match_operand:ALLI 0 "register_operand" "=&r")
|
|
- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
|
|
+(define_insn "aarch64_atomic_fetch_<atomic_ldoptab><mode>_lse"
|
|
+ [(set (match_operand:ALLI 0 "register_operand" "=r")
|
|
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
|
|
(set (match_dup 1)
|
|
- (unspec_volatile:ALLI
|
|
- [(atomic_op:ALLI (match_dup 1)
|
|
- (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>"))
|
|
- (match_operand:SI 3 "const_int_operand")]
|
|
- UNSPECV_ATOMIC_LDOP))]
|
|
+ (unspec_volatile:ALLI
|
|
+ [(match_dup 1)
|
|
+ (match_operand:ALLI 2 "register_operand" "r")
|
|
+ (match_operand:SI 3 "const_int_operand")]
|
|
+ ATOMIC_LDOP))]
|
|
"TARGET_LSE"
|
|
- "#"
|
|
- "&& reload_completed"
|
|
- [(const_int 0)]
|
|
{
|
|
- aarch64_gen_atomic_ldop (<CODE>, operands[0], NULL, operands[1],
|
|
- operands[2], operands[3]);
|
|
- DONE;
|
|
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
+ if (is_mm_relaxed (model))
|
|
+ return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
+ else if (is_mm_acquire (model) || is_mm_consume (model))
|
|
+ return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
+ else if (is_mm_release (model))
|
|
+ return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
+ else
|
|
+ return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
}
|
|
)
|
|
|
|
@@ -370,7 +424,7 @@
|
|
}
|
|
)
|
|
|
|
-;; Load-operate-store, returning the original memory data.
|
|
+;; Load-operate-store, returning the updated memory data.
|
|
|
|
(define_expand "atomic_<atomic_optab>_fetch<mode>"
|
|
[(match_operand:ALLI 0 "register_operand" "")
|
|
@@ -380,17 +434,23 @@
|
|
(match_operand:SI 3 "const_int_operand")]
|
|
""
|
|
{
|
|
- rtx (*gen) (rtx, rtx, rtx, rtx);
|
|
- rtx value = operands[2];
|
|
-
|
|
- /* Use an atomic load-operate instruction when possible. */
|
|
- if (aarch64_atomic_ldop_supported_p (<CODE>))
|
|
- gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>_lse;
|
|
+ /* Use an atomic load-operate instruction when possible. In this case
|
|
+ we will re-compute the result from the original mem value. */
|
|
+ if (TARGET_LSE)
|
|
+ {
|
|
+ rtx tmp = gen_reg_rtx (<MODE>mode);
|
|
+ operands[2] = force_reg (<MODE>mode, operands[2]);
|
|
+ emit_insn (gen_atomic_fetch_<atomic_optab><mode>
|
|
+ (tmp, operands[1], operands[2], operands[3]));
|
|
+ tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
|
|
+ operands[0], 1, OPTAB_WIDEN);
|
|
+ emit_move_insn (operands[0], tmp);
|
|
+ }
|
|
else
|
|
- gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>;
|
|
-
|
|
- emit_insn (gen (operands[0], operands[1], value, operands[3]));
|
|
-
|
|
+ {
|
|
+ emit_insn (gen_aarch64_atomic_<atomic_optab>_fetch<mode>
|
|
+ (operands[0], operands[1], operands[2], operands[3]));
|
|
+ }
|
|
DONE;
|
|
})
|
|
|
|
@@ -417,29 +477,6 @@
|
|
}
|
|
)
|
|
|
|
-(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>_lse"
|
|
- [(set (match_operand:ALLI 0 "register_operand" "=&r")
|
|
- (atomic_op:ALLI
|
|
- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
|
|
- (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")))
|
|
- (set (match_dup 1)
|
|
- (unspec_volatile:ALLI
|
|
- [(match_dup 1)
|
|
- (match_dup 2)
|
|
- (match_operand:SI 3 "const_int_operand")]
|
|
- UNSPECV_ATOMIC_LDOP))
|
|
- (clobber (match_scratch:ALLI 4 "=&r"))]
|
|
- "TARGET_LSE"
|
|
- "#"
|
|
- "&& reload_completed"
|
|
- [(const_int 0)]
|
|
- {
|
|
- aarch64_gen_atomic_ldop (<CODE>, operands[4], operands[0], operands[1],
|
|
- operands[2], operands[3]);
|
|
- DONE;
|
|
- }
|
|
-)
|
|
-
|
|
(define_insn_and_split "atomic_nand_fetch<mode>"
|
|
[(set (match_operand:ALLI 0 "register_operand" "=&r")
|
|
(not:ALLI
|
|
@@ -582,29 +619,3 @@
|
|
return "dmb\\tish";
|
|
}
|
|
)
|
|
-
|
|
-;; ARMv8.1-A LSE instructions.
|
|
-
|
|
-;; Atomic load-op: Load data, operate, store result, keep data.
|
|
-
|
|
-(define_insn "aarch64_atomic_load<atomic_ldop><mode>"
|
|
- [(set (match_operand:ALLI 0 "register_operand" "=r")
|
|
- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
|
|
- (set (match_dup 1)
|
|
- (unspec_volatile:ALLI
|
|
- [(match_dup 1)
|
|
- (match_operand:ALLI 2 "register_operand")
|
|
- (match_operand:SI 3 "const_int_operand")]
|
|
- ATOMIC_LDOP))]
|
|
- "TARGET_LSE && reload_completed"
|
|
- {
|
|
- enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
- if (is_mm_relaxed (model))
|
|
- return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
- else if (is_mm_acquire (model) || is_mm_consume (model))
|
|
- return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
- else if (is_mm_release (model))
|
|
- return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
- else
|
|
- return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
|
|
- })
|
|
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
|
|
index 4a39e30da13..fc87fc902a6 100644
|
|
--- a/gcc/config/aarch64/iterators.md
|
|
+++ b/gcc/config/aarch64/iterators.md
|
|
@@ -369,7 +369,6 @@
|
|
UNSPECV_ATOMIC_CAS ; Represent an atomic CAS.
|
|
UNSPECV_ATOMIC_SWP ; Represent an atomic SWP.
|
|
UNSPECV_ATOMIC_OP ; Represent an atomic operation.
|
|
- UNSPECV_ATOMIC_LDOP ; Represent an atomic load-operation
|
|
UNSPECV_ATOMIC_LDOP_OR ; Represent an atomic load-or
|
|
UNSPECV_ATOMIC_LDOP_BIC ; Represent an atomic load-bic
|
|
UNSPECV_ATOMIC_LDOP_XOR ; Represent an atomic load-xor
|
|
@@ -1113,6 +1112,10 @@
|
|
[(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
|
|
(UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
|
|
|
|
+(define_int_attr atomic_ldoptab
|
|
+ [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic")
|
|
+ (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
|
|
+
|
|
;; -------------------------------------------------------------------
|
|
;; Int Iterators Attributes.
|
|
;; -------------------------------------------------------------------
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 7ab1ff93727b1c32ea3a8dcfc5f068e0c4e1acf8 Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Wed, 31 Oct 2018 23:11:22 +0000
|
|
Subject: [PATCH 08/24] aarch64: Remove early clobber from ATOMIC_LDOP scratch
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2018-10-31 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
* config/aarch64/atomics.md (aarch64_atomic_<ATOMIC_LDOP><ALLI>_lse):
|
|
scratch register need not be early-clobber. Document the reason
|
|
why we cannot use ST<OP>.
|
|
|
|
(cherry picked from commit 53de1ea800db54b47290d578c43892799b66c8dc)
|
|
---
|
|
gcc/config/aarch64/atomics.md | 14 +++++++++++++-
|
|
1 file changed, 13 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index 735407c9fd7..1ef7c20db21 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -263,6 +263,18 @@
|
|
}
|
|
)
|
|
|
|
+;; It is tempting to want to use ST<OP> for relaxed and release
|
|
+;; memory models here. However, that is incompatible with the
|
|
+;; C++ memory model for the following case:
|
|
+;;
|
|
+;; atomic_fetch_add(ptr, 1, memory_order_relaxed);
|
|
+;; atomic_thread_fence(memory_order_acquire);
|
|
+;;
|
|
+;; The problem is that the architecture says that ST<OP> (and LD<OP>
|
|
+;; insns where the destination is XZR) are not regarded as a read.
|
|
+;; However we also implement the acquire memory barrier with DMB LD,
|
|
+;; and so the ST<OP> is not blocked by the barrier.
|
|
+
|
|
(define_insn "aarch64_atomic_<atomic_ldoptab><mode>_lse"
|
|
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
|
|
(unspec_volatile:ALLI
|
|
@@ -270,7 +282,7 @@
|
|
(match_operand:ALLI 1 "register_operand" "r")
|
|
(match_operand:SI 2 "const_int_operand")]
|
|
ATOMIC_LDOP))
|
|
- (clobber (match_scratch:ALLI 3 "=&r"))]
|
|
+ (clobber (match_scratch:ALLI 3 "=r"))]
|
|
"TARGET_LSE"
|
|
{
|
|
enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 3d05e960c18653ce1e4f19fba701645b1a030da7 Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Thu, 19 Sep 2019 14:36:24 +0000
|
|
Subject: [PATCH 09/24] aarch64: Extend %R for integer registers
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2019-09-19 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
* config/aarch64/aarch64.c (aarch64_print_operand): Allow integer
|
|
registers with %R.
|
|
|
|
(cherry picked from commit e3f15286d1129de2cceee6acd5d5584cb5422db6)
|
|
---
|
|
gcc/config/aarch64/aarch64.c | 13 +++++++------
|
|
1 file changed, 7 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index ed3cec30859..ec325813f5a 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -5082,12 +5082,13 @@ aarch64_print_operand (FILE *f, rtx x, int code)
|
|
case 'd':
|
|
case 'q':
|
|
/* Print a scalar FP/SIMD register name. */
|
|
- if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
|
|
- {
|
|
- output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
|
|
- return;
|
|
- }
|
|
- asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
|
|
+ if (REG_P (x) && FP_REGNUM_P (REGNO (x)))
|
|
+ asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
|
|
+ else if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
|
|
+ asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
|
|
+ else
|
|
+ output_operand_lossage ("incompatible register operand for '%%%c'",
|
|
+ code);
|
|
break;
|
|
|
|
case 'S':
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From ed63b93e1aefcea56305ef13e4b4726af0d4f27f Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Thu, 19 Sep 2019 14:36:29 +0000
|
|
Subject: [PATCH 10/24] aarch64: Implement TImode compare-and-swap
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
This pattern will only be used with the __sync functions, because
|
|
we do not yet have a bare TImode atomic load.
|
|
|
|
2019-09-19 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Add support
|
|
for NE comparison of TImode values.
|
|
(aarch64_emit_load_exclusive): Add support for TImode.
|
|
(aarch64_emit_store_exclusive): Likewise.
|
|
(aarch64_split_compare_and_swap): Disable strong_zero_p for TImode.
|
|
* config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI_TI>):
|
|
Change iterator from ALLI to ALLI_TI.
|
|
(@atomic_compare_and_swap<JUST_TI>): New.
|
|
(@atomic_compare_and_swap<JUST_TI>_lse): New.
|
|
(aarch64_load_exclusive_pair): New.
|
|
(aarch64_store_exclusive_pair): New.
|
|
* config/aarch64/iterators.md (JUST_TI): New.
|
|
|
|
(cherry picked from commit 4a2095ebace8534038ce2adf4ae94bfc854066c4)
|
|
---
|
|
gcc/config/aarch64/aarch64.c | 59 ++++++++++++++++++---
|
|
gcc/config/aarch64/atomics.md | 93 +++++++++++++++++++++++++++++++--
|
|
gcc/config/aarch64/iterators.md | 6 +++
|
|
3 files changed, 145 insertions(+), 13 deletions(-)
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index ec325813f5a..e86f34edcc6 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -1191,10 +1191,33 @@ emit_set_insn (rtx x, rtx y)
|
|
rtx
|
|
aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
|
|
{
|
|
- machine_mode mode = SELECT_CC_MODE (code, x, y);
|
|
- rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
|
|
+ machine_mode cmp_mode = GET_MODE (x);
|
|
+ machine_mode cc_mode;
|
|
+ rtx cc_reg;
|
|
|
|
- emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
|
|
+ if (cmp_mode == TImode)
|
|
+ {
|
|
+ gcc_assert (code == NE);
|
|
+
|
|
+ cc_mode = CCmode;
|
|
+ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
|
|
+
|
|
+ rtx x_lo = operand_subword (x, 0, 0, TImode);
|
|
+ rtx y_lo = operand_subword (y, 0, 0, TImode);
|
|
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
|
|
+
|
|
+ rtx x_hi = operand_subword (x, 1, 0, TImode);
|
|
+ rtx y_hi = operand_subword (y, 1, 0, TImode);
|
|
+ emit_insn (gen_ccmpdi (cc_reg, cc_reg, x_hi, y_hi,
|
|
+ gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
|
|
+ GEN_INT (AARCH64_EQ)));
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ cc_mode = SELECT_CC_MODE (code, x, y);
|
|
+ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
|
|
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
|
|
+ }
|
|
return cc_reg;
|
|
}
|
|
|
|
@@ -11839,6 +11862,14 @@ aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
|
|
{
|
|
rtx (*gen) (rtx, rtx, rtx);
|
|
|
|
+ if (mode == TImode)
|
|
+ {
|
|
+ emit_insn (gen_aarch64_load_exclusive_pair
|
|
+ (gen_lowpart (DImode, rval), gen_highpart (DImode, rval),
|
|
+ mem, model_rtx));
|
|
+ return;
|
|
+ }
|
|
+
|
|
switch (mode)
|
|
{
|
|
case QImode: gen = gen_aarch64_load_exclusiveqi; break;
|
|
@@ -11856,10 +11887,18 @@ aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
|
|
|
|
static void
|
|
aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
|
|
- rtx rval, rtx mem, rtx model_rtx)
|
|
+ rtx mem, rtx rval, rtx model_rtx)
|
|
{
|
|
rtx (*gen) (rtx, rtx, rtx, rtx);
|
|
|
|
+ if (mode == TImode)
|
|
+ {
|
|
+ emit_insn (gen_aarch64_store_exclusive_pair
|
|
+ (bval, mem, operand_subword (rval, 0, 0, TImode),
|
|
+ operand_subword (rval, 1, 0, TImode), model_rtx));
|
|
+ return;
|
|
+ }
|
|
+
|
|
switch (mode)
|
|
{
|
|
case QImode: gen = gen_aarch64_store_exclusiveqi; break;
|
|
@@ -11870,7 +11909,7 @@ aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
|
|
gcc_unreachable ();
|
|
}
|
|
|
|
- emit_insn (gen (bval, rval, mem, model_rtx));
|
|
+ emit_insn (gen (bval, mem, rval, model_rtx));
|
|
}
|
|
|
|
/* Mark the previous jump instruction as unlikely. */
|
|
@@ -11898,7 +11937,8 @@ aarch64_expand_compare_and_swap (rtx operands[])
|
|
gen_aarch64_compare_and_swapqi,
|
|
gen_aarch64_compare_and_swaphi,
|
|
gen_aarch64_compare_and_swapsi,
|
|
- gen_aarch64_compare_and_swapdi
|
|
+ gen_aarch64_compare_and_swapdi,
|
|
+ gen_aarch64_compare_and_swapti
|
|
};
|
|
typedef rtx (*gen_lse_fn) (rtx, rtx, rtx, rtx);
|
|
const gen_lse_fn atomic_cas[] =
|
|
@@ -11906,7 +11946,8 @@ aarch64_expand_compare_and_swap (rtx operands[])
|
|
gen_aarch64_compare_and_swapqi_lse,
|
|
gen_aarch64_compare_and_swaphi_lse,
|
|
gen_aarch64_compare_and_swapsi_lse,
|
|
- gen_aarch64_compare_and_swapdi_lse
|
|
+ gen_aarch64_compare_and_swapdi_lse,
|
|
+ gen_aarch64_compare_and_swapti_lse
|
|
};
|
|
|
|
bval = operands[0];
|
|
@@ -11939,6 +11980,7 @@ aarch64_expand_compare_and_swap (rtx operands[])
|
|
case HImode: idx = 1; break;
|
|
case SImode: idx = 2; break;
|
|
case DImode: idx = 3; break;
|
|
+ case TImode: idx = 4; break;
|
|
default:
|
|
gcc_unreachable ();
|
|
}
|
|
@@ -11965,6 +12007,7 @@ aarch64_expand_compare_and_swap (rtx operands[])
|
|
case HImode: code = CODE_FOR_aarch64_compare_and_swaphi; break;
|
|
case SImode: code = CODE_FOR_aarch64_compare_and_swapsi; break;
|
|
case DImode: code = CODE_FOR_aarch64_compare_and_swapdi; break;
|
|
+ case TImode: code = CODE_FOR_aarch64_compare_and_swapti; break;
|
|
default:
|
|
gcc_unreachable ();
|
|
}
|
|
@@ -12033,7 +12076,7 @@ aarch64_split_compare_and_swap (rtx operands[])
|
|
CBNZ scratch, .label1
|
|
.label2:
|
|
CMP rval, 0. */
|
|
- bool strong_zero_p = !is_weak && oldval == const0_rtx;
|
|
+ bool strong_zero_p = !is_weak && oldval == const0_rtx && mode != TImode;
|
|
|
|
label1 = NULL;
|
|
if (!is_weak)
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index 1ef7c20db21..316c84699d0 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -22,10 +22,10 @@
|
|
|
|
(define_expand "atomic_compare_and_swap<mode>"
|
|
[(match_operand:SI 0 "register_operand" "") ;; bool out
|
|
- (match_operand:ALLI 1 "register_operand" "") ;; val out
|
|
- (match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory
|
|
- (match_operand:ALLI 3 "nonmemory_operand" "") ;; expected
|
|
- (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired
|
|
+ (match_operand:ALLI_TI 1 "register_operand" "") ;; val out
|
|
+ (match_operand:ALLI_TI 2 "aarch64_sync_memory_operand" "") ;; memory
|
|
+ (match_operand:ALLI_TI 3 "nonmemory_operand" "") ;; expected
|
|
+ (match_operand:ALLI_TI 4 "aarch64_reg_or_zero" "") ;; desired
|
|
(match_operand:SI 5 "const_int_operand") ;; is_weak
|
|
(match_operand:SI 6 "const_int_operand") ;; mod_s
|
|
(match_operand:SI 7 "const_int_operand")] ;; mod_f
|
|
@@ -88,9 +88,33 @@
|
|
}
|
|
)
|
|
|
|
+(define_insn_and_split "aarch64_compare_and_swap<mode>"
|
|
+ [(set (reg:CC CC_REGNUM) ;; bool out
|
|
+ (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
|
|
+ (set (match_operand:JUST_TI 0 "register_operand" "=&r") ;; val out
|
|
+ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
|
|
+ (set (match_dup 1)
|
|
+ (unspec_volatile:JUST_TI
|
|
+ [(match_operand:JUST_TI 2 "aarch64_reg_or_zero" "rZ") ;; expect
|
|
+ (match_operand:JUST_TI 3 "aarch64_reg_or_zero" "rZ") ;; desired
|
|
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
|
|
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
|
|
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
|
|
+ UNSPECV_ATOMIC_CMPSW))
|
|
+ (clobber (match_scratch:SI 7 "=&r"))]
|
|
+ ""
|
|
+ "#"
|
|
+ "&& reload_completed"
|
|
+ [(const_int 0)]
|
|
+ {
|
|
+ aarch64_split_compare_and_swap (operands);
|
|
+ DONE;
|
|
+ }
|
|
+)
|
|
+
|
|
(define_insn "aarch64_compare_and_swap<mode>_lse"
|
|
[(set (match_operand:SI 0 "register_operand" "+r") ;; val out
|
|
- (zero_extend:SI
|
|
+ (zero_extend:SI
|
|
(match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
|
|
(set (match_dup 1)
|
|
(unspec_volatile:SHORT
|
|
@@ -133,6 +157,28 @@
|
|
return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
|
|
})
|
|
|
|
+(define_insn "aarch64_compare_and_swap<mode>_lse"
|
|
+ [(set (match_operand:JUST_TI 0 "register_operand" "+r") ;; val out
|
|
+ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
|
|
+ (set (match_dup 1)
|
|
+ (unspec_volatile:JUST_TI
|
|
+ [(match_dup 0) ;; expect
|
|
+ (match_operand:JUST_TI 2 "register_operand" "r") ;; desired
|
|
+ (match_operand:SI 3 "const_int_operand")] ;; mod_s
|
|
+ UNSPECV_ATOMIC_CMPSW))]
|
|
+ "TARGET_LSE"
|
|
+{
|
|
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
+ if (is_mm_relaxed (model))
|
|
+ return "casp\t%0, %R0, %2, %R2, %1";
|
|
+ else if (is_mm_acquire (model) || is_mm_consume (model))
|
|
+ return "caspa\t%0, %R0, %2, %R2, %1";
|
|
+ else if (is_mm_release (model))
|
|
+ return "caspl\t%0, %R0, %2, %R2, %1";
|
|
+ else
|
|
+ return "caspal\t%0, %R0, %2, %R2, %1";
|
|
+})
|
|
+
|
|
(define_expand "atomic_exchange<mode>"
|
|
[(match_operand:ALLI 0 "register_operand" "")
|
|
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
|
|
@@ -578,6 +624,24 @@
|
|
}
|
|
)
|
|
|
|
+(define_insn "aarch64_load_exclusive_pair"
|
|
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
|
+ (unspec_volatile:DI
|
|
+ [(match_operand:TI 2 "aarch64_sync_memory_operand" "Q")
|
|
+ (match_operand:SI 3 "const_int_operand")]
|
|
+ UNSPECV_LX))
|
|
+ (set (match_operand:DI 1 "register_operand" "=r")
|
|
+ (unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LX))]
|
|
+ ""
|
|
+ {
|
|
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
+ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
|
|
+ return "ldxp\t%0, %1, %2";
|
|
+ else
|
|
+ return "ldaxp\t%0, %1, %2";
|
|
+ }
|
|
+)
|
|
+
|
|
(define_insn "aarch64_store_exclusive<mode>"
|
|
[(set (match_operand:SI 0 "register_operand" "=&r")
|
|
(unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
|
|
@@ -596,6 +660,25 @@
|
|
}
|
|
)
|
|
|
|
+(define_insn "aarch64_store_exclusive_pair"
|
|
+ [(set (match_operand:SI 0 "register_operand" "=&r")
|
|
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
|
|
+ (set (match_operand:TI 1 "aarch64_sync_memory_operand" "=Q")
|
|
+ (unspec_volatile:TI
|
|
+ [(match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
|
|
+ (match_operand:DI 3 "aarch64_reg_or_zero" "rZ")
|
|
+ (match_operand:SI 4 "const_int_operand")]
|
|
+ UNSPECV_SX))]
|
|
+ ""
|
|
+ {
|
|
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
+ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
|
|
+ return "stxp\t%w0, %x2, %x3, %1";
|
|
+ else
|
|
+ return "stlxp\t%w0, %x2, %x3, %1";
|
|
+ }
|
|
+)
|
|
+
|
|
(define_expand "mem_thread_fence"
|
|
[(match_operand:SI 0 "const_int_operand" "")]
|
|
""
|
|
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
|
|
index fc87fc902a6..813f53690b7 100644
|
|
--- a/gcc/config/aarch64/iterators.md
|
|
+++ b/gcc/config/aarch64/iterators.md
|
|
@@ -29,12 +29,18 @@
|
|
;; Iterator for HI, SI, DI, some instructions can only work on these modes.
|
|
(define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI])
|
|
|
|
+;; "Iterator" for just TI -- features like @pattern only work with iterators.
|
|
+(define_mode_iterator JUST_TI [TI])
|
|
+
|
|
;; Iterator for QI and HI modes
|
|
(define_mode_iterator SHORT [QI HI])
|
|
|
|
;; Iterator for all integer modes (up to 64-bit)
|
|
(define_mode_iterator ALLI [QI HI SI DI])
|
|
|
|
+;; Iterator for all integer modes (up to 128-bit)
|
|
+(define_mode_iterator ALLI_TI [QI HI SI DI TI])
|
|
+
|
|
;; Iterator for all integer modes that can be extended (up to 64-bit)
|
|
(define_mode_iterator ALLX [QI HI SI])
|
|
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From a10b63386c2e87e7712ee2d1705a6af74ced8fec Mon Sep 17 00:00:00 2001
|
|
From: Wilco Dijkstra <wdijkstr@arm.com>
|
|
Date: Fri, 17 Jan 2020 13:17:21 +0000
|
|
Subject: [PATCH 11/24] Fix shrinkwrapping interactions with atomics (PR92692)
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
The separate shrinkwrapping pass may insert stores in the middle
|
|
of atomics loops which can cause issues on some implementations.
|
|
Avoid this by delaying splitting atomics patterns until after
|
|
prolog/epilog generation.
|
|
|
|
2020-01-17 Wilco Dijkstra <wdijkstr@arm.com>
|
|
|
|
gcc/
|
|
PR target/92692
|
|
* config/aarch64/aarch64.c (aarch64_split_compare_and_swap)
|
|
Add assert to ensure prolog has been emitted.
|
|
(aarch64_split_atomic_op): Likewise.
|
|
* config/aarch64/atomics.md (aarch64_compare_and_swap<mode>)
|
|
Use epilogue_completed rather than reload_completed.
|
|
(aarch64_atomic_exchange<mode>): Likewise.
|
|
(aarch64_atomic_<atomic_optab><mode>): Likewise.
|
|
(atomic_nand<mode>): Likewise.
|
|
(aarch64_atomic_fetch_<atomic_optab><mode>): Likewise.
|
|
(atomic_fetch_nand<mode>): Likewise.
|
|
(aarch64_atomic_<atomic_optab>_fetch<mode>): Likewise.
|
|
(atomic_nand_fetch<mode>): Likewise.
|
|
|
|
(cherry picked from commit e5e07b68187b9aa334519746c45b8cffc5eb7e5c)
|
|
---
|
|
gcc/config/aarch64/atomics.md | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index 316c84699d0..f32f5528f1a 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -104,7 +104,7 @@
|
|
(clobber (match_scratch:SI 7 "=&r"))]
|
|
""
|
|
"#"
|
|
- "&& reload_completed"
|
|
+ "&& epilogue_completed"
|
|
[(const_int 0)]
|
|
{
|
|
aarch64_split_compare_and_swap (operands);
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From fd8684c86bebd765649e7e4b70f151c87a1f13da Mon Sep 17 00:00:00 2001
|
|
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
|
Date: Wed, 1 Apr 2020 11:54:14 +0100
|
|
Subject: [PATCH 12/24] aarch64: Tidy aarch64_split_compare_and_swap
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2019-09-19 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
* config/aarch64/aarch64 (aarch64_split_compare_and_swap): Disable
|
|
strong_zero_p for aarch64_track_speculation; unify some code paths;
|
|
use aarch64_gen_compare_reg instead of open-coding.
|
|
|
|
(cherry picked from commit b7e560deb37e38fb224a0cf108e15df4a717167a)
|
|
---
|
|
gcc/config/aarch64/aarch64.c | 40 ++++++++++++------------------------
|
|
1 file changed, 13 insertions(+), 27 deletions(-)
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index e86f34edcc6..53de92e65fc 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -12049,13 +12049,11 @@ aarch64_emit_post_barrier (enum memmodel model)
|
|
void
|
|
aarch64_split_compare_and_swap (rtx operands[])
|
|
{
|
|
- rtx rval, mem, oldval, newval, scratch;
|
|
+ rtx rval, mem, oldval, newval, scratch, x, model_rtx;
|
|
machine_mode mode;
|
|
bool is_weak;
|
|
rtx_code_label *label1, *label2;
|
|
- rtx x, cond;
|
|
enum memmodel model;
|
|
- rtx model_rtx;
|
|
|
|
rval = operands[0];
|
|
mem = operands[1];
|
|
@@ -12076,7 +12074,7 @@ aarch64_split_compare_and_swap (rtx operands[])
|
|
CBNZ scratch, .label1
|
|
.label2:
|
|
CMP rval, 0. */
|
|
- bool strong_zero_p = !is_weak && oldval == const0_rtx && mode != TImode;
|
|
+ bool strong_zero_p = (!is_weak && oldval == const0_rtx && mode != TImode);
|
|
|
|
label1 = NULL;
|
|
if (!is_weak)
|
|
@@ -12089,26 +12087,20 @@ aarch64_split_compare_and_swap (rtx operands[])
|
|
/* The initial load can be relaxed for a __sync operation since a final
|
|
barrier will be emitted to stop code hoisting. */
|
|
if (is_mm_sync (model))
|
|
- aarch64_emit_load_exclusive (mode, rval, mem,
|
|
- GEN_INT (MEMMODEL_RELAXED));
|
|
+ aarch64_emit_load_exclusive (mode, rval, mem, GEN_INT (MEMMODEL_RELAXED));
|
|
else
|
|
aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
|
|
|
|
if (strong_zero_p)
|
|
- {
|
|
- x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
|
|
- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
|
|
- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
|
|
- aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
|
|
- }
|
|
+ x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
|
|
else
|
|
{
|
|
- cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
|
|
- x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
|
- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
|
|
- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
|
|
- aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
|
|
+ rtx cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
|
|
+ x = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
|
|
}
|
|
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
|
|
+ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
|
|
+ aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
|
|
|
|
aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
|
|
|
|
@@ -12120,22 +12112,16 @@ aarch64_split_compare_and_swap (rtx operands[])
|
|
aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
|
|
}
|
|
else
|
|
- {
|
|
- cond = gen_rtx_REG (CCmode, CC_REGNUM);
|
|
- x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
|
|
- emit_insn (gen_rtx_SET (cond, x));
|
|
- }
|
|
+ aarch64_gen_compare_reg (NE, scratch, const0_rtx);
|
|
|
|
emit_label (label2);
|
|
+
|
|
/* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
|
|
to set the condition flags. If this is not used it will be removed by
|
|
later passes. */
|
|
if (strong_zero_p)
|
|
- {
|
|
- cond = gen_rtx_REG (CCmode, CC_REGNUM);
|
|
- x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
|
|
- emit_insn (gen_rtx_SET (cond, x));
|
|
- }
|
|
+ aarch64_gen_compare_reg (NE, rval, const0_rtx);
|
|
+
|
|
/* Emit any final barrier needed for a __sync operation. */
|
|
if (is_mm_sync (model))
|
|
aarch64_emit_post_barrier (model);
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From a77275f87d9e4c33a00610c5ccfba48c6eeffe55 Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Thu, 19 Sep 2019 14:36:38 +0000
|
|
Subject: [PATCH 13/24] aarch64: Add out-of-line functions for LSE atomics
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
This is the libgcc part of the interface -- providing the functions.
|
|
Rationale is provided at the top of libgcc/config/aarch64/lse.S.
|
|
|
|
2019-09-19 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
* config/aarch64/lse-init.c: New file.
|
|
* config/aarch64/lse.S: New file.
|
|
* config/aarch64/t-lse: New file.
|
|
* config.host: Add t-lse to all aarch64 tuples.
|
|
|
|
(cherry picked from commit 33befddcb849235353dc263db1c7d07dc15c9faa)
|
|
---
|
|
libgcc/config.host | 3 +
|
|
libgcc/config/aarch64/lse-init.c | 45 ++++++
|
|
libgcc/config/aarch64/lse.S | 235 +++++++++++++++++++++++++++++++
|
|
libgcc/config/aarch64/t-lse | 44 ++++++
|
|
4 files changed, 327 insertions(+)
|
|
create mode 100644 libgcc/config/aarch64/lse-init.c
|
|
create mode 100644 libgcc/config/aarch64/lse.S
|
|
create mode 100644 libgcc/config/aarch64/t-lse
|
|
|
|
diff --git a/libgcc/config.host b/libgcc/config.host
|
|
index b8e23766695..b937cfb1763 100644
|
|
--- a/libgcc/config.host
|
|
+++ b/libgcc/config.host
|
|
@@ -334,12 +334,14 @@ aarch64*-*-elf | aarch64*-*-rtems*)
|
|
extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o"
|
|
extra_parts="$extra_parts crtfastmath.o"
|
|
tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
|
|
+ tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
|
|
tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
|
|
md_unwind_header=aarch64/aarch64-unwind.h
|
|
;;
|
|
aarch64*-*-freebsd*)
|
|
extra_parts="$extra_parts crtfastmath.o"
|
|
tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
|
|
+ tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
|
|
tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
|
|
md_unwind_header=aarch64/freebsd-unwind.h
|
|
;;
|
|
@@ -347,6 +349,7 @@ aarch64*-*-linux*)
|
|
extra_parts="$extra_parts crtfastmath.o"
|
|
md_unwind_header=aarch64/linux-unwind.h
|
|
tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
|
|
+ tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
|
|
tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
|
|
;;
|
|
alpha*-*-linux*)
|
|
diff --git a/libgcc/config/aarch64/lse-init.c b/libgcc/config/aarch64/lse-init.c
|
|
new file mode 100644
|
|
index 00000000000..33d29147479
|
|
--- /dev/null
|
|
+++ b/libgcc/config/aarch64/lse-init.c
|
|
@@ -0,0 +1,45 @@
|
|
+/* Out-of-line LSE atomics for AArch64 architecture, Init.
|
|
+ Copyright (C) 2019 Free Software Foundation, Inc.
|
|
+ Contributed by Linaro Ltd.
|
|
+
|
|
+This file is part of GCC.
|
|
+
|
|
+GCC is free software; you can redistribute it and/or modify it under
|
|
+the terms of the GNU General Public License as published by the Free
|
|
+Software Foundation; either version 3, or (at your option) any later
|
|
+version.
|
|
+
|
|
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+for more details.
|
|
+
|
|
+Under Section 7 of GPL version 3, you are granted additional
|
|
+permissions described in the GCC Runtime Library Exception, version
|
|
+3.1, as published by the Free Software Foundation.
|
|
+
|
|
+You should have received a copy of the GNU General Public License and
|
|
+a copy of the GCC Runtime Library Exception along with this program;
|
|
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
+<http://www.gnu.org/licenses/>. */
|
|
+
|
|
+/* Define the symbol gating the LSE implementations. */
|
|
+_Bool __aarch64_have_lse_atomics
|
|
+ __attribute__((visibility("hidden"), nocommon));
|
|
+
|
|
+/* Disable initialization of __aarch64_have_lse_atomics during bootstrap. */
|
|
+#ifndef inhibit_libc
|
|
+# include <sys/auxv.h>
|
|
+
|
|
+/* Disable initialization if the system headers are too old. */
|
|
+# if defined(AT_HWCAP) && defined(HWCAP_ATOMICS)
|
|
+
|
|
+static void __attribute__((constructor))
|
|
+init_have_lse_atomics (void)
|
|
+{
|
|
+ unsigned long hwcap = getauxval (AT_HWCAP);
|
|
+ __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
|
|
+}
|
|
+
|
|
+# endif /* HWCAP */
|
|
+#endif /* inhibit_libc */
|
|
diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
|
|
new file mode 100644
|
|
index 00000000000..a5f6673596c
|
|
--- /dev/null
|
|
+++ b/libgcc/config/aarch64/lse.S
|
|
@@ -0,0 +1,235 @@
|
|
+/* Out-of-line LSE atomics for AArch64 architecture.
|
|
+ Copyright (C) 2019 Free Software Foundation, Inc.
|
|
+ Contributed by Linaro Ltd.
|
|
+
|
|
+This file is part of GCC.
|
|
+
|
|
+GCC is free software; you can redistribute it and/or modify it under
|
|
+the terms of the GNU General Public License as published by the Free
|
|
+Software Foundation; either version 3, or (at your option) any later
|
|
+version.
|
|
+
|
|
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+for more details.
|
|
+
|
|
+Under Section 7 of GPL version 3, you are granted additional
|
|
+permissions described in the GCC Runtime Library Exception, version
|
|
+3.1, as published by the Free Software Foundation.
|
|
+
|
|
+You should have received a copy of the GNU General Public License and
|
|
+a copy of the GCC Runtime Library Exception along with this program;
|
|
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
+<http://www.gnu.org/licenses/>. */
|
|
+
|
|
+/*
|
|
+ * The problem that we are trying to solve is operating system deployment
|
|
+ * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).
|
|
+ *
|
|
+ * There are a number of potential solutions for this problem which have
|
|
+ * been proposed and rejected for various reasons. To recap:
|
|
+ *
|
|
+ * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/
|
|
+ * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.
|
|
+ * However, not all Linux distributions are happy with multiple builds,
|
|
+ * and anyway it has no effect on main applications.
|
|
+ *
|
|
+ * (2) IFUNC. We could put these functions into libgcc_s.so, and have
|
|
+ * a single copy of each function for all DSOs. However, ARM is concerned
|
|
+ * that the branch-to-indirect-branch that is implied by using a PLT,
|
|
+ * as required by IFUNC, is too much overhead for smaller cpus.
|
|
+ *
|
|
+ * (3) Statically predicted direct branches. This is the approach that
|
|
+ * is taken here. These functions are linked into every DSO that uses them.
|
|
+ * All of the symbols are hidden, so that the functions are called via a
|
|
+ * direct branch. The choice of LSE vs non-LSE is done via one byte load
|
|
+ * followed by a well-predicted direct branch. The functions are compiled
|
|
+ * separately to minimize code size.
|
|
+ */
|
|
+
|
|
+/* Tell the assembler to accept LSE instructions. */
|
|
+ .arch armv8-a+lse
|
|
+
|
|
+/* Declare the symbol gating the LSE implementations. */
|
|
+ .hidden __aarch64_have_lse_atomics
|
|
+
|
|
+/* Turn size and memory model defines into mnemonic fragments. */
|
|
+#if SIZE == 1
|
|
+# define S b
|
|
+# define UXT uxtb
|
|
+#elif SIZE == 2
|
|
+# define S h
|
|
+# define UXT uxth
|
|
+#elif SIZE == 4 || SIZE == 8 || SIZE == 16
|
|
+# define S
|
|
+# define UXT mov
|
|
+#else
|
|
+# error
|
|
+#endif
|
|
+
|
|
+#if MODEL == 1
|
|
+# define SUFF _relax
|
|
+# define A
|
|
+# define L
|
|
+#elif MODEL == 2
|
|
+# define SUFF _acq
|
|
+# define A a
|
|
+# define L
|
|
+#elif MODEL == 3
|
|
+# define SUFF _rel
|
|
+# define A
|
|
+# define L l
|
|
+#elif MODEL == 4
|
|
+# define SUFF _acq_rel
|
|
+# define A a
|
|
+# define L l
|
|
+#else
|
|
+# error
|
|
+#endif
|
|
+
|
|
+/* Concatenate symbols. */
|
|
+#define glue2_(A, B) A ## B
|
|
+#define glue2(A, B) glue2_(A, B)
|
|
+#define glue3_(A, B, C) A ## B ## C
|
|
+#define glue3(A, B, C) glue3_(A, B, C)
|
|
+#define glue4_(A, B, C, D) A ## B ## C ## D
|
|
+#define glue4(A, B, C, D) glue4_(A, B, C, D)
|
|
+
|
|
+/* Select the size of a register, given a regno. */
|
|
+#define x(N) glue2(x, N)
|
|
+#define w(N) glue2(w, N)
|
|
+#if SIZE < 8
|
|
+# define s(N) w(N)
|
|
+#else
|
|
+# define s(N) x(N)
|
|
+#endif
|
|
+
|
|
+#define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF)
|
|
+#define LDXR glue4(ld, A, xr, S)
|
|
+#define STXR glue4(st, L, xr, S)
|
|
+
|
|
+/* Temporary registers used. Other than these, only the return value
|
|
+ register (x0) and the flags are modified. */
|
|
+#define tmp0 16
|
|
+#define tmp1 17
|
|
+#define tmp2 15
|
|
+
|
|
+/* Start and end a function. */
|
|
+.macro STARTFN name
|
|
+ .text
|
|
+ .balign 16
|
|
+ .globl \name
|
|
+ .hidden \name
|
|
+ .type \name, %function
|
|
+ .cfi_startproc
|
|
+\name:
|
|
+.endm
|
|
+
|
|
+.macro ENDFN name
|
|
+ .cfi_endproc
|
|
+ .size \name, . - \name
|
|
+.endm
|
|
+
|
|
+/* Branch to LABEL if LSE is disabled. */
|
|
+.macro JUMP_IF_NOT_LSE label
|
|
+ adrp x(tmp0), __aarch64_have_lse_atomics
|
|
+ ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
|
|
+ cbz w(tmp0), \label
|
|
+.endm
|
|
+
|
|
+#ifdef L_cas
|
|
+
|
|
+STARTFN NAME(cas)
|
|
+ JUMP_IF_NOT_LSE 8f
|
|
+
|
|
+#if SIZE < 16
|
|
+#define CAS glue4(cas, A, L, S)
|
|
+
|
|
+ CAS s(0), s(1), [x2]
|
|
+ ret
|
|
+
|
|
+8: UXT s(tmp0), s(0)
|
|
+0: LDXR s(0), [x2]
|
|
+ cmp s(0), s(tmp0)
|
|
+ bne 1f
|
|
+ STXR w(tmp1), s(1), [x2]
|
|
+ cbnz w(tmp1), 0b
|
|
+1: ret
|
|
+
|
|
+#else
|
|
+#define LDXP glue3(ld, A, xp)
|
|
+#define STXP glue3(st, L, xp)
|
|
+#define CASP glue3(casp, A, L)
|
|
+
|
|
+ CASP x0, x1, x2, x3, [x4]
|
|
+ ret
|
|
+
|
|
+8: mov x(tmp0), x0
|
|
+ mov x(tmp1), x1
|
|
+0: LDXP x0, x1, [x4]
|
|
+ cmp x0, x(tmp0)
|
|
+ ccmp x1, x(tmp1), #0, eq
|
|
+ bne 1f
|
|
+ STXP w(tmp2), x(tmp0), x(tmp1), [x4]
|
|
+ cbnz w(tmp2), 0b
|
|
+1: ret
|
|
+
|
|
+#endif
|
|
+
|
|
+ENDFN NAME(cas)
|
|
+#endif
|
|
+
|
|
+#ifdef L_swp
|
|
+#define SWP glue4(swp, A, L, S)
|
|
+
|
|
+STARTFN NAME(swp)
|
|
+ JUMP_IF_NOT_LSE 8f
|
|
+
|
|
+ SWP s(0), s(0), [x1]
|
|
+ ret
|
|
+
|
|
+8: mov s(tmp0), s(0)
|
|
+0: LDXR s(0), [x1]
|
|
+ STXR w(tmp1), s(tmp0), [x1]
|
|
+ cbnz w(tmp1), 0b
|
|
+ ret
|
|
+
|
|
+ENDFN NAME(swp)
|
|
+#endif
|
|
+
|
|
+#if defined(L_ldadd) || defined(L_ldclr) \
|
|
+ || defined(L_ldeor) || defined(L_ldset)
|
|
+
|
|
+#ifdef L_ldadd
|
|
+#define LDNM ldadd
|
|
+#define OP add
|
|
+#elif defined(L_ldclr)
|
|
+#define LDNM ldclr
|
|
+#define OP bic
|
|
+#elif defined(L_ldeor)
|
|
+#define LDNM ldeor
|
|
+#define OP eor
|
|
+#elif defined(L_ldset)
|
|
+#define LDNM ldset
|
|
+#define OP orr
|
|
+#else
|
|
+#error
|
|
+#endif
|
|
+#define LDOP glue4(LDNM, A, L, S)
|
|
+
|
|
+STARTFN NAME(LDNM)
|
|
+ JUMP_IF_NOT_LSE 8f
|
|
+
|
|
+ LDOP s(0), s(0), [x1]
|
|
+ ret
|
|
+
|
|
+8: mov s(tmp0), s(0)
|
|
+0: LDXR s(0), [x1]
|
|
+ OP s(tmp1), s(0), s(tmp0)
|
|
+ STXR w(tmp1), s(tmp1), [x1]
|
|
+ cbnz w(tmp1), 0b
|
|
+ ret
|
|
+
|
|
+ENDFN NAME(LDNM)
|
|
+#endif
|
|
diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse
|
|
new file mode 100644
|
|
index 00000000000..fe3868dacbf
|
|
--- /dev/null
|
|
+++ b/libgcc/config/aarch64/t-lse
|
|
@@ -0,0 +1,44 @@
|
|
+# Out-of-line LSE atomics for AArch64 architecture.
|
|
+# Copyright (C) 2019 Free Software Foundation, Inc.
|
|
+# Contributed by Linaro Ltd.
|
|
+#
|
|
+# This file is part of GCC.
|
|
+#
|
|
+# GCC is free software; you can redistribute it and/or modify it
|
|
+# under the terms of the GNU General Public License as published by
|
|
+# the Free Software Foundation; either version 3, or (at your option)
|
|
+# any later version.
|
|
+#
|
|
+# GCC is distributed in the hope that it will be useful, but
|
|
+# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+# General Public License for more details.
|
|
+#
|
|
+# You should have received a copy of the GNU General Public License
|
|
+# along with GCC; see the file COPYING3. If not see
|
|
+# <http://www.gnu.org/licenses/>.
|
|
+
|
|
+# Compare-and-swap has 5 sizes and 4 memory models.
|
|
+S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
|
|
+O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0)))
|
|
+
|
|
+# Swap, Load-and-operate have 4 sizes and 4 memory models
|
|
+S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))
|
|
+O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))
|
|
+
|
|
+LSE_OBJS := $(O0) $(O1)
|
|
+
|
|
+libgcc-objects += $(LSE_OBJS) lse-init$(objext)
|
|
+
|
|
+empty =
|
|
+space = $(empty) $(empty)
|
|
+PAT_SPLIT = $(subst _,$(space),$(*F))
|
|
+PAT_BASE = $(word 1,$(PAT_SPLIT))
|
|
+PAT_N = $(word 2,$(PAT_SPLIT))
|
|
+PAT_M = $(word 3,$(PAT_SPLIT))
|
|
+
|
|
+lse-init$(objext): $(srcdir)/config/aarch64/lse-init.c
|
|
+ $(gcc_compile) -c $<
|
|
+
|
|
+$(LSE_OBJS): $(srcdir)/config/aarch64/lse.S
|
|
+ $(gcc_compile) -DL_$(PAT_BASE) -DSIZE=$(PAT_N) -DMODEL=$(PAT_M) -c $<
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 96202f3214509cdc52a10d032d392d797ab93330 Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Wed, 31 Oct 2018 23:12:14 +0000
|
|
Subject: [PATCH 14/24] Add visibility to libfunc constructors
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2018-10-31 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
* optabs-libfuncs.c (build_libfunc_function_visibility):
|
|
New, split out from...
|
|
(build_libfunc_function): ... here.
|
|
(init_one_libfunc_visibility): New, split out from ...
|
|
(init_one_libfunc): ... here.
|
|
|
|
(cherry picked from commit a3ace685830da611697d0b9721ca675f3ae13766)
|
|
---
|
|
gcc/optabs-libfuncs.c | 26 ++++++++++++++++++++------
|
|
gcc/optabs-libfuncs.h | 2 ++
|
|
2 files changed, 22 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/gcc/optabs-libfuncs.c b/gcc/optabs-libfuncs.c
|
|
index a3a32b7652c..1cbda947fb7 100644
|
|
--- a/gcc/optabs-libfuncs.c
|
|
+++ b/gcc/optabs-libfuncs.c
|
|
@@ -726,10 +726,10 @@ struct libfunc_decl_hasher : ggc_ptr_hash<tree_node>
|
|
/* A table of previously-created libfuncs, hashed by name. */
|
|
static GTY (()) hash_table<libfunc_decl_hasher> *libfunc_decls;
|
|
|
|
-/* Build a decl for a libfunc named NAME. */
|
|
+/* Build a decl for a libfunc named NAME with visibility VIS. */
|
|
|
|
tree
|
|
-build_libfunc_function (const char *name)
|
|
+build_libfunc_function_visibility (const char *name, symbol_visibility vis)
|
|
{
|
|
/* ??? We don't have any type information; pretend this is "int foo ()". */
|
|
tree decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL,
|
|
@@ -738,7 +738,7 @@ build_libfunc_function (const char *name)
|
|
DECL_EXTERNAL (decl) = 1;
|
|
TREE_PUBLIC (decl) = 1;
|
|
DECL_ARTIFICIAL (decl) = 1;
|
|
- DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
|
|
+ DECL_VISIBILITY (decl) = vis;
|
|
DECL_VISIBILITY_SPECIFIED (decl) = 1;
|
|
gcc_assert (DECL_ASSEMBLER_NAME (decl));
|
|
|
|
@@ -749,11 +749,19 @@ build_libfunc_function (const char *name)
|
|
return decl;
|
|
}
|
|
|
|
+/* Build a decl for a libfunc named NAME. */
|
|
+
|
|
+tree
|
|
+build_libfunc_function (const char *name)
|
|
+{
|
|
+ return build_libfunc_function_visibility (name, VISIBILITY_DEFAULT);
|
|
+}
|
|
+
|
|
/* Return a libfunc for NAME, creating one if we don't already have one.
|
|
- The returned rtx is a SYMBOL_REF. */
|
|
+ The decl is given visibility VIS. The returned rtx is a SYMBOL_REF. */
|
|
|
|
rtx
|
|
-init_one_libfunc (const char *name)
|
|
+init_one_libfunc_visibility (const char *name, symbol_visibility vis)
|
|
{
|
|
tree id, decl;
|
|
hashval_t hash;
|
|
@@ -770,12 +778,18 @@ init_one_libfunc (const char *name)
|
|
{
|
|
/* Create a new decl, so that it can be passed to
|
|
targetm.encode_section_info. */
|
|
- decl = build_libfunc_function (name);
|
|
+ decl = build_libfunc_function_visibility (name, vis);
|
|
*slot = decl;
|
|
}
|
|
return XEXP (DECL_RTL (decl), 0);
|
|
}
|
|
|
|
+rtx
|
|
+init_one_libfunc (const char *name)
|
|
+{
|
|
+ return init_one_libfunc_visibility (name, VISIBILITY_DEFAULT);
|
|
+}
|
|
+
|
|
/* Adjust the assembler name of libfunc NAME to ASMSPEC. */
|
|
|
|
rtx
|
|
diff --git a/gcc/optabs-libfuncs.h b/gcc/optabs-libfuncs.h
|
|
index a271d2dee5b..af06d6100a8 100644
|
|
--- a/gcc/optabs-libfuncs.h
|
|
+++ b/gcc/optabs-libfuncs.h
|
|
@@ -63,7 +63,9 @@ void gen_satfract_conv_libfunc (convert_optab, const char *,
|
|
void gen_satfractuns_conv_libfunc (convert_optab, const char *,
|
|
machine_mode, machine_mode);
|
|
|
|
+tree build_libfunc_function_visibility (const char *, symbol_visibility);
|
|
tree build_libfunc_function (const char *);
|
|
+rtx init_one_libfunc_visibility (const char *, symbol_visibility);
|
|
rtx init_one_libfunc (const char *);
|
|
rtx set_user_assembler_libfunc (const char *, const char *);
|
|
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From ab5bf64b93983d41d5619e0f2024b76913b812db Mon Sep 17 00:00:00 2001
|
|
From: Andrew Pinski <apinski@cavium.com>
|
|
Date: Sun, 6 Aug 2017 18:10:58 +0000
|
|
Subject: [PATCH 15/24] atomic_cmp_exchange_zero_reg_1.c: Pass
|
|
-march=armv8-a+nolse, skip if -mcpu= is passed.
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2017-08-06 Andrew Pinski <apinski@cavium.com>
|
|
|
|
gcc/testsuite/
|
|
* gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c: Pass
|
|
-march=armv8-a+nolse, skip if -mcpu= is passed.
|
|
* gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c: Likewise.
|
|
|
|
(cherry picked from commit 905964740f674a784224620d1339676448aaada6)
|
|
---
|
|
.../gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c | 3 ++-
|
|
.../gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c | 3 ++-
|
|
2 files changed, 4 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
|
|
index 15606b68990..f2a21ddf2e1 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
|
|
@@ -1,5 +1,6 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O2" } */
|
|
+/* { dg-options "-O2 -march=armv8-a+nolse" } */
|
|
+/* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
|
|
|
|
int
|
|
foo (int *a)
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
|
|
index b14a7c29437..8d2ae67dfbe 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
|
|
@@ -1,5 +1,6 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O2" } */
|
|
+/* { dg-options "-O2 -march=armv8-a+nolse" } */
|
|
+/* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
|
|
|
|
int
|
|
foo (int *a)
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 4547522bebc79ece2ebc505116a81f47c423743a Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Thu, 19 Sep 2019 14:36:43 +0000
|
|
Subject: [PATCH 16/24] aarch64: Implement -moutline-atomics
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2019-09-19 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
* config/aarch64/aarch64.opt (-moutline-atomics): New.
|
|
* config/aarch64/aarch64.c (aarch64_atomic_ool_func): New.
|
|
(aarch64_ool_cas_names, aarch64_ool_swp_names): New.
|
|
(aarch64_ool_ldadd_names, aarch64_ool_ldset_names): New.
|
|
(aarch64_ool_ldclr_names, aarch64_ool_ldeor_names): New.
|
|
(aarch64_expand_compare_and_swap): Honor TARGET_OUTLINE_ATOMICS.
|
|
* config/aarch64/atomics.md (atomic_exchange<ALLI>): Likewise.
|
|
(atomic_<atomic_op><ALLI>): Likewise.
|
|
(atomic_fetch_<atomic_op><ALLI>): Likewise.
|
|
(atomic_<atomic_op>_fetch<ALLI>): Likewise.
|
|
* doc/invoke.texi: Document -moutline-atomics.
|
|
|
|
gcc/testsuite/
|
|
* gcc.target/aarch64/atomic-op-acq_rel.c: Use -mno-outline-atomics.
|
|
* gcc.target/aarch64/atomic-comp-swap-release-acquire.c: Likewise.
|
|
* gcc.target/aarch64/atomic-op-acquire.c: Likewise.
|
|
* gcc.target/aarch64/atomic-op-char.c: Likewise.
|
|
* gcc.target/aarch64/atomic-op-consume.c: Likewise.
|
|
* gcc.target/aarch64/atomic-op-imm.c: Likewise.
|
|
* gcc.target/aarch64/atomic-op-int.c: Likewise.
|
|
* gcc.target/aarch64/atomic-op-long.c: Likewise.
|
|
* gcc.target/aarch64/atomic-op-relaxed.c: Likewise.
|
|
* gcc.target/aarch64/atomic-op-release.c: Likewise.
|
|
* gcc.target/aarch64/atomic-op-seq_cst.c: Likewise.
|
|
* gcc.target/aarch64/atomic-op-short.c: Likewise.
|
|
* gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c: Likewise.
|
|
* gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c: Likewise.
|
|
* gcc.target/aarch64/sync-comp-swap.c: Likewise.
|
|
* gcc.target/aarch64/sync-op-acquire.c: Likewise.
|
|
* gcc.target/aarch64/sync-op-full.c: Likewise.
|
|
|
|
(cherry picked from commit 3950b229a5ed6710f30241c2ddc3c74909bf4740)
|
|
---
|
|
gcc/config/aarch64/aarch64-protos.h | 13 +++
|
|
gcc/config/aarch64/aarch64.c | 87 +++++++++++++++++
|
|
gcc/config/aarch64/aarch64.opt | 4 +
|
|
gcc/config/aarch64/atomics.md | 94 +++++++++++++++++--
|
|
gcc/doc/invoke.texi | 15 ++-
|
|
.../atomic-comp-swap-release-acquire.c | 2 +-
|
|
.../gcc.target/aarch64/atomic-op-acq_rel.c | 2 +-
|
|
.../gcc.target/aarch64/atomic-op-acquire.c | 2 +-
|
|
.../gcc.target/aarch64/atomic-op-char.c | 2 +-
|
|
.../gcc.target/aarch64/atomic-op-consume.c | 2 +-
|
|
.../gcc.target/aarch64/atomic-op-imm.c | 2 +-
|
|
.../gcc.target/aarch64/atomic-op-int.c | 2 +-
|
|
.../gcc.target/aarch64/atomic-op-long.c | 2 +-
|
|
.../gcc.target/aarch64/atomic-op-relaxed.c | 2 +-
|
|
.../gcc.target/aarch64/atomic-op-release.c | 2 +-
|
|
.../gcc.target/aarch64/atomic-op-seq_cst.c | 2 +-
|
|
.../gcc.target/aarch64/atomic-op-short.c | 2 +-
|
|
.../aarch64/atomic_cmp_exchange_zero_reg_1.c | 2 +-
|
|
.../atomic_cmp_exchange_zero_strong_1.c | 2 +-
|
|
.../gcc.target/aarch64/sync-comp-swap.c | 2 +-
|
|
.../gcc.target/aarch64/sync-op-acquire.c | 2 +-
|
|
.../gcc.target/aarch64/sync-op-full.c | 2 +-
|
|
22 files changed, 221 insertions(+), 26 deletions(-)
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
|
index e47f2174479..e5ab894ddc7 100644
|
|
--- a/gcc/config/aarch64/aarch64-protos.h
|
|
+++ b/gcc/config/aarch64/aarch64-protos.h
|
|
@@ -472,4 +472,17 @@ std::string aarch64_get_extension_string_for_isa_flags (unsigned long,
|
|
|
|
rtl_opt_pass *make_pass_fma_steering (gcc::context *ctxt);
|
|
|
|
+struct atomic_ool_names
|
|
+{
|
|
+ const char *str[5][4];
|
|
+};
|
|
+
|
|
+rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
|
|
+ const atomic_ool_names *names);
|
|
+extern const atomic_ool_names aarch64_ool_swp_names;
|
|
+extern const atomic_ool_names aarch64_ool_ldadd_names;
|
|
+extern const atomic_ool_names aarch64_ool_ldset_names;
|
|
+extern const atomic_ool_names aarch64_ool_ldclr_names;
|
|
+extern const atomic_ool_names aarch64_ool_ldeor_names;
|
|
+
|
|
#endif /* GCC_AARCH64_PROTOS_H */
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index 53de92e65fc..5ccb13b46fe 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -11923,6 +11923,82 @@ aarch64_emit_unlikely_jump (rtx insn)
|
|
add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
|
|
}
|
|
|
|
+/* We store the names of the various atomic helpers in a 5x4 array.
|
|
+ Return the libcall function given MODE, MODEL and NAMES. */
|
|
+
|
|
+rtx
|
|
+aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
|
|
+ const atomic_ool_names *names)
|
|
+{
|
|
+ memmodel model = memmodel_base (INTVAL (model_rtx));
|
|
+ int mode_idx, model_idx;
|
|
+
|
|
+ switch (mode)
|
|
+ {
|
|
+ case QImode:
|
|
+ mode_idx = 0;
|
|
+ break;
|
|
+ case HImode:
|
|
+ mode_idx = 1;
|
|
+ break;
|
|
+ case SImode:
|
|
+ mode_idx = 2;
|
|
+ break;
|
|
+ case DImode:
|
|
+ mode_idx = 3;
|
|
+ break;
|
|
+ case TImode:
|
|
+ mode_idx = 4;
|
|
+ break;
|
|
+ default:
|
|
+ gcc_unreachable ();
|
|
+ }
|
|
+
|
|
+ switch (model)
|
|
+ {
|
|
+ case MEMMODEL_RELAXED:
|
|
+ model_idx = 0;
|
|
+ break;
|
|
+ case MEMMODEL_CONSUME:
|
|
+ case MEMMODEL_ACQUIRE:
|
|
+ model_idx = 1;
|
|
+ break;
|
|
+ case MEMMODEL_RELEASE:
|
|
+ model_idx = 2;
|
|
+ break;
|
|
+ case MEMMODEL_ACQ_REL:
|
|
+ case MEMMODEL_SEQ_CST:
|
|
+ model_idx = 3;
|
|
+ break;
|
|
+ default:
|
|
+ gcc_unreachable ();
|
|
+ }
|
|
+
|
|
+ return init_one_libfunc_visibility (names->str[mode_idx][model_idx],
|
|
+ VISIBILITY_HIDDEN);
|
|
+}
|
|
+
|
|
+#define DEF0(B, N) \
|
|
+ { "__aarch64_" #B #N "_relax", \
|
|
+ "__aarch64_" #B #N "_acq", \
|
|
+ "__aarch64_" #B #N "_rel", \
|
|
+ "__aarch64_" #B #N "_acq_rel" }
|
|
+
|
|
+#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
|
|
+ { NULL, NULL, NULL, NULL }
|
|
+#define DEF5(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16)
|
|
+
|
|
+static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } };
|
|
+const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } };
|
|
+const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
|
|
+const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
|
|
+const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } };
|
|
+const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } };
|
|
+
|
|
+#undef DEF0
|
|
+#undef DEF4
|
|
+#undef DEF5
|
|
+
|
|
/* Expand a compare and swap pattern. */
|
|
|
|
void
|
|
@@ -11997,6 +12073,17 @@ aarch64_expand_compare_and_swap (rtx operands[])
|
|
emit_insn (atomic_cas[idx] (rval, mem, newval, mod_s));
|
|
cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
|
|
}
|
|
+ else if (TARGET_OUTLINE_ATOMICS)
|
|
+ {
|
|
+ /* Oldval must satisfy compare afterward. */
|
|
+ if (!aarch64_plus_operand (oldval, mode))
|
|
+ oldval = force_reg (mode, oldval);
|
|
+ rtx func = aarch64_atomic_ool_func (mode, mod_s, &aarch64_ool_cas_names);
|
|
+ rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode,
|
|
+ 3, oldval, mode, newval, mode,
|
|
+ XEXP (mem, 0), Pmode);
|
|
+ cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
|
|
+ }
|
|
else
|
|
{
|
|
/* The oldval predicate varies by mode. Test it and force to reg. */
|
|
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
|
index 942a7d558f2..edfb1b92be1 100644
|
|
--- a/gcc/config/aarch64/aarch64.opt
|
|
+++ b/gcc/config/aarch64/aarch64.opt
|
|
@@ -188,3 +188,7 @@ single precision and to 32 bits for double precision.
|
|
mverbose-cost-dump
|
|
Common Undocumented Var(flag_aarch64_verbose_cost)
|
|
Enables verbose cost model dumping in the debug dump files.
|
|
+
|
|
+moutline-atomics
|
|
+Target Report Mask(OUTLINE_ATOMICS) Save
|
|
+Generate local calls to out-of-line atomic operations.
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index f32f5528f1a..23333a803ec 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -186,16 +186,27 @@
|
|
(match_operand:SI 3 "const_int_operand" "")]
|
|
""
|
|
{
|
|
- rtx (*gen) (rtx, rtx, rtx, rtx);
|
|
-
|
|
/* Use an atomic SWP when available. */
|
|
if (TARGET_LSE)
|
|
- gen = gen_aarch64_atomic_exchange<mode>_lse;
|
|
+ {
|
|
+ emit_insn (gen_aarch64_atomic_exchange<mode>_lse
|
|
+ (operands[0], operands[1], operands[2], operands[3]));
|
|
+ }
|
|
+ else if (TARGET_OUTLINE_ATOMICS)
|
|
+ {
|
|
+ machine_mode mode = <MODE>mode;
|
|
+ rtx func = aarch64_atomic_ool_func (mode, operands[3],
|
|
+ &aarch64_ool_swp_names);
|
|
+ rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL,
|
|
+ mode, 2, operands[2], mode,
|
|
+ XEXP (operands[1], 0), Pmode);
|
|
+ emit_move_insn (operands[0], rval);
|
|
+ }
|
|
else
|
|
- gen = gen_aarch64_atomic_exchange<mode>;
|
|
-
|
|
- emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
|
|
-
|
|
+ {
|
|
+ emit_insn (gen_aarch64_atomic_exchange<mode>
|
|
+ (operands[0], operands[1], operands[2], operands[3]));
|
|
+ }
|
|
DONE;
|
|
}
|
|
)
|
|
@@ -280,6 +291,39 @@
|
|
}
|
|
operands[1] = force_reg (<MODE>mode, operands[1]);
|
|
}
|
|
+ else if (TARGET_OUTLINE_ATOMICS)
|
|
+ {
|
|
+ const atomic_ool_names *names;
|
|
+ switch (<CODE>)
|
|
+ {
|
|
+ case MINUS:
|
|
+ operands[1] = expand_simple_unop (<MODE>mode, NEG, operands[1],
|
|
+ NULL, 1);
|
|
+ /* fallthru */
|
|
+ case PLUS:
|
|
+ names = &aarch64_ool_ldadd_names;
|
|
+ break;
|
|
+ case IOR:
|
|
+ names = &aarch64_ool_ldset_names;
|
|
+ break;
|
|
+ case XOR:
|
|
+ names = &aarch64_ool_ldeor_names;
|
|
+ break;
|
|
+ case AND:
|
|
+ operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
|
|
+ NULL, 1);
|
|
+ names = &aarch64_ool_ldclr_names;
|
|
+ break;
|
|
+ default:
|
|
+ gcc_unreachable ();
|
|
+ }
|
|
+ machine_mode mode = <MODE>mode;
|
|
+ rtx func = aarch64_atomic_ool_func (mode, operands[2], names);
|
|
+ emit_library_call_value (func, NULL_RTX, LCT_NORMAL, mode, 2,
|
|
+ operands[1], mode,
|
|
+ XEXP (operands[0], 0), Pmode);
|
|
+ DONE;
|
|
+ }
|
|
else
|
|
gen = gen_aarch64_atomic_<atomic_optab><mode>;
|
|
|
|
@@ -405,6 +449,40 @@
|
|
}
|
|
operands[2] = force_reg (<MODE>mode, operands[2]);
|
|
}
|
|
+ else if (TARGET_OUTLINE_ATOMICS)
|
|
+ {
|
|
+ const atomic_ool_names *names;
|
|
+ switch (<CODE>)
|
|
+ {
|
|
+ case MINUS:
|
|
+ operands[2] = expand_simple_unop (<MODE>mode, NEG, operands[2],
|
|
+ NULL, 1);
|
|
+ /* fallthru */
|
|
+ case PLUS:
|
|
+ names = &aarch64_ool_ldadd_names;
|
|
+ break;
|
|
+ case IOR:
|
|
+ names = &aarch64_ool_ldset_names;
|
|
+ break;
|
|
+ case XOR:
|
|
+ names = &aarch64_ool_ldeor_names;
|
|
+ break;
|
|
+ case AND:
|
|
+ operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
|
|
+ NULL, 1);
|
|
+ names = &aarch64_ool_ldclr_names;
|
|
+ break;
|
|
+ default:
|
|
+ gcc_unreachable ();
|
|
+ }
|
|
+ machine_mode mode = <MODE>mode;
|
|
+ rtx func = aarch64_atomic_ool_func (mode, operands[3], names);
|
|
+ rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL, mode,
|
|
+ 2, operands[2], mode,
|
|
+ XEXP (operands[1], 0), Pmode);
|
|
+ emit_move_insn (operands[0], rval);
|
|
+ DONE;
|
|
+ }
|
|
else
|
|
gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
|
|
|
|
@@ -494,7 +572,7 @@
|
|
{
|
|
/* Use an atomic load-operate instruction when possible. In this case
|
|
we will re-compute the result from the original mem value. */
|
|
- if (TARGET_LSE)
|
|
+ if (TARGET_LSE || TARGET_OUTLINE_ATOMICS)
|
|
{
|
|
rtx tmp = gen_reg_rtx (<MODE>mode);
|
|
operands[2] = force_reg (<MODE>mode, operands[2]);
|
|
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
|
index d941c7b1ed4..71b8e92cf90 100644
|
|
--- a/gcc/doc/invoke.texi
|
|
+++ b/gcc/doc/invoke.texi
|
|
@@ -1042,7 +1042,7 @@ See RS/6000 and PowerPC Options.
|
|
-mfloat128 -mno-float128 -mfloat128-hardware -mno-float128-hardware @gol
|
|
-mgnu-attribute -mno-gnu-attribute @gol
|
|
-mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol
|
|
--mstack-protector-guard-offset=@var{offset} @gol
|
|
+-mstack-protector-guard-offset=@var{offset} -moutline-atomics @gol
|
|
-mlra -mno-lra}
|
|
|
|
@emph{RX Options}
|
|
@@ -13955,6 +13955,19 @@ This option only has an effect if @option{-ffast-math} or
|
|
precision of division results to about 16 bits for
|
|
single precision and to 32 bits for double precision.
|
|
|
|
+@item -moutline-atomics
|
|
+@itemx -mno-outline-atomics
|
|
+Enable or disable calls to out-of-line helpers to implement atomic operations.
|
|
+These helpers will, at runtime, determine if the LSE instructions from
|
|
+ARMv8.1-A can be used; if not, they will use the load/store-exclusive
|
|
+instructions that are present in the base ARMv8.0 ISA.
|
|
+
|
|
+This option is only applicable when compiling for the base ARMv8.0
|
|
+instruction set. If using a later revision, e.g. @option{-march=armv8.1-a}
|
|
+or @option{-march=armv8-a+lse}, the ARMv8.1-Atomics instructions will be
|
|
+used directly. The same applies when using @option{-mcpu=} when the
|
|
+selected cpu supports the @samp{lse} feature.
|
|
+
|
|
@item -march=@var{name}
|
|
@opindex march
|
|
Specify the name of the target architecture and, optionally, one or
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
|
|
index 49ca5d0d09c..a828a72aa75 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */
|
|
|
|
#include "atomic-comp-swap-release-acquire.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
|
|
index 74f26348e42..6823ce381b2 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
#include "atomic-op-acq_rel.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
|
|
index 66c1b1efe20..87937de378a 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
#include "atomic-op-acquire.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
|
|
index c09d0434ecf..60955e57da3 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
#include "atomic-op-char.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
|
|
index 5783ab84f5c..16cb11aeeaf 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
#include "atomic-op-consume.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
|
|
index 18b8f0b04e9..bcab4e481e3 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
int v = 0;
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
|
|
index 8520f0839ba..040e4a8d168 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
#include "atomic-op-int.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
|
|
index d011f8c5ce2..fc88b92cd3e 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
long v = 0;
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
|
|
index ed96bfdb978..503d62b0280 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
#include "atomic-op-relaxed.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
|
|
index fc4be17de89..efe14aea7e4 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
#include "atomic-op-release.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
|
|
index 613000fe490..09973bf82ba 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
#include "atomic-op-seq_cst.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
|
|
index e82c8118ece..e1dcebb0f89 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
#include "atomic-op-short.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
|
|
index f2a21ddf2e1..29246979bfb 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O2 -march=armv8-a+nolse" } */
|
|
+/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */
|
|
/* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
|
|
|
|
int
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
|
|
index 8d2ae67dfbe..6daf9b08f5a 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O2 -march=armv8-a+nolse" } */
|
|
+/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */
|
|
/* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
|
|
|
|
int
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
|
|
index e571b2f13b3..f56415f3354 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */
|
|
|
|
#include "sync-comp-swap.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
|
|
index 357bf1be3b2..39b3144aa36 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
#include "sync-op-acquire.x"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
|
|
index c6ba1629965..6b8b2043f40 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-march=armv8-a+nolse -O2" } */
|
|
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
|
|
|
|
#include "sync-op-full.x"
|
|
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 6da4552680e8269b677c08770797042f70ab8fdc Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Wed, 25 Sep 2019 21:48:41 +0000
|
|
Subject: [PATCH 17/24] aarch64: Fix store-exclusive in load-operate LSE
|
|
helpers
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2019-09-25 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
PR target/91834
|
|
* config/aarch64/lse.S (LDNM): Ensure STXR output does not
|
|
overlap the inputs.
|
|
|
|
(cherry picked from commit 88a51d68c4aaa61adb36a9cad6f25ef214bde853)
|
|
---
|
|
libgcc/config/aarch64/lse.S | 4 ++--
|
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
|
|
index a5f6673596c..c7979382ad7 100644
|
|
--- a/libgcc/config/aarch64/lse.S
|
|
+++ b/libgcc/config/aarch64/lse.S
|
|
@@ -227,8 +227,8 @@ STARTFN NAME(LDNM)
|
|
8: mov s(tmp0), s(0)
|
|
0: LDXR s(0), [x1]
|
|
OP s(tmp1), s(0), s(tmp0)
|
|
- STXR w(tmp1), s(tmp1), [x1]
|
|
- cbnz w(tmp1), 0b
|
|
+ STXR w(tmp2), s(tmp1), [x1]
|
|
+ cbnz w(tmp2), 0b
|
|
ret
|
|
|
|
ENDFN NAME(LDNM)
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From a5f0d552ba6d5ce02b1c2f056116a0f8532b9667 Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <richard.henderson@linaro.org>
|
|
Date: Wed, 25 Sep 2019 22:51:55 +0000
|
|
Subject: [PATCH 18/24] aarch64: Configure for sys/auxv.h in libgcc for
|
|
lse-init.c
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2019-09-25 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
gcc/
|
|
PR target/91833
|
|
* config/aarch64/lse-init.c: Include auto-target.h. Disable
|
|
initialization if !HAVE_SYS_AUXV_H.
|
|
* configure.ac (AC_CHECK_HEADERS): Add sys/auxv.h.
|
|
* config.in, configure: Rebuild.
|
|
|
|
(cherry picked from commit 58d169ba9ffca04d77314f525af9efd93881a86b)
|
|
---
|
|
libgcc/config.in | 8 ++++++++
|
|
libgcc/config/aarch64/lse-init.c | 4 +++-
|
|
libgcc/configure | 26 +++++++++++++++++++-------
|
|
libgcc/configure.ac | 2 +-
|
|
4 files changed, 31 insertions(+), 9 deletions(-)
|
|
mode change 100644 => 100755 libgcc/configure
|
|
|
|
diff --git a/libgcc/config.in b/libgcc/config.in
|
|
index 7de22ee0a72..89d4aba3133 100644
|
|
--- a/libgcc/config.in
|
|
+++ b/libgcc/config.in
|
|
@@ -33,6 +33,9 @@
|
|
/* Define to 1 if you have the <string.h> header file. */
|
|
#undef HAVE_STRING_H
|
|
|
|
+/* Define to 1 if you have the <sys/auxv.h> header file. */
|
|
+#undef HAVE_SYS_AUXV_H
|
|
+
|
|
/* Define to 1 if you have the <sys/stat.h> header file. */
|
|
#undef HAVE_SYS_STAT_H
|
|
|
|
@@ -72,6 +75,11 @@
|
|
/* Define to 1 if the target use emutls for thread-local storage. */
|
|
#undef USE_EMUTLS
|
|
|
|
+/* Enable large inode numbers on Mac OS X 10.5. */
|
|
+#ifndef _DARWIN_USE_64_BIT_INODE
|
|
+# define _DARWIN_USE_64_BIT_INODE 1
|
|
+#endif
|
|
+
|
|
/* Number of bits in a file offset, on hosts where this is settable. */
|
|
#undef _FILE_OFFSET_BITS
|
|
|
|
diff --git a/libgcc/config/aarch64/lse-init.c b/libgcc/config/aarch64/lse-init.c
|
|
index 33d29147479..1a8f4c55213 100644
|
|
--- a/libgcc/config/aarch64/lse-init.c
|
|
+++ b/libgcc/config/aarch64/lse-init.c
|
|
@@ -23,12 +23,14 @@ a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
+#include "auto-target.h"
|
|
+
|
|
/* Define the symbol gating the LSE implementations. */
|
|
_Bool __aarch64_have_lse_atomics
|
|
__attribute__((visibility("hidden"), nocommon));
|
|
|
|
/* Disable initialization of __aarch64_have_lse_atomics during bootstrap. */
|
|
-#ifndef inhibit_libc
|
|
+#if !defined(inhibit_libc) && defined(HAVE_SYS_AUXV_H)
|
|
# include <sys/auxv.h>
|
|
|
|
/* Disable initialization if the system headers are too old. */
|
|
diff --git a/libgcc/configure b/libgcc/configure
|
|
old mode 100644
|
|
new mode 100755
|
|
index 441601a1f76..59e15a7f33f
|
|
--- a/libgcc/configure
|
|
+++ b/libgcc/configure
|
|
@@ -640,6 +640,7 @@ infodir
|
|
docdir
|
|
oldincludedir
|
|
includedir
|
|
+runstatedir
|
|
localstatedir
|
|
sharedstatedir
|
|
sysconfdir
|
|
@@ -727,6 +728,7 @@ datadir='${datarootdir}'
|
|
sysconfdir='${prefix}/etc'
|
|
sharedstatedir='${prefix}/com'
|
|
localstatedir='${prefix}/var'
|
|
+runstatedir='${localstatedir}/run'
|
|
includedir='${prefix}/include'
|
|
oldincludedir='/usr/include'
|
|
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
|
|
@@ -978,6 +980,15 @@ do
|
|
| -silent | --silent | --silen | --sile | --sil)
|
|
silent=yes ;;
|
|
|
|
+ -runstatedir | --runstatedir | --runstatedi | --runstated \
|
|
+ | --runstate | --runstat | --runsta | --runst | --runs \
|
|
+ | --run | --ru | --r)
|
|
+ ac_prev=runstatedir ;;
|
|
+ -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
|
|
+ | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
|
|
+ | --run=* | --ru=* | --r=*)
|
|
+ runstatedir=$ac_optarg ;;
|
|
+
|
|
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
|
|
ac_prev=sbindir ;;
|
|
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
|
|
@@ -1115,7 +1126,7 @@ fi
|
|
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
|
|
datadir sysconfdir sharedstatedir localstatedir includedir \
|
|
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
|
|
- libdir localedir mandir
|
|
+ libdir localedir mandir runstatedir
|
|
do
|
|
eval ac_val=\$$ac_var
|
|
# Remove trailing slashes.
|
|
@@ -1270,6 +1281,7 @@ Fine tuning of the installation directories:
|
|
--sysconfdir=DIR read-only single-machine data [PREFIX/etc]
|
|
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
|
|
--localstatedir=DIR modifiable single-machine data [PREFIX/var]
|
|
+ --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
|
|
--libdir=DIR object code libraries [EPREFIX/lib]
|
|
--includedir=DIR C header files [PREFIX/include]
|
|
--oldincludedir=DIR C header files for non-gcc [/usr/include]
|
|
@@ -4088,7 +4100,7 @@ else
|
|
We can't simply define LARGE_OFF_T to be 9223372036854775807,
|
|
since some C++ compilers masquerading as C compilers
|
|
incorrectly reject 9223372036854775807. */
|
|
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
|
|
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
|
|
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
|
|
&& LARGE_OFF_T % 2147483647 == 1)
|
|
? 1 : -1];
|
|
@@ -4134,7 +4146,7 @@ else
|
|
We can't simply define LARGE_OFF_T to be 9223372036854775807,
|
|
since some C++ compilers masquerading as C compilers
|
|
incorrectly reject 9223372036854775807. */
|
|
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
|
|
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
|
|
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
|
|
&& LARGE_OFF_T % 2147483647 == 1)
|
|
? 1 : -1];
|
|
@@ -4158,7 +4170,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
|
We can't simply define LARGE_OFF_T to be 9223372036854775807,
|
|
since some C++ compilers masquerading as C compilers
|
|
incorrectly reject 9223372036854775807. */
|
|
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
|
|
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
|
|
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
|
|
&& LARGE_OFF_T % 2147483647 == 1)
|
|
? 1 : -1];
|
|
@@ -4203,7 +4215,7 @@ else
|
|
We can't simply define LARGE_OFF_T to be 9223372036854775807,
|
|
since some C++ compilers masquerading as C compilers
|
|
incorrectly reject 9223372036854775807. */
|
|
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
|
|
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
|
|
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
|
|
&& LARGE_OFF_T % 2147483647 == 1)
|
|
? 1 : -1];
|
|
@@ -4227,7 +4239,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
|
We can't simply define LARGE_OFF_T to be 9223372036854775807,
|
|
since some C++ compilers masquerading as C compilers
|
|
incorrectly reject 9223372036854775807. */
|
|
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
|
|
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
|
|
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
|
|
&& LARGE_OFF_T % 2147483647 == 1)
|
|
? 1 : -1];
|
|
@@ -4339,7 +4351,7 @@ as_fn_arith $ac_cv_sizeof_long_double \* 8 && long_double_type_size=$as_val
|
|
|
|
for ac_header in inttypes.h stdint.h stdlib.h ftw.h \
|
|
unistd.h sys/stat.h sys/types.h \
|
|
- string.h strings.h memory.h
|
|
+ string.h strings.h memory.h sys/auxv.h
|
|
do :
|
|
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
|
|
ac_fn_c_check_header_preproc "$LINENO" "$ac_header" "$as_ac_Header"
|
|
diff --git a/libgcc/configure.ac b/libgcc/configure.ac
|
|
index 99b8e15562f..fdd25f295b1 100644
|
|
--- a/libgcc/configure.ac
|
|
+++ b/libgcc/configure.ac
|
|
@@ -202,7 +202,7 @@ AC_SUBST(long_double_type_size)
|
|
|
|
AC_CHECK_HEADERS(inttypes.h stdint.h stdlib.h ftw.h \
|
|
unistd.h sys/stat.h sys/types.h \
|
|
- string.h strings.h memory.h)
|
|
+ string.h strings.h memory.h sys/auxv.h)
|
|
AC_HEADER_STDC
|
|
|
|
# Check for decimal float support.
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From b5727f45769817cb8b7628abf5898ef840983425 Mon Sep 17 00:00:00 2001
|
|
From: Jakub Jelinek <jakub@redhat.com>
|
|
Date: Tue, 31 Mar 2020 11:08:22 +0200
|
|
Subject: [PATCH 19/24] aarch64: Fix up aarch64_compare_and_swaphi pattern
|
|
[PR94368]
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
The following testcase ICEs in final_scan_insn_1. The problem is in the
|
|
@aarch64_compare_and_swaphi define_insn_and_split, since 9 it uses
|
|
aarch64_plushi_operand predicate for the "expected value" operand, which
|
|
allows either 0..0xfff constants or 0x1000..0xf000 constants (i.e. HImode
|
|
values which when zero extended are either 0..0xfff or (0..0xfff) << 12).
|
|
The problem is that RA doesn't care about predicates, it honors just
|
|
constraints and the used constraint on the operand is n, which means any
|
|
HImode CONST_SCALAR_INT. In the testcase LRA thus propagates the -1
|
|
value into the insn.
|
|
This is a define_insn_and_split which requires mandatory split.
|
|
But during split2 pass, we check the predicate (and don't check
|
|
constraints), which fails and thus we don't split it and during final ICE
|
|
because the mandatory splitting didn't happen.
|
|
|
|
The following patch fixes it by adding a matching constraint to the
|
|
predicate and using it.
|
|
|
|
2020-03-31 Jakub Jelinek <jakub@redhat.com>
|
|
|
|
gcc/
|
|
PR target/94368
|
|
* config/aarch64/constraints.md (Uph): New constraint.
|
|
* config/aarch64/atomics.md (cas_short_expected_imm): New mode attr.
|
|
(@aarch64_compare_and_swap<mode>): Use it instead of n in operand 2's
|
|
constraint.
|
|
|
|
gcc/testsuite/
|
|
* gcc.dg/pr94368.c: New test.
|
|
|
|
(cherry picked from commit a27c534794dbe3530acae3427d2c58f937f1b050)
|
|
---
|
|
gcc/config/aarch64/atomics.md | 5 ++++-
|
|
gcc/config/aarch64/constraints.md | 7 +++++++
|
|
gcc/testsuite/gcc.dg/pr94368.c | 25 +++++++++++++++++++++++++
|
|
3 files changed, 36 insertions(+), 1 deletion(-)
|
|
create mode 100644 gcc/testsuite/gcc.dg/pr94368.c
|
|
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index 23333a803ec..05ed4cee75b 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -38,6 +38,8 @@
|
|
|
|
(define_mode_attr cas_short_expected_pred
|
|
[(QI "aarch64_reg_or_imm") (HI "aarch64_plushi_operand")])
|
|
+(define_mode_attr cas_short_expected_imm
|
|
+ [(QI "n") (HI "Uph")])
|
|
|
|
(define_insn_and_split "aarch64_compare_and_swap<mode>"
|
|
[(set (reg:CC CC_REGNUM) ;; bool out
|
|
@@ -47,7 +49,8 @@
|
|
(match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
|
|
(set (match_dup 1)
|
|
(unspec_volatile:SHORT
|
|
- [(match_operand:SHORT 2 "<cas_short_expected_pred>" "rn") ;; expected
|
|
+ [(match_operand:SHORT 2 "<cas_short_expected_pred>"
|
|
+ "r<cas_short_expected_imm>") ;; expected
|
|
(match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ") ;; desired
|
|
(match_operand:SI 4 "const_int_operand") ;; is_weak
|
|
(match_operand:SI 5 "const_int_operand") ;; mod_s
|
|
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
|
|
index 9b3c7339c54..8dfd9026752 100644
|
|
--- a/gcc/config/aarch64/constraints.md
|
|
+++ b/gcc/config/aarch64/constraints.md
|
|
@@ -142,6 +142,13 @@
|
|
(and (match_code "const_int")
|
|
(match_test "(unsigned) exact_log2 (ival) <= 4")))
|
|
|
|
+(define_constraint "Uph"
|
|
+ "@internal
|
|
+ A constraint that matches HImode integers zero extendable to
|
|
+ SImode plus_operand."
|
|
+ (and (match_code "const_int")
|
|
+ (match_test "aarch64_plushi_immediate (op, VOIDmode)")))
|
|
+
|
|
(define_memory_constraint "Q"
|
|
"A memory address which uses a single base register with no offset."
|
|
(and (match_code "mem")
|
|
diff --git a/gcc/testsuite/gcc.dg/pr94368.c b/gcc/testsuite/gcc.dg/pr94368.c
|
|
new file mode 100644
|
|
index 00000000000..1267b822098
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/pr94368.c
|
|
@@ -0,0 +1,25 @@
|
|
+/* PR target/94368 */
|
|
+/* { dg-do compile { target fpic } } */
|
|
+/* { dg-options "-fpic -O1 -fcommon" } */
|
|
+
|
|
+int b, c, d, e, f, h;
|
|
+short g;
|
|
+int foo (int) __attribute__ ((__const__));
|
|
+
|
|
+void
|
|
+bar (void)
|
|
+{
|
|
+ while (1)
|
|
+ {
|
|
+ while (1)
|
|
+ {
|
|
+ __atomic_load_n (&e, 0);
|
|
+ if (foo (2))
|
|
+ __sync_val_compare_and_swap (&c, 0, f);
|
|
+ b = 1;
|
|
+ if (h == e)
|
|
+ break;
|
|
+ }
|
|
+ __sync_val_compare_and_swap (&g, -1, f);
|
|
+ }
|
|
+}
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From e98ee125c439b20329010b8a2386a252e9ca20cf Mon Sep 17 00:00:00 2001
|
|
From: Jakub Jelinek <jakub@redhat.com>
|
|
Date: Wed, 15 Apr 2020 11:01:19 +0200
|
|
Subject: [PATCH 20/24] aarch64: Fix bootstrap with old binutils [PR93053]
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
As reported in the PR, GCC 10 (and also 9.3.1 but not 9.3.0) fails to build
|
|
when using older binutils which lack LSE support, because those instructions
|
|
are used in libgcc.
|
|
Thanks to Kyrylo's hint, the following patches (hopefully) allow it to build
|
|
even with older binutils by using .inst directive if LSE support isn't
|
|
available in the assembler.
|
|
|
|
2020-04-15 Jakub Jelinek <jakub@redhat.com>
|
|
|
|
gcc/
|
|
PR target/93053
|
|
* configure.ac (LIBGCC_CHECK_AS_LSE): Add HAVE_AS_LSE checking.
|
|
* config/aarch64/lse.S: Include auto-target.h, if HAVE_AS_LSE
|
|
is not defined, use just .arch armv8-a.
|
|
(B, M, N, OPN): Define.
|
|
(COMMENT): New .macro.
|
|
(CAS, CASP, SWP, LDOP): Use .inst directive if HAVE_AS_LSE is not
|
|
defined. Otherwise, move the operands right after the glue? and
|
|
comment out operands where the macros are used.
|
|
* configure: Regenerated.
|
|
* config.in: Regenerated.
|
|
|
|
(cherry picked from commit 5b2f76e36d861c881c6770b4f47c1fae6c0c8965)
|
|
---
|
|
libgcc/config.in | 8 ++---
|
|
libgcc/config/aarch64/lse.S | 57 ++++++++++++++++++++++++++++-----
|
|
libgcc/configure | 64 ++++++++++++++++++++++++++-----------
|
|
libgcc/configure.ac | 19 +++++++++++
|
|
4 files changed, 117 insertions(+), 31 deletions(-)
|
|
|
|
diff --git a/libgcc/config.in b/libgcc/config.in
|
|
index 89d4aba3133..9009790625f 100644
|
|
--- a/libgcc/config.in
|
|
+++ b/libgcc/config.in
|
|
@@ -1,5 +1,8 @@
|
|
/* config.in. Generated from configure.ac by autoheader. */
|
|
|
|
+/* Define to 1 if the assembler supports LSE. */
|
|
+#undef HAVE_AS_LSE
|
|
+
|
|
/* Define to 1 if the target assembler supports thread-local storage. */
|
|
#undef HAVE_CC_TLS
|
|
|
|
@@ -75,11 +78,6 @@
|
|
/* Define to 1 if the target use emutls for thread-local storage. */
|
|
#undef USE_EMUTLS
|
|
|
|
-/* Enable large inode numbers on Mac OS X 10.5. */
|
|
-#ifndef _DARWIN_USE_64_BIT_INODE
|
|
-# define _DARWIN_USE_64_BIT_INODE 1
|
|
-#endif
|
|
-
|
|
/* Number of bits in a file offset, on hosts where this is settable. */
|
|
#undef _FILE_OFFSET_BITS
|
|
|
|
diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
|
|
index c7979382ad7..f7f1c19587b 100644
|
|
--- a/libgcc/config/aarch64/lse.S
|
|
+++ b/libgcc/config/aarch64/lse.S
|
|
@@ -48,8 +48,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
* separately to minimize code size.
|
|
*/
|
|
|
|
+#include "auto-target.h"
|
|
+
|
|
/* Tell the assembler to accept LSE instructions. */
|
|
+#ifdef HAVE_AS_LSE
|
|
.arch armv8-a+lse
|
|
+#else
|
|
+ .arch armv8-a
|
|
+#endif
|
|
|
|
/* Declare the symbol gating the LSE implementations. */
|
|
.hidden __aarch64_have_lse_atomics
|
|
@@ -58,12 +64,19 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
#if SIZE == 1
|
|
# define S b
|
|
# define UXT uxtb
|
|
+# define B 0x00000000
|
|
#elif SIZE == 2
|
|
# define S h
|
|
# define UXT uxth
|
|
+# define B 0x40000000
|
|
#elif SIZE == 4 || SIZE == 8 || SIZE == 16
|
|
# define S
|
|
# define UXT mov
|
|
+# if SIZE == 4
|
|
+# define B 0x80000000
|
|
+# elif SIZE == 8
|
|
+# define B 0xc0000000
|
|
+# endif
|
|
#else
|
|
# error
|
|
#endif
|
|
@@ -72,18 +85,26 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
# define SUFF _relax
|
|
# define A
|
|
# define L
|
|
+# define M 0x000000
|
|
+# define N 0x000000
|
|
#elif MODEL == 2
|
|
# define SUFF _acq
|
|
# define A a
|
|
# define L
|
|
+# define M 0x400000
|
|
+# define N 0x800000
|
|
#elif MODEL == 3
|
|
# define SUFF _rel
|
|
# define A
|
|
# define L l
|
|
+# define M 0x008000
|
|
+# define N 0x400000
|
|
#elif MODEL == 4
|
|
# define SUFF _acq_rel
|
|
# define A a
|
|
# define L l
|
|
+# define M 0x408000
|
|
+# define N 0xc00000
|
|
#else
|
|
# error
|
|
#endif
|
|
@@ -144,9 +165,13 @@ STARTFN NAME(cas)
|
|
JUMP_IF_NOT_LSE 8f
|
|
|
|
#if SIZE < 16
|
|
-#define CAS glue4(cas, A, L, S)
|
|
+#ifdef HAVE_AS_LSE
|
|
+# define CAS glue4(cas, A, L, S) s(0), s(1), [x2]
|
|
+#else
|
|
+# define CAS .inst 0x08a07c41 + B + M
|
|
+#endif
|
|
|
|
- CAS s(0), s(1), [x2]
|
|
+ CAS /* s(0), s(1), [x2] */
|
|
ret
|
|
|
|
8: UXT s(tmp0), s(0)
|
|
@@ -160,9 +185,13 @@ STARTFN NAME(cas)
|
|
#else
|
|
#define LDXP glue3(ld, A, xp)
|
|
#define STXP glue3(st, L, xp)
|
|
-#define CASP glue3(casp, A, L)
|
|
+#ifdef HAVE_AS_LSE
|
|
+# define CASP glue3(casp, A, L) x0, x1, x2, x3, [x4]
|
|
+#else
|
|
+# define CASP .inst 0x48207c82 + M
|
|
+#endif
|
|
|
|
- CASP x0, x1, x2, x3, [x4]
|
|
+ CASP /* x0, x1, x2, x3, [x4] */
|
|
ret
|
|
|
|
8: mov x(tmp0), x0
|
|
@@ -181,12 +210,16 @@ ENDFN NAME(cas)
|
|
#endif
|
|
|
|
#ifdef L_swp
|
|
-#define SWP glue4(swp, A, L, S)
|
|
+#ifdef HAVE_AS_LSE
|
|
+# define SWP glue4(swp, A, L, S) s(0), s(0), [x1]
|
|
+#else
|
|
+# define SWP .inst 0x38208020 + B + N
|
|
+#endif
|
|
|
|
STARTFN NAME(swp)
|
|
JUMP_IF_NOT_LSE 8f
|
|
|
|
- SWP s(0), s(0), [x1]
|
|
+ SWP /* s(0), s(0), [x1] */
|
|
ret
|
|
|
|
8: mov s(tmp0), s(0)
|
|
@@ -204,24 +237,32 @@ ENDFN NAME(swp)
|
|
#ifdef L_ldadd
|
|
#define LDNM ldadd
|
|
#define OP add
|
|
+#define OPN 0x0000
|
|
#elif defined(L_ldclr)
|
|
#define LDNM ldclr
|
|
#define OP bic
|
|
+#define OPN 0x1000
|
|
#elif defined(L_ldeor)
|
|
#define LDNM ldeor
|
|
#define OP eor
|
|
+#define OPN 0x2000
|
|
#elif defined(L_ldset)
|
|
#define LDNM ldset
|
|
#define OP orr
|
|
+#define OPN 0x3000
|
|
#else
|
|
#error
|
|
#endif
|
|
-#define LDOP glue4(LDNM, A, L, S)
|
|
+#ifdef HAVE_AS_LSE
|
|
+# define LDOP glue4(LDNM, A, L, S) s(0), s(0), [x1]
|
|
+#else
|
|
+# define LDOP .inst 0x38200020 + OPN + B + N
|
|
+#endif
|
|
|
|
STARTFN NAME(LDNM)
|
|
JUMP_IF_NOT_LSE 8f
|
|
|
|
- LDOP s(0), s(0), [x1]
|
|
+ LDOP /* s(0), s(0), [x1] */
|
|
ret
|
|
|
|
8: mov s(tmp0), s(0)
|
|
diff --git a/libgcc/configure b/libgcc/configure
|
|
index 59e15a7f33f..8b6f38eeddd 100755
|
|
--- a/libgcc/configure
|
|
+++ b/libgcc/configure
|
|
@@ -640,7 +640,6 @@ infodir
|
|
docdir
|
|
oldincludedir
|
|
includedir
|
|
-runstatedir
|
|
localstatedir
|
|
sharedstatedir
|
|
sysconfdir
|
|
@@ -728,7 +727,6 @@ datadir='${datarootdir}'
|
|
sysconfdir='${prefix}/etc'
|
|
sharedstatedir='${prefix}/com'
|
|
localstatedir='${prefix}/var'
|
|
-runstatedir='${localstatedir}/run'
|
|
includedir='${prefix}/include'
|
|
oldincludedir='/usr/include'
|
|
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
|
|
@@ -980,15 +978,6 @@ do
|
|
| -silent | --silent | --silen | --sile | --sil)
|
|
silent=yes ;;
|
|
|
|
- -runstatedir | --runstatedir | --runstatedi | --runstated \
|
|
- | --runstate | --runstat | --runsta | --runst | --runs \
|
|
- | --run | --ru | --r)
|
|
- ac_prev=runstatedir ;;
|
|
- -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
|
|
- | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
|
|
- | --run=* | --ru=* | --r=*)
|
|
- runstatedir=$ac_optarg ;;
|
|
-
|
|
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
|
|
ac_prev=sbindir ;;
|
|
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
|
|
@@ -1126,7 +1115,7 @@ fi
|
|
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
|
|
datadir sysconfdir sharedstatedir localstatedir includedir \
|
|
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
|
|
- libdir localedir mandir runstatedir
|
|
+ libdir localedir mandir
|
|
do
|
|
eval ac_val=\$$ac_var
|
|
# Remove trailing slashes.
|
|
@@ -1281,7 +1270,6 @@ Fine tuning of the installation directories:
|
|
--sysconfdir=DIR read-only single-machine data [PREFIX/etc]
|
|
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
|
|
--localstatedir=DIR modifiable single-machine data [PREFIX/var]
|
|
- --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
|
|
--libdir=DIR object code libraries [EPREFIX/lib]
|
|
--includedir=DIR C header files [PREFIX/include]
|
|
--oldincludedir=DIR C header files for non-gcc [/usr/include]
|
|
@@ -4100,7 +4088,7 @@ else
|
|
We can't simply define LARGE_OFF_T to be 9223372036854775807,
|
|
since some C++ compilers masquerading as C compilers
|
|
incorrectly reject 9223372036854775807. */
|
|
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
|
|
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
|
|
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
|
|
&& LARGE_OFF_T % 2147483647 == 1)
|
|
? 1 : -1];
|
|
@@ -4146,7 +4134,7 @@ else
|
|
We can't simply define LARGE_OFF_T to be 9223372036854775807,
|
|
since some C++ compilers masquerading as C compilers
|
|
incorrectly reject 9223372036854775807. */
|
|
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
|
|
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
|
|
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
|
|
&& LARGE_OFF_T % 2147483647 == 1)
|
|
? 1 : -1];
|
|
@@ -4170,7 +4158,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
|
We can't simply define LARGE_OFF_T to be 9223372036854775807,
|
|
since some C++ compilers masquerading as C compilers
|
|
incorrectly reject 9223372036854775807. */
|
|
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
|
|
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
|
|
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
|
|
&& LARGE_OFF_T % 2147483647 == 1)
|
|
? 1 : -1];
|
|
@@ -4215,7 +4203,7 @@ else
|
|
We can't simply define LARGE_OFF_T to be 9223372036854775807,
|
|
since some C++ compilers masquerading as C compilers
|
|
incorrectly reject 9223372036854775807. */
|
|
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
|
|
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
|
|
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
|
|
&& LARGE_OFF_T % 2147483647 == 1)
|
|
? 1 : -1];
|
|
@@ -4239,7 +4227,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
|
We can't simply define LARGE_OFF_T to be 9223372036854775807,
|
|
since some C++ compilers masquerading as C compilers
|
|
incorrectly reject 9223372036854775807. */
|
|
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
|
|
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
|
|
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
|
|
&& LARGE_OFF_T % 2147483647 == 1)
|
|
? 1 : -1];
|
|
@@ -5221,6 +5209,46 @@ if test "$enable_tls $gcc_cv_use_emutls" = "yes yes"; then
|
|
fi
|
|
|
|
|
|
+
|
|
+
|
|
+case "${target}" in
|
|
+aarch64*-*-*)
|
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler supports LSE" >&5
|
|
+$as_echo_n "checking if the assembler supports LSE... " >&6; }
|
|
+if test "${libgcc_cv_as_lse+set}" = set; then :
|
|
+ $as_echo_n "(cached) " >&6
|
|
+else
|
|
+
|
|
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
|
+/* end confdefs.h. */
|
|
+
|
|
+int
|
|
+main ()
|
|
+{
|
|
+ asm(".arch armv8-a+lse\n\tcas w0, w1, [x2]");
|
|
+
|
|
+ ;
|
|
+ return 0;
|
|
+}
|
|
+_ACEOF
|
|
+if ac_fn_c_try_compile "$LINENO"; then :
|
|
+ libgcc_cv_as_lse=yes
|
|
+else
|
|
+ libgcc_cv_as_lse=no
|
|
+fi
|
|
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
|
+
|
|
+fi
|
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_as_lse" >&5
|
|
+$as_echo "$libgcc_cv_as_lse" >&6; }
|
|
+ if test x$libgcc_cv_as_lse = xyes; then
|
|
+
|
|
+$as_echo "#define HAVE_AS_LSE 1" >>confdefs.h
|
|
+
|
|
+ fi
|
|
+ ;;
|
|
+esac
|
|
+
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" >&5
|
|
$as_echo_n "checking for init priority support... " >&6; }
|
|
if test "${libgcc_cv_init_priority+set}" = set; then :
|
|
diff --git a/libgcc/configure.ac b/libgcc/configure.ac
|
|
index fdd25f295b1..708ed19f8e4 100644
|
|
--- a/libgcc/configure.ac
|
|
+++ b/libgcc/configure.ac
|
|
@@ -503,6 +503,25 @@ if test "$enable_tls $gcc_cv_use_emutls" = "yes yes"; then
|
|
fi
|
|
AC_SUBST(set_use_emutls)
|
|
|
|
+dnl Check if as supports LSE instructions.
|
|
+AC_DEFUN([LIBGCC_CHECK_AS_LSE], [
|
|
+case "${target}" in
|
|
+aarch64*-*-*)
|
|
+ AC_CACHE_CHECK([if the assembler supports LSE], libgcc_cv_as_lse, [
|
|
+ AC_TRY_COMPILE([],
|
|
+changequote(,)dnl
|
|
+ asm(".arch armv8-a+lse\n\tcas w0, w1, [x2]");
|
|
+changequote([,])dnl
|
|
+ ,
|
|
+ [libgcc_cv_as_lse=yes], [libgcc_cv_as_lse=no])
|
|
+ ])
|
|
+ if test x$libgcc_cv_as_lse = xyes; then
|
|
+ AC_DEFINE(HAVE_AS_LSE, 1, [Define to 1 if the assembler supports LSE.])
|
|
+ fi
|
|
+ ;;
|
|
+esac])
|
|
+LIBGCC_CHECK_AS_LSE
|
|
+
|
|
AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, [
|
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
|
|
[[void ip (void) __attribute__ ((constructor (1)));]])],
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 37c58b17785af416b1d78ddd40ec3b1d394a584c Mon Sep 17 00:00:00 2001
|
|
From: Jakub Jelinek <jakub@redhat.com>
|
|
Date: Thu, 2 Apr 2020 12:57:11 +0200
|
|
Subject: [PATCH 21/24] aarch64: Fix ICE due to
|
|
aarch64_gen_compare_reg_maybe_ze [PR94435]
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
The following testcase ICEs, because aarch64_gen_compare_reg_maybe_ze emits
|
|
invalid RTL.
|
|
For y_mode [QH]Imode it expects y to be of that mode (or CONST_INT that fits
|
|
into that mode) and x being SImode; for non-CONST_INT y it zero extends y
|
|
into SImode and compares that against x, for CONST_INT y it zero extends y
|
|
into SImode. The problem is that when the zero extended constant isn't
|
|
usable directly, it forces it into a REG, but with y_mode mode, and then
|
|
compares against y. That is wrong, because it should force it into a SImode
|
|
REG and compare that way.
|
|
|
|
2020-04-02 Jakub Jelinek <jakub@redhat.com>
|
|
|
|
gcc/
|
|
PR target/94435
|
|
* config/aarch64/aarch64.c (aarch64_gen_compare_reg_maybe_ze): For
|
|
y_mode E_[QH]Imode and y being a CONST_INT, change y_mode to SImode.
|
|
|
|
gcc/testsuite/
|
|
* gcc.target/aarch64/pr94435.c: New test.
|
|
|
|
(cherry picked from commit df562b12d90699c20923f91df48eed08ebcb572e)
|
|
---
|
|
gcc/config/aarch64/aarch64.c | 5 ++++-
|
|
gcc/testsuite/gcc.target/aarch64/pr94435.c | 25 ++++++++++++++++++++++
|
|
2 files changed, 29 insertions(+), 1 deletion(-)
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr94435.c
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index 5ccb13b46fe..a0685a5ad41 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -1230,7 +1230,10 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
|
|
if (y_mode == QImode || y_mode == HImode)
|
|
{
|
|
if (CONST_INT_P (y))
|
|
- y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
|
|
+ {
|
|
+ y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
|
|
+ y_mode = SImode;
|
|
+ }
|
|
else
|
|
{
|
|
rtx t, cc_reg;
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/pr94435.c b/gcc/testsuite/gcc.target/aarch64/pr94435.c
|
|
new file mode 100644
|
|
index 00000000000..5713c14d5f9
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/pr94435.c
|
|
@@ -0,0 +1,25 @@
|
|
+/* PR target/94435 */
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-march=armv8-a+nolse -moutline-atomics" } */
|
|
+
|
|
+int b, c, d, e, f, h;
|
|
+short g;
|
|
+int foo (int) __attribute__ ((__const__));
|
|
+
|
|
+void
|
|
+bar (void)
|
|
+{
|
|
+ while (1)
|
|
+ {
|
|
+ while (1)
|
|
+ {
|
|
+ __atomic_load_n (&e, 0);
|
|
+ if (foo (2))
|
|
+ __sync_val_compare_and_swap (&c, 0, f);
|
|
+ b = 1;
|
|
+ if (h == e)
|
|
+ break;
|
|
+ }
|
|
+ __sync_val_compare_and_swap (&g, -1, f);
|
|
+ }
|
|
+}
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 57927b2e7e2aa9c7ae703a110cc86dfc40e7659c Mon Sep 17 00:00:00 2001
|
|
From: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
|
|
Date: Wed, 21 Aug 2019 18:34:43 +0000
|
|
Subject: [PATCH 22/24] re PR target/90724 (ICE with
|
|
__sync_bool_compare_and_swap with -march=armv8.2-a+sve)
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
2019-08-21 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
|
|
|
|
gcc/
|
|
PR target/90724
|
|
* config/aarch64/aarch64.c (aarch64_gen_compare_reg_maybe_ze): Force y
|
|
in reg if it fails aarch64_plus_operand predicate.
|
|
|
|
(cherry picked from commit 846f78d414101dbd33ff9c370d379bae73ae0efa)
|
|
---
|
|
gcc/config/aarch64/aarch64.c | 3 +++
|
|
1 file changed, 3 insertions(+)
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index a0685a5ad41..9535d688ee5 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -1248,6 +1248,9 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
|
|
}
|
|
}
|
|
|
|
+ if (!aarch64_plus_operand (y, y_mode))
|
|
+ y = force_reg (y_mode, y);
|
|
+
|
|
return aarch64_gen_compare_reg (code, x, y);
|
|
}
|
|
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From d019077cc8d3c18bc510aeaf34c6c4532913f378 Mon Sep 17 00:00:00 2001
|
|
From: Andre Vieira <andre.simoesdiasvieira@arm.com>
|
|
Date: Tue, 28 Apr 2020 13:25:43 +0100
|
|
Subject: [PATCH 23/24] aarch64: Fix for PR target/94814
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
Backport of PR target/94518: Fix memmodel index in aarch64_store_exclusive_pair
|
|
|
|
2020-04-28 Andre Vieira <andre.simoesdiasvieira@arm.com>
|
|
|
|
PR target/94814
|
|
Backport from gcc-9.
|
|
2020-04-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
|
|
|
PR target/94518
|
|
2019-09-23 Richard Sandiford <richard.sandiford@arm.com>
|
|
|
|
gcc/
|
|
* config/aarch64/atomics.md (aarch64_store_exclusive_pair): Fix
|
|
memmodel index.
|
|
|
|
(cherry picked from commit 3a30d2558b3a199fe346479e6140cddae7fba5ed)
|
|
---
|
|
gcc/config/aarch64/atomics.md | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
|
|
index 05ed4cee75b..1fdc3092a33 100644
|
|
--- a/gcc/config/aarch64/atomics.md
|
|
+++ b/gcc/config/aarch64/atomics.md
|
|
@@ -752,7 +752,7 @@
|
|
UNSPECV_SX))]
|
|
""
|
|
{
|
|
- enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
|
|
+ enum memmodel model = memmodel_from_int (INTVAL (operands[4]));
|
|
if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
|
|
return "stxp\t%w0, %x2, %x3, %1";
|
|
else
|
|
--
|
|
2.26.2
|
|
|
|
|
|
From 7c216ba945cb92bd79fbe01b35e16bd1e3cd854d Mon Sep 17 00:00:00 2001
|
|
From: Andre Vieira <andre.simoesdiasvieira@arm.com>
|
|
Date: Wed, 29 Apr 2020 15:42:27 +0100
|
|
Subject: [PATCH 24/24] aarch64: Force TImode values into even registers
|
|
To: gcc-patches@gcc.gnu.org
|
|
|
|
The LSE CASP instruction requires values to be placed in even
|
|
register pairs. A solution involving two additional register
|
|
classes was rejected in favor of the much simpler solution of
|
|
simply requiring all TImode values to be aligned.
|
|
|
|
gcc/ChangeLog:
|
|
2020-04-29 Andre Vieira <andre.simoesdiasvieira@arm.com>
|
|
|
|
Backport from mainline.
|
|
2018-10-31 Richard Henderson <richard.henderson@linaro.org>
|
|
|
|
* config/aarch64/aarch64.c (aarch64_hard_regno_mode_ok): Force
|
|
16-byte modes held in GP registers to use an even regno.
|
|
|
|
(cherry picked from commit 563cc649beaf11d707c422e5f4e9e5cdacb818c3)
|
|
---
|
|
gcc/config/aarch64/aarch64.c | 12 ++++++++----
|
|
1 file changed, 8 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index 9535d688ee5..643ba7e8153 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -1084,10 +1084,14 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
|
|
if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
|
|
return mode == Pmode;
|
|
|
|
- if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
|
|
- return 1;
|
|
-
|
|
- if (FP_REGNUM_P (regno))
|
|
+ if (GP_REGNUM_P (regno))
|
|
+ {
|
|
+ if (GET_MODE_SIZE (mode) <= 8)
|
|
+ return true;
|
|
+ if (GET_MODE_SIZE (mode) <= 16)
|
|
+ return (regno & 1) == 0;
|
|
+ }
|
|
+ else if (FP_REGNUM_P (regno))
|
|
{
|
|
if (aarch64_vect_struct_mode_p (mode))
|
|
return
|
|
--
|
|
2.26.2
|
|
|
|
commit 1266778548e20de82983b6446f3cb685068cfb1e
|
|
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
|
Date: Wed May 6 16:20:38 2020 +0100
|
|
|
|
[AArch64] Use __getauxval instead of getauxval in LSE detection code in libgcc
|
|
|
|
This version of the fix uses __getauxval instead of getauxval.
|
|
The whole thing is guarded simply on __gnu_linux__.
|
|
__getauxval was introduced in 2.16 but the aarch64 port was added in 2.17 so in practice I expect all aarch64 glibcs to support __getauxval.
|
|
|
|
Bootstrapped and tested on aarch64-none-linux-gnu.
|
|
Also tested on aarch64-none-elf.
|
|
|
|
2020-05-06 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
|
|
|
* config/aarch64/lse-init.c (init_have_lse_atomics): Use __getauxval
|
|
instead of getauxval.
|
|
(AT_HWCAP): Define.
|
|
(HWCAP_ATOMICS): Define.
|
|
Guard detection on __gnu_linux__.
|
|
|
|
diff --git a/libgcc/config/aarch64/lse-init.c b/libgcc/config/aarch64/lse-init.c
|
|
index 74acef25cce..00e9ab8cd1c 100644
|
|
--- a/libgcc/config/aarch64/lse-init.c
|
|
+++ b/libgcc/config/aarch64/lse-init.c
|
|
@@ -29,19 +29,20 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
_Bool __aarch64_have_lse_atomics
|
|
__attribute__((visibility("hidden"), nocommon));
|
|
|
|
-/* Disable initialization of __aarch64_have_lse_atomics during bootstrap. */
|
|
-#if !defined(inhibit_libc) && defined(HAVE_SYS_AUXV_H)
|
|
-# include <sys/auxv.h>
|
|
+/* Gate availability of __getauxval on glibc. All AArch64-supporting glibc
|
|
+ versions support it. */
|
|
+#ifdef __gnu_linux__
|
|
|
|
-/* Disable initialization if the system headers are too old. */
|
|
-# if defined(AT_HWCAP) && defined(HWCAP_ATOMICS)
|
|
+# define AT_HWCAP 16
|
|
+# define HWCAP_ATOMICS (1 << 8)
|
|
+
|
|
+unsigned long int __getauxval (unsigned long int);
|
|
|
|
static void __attribute__((constructor))
|
|
init_have_lse_atomics (void)
|
|
{
|
|
- unsigned long hwcap = getauxval (AT_HWCAP);
|
|
+ unsigned long hwcap = __getauxval (AT_HWCAP);
|
|
__aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
|
|
}
|
|
|
|
-# endif /* HWCAP */
|
|
-#endif /* inhibit_libc */
|
|
+#endif /* __gnu_linux__ */
|