147 lines
5.4 KiB
Diff
147 lines
5.4 KiB
Diff
|
Index: llvm/lib/Target/ARM/ARMISelLowering.cpp
|
||
|
===================================================================
|
||
|
--- llvm/lib/Target/ARM/ARMISelLowering.cpp
|
||
|
+++ llvm/lib/Target/ARM/ARMISelLowering.cpp
|
||
|
@@ -6833,25 +6833,25 @@
|
||
|
|
||
|
// If one of the operands is a constant vector zero, attempt to fold the
|
||
|
// comparison to a specialized compare-against-zero form.
|
||
|
- SDValue SingleOp;
|
||
|
- if (ISD::isBuildVectorAllZeros(Op1.getNode()))
|
||
|
- SingleOp = Op0;
|
||
|
- else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
|
||
|
+ if (ISD::isBuildVectorAllZeros(Op0.getNode()) &&
|
||
|
+ (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
|
||
|
+ Opc == ARMCC::NE)) {
|
||
|
if (Opc == ARMCC::GE)
|
||
|
Opc = ARMCC::LE;
|
||
|
else if (Opc == ARMCC::GT)
|
||
|
Opc = ARMCC::LT;
|
||
|
- SingleOp = Op1;
|
||
|
+ std::swap(Op0, Op1);
|
||
|
}
|
||
|
|
||
|
SDValue Result;
|
||
|
- if (SingleOp.getNode()) {
|
||
|
- Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp,
|
||
|
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()) &&
|
||
|
+ (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
|
||
|
+ Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
|
||
|
+ Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
|
||
|
DAG.getConstant(Opc, dl, MVT::i32));
|
||
|
- } else {
|
||
|
+ else
|
||
|
Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
|
||
|
DAG.getConstant(Opc, dl, MVT::i32));
|
||
|
- }
|
||
|
|
||
|
Result = DAG.getSExtOrTrunc(Result, dl, VT);
|
||
|
|
||
|
Index: llvm/test/CodeGen/Thumb2/mve-pred-and.ll
|
||
|
===================================================================
|
||
|
--- llvm/test/CodeGen/Thumb2/mve-pred-and.ll
|
||
|
+++ llvm/test/CodeGen/Thumb2/mve-pred-and.ll
|
||
|
@@ -122,8 +122,9 @@
|
||
|
define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
||
|
; CHECK-LABEL: cmpulez_v4i1:
|
||
|
; CHECK: @ %bb.0: @ %entry
|
||
|
+; CHECK-NEXT: vmov.i32 q2, #0x0
|
||
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
||
|
-; CHECK-NEXT: vcmpt.u32 cs, q1, zr
|
||
|
+; CHECK-NEXT: vcmpt.u32 cs, q2, q1
|
||
|
; CHECK-NEXT: vpsel q0, q0, q1
|
||
|
; CHECK-NEXT: bx lr
|
||
|
entry:
|
||
|
Index: llvm/test/CodeGen/Thumb2/mve-pred-or.ll
|
||
|
===================================================================
|
||
|
--- llvm/test/CodeGen/Thumb2/mve-pred-or.ll
|
||
|
+++ llvm/test/CodeGen/Thumb2/mve-pred-or.ll
|
||
|
@@ -123,7 +123,8 @@
|
||
|
define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
||
|
; CHECK-LABEL: cmpulez_v4i1:
|
||
|
; CHECK: @ %bb.0: @ %entry
|
||
|
-; CHECK-NEXT: vcmp.u32 cs, q1, zr
|
||
|
+; CHECK-NEXT: vmov.i32 q2, #0x0
|
||
|
+; CHECK-NEXT: vcmp.u32 cs, q2, q1
|
||
|
; CHECK-NEXT: vpnot
|
||
|
; CHECK-NEXT: vpst
|
||
|
; CHECK-NEXT: vcmpt.i32 ne, q0, zr
|
||
|
Index: llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
|
||
|
===================================================================
|
||
|
--- llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
|
||
|
+++ llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
|
||
|
@@ -151,7 +151,8 @@
|
||
|
define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
||
|
; CHECK-LABEL: cmpulez_v4i1:
|
||
|
; CHECK: @ %bb.0: @ %entry
|
||
|
-; CHECK-NEXT: vcmp.u32 cs, q1, zr
|
||
|
+; CHECK-NEXT: vmov.i32 q2, #0x0
|
||
|
+; CHECK-NEXT: vcmp.u32 cs, q2, q1
|
||
|
; CHECK-NEXT: vmrs r0, p0
|
||
|
; CHECK-NEXT: vcmp.i32 eq, q0, zr
|
||
|
; CHECK-NEXT: vmrs r1, p0
|
||
|
Index: llvm/test/CodeGen/Thumb2/mve-vcmpz.ll
|
||
|
===================================================================
|
||
|
--- llvm/test/CodeGen/Thumb2/mve-vcmpz.ll
|
||
|
+++ llvm/test/CodeGen/Thumb2/mve-vcmpz.ll
|
||
|
@@ -110,7 +110,8 @@
|
||
|
define arm_aapcs_vfpcc <4 x i32> @vcmp_ulez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
|
||
|
; CHECK-LABEL: vcmp_ulez_v4i32:
|
||
|
; CHECK: @ %bb.0: @ %entry
|
||
|
-; CHECK-NEXT: vcmp.u32 cs, q0, zr
|
||
|
+; CHECK-NEXT: vmov.i32 q3, #0x0
|
||
|
+; CHECK-NEXT: vcmp.u32 cs, q3, q0
|
||
|
; CHECK-NEXT: vpsel q0, q1, q2
|
||
|
; CHECK-NEXT: bx lr
|
||
|
entry:
|
||
|
@@ -229,7 +230,8 @@
|
||
|
define arm_aapcs_vfpcc <8 x i16> @vcmp_ulez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
|
||
|
; CHECK-LABEL: vcmp_ulez_v8i16:
|
||
|
; CHECK: @ %bb.0: @ %entry
|
||
|
-; CHECK-NEXT: vcmp.u16 cs, q0, zr
|
||
|
+; CHECK-NEXT: vmov.i32 q3, #0x0
|
||
|
+; CHECK-NEXT: vcmp.u16 cs, q3, q0
|
||
|
; CHECK-NEXT: vpsel q0, q1, q2
|
||
|
; CHECK-NEXT: bx lr
|
||
|
entry:
|
||
|
@@ -348,7 +350,8 @@
|
||
|
define arm_aapcs_vfpcc <16 x i8> @vcmp_ulez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
|
||
|
; CHECK-LABEL: vcmp_ulez_v16i8:
|
||
|
; CHECK: @ %bb.0: @ %entry
|
||
|
-; CHECK-NEXT: vcmp.u8 cs, q0, zr
|
||
|
+; CHECK-NEXT: vmov.i32 q3, #0x0
|
||
|
+; CHECK-NEXT: vcmp.u8 cs, q3, q0
|
||
|
; CHECK-NEXT: vpsel q0, q1, q2
|
||
|
; CHECK-NEXT: bx lr
|
||
|
entry:
|
||
|
@@ -489,7 +492,8 @@
|
||
|
define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ugez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
|
||
|
; CHECK-LABEL: vcmp_r_ugez_v4i32:
|
||
|
; CHECK: @ %bb.0: @ %entry
|
||
|
-; CHECK-NEXT: vcmp.u32 cs, q0, zr
|
||
|
+; CHECK-NEXT: vmov.i32 q3, #0x0
|
||
|
+; CHECK-NEXT: vcmp.u32 cs, q3, q0
|
||
|
; CHECK-NEXT: vpsel q0, q1, q2
|
||
|
; CHECK-NEXT: bx lr
|
||
|
entry:
|
||
|
@@ -608,7 +612,8 @@
|
||
|
define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ugez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
|
||
|
; CHECK-LABEL: vcmp_r_ugez_v8i16:
|
||
|
; CHECK: @ %bb.0: @ %entry
|
||
|
-; CHECK-NEXT: vcmp.u16 cs, q0, zr
|
||
|
+; CHECK-NEXT: vmov.i32 q3, #0x0
|
||
|
+; CHECK-NEXT: vcmp.u16 cs, q3, q0
|
||
|
; CHECK-NEXT: vpsel q0, q1, q2
|
||
|
; CHECK-NEXT: bx lr
|
||
|
entry:
|
||
|
@@ -727,7 +732,8 @@
|
||
|
define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ugez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
|
||
|
; CHECK-LABEL: vcmp_r_ugez_v16i8:
|
||
|
; CHECK: @ %bb.0: @ %entry
|
||
|
-; CHECK-NEXT: vcmp.u8 cs, q0, zr
|
||
|
+; CHECK-NEXT: vmov.i32 q3, #0x0
|
||
|
+; CHECK-NEXT: vcmp.u8 cs, q3, q0
|
||
|
; CHECK-NEXT: vpsel q0, q1, q2
|
||
|
; CHECK-NEXT: bx lr
|
||
|
entry:
|