diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll index d07fcbc29806..290031ac50ea 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll @@ -2,13 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI -declare i4 @llvm.sadd.sat.i4(i4, i4) -declare i8 @llvm.sadd.sat.i8(i8, i8) -declare i16 @llvm.sadd.sat.i16(i16, i16) -declare i32 @llvm.sadd.sat.i32(i32, i32) -declare i64 @llvm.sadd.sat.i64(i64, i64) -declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) - define i32 @func(i32 %x, i32 %y) nounwind { ; CHECK-SD-LABEL: func: ; CHECK-SD: // %bb.0: @@ -135,12 +128,5 @@ define i4 @func3(i4 %x, i4 %y) nounwind { %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y); ret i4 %tmp; } - -define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: vec: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ret - %tmp = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y); - ret <4 x i32> %tmp; -} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index e1018bbee789..5f0d4c7bffe5 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -5,37 +5,6 @@ ; CHECK-GI: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>) -declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) -declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) -declare <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8>, <8 x i8>) -declare <12 x i8> @llvm.sadd.sat.v12i8(<12 x i8>, <12 x i8>) -declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) -declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) -declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>) - -declare <1 x i16> @llvm.sadd.sat.v1i16(<1 x i16>, <1 x i16>) -declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>) -declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) -declare <12 x i16> @llvm.sadd.sat.v12i16(<12 x i16>, <12 x i16>) -declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) -declare <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16>, <32 x i16>) - -declare <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1>, <16 x i1>) -declare <16 x i4> @llvm.sadd.sat.v16i4(<16 x i4>, <16 x i4>) - -declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) -declare <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32>, <8 x i32>) -declare <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32>, <16 x i32>) -declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>) -declare <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64>, <4 x i64>) -declare <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64>, <8 x i64>) - -declare <4 x i24> @llvm.sadd.sat.v4i24(<4 x i24>, <4 x i24>) -declare <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128>, <2 x i128>) - define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-LABEL: v16i8: ; CHECK: // %bb.0: @@ -393,6 +362,34 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ret <16 x i1> %z } +define void @v1i32(ptr %px, ptr %py, ptr %pz) nounwind { +; CHECK-SD-LABEL: v1i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: sqadd v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: adds w8, w8, w9 +; CHECK-GI-NEXT: mov w9, #-2147483648 // =0x80000000 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add w9, w9, w8, asr #31 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel w8, w9, w8, ne +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret + %x = load <1 x i32>, ptr %px + %y = load <1 x i32>, ptr %py + %z = call <1 x i32> @llvm.sadd.sat.v1i32(<1 x i32> %x, <1 x i32> %y) + store <1 x i32> %z, ptr %pz + ret void +} + define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind { ; CHECK-LABEL: v2i32: ; CHECK: // %bb.0: @@ -447,6 +444,38 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ret <16 x i32> %z } +define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind { +; CHECK-SD-LABEL: v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr x8, [x1] +; CHECK-SD-NEXT: ldr x9, [x0] +; CHECK-SD-NEXT: adds x8, x9, x8 +; CHECK-SD-NEXT: asr x9, x8, #63 +; CHECK-SD-NEXT: eor x9, x9, #0x8000000000000000 +; CHECK-SD-NEXT: csel x8, x9, x8, vs +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: str d0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr x8, [x0] +; CHECK-GI-NEXT: ldr x9, [x1] +; CHECK-GI-NEXT: adds x8, x8, x9 +; CHECK-GI-NEXT: mov x9, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add x9, x9, x8, asr #63 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel x8, x9, x8, ne +; CHECK-GI-NEXT: str x8, [x2] +; CHECK-GI-NEXT: ret + %x = load <1 x i64>, ptr %px + %y = load <1 x i64>, ptr %py + %z = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> %x, <1 x i64> %y) + store <1 x i64> %z, ptr %pz + ret void +} + define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-LABEL: v2i64: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll index 23550d3c41cc..4740803fc762 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll @@ -2,13 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI -declare i4 @llvm.ssub.sat.i4(i4, i4) -declare i8 @llvm.ssub.sat.i8(i8, i8) -declare i16 @llvm.ssub.sat.i16(i16, i16) -declare i32 @llvm.ssub.sat.i32(i32, i32) -declare i64 @llvm.ssub.sat.i64(i64, i64) -declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) - define i32 @func(i32 %x, i32 %y) nounwind { ; CHECK-SD-LABEL: func: ; CHECK-SD: // %bb.0: @@ -135,12 +128,5 @@ define i4 @func3(i4 %x, i4 %y) nounwind { %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y); ret i4 %tmp; } - -define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: vec: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ret - %tmp = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y); - ret <4 x i32> %tmp; -} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 085857c0c542..ed79d0158651 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -5,38 +5,6 @@ ; CHECK-GI: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>) -declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) -declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) -declare <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8>, <8 x i8>) -declare <12 x i8> @llvm.ssub.sat.v12i8(<12 x i8>, <12 x i8>) -declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) -declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) -declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>) - -declare <1 x i16> @llvm.ssub.sat.v1i16(<1 x i16>, <1 x i16>) -declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) -declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) -declare <12 x i16> @llvm.ssub.sat.v12i16(<12 x i16>, <12 x i16>) -declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) -declare <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16>, <32 x i16>) - -declare <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1>, <16 x i1>) -declare <16 x i4> @llvm.ssub.sat.v16i4(<16 x i4>, <16 x i4>) - -declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) -declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) -declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) -declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>) -declare <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64>, <4 x i64>) -declare <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64>, <8 x i64>) - -declare <4 x i24> @llvm.ssub.sat.v4i24(<4 x i24>, <4 x i24>) -declare <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128>, <2 x i128>) - - define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-LABEL: v16i8: ; CHECK: // %bb.0: @@ -396,6 +364,34 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ret <16 x i1> %z } +define void @v1i32(ptr %px, ptr %py, ptr %pz) nounwind { +; CHECK-SD-LABEL: v1i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: sqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: subs w8, w8, w9 +; CHECK-GI-NEXT: mov w9, #-2147483648 // =0x80000000 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add w9, w9, w8, asr #31 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel w8, w9, w8, ne +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret + %x = load <1 x i32>, ptr %px + %y = load <1 x i32>, ptr %py + %z = call <1 x i32> @llvm.ssub.sat.v1i32(<1 x i32> %x, <1 x i32> %y) + store <1 x i32> %z, ptr %pz + ret void +} + define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind { ; CHECK-LABEL: v2i32: ; CHECK: // %bb.0: @@ -450,6 +446,38 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ret <16 x i32> %z } +define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind { +; CHECK-SD-LABEL: v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr x8, [x1] +; CHECK-SD-NEXT: ldr x9, [x0] +; CHECK-SD-NEXT: subs x8, x9, x8 +; CHECK-SD-NEXT: asr x9, x8, #63 +; CHECK-SD-NEXT: eor x9, x9, #0x8000000000000000 +; CHECK-SD-NEXT: csel x8, x9, x8, vs +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: str d0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr x8, [x0] +; CHECK-GI-NEXT: ldr x9, [x1] +; CHECK-GI-NEXT: subs x8, x8, x9 +; CHECK-GI-NEXT: mov x9, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add x9, x9, x8, asr #63 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel x8, x9, x8, ne +; CHECK-GI-NEXT: str x8, [x2] +; CHECK-GI-NEXT: ret + %x = load <1 x i64>, ptr %px + %y = load <1 x i64>, ptr %py + %z = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %x, <1 x i64> %y) + store <1 x i64> %z, ptr %pz + ret void +} + define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-LABEL: v2i64: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/uadd_sat.ll b/llvm/test/CodeGen/AArch64/uadd_sat.ll index e9d22c7be52e..7a7906622fb9 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat.ll @@ -2,12 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI -declare i4 @llvm.uadd.sat.i4(i4, i4) -declare i8 @llvm.uadd.sat.i8(i8, i8) -declare i16 @llvm.uadd.sat.i16(i16, i16) -declare i32 @llvm.uadd.sat.i32(i32, i32) -declare i64 @llvm.uadd.sat.i64(i64, i64) - define i32 @func(i32 %x, i32 %y) nounwind { ; CHECK-SD-LABEL: func: ; CHECK-SD: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index b0b3198fda0e..dcfb5176db12 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -5,37 +5,6 @@ ; CHECK-GI: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>) -declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) -declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) -declare <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8>, <8 x i8>) -declare <12 x i8> @llvm.uadd.sat.v12i8(<12 x i8>, <12 x i8>) -declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) -declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>) -declare <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>) - -declare <1 x i16> @llvm.uadd.sat.v1i16(<1 x i16>, <1 x i16>) -declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) -declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) -declare <12 x i16> @llvm.uadd.sat.v12i16(<12 x i16>, <12 x i16>) -declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>) -declare <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16>, <32 x i16>) - -declare <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1>, <16 x i1>) -declare <16 x i4> @llvm.uadd.sat.v16i4(<16 x i4>, <16 x i4>) - -declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) -declare <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32>, <8 x i32>) -declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>) -declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>) -declare <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64>, <4 x i64>) -declare <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64>, <8 x i64>) - -declare <4 x i24> @llvm.uadd.sat.v4i24(<4 x i24>, <4 x i24>) -declare <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128>, <2 x i128>) - define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-LABEL: v16i8: ; CHECK: // %bb.0: @@ -387,6 +356,32 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ret <16 x i1> %z } +define void @v1i32(ptr %px, ptr %py, ptr %pz) nounwind { +; CHECK-SD-LABEL: v1i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: uqadd v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: adds w8, w8, w9 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csinv w8, w8, wzr, eq +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret + %x = load <1 x i32>, ptr %px + %y = load <1 x i32>, ptr %py + %z = call <1 x i32> @llvm.uadd.sat.v1i32(<1 x i32> %x, <1 x i32> %y) + store <1 x i32> %z, ptr %pz + ret void +} + define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind { ; CHECK-LABEL: v2i32: ; CHECK: // %bb.0: @@ -441,6 +436,34 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ret <16 x i32> %z } +define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind { +; CHECK-SD-LABEL: v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr x8, [x1] +; CHECK-SD-NEXT: ldr x9, [x0] +; CHECK-SD-NEXT: adds x8, x9, x8 +; CHECK-SD-NEXT: csinv x8, x8, xzr, lo +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: str d0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr x8, [x0] +; CHECK-GI-NEXT: ldr x9, [x1] +; CHECK-GI-NEXT: adds x8, x8, x9 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csinv x8, x8, xzr, eq +; CHECK-GI-NEXT: str x8, [x2] +; CHECK-GI-NEXT: ret + %x = load <1 x i64>, ptr %px + %y = load <1 x i64>, ptr %py + %z = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> %x, <1 x i64> %y) + store <1 x i64> %z, ptr %pz + ret void +} + define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-LABEL: v2i64: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/usub_sat.ll b/llvm/test/CodeGen/AArch64/usub_sat.ll index 54d7fc5a63b1..045bb762ef79 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat.ll @@ -2,12 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI -declare i4 @llvm.usub.sat.i4(i4, i4) -declare i8 @llvm.usub.sat.i8(i8, i8) -declare i16 @llvm.usub.sat.i16(i16, i16) -declare i32 @llvm.usub.sat.i32(i32, i32) -declare i64 @llvm.usub.sat.i64(i64, i64) - define i32 @func(i32 %x, i32 %y) nounwind { ; CHECK-SD-LABEL: func: ; CHECK-SD: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 54754e7fbaed..0049aba62d27 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -5,38 +5,6 @@ ; CHECK-GI: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>) -declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) -declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) -declare <8 x i8> @llvm.usub.sat.v8i8(<8 x i8>, <8 x i8>) -declare <12 x i8> @llvm.usub.sat.v12i8(<12 x i8>, <12 x i8>) -declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) -declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>) -declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>) - -declare <1 x i16> @llvm.usub.sat.v1i16(<1 x i16>, <1 x i16>) -declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>) -declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) -declare <12 x i16> @llvm.usub.sat.v12i16(<12 x i16>, <12 x i16>) -declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>) -declare <32 x i16> @llvm.usub.sat.v32i16(<32 x i16>, <32 x i16>) - -declare <16 x i1> @llvm.usub.sat.v16i1(<16 x i1>, <16 x i1>) -declare <16 x i4> @llvm.usub.sat.v16i4(<16 x i4>, <16 x i4>) - -declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) -declare <8 x i32> @llvm.usub.sat.v8i32(<8 x i32>, <8 x i32>) -declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>) -declare <2 x i64> @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>) -declare <4 x i64> @llvm.usub.sat.v4i64(<4 x i64>, <4 x i64>) -declare <8 x i64> @llvm.usub.sat.v8i64(<8 x i64>, <8 x i64>) - -declare <4 x i24> @llvm.usub.sat.v4i24(<4 x i24>, <4 x i24>) -declare <2 x i128> @llvm.usub.sat.v2i128(<2 x i128>, <2 x i128>) - - define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-LABEL: v16i8: ; CHECK: // %bb.0: @@ -385,6 +353,32 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ret <16 x i1> %z } +define void @v1i32(ptr %px, ptr %py, ptr %pz) nounwind { +; CHECK-SD-LABEL: v1i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: uqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: subs w8, w8, w9 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csel w8, wzr, w8, ne +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret + %x = load <1 x i32>, ptr %px + %y = load <1 x i32>, ptr %py + %z = call <1 x i32> @llvm.usub.sat.v1i32(<1 x i32> %x, <1 x i32> %y) + store <1 x i32> %z, ptr %pz + ret void +} + define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind { ; CHECK-LABEL: v2i32: ; CHECK: // %bb.0: @@ -439,6 +433,34 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ret <16 x i32> %z } +define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind { +; CHECK-SD-LABEL: v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr x8, [x1] +; CHECK-SD-NEXT: ldr x9, [x0] +; CHECK-SD-NEXT: subs x8, x9, x8 +; CHECK-SD-NEXT: csel x8, xzr, x8, lo +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: str d0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr x8, [x0] +; CHECK-GI-NEXT: ldr x9, [x1] +; CHECK-GI-NEXT: subs x8, x8, x9 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csel x8, xzr, x8, ne +; CHECK-GI-NEXT: str x8, [x2] +; CHECK-GI-NEXT: ret + %x = load <1 x i64>, ptr %px + %y = load <1 x i64>, ptr %py + %z = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> %x, <1 x i64> %y) + store <1 x i64> %z, ptr %pz + ret void +} + define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-LABEL: v2i64: ; CHECK: // %bb.0: