[AArch64] Additional saddlv store tests. NFC
The select-intrinsic-uaddlv.mir test now lower via G_SADDLV / G_UADDLV so is no longer needed.
This commit is contained in:
parent
fa9f6b5828
commit
277ca48f2f
@ -1,109 +0,0 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
...
|
||||
---
|
||||
name: uaddlv_v8s8
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0
|
||||
; CHECK-LABEL: name: uaddlv_v8s8
|
||||
; CHECK: %copy:fpr64 = COPY $d0
|
||||
; CHECK: [[UADDLVv8i8v:%[0-9]+]]:fpr16 = UADDLVv8i8v %copy
|
||||
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
|
||||
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv8i8v]], %subreg.hsub
|
||||
; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
|
||||
; CHECK: $w0 = COPY %intrin
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%copy:fpr(<8 x s8>) = COPY $d0
|
||||
%intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<8 x s8>)
|
||||
$w0 = COPY %intrin(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: uaddlv_v16s8
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: uaddlv_v16s8
|
||||
; CHECK: %copy:fpr128 = COPY $q0
|
||||
; CHECK: [[UADDLVv16i8v:%[0-9]+]]:fpr16 = UADDLVv16i8v %copy
|
||||
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
|
||||
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv16i8v]], %subreg.hsub
|
||||
; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
|
||||
; CHECK: $w0 = COPY %intrin
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%copy:fpr(<16 x s8>) = COPY $q0
|
||||
%intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>)
|
||||
$w0 = COPY %intrin(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: uaddlv_v4s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0
|
||||
; CHECK-LABEL: name: uaddlv_v4s16
|
||||
; CHECK: %copy:fpr64 = COPY $d0
|
||||
; CHECK: [[UADDLVv4i16v:%[0-9]+]]:fpr32 = UADDLVv4i16v %copy
|
||||
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
|
||||
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv4i16v]], %subreg.ssub
|
||||
; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
|
||||
; CHECK: $w0 = COPY %intrin
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%copy:fpr(<4 x s16>) = COPY $d0
|
||||
%intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<4 x s16>)
|
||||
$w0 = COPY %intrin(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: uaddlv_v8s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: uaddlv_v8s16
|
||||
; CHECK: %copy:fpr128 = COPY $q0
|
||||
; CHECK: [[UADDLVv8i16v:%[0-9]+]]:fpr32 = UADDLVv8i16v %copy
|
||||
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
|
||||
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv8i16v]], %subreg.ssub
|
||||
; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
|
||||
; CHECK: $w0 = COPY %intrin
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%copy:fpr(<8 x s16>) = COPY $q0
|
||||
%intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<8 x s16>)
|
||||
$w0 = COPY %intrin(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: uaddlv_v4s32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: uaddlv_v4s32
|
||||
; CHECK: %copy:fpr128 = COPY $q0
|
||||
; CHECK: [[UADDLVv4i32v:%[0-9]+]]:fpr64 = UADDLVv4i32v %copy
|
||||
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
|
||||
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv4i32v]], %subreg.dsub
|
||||
; CHECK: %intrin:fpr64 = COPY [[INSERT_SUBREG]].dsub
|
||||
; CHECK: $x0 = COPY %intrin
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%copy:fpr(<4 x s32>) = COPY $q0
|
||||
%intrin:fpr(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<4 x s32>)
|
||||
$x0 = COPY %intrin(s64)
|
||||
RET_ReallyLR implicit $x0
|
@ -485,3 +485,101 @@ entry:
|
||||
store <4 x float> %2, ptr %0, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_saddlv_v8i8(ptr %H, <8 x i8> %sum_h, i32 %idx) {
|
||||
; CHECK-LABEL: store_saddlv_v8i8:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: saddlv.8b h0, v0
|
||||
; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
|
||||
; CHECK-NEXT: sbfiz x9, x1, #3, #32
|
||||
; CHECK-NEXT: smov.h w8, v0[0]
|
||||
; CHECK-NEXT: str w8, [x0, x9]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vaddlvq_s32.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %sum_h)
|
||||
%idxprom = sext i32 %idx to i64
|
||||
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
|
||||
store i32 %vaddlvq_s32.i, ptr %arrayidx, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_saddlv_v16i8(ptr %H, <16 x i8> %sum_h, i32 %idx) {
|
||||
; CHECK-LABEL: store_saddlv_v16i8:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: saddlv.16b h0, v0
|
||||
; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
|
||||
; CHECK-NEXT: sbfiz x9, x1, #3, #32
|
||||
; CHECK-NEXT: smov.h w8, v0[0]
|
||||
; CHECK-NEXT: str w8, [x0, x9]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vaddlvq_s32.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %sum_h)
|
||||
%idxprom = sext i32 %idx to i64
|
||||
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
|
||||
store i32 %vaddlvq_s32.i, ptr %arrayidx, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_saddlv_v4i16(ptr %H, <4 x i16> %sum_h, i32 %idx) {
|
||||
; CHECK-LABEL: store_saddlv_v4i16:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: saddlv.4h s0, v0
|
||||
; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
|
||||
; CHECK-NEXT: sbfiz x8, x1, #3, #32
|
||||
; CHECK-NEXT: fmov w9, s0
|
||||
; CHECK-NEXT: str w9, [x0, x8]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vaddlvq_s32.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %sum_h)
|
||||
%idxprom = sext i32 %idx to i64
|
||||
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
|
||||
store i32 %vaddlvq_s32.i, ptr %arrayidx, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_saddlv_v8i16(ptr %H, <8 x i16> %sum_h, i32 %idx) {
|
||||
; CHECK-LABEL: store_saddlv_v8i16:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: saddlv.8h s0, v0
|
||||
; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
|
||||
; CHECK-NEXT: sbfiz x8, x1, #3, #32
|
||||
; CHECK-NEXT: fmov w9, s0
|
||||
; CHECK-NEXT: str w9, [x0, x8]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vaddlvq_s32.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %sum_h)
|
||||
%idxprom = sext i32 %idx to i64
|
||||
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
|
||||
store i32 %vaddlvq_s32.i, ptr %arrayidx, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_saddlv_v2i32(ptr %H, <2 x i32> %sum_h, i32 %idx) {
|
||||
; CHECK-LABEL: store_saddlv_v2i32:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: saddlp.1d v0, v0
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: str x8, [x0, w1, sxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vaddlvq_s32.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %sum_h)
|
||||
%idxprom = sext i32 %idx to i64
|
||||
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
|
||||
store i64 %vaddlvq_s32.i, ptr %arrayidx, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_saddlv_v4i32(ptr %H, <4 x i32> %sum_h, i32 %idx) {
|
||||
; CHECK-LABEL: store_saddlv_v4i32:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: saddlv.4s d0, v0
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: str x8, [x0, w1, sxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vaddlvq_s32.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %sum_h)
|
||||
%idxprom = sext i32 %idx to i64
|
||||
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
|
||||
store i64 %vaddlvq_s32.i, ptr %arrayidx, align 8
|
||||
ret void
|
||||
}
|
||||
|
@ -1,83 +1,45 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
|
||||
; RUN: llc < %s -global-isel=1 -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-GI
|
||||
; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD
|
||||
; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon -global-isel -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI
|
||||
|
||||
declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)
|
||||
|
||||
declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>)
|
||||
|
||||
declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>)
|
||||
|
||||
declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>)
|
||||
|
||||
declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>)
|
||||
|
||||
declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16>)
|
||||
|
||||
declare i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8>)
|
||||
|
||||
define i16 @test_vaddlv_s8(<8 x i8> %a) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user