
Add a new combine to replace ``` (store ch (vselect cond truevec (load ch ptr offset)) ptr offset) ``` to ``` (mstore ch truevec ptr offset cond) ``` This saves a blend operation on targets that support conditional stores.
685 lines
24 KiB
LLVM
685 lines
24 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -mtriple=riscv64-- -mattr=+m,+v,+f | FileCheck %s -check-prefix=RISCV
|
|
|
|
define void @test_masked_store_success_v4i8(<4 x i8> %x, ptr %ptr, <4 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v4i8:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
|
; RISCV-NEXT: vse8.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <4 x i8>, ptr %ptr, align 32
|
|
%sel = select <4 x i1> %mask, <4 x i8> %x, <4 x i8> %load
|
|
store <4 x i8> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v4i16(<4 x i16> %x, ptr %ptr, <4 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v4i16:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
|
|
; RISCV-NEXT: vse16.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <4 x i16>, ptr %ptr, align 32
|
|
%sel = select <4 x i1> %mask, <4 x i16> %x, <4 x i16> %load
|
|
store <4 x i16> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v4i32(<4 x i32> %x, ptr %ptr, <4 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v4i32:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma
|
|
; RISCV-NEXT: vse32.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <4 x i32>, ptr %ptr, align 32
|
|
%sel = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %load
|
|
store <4 x i32> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v4i64(<4 x i64> %x, ptr %ptr, <4 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v4i64:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RISCV-NEXT: vse64.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <4 x i64>, ptr %ptr, align 32
|
|
%sel = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %load
|
|
store <4 x i64> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v4f16(<4 x half> %x, ptr %ptr, <4 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v4f16:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; RISCV-NEXT: vmv1r.v v9, v0
|
|
; RISCV-NEXT: vfirst.m a3, v0
|
|
; RISCV-NEXT: mv a2, a0
|
|
; RISCV-NEXT: beqz a3, .LBB4_2
|
|
; RISCV-NEXT: # %bb.1:
|
|
; RISCV-NEXT: mv a2, a1
|
|
; RISCV-NEXT: .LBB4_2:
|
|
; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
|
; RISCV-NEXT: vmv.v.i v8, 0
|
|
; RISCV-NEXT: vmv1r.v v0, v9
|
|
; RISCV-NEXT: vmerge.vim v8, v8, 1, v0
|
|
; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
|
|
; RISCV-NEXT: vslidedown.vi v8, v8, 2
|
|
; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; RISCV-NEXT: vmsne.vi v8, v8, 0
|
|
; RISCV-NEXT: vmv.v.i v10, 0
|
|
; RISCV-NEXT: vmv1r.v v0, v8
|
|
; RISCV-NEXT: vmerge.vim v11, v10, 1, v0
|
|
; RISCV-NEXT: vslidedown.vi v11, v11, 1
|
|
; RISCV-NEXT: vmv.x.s a3, v11
|
|
; RISCV-NEXT: andi a3, a3, 1
|
|
; RISCV-NEXT: bnez a3, .LBB4_4
|
|
; RISCV-NEXT: # %bb.3:
|
|
; RISCV-NEXT: addi a3, a1, 6
|
|
; RISCV-NEXT: j .LBB4_5
|
|
; RISCV-NEXT: .LBB4_4:
|
|
; RISCV-NEXT: addi a3, a0, 24
|
|
; RISCV-NEXT: .LBB4_5:
|
|
; RISCV-NEXT: vmv1r.v v0, v9
|
|
; RISCV-NEXT: vmerge.vim v9, v10, 1, v0
|
|
; RISCV-NEXT: vslidedown.vi v9, v9, 1
|
|
; RISCV-NEXT: vmv.x.s a4, v9
|
|
; RISCV-NEXT: andi a4, a4, 1
|
|
; RISCV-NEXT: bnez a4, .LBB4_7
|
|
; RISCV-NEXT: # %bb.6:
|
|
; RISCV-NEXT: addi a5, a1, 2
|
|
; RISCV-NEXT: j .LBB4_8
|
|
; RISCV-NEXT: .LBB4_7:
|
|
; RISCV-NEXT: addi a5, a0, 8
|
|
; RISCV-NEXT: .LBB4_8:
|
|
; RISCV-NEXT: lh a4, 0(a2)
|
|
; RISCV-NEXT: lh a2, 0(a3)
|
|
; RISCV-NEXT: lh a3, 0(a5)
|
|
; RISCV-NEXT: vfirst.m a5, v8
|
|
; RISCV-NEXT: beqz a5, .LBB4_10
|
|
; RISCV-NEXT: # %bb.9:
|
|
; RISCV-NEXT: addi a0, a1, 4
|
|
; RISCV-NEXT: j .LBB4_11
|
|
; RISCV-NEXT: .LBB4_10:
|
|
; RISCV-NEXT: addi a0, a0, 16
|
|
; RISCV-NEXT: .LBB4_11:
|
|
; RISCV-NEXT: lh a0, 0(a0)
|
|
; RISCV-NEXT: sh a4, 0(a1)
|
|
; RISCV-NEXT: sh a3, 2(a1)
|
|
; RISCV-NEXT: sh a0, 4(a1)
|
|
; RISCV-NEXT: sh a2, 6(a1)
|
|
; RISCV-NEXT: ret
|
|
%load = load <4 x half>, ptr %ptr, align 32
|
|
%sel = select <4 x i1> %mask, <4 x half> %x, <4 x half> %load
|
|
store <4 x half> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v4f32(<4 x float> %x, ptr %ptr, <4 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v4f32:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma
|
|
; RISCV-NEXT: vse32.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <4 x float>, ptr %ptr, align 32
|
|
%sel = select <4 x i1> %mask, <4 x float> %x, <4 x float> %load
|
|
store <4 x float> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v4f64(<4 x double> %x, ptr %ptr, <4 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v4f64:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RISCV-NEXT: vse64.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <4 x double>, ptr %ptr, align 32
|
|
%sel = select <4 x i1> %mask, <4 x double> %x, <4 x double> %load
|
|
store <4 x double> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v8i8(<8 x i8> %x, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v8i8:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
|
|
; RISCV-NEXT: vse8.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x i8>, ptr %ptr, align 32
|
|
%sel = select <8 x i1> %mask, <8 x i8> %x, <8 x i8> %load
|
|
store <8 x i8> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v8i16(<8 x i16> %x, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v8i16:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 8, e16, m1, ta, ma
|
|
; RISCV-NEXT: vse16.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x i16>, ptr %ptr, align 32
|
|
%sel = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %load
|
|
store <8 x i16> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v8i32(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v8i32:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
|
|
; RISCV-NEXT: vse32.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x i32>, ptr %ptr, align 32
|
|
%sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load
|
|
store <8 x i32> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v8i64(<8 x i64> %x, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v8i64:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma
|
|
; RISCV-NEXT: vse64.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x i64>, ptr %ptr, align 32
|
|
%sel = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %load
|
|
store <8 x i64> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v8f16(<8 x half> %x, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v8f16:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; RISCV-NEXT: vmv1r.v v8, v0
|
|
; RISCV-NEXT: vfirst.m a3, v0
|
|
; RISCV-NEXT: mv a2, a0
|
|
; RISCV-NEXT: beqz a3, .LBB11_2
|
|
; RISCV-NEXT: # %bb.1:
|
|
; RISCV-NEXT: mv a2, a1
|
|
; RISCV-NEXT: .LBB11_2:
|
|
; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
|
|
; RISCV-NEXT: vmv.v.i v9, 0
|
|
; RISCV-NEXT: vmv1r.v v0, v8
|
|
; RISCV-NEXT: vmerge.vim v9, v9, 1, v0
|
|
; RISCV-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
|
|
; RISCV-NEXT: vslidedown.vi v9, v9, 4
|
|
; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
|
; RISCV-NEXT: vmsne.vi v11, v9, 0
|
|
; RISCV-NEXT: vmv.v.i v10, 0
|
|
; RISCV-NEXT: vmv1r.v v0, v11
|
|
; RISCV-NEXT: vmerge.vim v9, v10, 1, v0
|
|
; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
|
|
; RISCV-NEXT: vslidedown.vi v9, v9, 2
|
|
; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; RISCV-NEXT: vmsne.vi v9, v9, 0
|
|
; RISCV-NEXT: vmv.v.i v12, 0
|
|
; RISCV-NEXT: vmv1r.v v0, v9
|
|
; RISCV-NEXT: vmerge.vim v13, v12, 1, v0
|
|
; RISCV-NEXT: vslidedown.vi v13, v13, 1
|
|
; RISCV-NEXT: vmv.x.s a3, v13
|
|
; RISCV-NEXT: andi a3, a3, 1
|
|
; RISCV-NEXT: bnez a3, .LBB11_4
|
|
; RISCV-NEXT: # %bb.3:
|
|
; RISCV-NEXT: addi a3, a1, 14
|
|
; RISCV-NEXT: j .LBB11_5
|
|
; RISCV-NEXT: .LBB11_4:
|
|
; RISCV-NEXT: addi a3, a0, 56
|
|
; RISCV-NEXT: .LBB11_5:
|
|
; RISCV-NEXT: vmv1r.v v0, v8
|
|
; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
|
; RISCV-NEXT: vmerge.vim v10, v10, 1, v0
|
|
; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
|
|
; RISCV-NEXT: vslidedown.vi v10, v10, 2
|
|
; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; RISCV-NEXT: vmsne.vi v10, v10, 0
|
|
; RISCV-NEXT: vmv1r.v v0, v10
|
|
; RISCV-NEXT: vmerge.vim v13, v12, 1, v0
|
|
; RISCV-NEXT: vslidedown.vi v13, v13, 1
|
|
; RISCV-NEXT: vmv.x.s a4, v13
|
|
; RISCV-NEXT: andi a4, a4, 1
|
|
; RISCV-NEXT: bnez a4, .LBB11_8
|
|
; RISCV-NEXT: # %bb.6:
|
|
; RISCV-NEXT: addi a4, a1, 6
|
|
; RISCV-NEXT: vfirst.m a5, v11
|
|
; RISCV-NEXT: bnez a5, .LBB11_9
|
|
; RISCV-NEXT: .LBB11_7:
|
|
; RISCV-NEXT: addi a5, a0, 32
|
|
; RISCV-NEXT: j .LBB11_10
|
|
; RISCV-NEXT: .LBB11_8:
|
|
; RISCV-NEXT: addi a4, a0, 24
|
|
; RISCV-NEXT: vfirst.m a5, v11
|
|
; RISCV-NEXT: beqz a5, .LBB11_7
|
|
; RISCV-NEXT: .LBB11_9:
|
|
; RISCV-NEXT: addi a5, a1, 8
|
|
; RISCV-NEXT: .LBB11_10:
|
|
; RISCV-NEXT: vmv1r.v v0, v11
|
|
; RISCV-NEXT: vmerge.vim v11, v12, 1, v0
|
|
; RISCV-NEXT: vslidedown.vi v11, v11, 1
|
|
; RISCV-NEXT: vmv.x.s a6, v11
|
|
; RISCV-NEXT: andi a6, a6, 1
|
|
; RISCV-NEXT: bnez a6, .LBB11_14
|
|
; RISCV-NEXT: # %bb.11:
|
|
; RISCV-NEXT: addi a6, a1, 10
|
|
; RISCV-NEXT: vfirst.m a7, v9
|
|
; RISCV-NEXT: bnez a7, .LBB11_15
|
|
; RISCV-NEXT: .LBB11_12:
|
|
; RISCV-NEXT: addi a7, a0, 48
|
|
; RISCV-NEXT: vfirst.m t0, v10
|
|
; RISCV-NEXT: bnez t0, .LBB11_16
|
|
; RISCV-NEXT: .LBB11_13:
|
|
; RISCV-NEXT: addi t1, a0, 16
|
|
; RISCV-NEXT: j .LBB11_17
|
|
; RISCV-NEXT: .LBB11_14:
|
|
; RISCV-NEXT: addi a6, a0, 40
|
|
; RISCV-NEXT: vfirst.m a7, v9
|
|
; RISCV-NEXT: beqz a7, .LBB11_12
|
|
; RISCV-NEXT: .LBB11_15:
|
|
; RISCV-NEXT: addi a7, a1, 12
|
|
; RISCV-NEXT: vfirst.m t0, v10
|
|
; RISCV-NEXT: beqz t0, .LBB11_13
|
|
; RISCV-NEXT: .LBB11_16:
|
|
; RISCV-NEXT: addi t1, a1, 4
|
|
; RISCV-NEXT: .LBB11_17:
|
|
; RISCV-NEXT: vmv1r.v v0, v8
|
|
; RISCV-NEXT: lh t0, 0(a2)
|
|
; RISCV-NEXT: lh a2, 0(a3)
|
|
; RISCV-NEXT: lh a3, 0(a4)
|
|
; RISCV-NEXT: lh a4, 0(a5)
|
|
; RISCV-NEXT: lh a5, 0(a6)
|
|
; RISCV-NEXT: lh a6, 0(a7)
|
|
; RISCV-NEXT: lh a7, 0(t1)
|
|
; RISCV-NEXT: vmerge.vim v8, v12, 1, v0
|
|
; RISCV-NEXT: vslidedown.vi v8, v8, 1
|
|
; RISCV-NEXT: vmv.x.s t1, v8
|
|
; RISCV-NEXT: andi t1, t1, 1
|
|
; RISCV-NEXT: bnez t1, .LBB11_19
|
|
; RISCV-NEXT: # %bb.18:
|
|
; RISCV-NEXT: addi a0, a1, 2
|
|
; RISCV-NEXT: j .LBB11_20
|
|
; RISCV-NEXT: .LBB11_19:
|
|
; RISCV-NEXT: addi a0, a0, 8
|
|
; RISCV-NEXT: .LBB11_20:
|
|
; RISCV-NEXT: lh a0, 0(a0)
|
|
; RISCV-NEXT: sh t0, 0(a1)
|
|
; RISCV-NEXT: sh a0, 2(a1)
|
|
; RISCV-NEXT: sh a7, 4(a1)
|
|
; RISCV-NEXT: sh a3, 6(a1)
|
|
; RISCV-NEXT: sh a4, 8(a1)
|
|
; RISCV-NEXT: sh a5, 10(a1)
|
|
; RISCV-NEXT: sh a6, 12(a1)
|
|
; RISCV-NEXT: sh a2, 14(a1)
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x half>, ptr %ptr, align 32
|
|
%sel = select <8 x i1> %mask, <8 x half> %x, <8 x half> %load
|
|
store <8 x half> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v8f32(<8 x float> %x, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v8f32:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
|
|
; RISCV-NEXT: vse32.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x float>, ptr %ptr, align 32
|
|
%sel = select <8 x i1> %mask, <8 x float> %x, <8 x float> %load
|
|
store <8 x float> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v8f64(<8 x double> %x, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v8f64:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma
|
|
; RISCV-NEXT: vse64.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x double>, ptr %ptr, align 32
|
|
%sel = select <8 x i1> %mask, <8 x double> %x, <8 x double> %load
|
|
store <8 x double> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v16i8(<16 x i8> %x, ptr %ptr, <16 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v16i8:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma
|
|
; RISCV-NEXT: vse8.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <16 x i8>, ptr %ptr, align 32
|
|
%sel = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %load
|
|
store <16 x i8> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v16i16(<16 x i16> %x, ptr %ptr, <16 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v16i16:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 16, e16, m2, ta, ma
|
|
; RISCV-NEXT: vse16.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <16 x i16>, ptr %ptr, align 32
|
|
%sel = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %load
|
|
store <16 x i16> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v16i32(<16 x i32> %x, ptr %ptr, <16 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v16i32:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 16, e32, m4, ta, ma
|
|
; RISCV-NEXT: vse32.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <16 x i32>, ptr %ptr, align 32
|
|
%sel = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %load
|
|
store <16 x i32> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v32i8(<32 x i8> %x, ptr %ptr, <32 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v32i8:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: li a1, 32
|
|
; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma
|
|
; RISCV-NEXT: vse8.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <32 x i8>, ptr %ptr, align 32
|
|
%sel = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %load
|
|
store <32 x i8> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v32i16(<32 x i16> %x, ptr %ptr, <32 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v32i16:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: li a1, 32
|
|
; RISCV-NEXT: vsetvli zero, a1, e16, m4, ta, ma
|
|
; RISCV-NEXT: vse16.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <32 x i16>, ptr %ptr, align 32
|
|
%sel = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %load
|
|
store <32 x i16> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_v64i8(<64 x i8> %x, ptr %ptr, <64 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_v64i8:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: li a1, 64
|
|
; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma
|
|
; RISCV-NEXT: vse8.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <64 x i8>, ptr %ptr, align 32
|
|
%sel = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %load
|
|
store <64 x i8> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_invert_mask_v4i32(<4 x i32> %x, ptr %ptr, <4 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_invert_mask_v4i32:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
|
; RISCV-NEXT: vmnot.m v0, v0
|
|
; RISCV-NEXT: vse32.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <4 x i32>, ptr %ptr, align 32
|
|
%sel = select <4 x i1> %mask, <4 x i32> %load, <4 x i32> %x
|
|
store <4 x i32> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_invert_mask_v8i32(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_invert_mask_v8i32:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
|
|
; RISCV-NEXT: vmnot.m v0, v0
|
|
; RISCV-NEXT: vse32.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x i32>, ptr %ptr, align 32
|
|
%sel = select <8 x i1> %mask, <8 x i32> %load, <8 x i32> %x
|
|
store <8 x i32> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_success_invert_mask_v16i32(<16 x i32> %x, ptr %ptr, <16 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_success_invert_mask_v16i32:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma
|
|
; RISCV-NEXT: vmnot.m v0, v0
|
|
; RISCV-NEXT: vse32.v v8, (a0), v0.t
|
|
; RISCV-NEXT: ret
|
|
%load = load <16 x i32>, ptr %ptr, align 32
|
|
%sel = select <16 x i1> %mask, <16 x i32> %load, <16 x i32> %x
|
|
store <16 x i32> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_zextload(<4 x i64> %x, ptr %ptr, <4 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_zextload:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RISCV-NEXT: vle32.v v12, (a0)
|
|
; RISCV-NEXT: vzext.vf2 v10, v12
|
|
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
|
|
; RISCV-NEXT: vse64.v v8, (a0)
|
|
; RISCV-NEXT: ret
|
|
%load = load <4 x i32>, ptr %ptr, align 32
|
|
%zext = zext <4 x i32> %load to <4 x i64>
|
|
%masked = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %zext
|
|
store <4 x i64> %masked, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_volatile_load(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_volatile_load:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
|
|
; RISCV-NEXT: vle32.v v10, (a0)
|
|
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
|
|
; RISCV-NEXT: vse32.v v8, (a0)
|
|
; RISCV-NEXT: ret
|
|
%load = load volatile <8 x i32>, ptr %ptr, align 32
|
|
%sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load
|
|
store <8 x i32> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_volatile_store(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_volatile_store:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
|
|
; RISCV-NEXT: vle32.v v10, (a0)
|
|
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
|
|
; RISCV-NEXT: vse32.v v8, (a0)
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x i32>, ptr %ptr, align 32
|
|
%sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load
|
|
store volatile <8 x i32> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
declare void @use_vec(<8 x i32>)
|
|
|
|
define void @test_masked_store_intervening(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) nounwind {
|
|
; RISCV-LABEL: test_masked_store_intervening:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: addi sp, sp, -32
|
|
; RISCV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RISCV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RISCV-NEXT: csrr a1, vlenb
|
|
; RISCV-NEXT: slli a2, a1, 2
|
|
; RISCV-NEXT: add a1, a2, a1
|
|
; RISCV-NEXT: sub sp, sp, a1
|
|
; RISCV-NEXT: csrr a1, vlenb
|
|
; RISCV-NEXT: slli a1, a1, 2
|
|
; RISCV-NEXT: add a1, sp, a1
|
|
; RISCV-NEXT: addi a1, a1, 16
|
|
; RISCV-NEXT: vs1r.v v0, (a1) # vscale x 8-byte Folded Spill
|
|
; RISCV-NEXT: mv s0, a0
|
|
; RISCV-NEXT: csrr a1, vlenb
|
|
; RISCV-NEXT: slli a1, a1, 1
|
|
; RISCV-NEXT: add a1, sp, a1
|
|
; RISCV-NEXT: addi a1, a1, 16
|
|
; RISCV-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill
|
|
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
|
|
; RISCV-NEXT: vle32.v v8, (a0)
|
|
; RISCV-NEXT: addi a1, sp, 16
|
|
; RISCV-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill
|
|
; RISCV-NEXT: vmv.v.i v8, 0
|
|
; RISCV-NEXT: vse32.v v8, (a0)
|
|
; RISCV-NEXT: call use_vec
|
|
; RISCV-NEXT: csrr a0, vlenb
|
|
; RISCV-NEXT: slli a0, a0, 2
|
|
; RISCV-NEXT: add a0, sp, a0
|
|
; RISCV-NEXT: addi a0, a0, 16
|
|
; RISCV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
|
|
; RISCV-NEXT: csrr a0, vlenb
|
|
; RISCV-NEXT: slli a0, a0, 1
|
|
; RISCV-NEXT: add a0, sp, a0
|
|
; RISCV-NEXT: addi a0, a0, 16
|
|
; RISCV-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
|
; RISCV-NEXT: addi a0, sp, 16
|
|
; RISCV-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
|
|
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
|
|
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
|
|
; RISCV-NEXT: vse32.v v8, (s0)
|
|
; RISCV-NEXT: csrr a0, vlenb
|
|
; RISCV-NEXT: slli a1, a0, 2
|
|
; RISCV-NEXT: add a0, a1, a0
|
|
; RISCV-NEXT: add sp, sp, a0
|
|
; RISCV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RISCV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RISCV-NEXT: addi sp, sp, 32
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x i32>, ptr %ptr, align 32
|
|
store <8 x i32> zeroinitializer, ptr %ptr, align 32
|
|
%tmp = load <8 x i32>, ptr %ptr
|
|
call void @use_vec(<8 x i32> %tmp)
|
|
%sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load
|
|
store <8 x i32> %sel, ptr %ptr, align 32
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @test_masked_store_multiple_v8i32(<8 x i32> %x, <8 x i32> %y, ptr %ptr1, ptr %ptr2, <8 x i1> %mask, <8 x i1> %mask2) {
|
|
; RISCV-LABEL: test_masked_store_multiple_v8i32:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
|
|
; RISCV-NEXT: vmv1r.v v13, v0
|
|
; RISCV-NEXT: vle32.v v14, (a1)
|
|
; RISCV-NEXT: vmv1r.v v0, v12
|
|
; RISCV-NEXT: vmerge.vvm v10, v14, v10, v0
|
|
; RISCV-NEXT: vmv1r.v v0, v13
|
|
; RISCV-NEXT: vse32.v v8, (a0), v0.t
|
|
; RISCV-NEXT: vse32.v v10, (a1)
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x i32>, ptr %ptr1, align 32
|
|
%load2 = load <8 x i32>, ptr %ptr2, align 32
|
|
%sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load
|
|
%sel2 = select <8 x i1> %mask2, <8 x i32> %y, <8 x i32> %load2
|
|
store <8 x i32> %sel, ptr %ptr1, align 32
|
|
store <8 x i32> %sel2, ptr %ptr2, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_multiple_v8i64(<8 x i64> %x, <8 x i64> %y, ptr %ptr1, ptr %ptr2, <8 x i1> %mask, <8 x i1> %mask2) {
|
|
; RISCV-LABEL: test_masked_store_multiple_v8i64:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma
|
|
; RISCV-NEXT: vmv1r.v v17, v0
|
|
; RISCV-NEXT: vle64.v v20, (a1)
|
|
; RISCV-NEXT: vmv1r.v v0, v16
|
|
; RISCV-NEXT: vmerge.vvm v12, v20, v12, v0
|
|
; RISCV-NEXT: vmv1r.v v0, v17
|
|
; RISCV-NEXT: vse64.v v8, (a0), v0.t
|
|
; RISCV-NEXT: vse64.v v12, (a1)
|
|
; RISCV-NEXT: ret
|
|
%load = load <8 x i64>, ptr %ptr1, align 32
|
|
%load2 = load <8 x i64>, ptr %ptr2, align 32
|
|
%sel = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %load
|
|
%sel2 = select <8 x i1> %mask2, <8 x i64> %y, <8 x i64> %load2
|
|
store <8 x i64> %sel, ptr %ptr1, align 32
|
|
store <8 x i64> %sel2, ptr %ptr2, align 32
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_unaligned_v4i32(<4 x i32> %data, ptr %ptr, <4 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_unaligned_v4i32:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: addi a0, a0, 1
|
|
; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma
|
|
; RISCV-NEXT: vle8.v v9, (a0)
|
|
; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma
|
|
; RISCV-NEXT: vmerge.vvm v8, v9, v8, v0
|
|
; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma
|
|
; RISCV-NEXT: vse8.v v8, (a0)
|
|
; RISCV-NEXT: ret
|
|
%ptr_i8 = getelementptr i8, ptr %ptr, i32 1
|
|
%ptr_vec = bitcast ptr %ptr_i8 to ptr
|
|
%load = load <4 x i32>, ptr %ptr_vec, align 1
|
|
%sel = select <4 x i1> %mask, <4 x i32> %data, <4 x i32> %load
|
|
store <4 x i32> %sel, ptr %ptr_vec, align 1
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_unaligned_v4i64(<4 x i64> %data, ptr %ptr, <4 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_unaligned_v4i64:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: addi a0, a0, 1
|
|
; RISCV-NEXT: li a1, 32
|
|
; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma
|
|
; RISCV-NEXT: vle8.v v10, (a0)
|
|
; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
|
|
; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma
|
|
; RISCV-NEXT: vse8.v v8, (a0)
|
|
; RISCV-NEXT: ret
|
|
%ptr_i8 = getelementptr i8, ptr %ptr, i64 1
|
|
%ptr_vec = bitcast ptr %ptr_i8 to ptr
|
|
%load = load <4 x i64>, ptr %ptr_vec, align 1
|
|
%sel = select <4 x i1> %mask, <4 x i64> %data, <4 x i64> %load
|
|
store <4 x i64> %sel, ptr %ptr_vec, align 1
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_unaligned_v8i32(<8 x i32> %data, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_unaligned_v8i32:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: addi a0, a0, 1
|
|
; RISCV-NEXT: li a1, 32
|
|
; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma
|
|
; RISCV-NEXT: vle8.v v10, (a0)
|
|
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
|
|
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
|
|
; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma
|
|
; RISCV-NEXT: vse8.v v8, (a0)
|
|
; RISCV-NEXT: ret
|
|
%ptr_i8 = getelementptr i8, ptr %ptr, i32 1
|
|
%ptr_vec = bitcast ptr %ptr_i8 to ptr
|
|
%load = load <8 x i32>, ptr %ptr_vec, align 1
|
|
%sel = select <8 x i1> %mask, <8 x i32> %data, <8 x i32> %load
|
|
store <8 x i32> %sel, ptr %ptr_vec, align 1
|
|
ret void
|
|
}
|
|
|
|
define void @test_masked_store_unaligned_v8i64(<8 x i64> %data, ptr %ptr, <8 x i1> %mask) {
|
|
; RISCV-LABEL: test_masked_store_unaligned_v8i64:
|
|
; RISCV: # %bb.0:
|
|
; RISCV-NEXT: addi a0, a0, 1
|
|
; RISCV-NEXT: li a1, 64
|
|
; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma
|
|
; RISCV-NEXT: vle8.v v12, (a0)
|
|
; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma
|
|
; RISCV-NEXT: vmerge.vvm v8, v12, v8, v0
|
|
; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma
|
|
; RISCV-NEXT: vse8.v v8, (a0)
|
|
; RISCV-NEXT: ret
|
|
%ptr_i8 = getelementptr i8, ptr %ptr, i64 1
|
|
%ptr_vec = bitcast ptr %ptr_i8 to ptr
|
|
%load = load <8 x i64>, ptr %ptr_vec, align 1
|
|
%sel = select <8 x i1> %mask, <8 x i64> %data, <8 x i64> %load
|
|
store <8 x i64> %sel, ptr %ptr_vec, align 1
|
|
ret void
|
|
}
|