llvm-project/llvm/test/CodeGen/RISCV/combine-storetomstore.ll
Abhishek Kaushik 1c0ac80d4a
[DAG] Combine store + vselect to masked_store (#145176)
Add a new combine to replace
```
(store ch (vselect cond truevec (load ch ptr offset)) ptr offset)
```
to
```
(mstore ch truevec ptr offset cond)
```

This saves a blend operation on targets that support conditional stores.
2025-08-04 19:05:36 +05:30

685 lines
24 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv64-- -mattr=+m,+v,+f | FileCheck %s -check-prefix=RISCV
define void @test_masked_store_success_v4i8(<4 x i8> %x, ptr %ptr, <4 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v4i8:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RISCV-NEXT: vse8.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <4 x i8>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x i8> %x, <4 x i8> %load
store <4 x i8> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v4i16(<4 x i16> %x, ptr %ptr, <4 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v4i16:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RISCV-NEXT: vse16.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <4 x i16>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x i16> %x, <4 x i16> %load
store <4 x i16> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v4i32(<4 x i32> %x, ptr %ptr, <4 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v4i32:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RISCV-NEXT: vse32.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <4 x i32>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %load
store <4 x i32> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v4i64(<4 x i64> %x, ptr %ptr, <4 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v4i64:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RISCV-NEXT: vse64.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <4 x i64>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %load
store <4 x i64> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v4f16(<4 x half> %x, ptr %ptr, <4 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v4f16:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RISCV-NEXT: vmv1r.v v9, v0
; RISCV-NEXT: vfirst.m a3, v0
; RISCV-NEXT: mv a2, a0
; RISCV-NEXT: beqz a3, .LBB4_2
; RISCV-NEXT: # %bb.1:
; RISCV-NEXT: mv a2, a1
; RISCV-NEXT: .LBB4_2:
; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RISCV-NEXT: vmv.v.i v8, 0
; RISCV-NEXT: vmv1r.v v0, v9
; RISCV-NEXT: vmerge.vim v8, v8, 1, v0
; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RISCV-NEXT: vslidedown.vi v8, v8, 2
; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RISCV-NEXT: vmsne.vi v8, v8, 0
; RISCV-NEXT: vmv.v.i v10, 0
; RISCV-NEXT: vmv1r.v v0, v8
; RISCV-NEXT: vmerge.vim v11, v10, 1, v0
; RISCV-NEXT: vslidedown.vi v11, v11, 1
; RISCV-NEXT: vmv.x.s a3, v11
; RISCV-NEXT: andi a3, a3, 1
; RISCV-NEXT: bnez a3, .LBB4_4
; RISCV-NEXT: # %bb.3:
; RISCV-NEXT: addi a3, a1, 6
; RISCV-NEXT: j .LBB4_5
; RISCV-NEXT: .LBB4_4:
; RISCV-NEXT: addi a3, a0, 24
; RISCV-NEXT: .LBB4_5:
; RISCV-NEXT: vmv1r.v v0, v9
; RISCV-NEXT: vmerge.vim v9, v10, 1, v0
; RISCV-NEXT: vslidedown.vi v9, v9, 1
; RISCV-NEXT: vmv.x.s a4, v9
; RISCV-NEXT: andi a4, a4, 1
; RISCV-NEXT: bnez a4, .LBB4_7
; RISCV-NEXT: # %bb.6:
; RISCV-NEXT: addi a5, a1, 2
; RISCV-NEXT: j .LBB4_8
; RISCV-NEXT: .LBB4_7:
; RISCV-NEXT: addi a5, a0, 8
; RISCV-NEXT: .LBB4_8:
; RISCV-NEXT: lh a4, 0(a2)
; RISCV-NEXT: lh a2, 0(a3)
; RISCV-NEXT: lh a3, 0(a5)
; RISCV-NEXT: vfirst.m a5, v8
; RISCV-NEXT: beqz a5, .LBB4_10
; RISCV-NEXT: # %bb.9:
; RISCV-NEXT: addi a0, a1, 4
; RISCV-NEXT: j .LBB4_11
; RISCV-NEXT: .LBB4_10:
; RISCV-NEXT: addi a0, a0, 16
; RISCV-NEXT: .LBB4_11:
; RISCV-NEXT: lh a0, 0(a0)
; RISCV-NEXT: sh a4, 0(a1)
; RISCV-NEXT: sh a3, 2(a1)
; RISCV-NEXT: sh a0, 4(a1)
; RISCV-NEXT: sh a2, 6(a1)
; RISCV-NEXT: ret
%load = load <4 x half>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x half> %x, <4 x half> %load
store <4 x half> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v4f32(<4 x float> %x, ptr %ptr, <4 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v4f32:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RISCV-NEXT: vse32.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <4 x float>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x float> %x, <4 x float> %load
store <4 x float> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v4f64(<4 x double> %x, ptr %ptr, <4 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v4f64:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RISCV-NEXT: vse64.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <4 x double>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x double> %x, <4 x double> %load
store <4 x double> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v8i8(<8 x i8> %x, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v8i8:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RISCV-NEXT: vse8.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <8 x i8>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i8> %x, <8 x i8> %load
store <8 x i8> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v8i16(<8 x i16> %x, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v8i16:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RISCV-NEXT: vse16.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <8 x i16>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %load
store <8 x i16> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v8i32(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v8i32:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RISCV-NEXT: vse32.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <8 x i32>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load
store <8 x i32> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v8i64(<8 x i64> %x, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v8i64:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RISCV-NEXT: vse64.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <8 x i64>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %load
store <8 x i64> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v8f16(<8 x half> %x, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v8f16:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RISCV-NEXT: vmv1r.v v8, v0
; RISCV-NEXT: vfirst.m a3, v0
; RISCV-NEXT: mv a2, a0
; RISCV-NEXT: beqz a3, .LBB11_2
; RISCV-NEXT: # %bb.1:
; RISCV-NEXT: mv a2, a1
; RISCV-NEXT: .LBB11_2:
; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RISCV-NEXT: vmv.v.i v9, 0
; RISCV-NEXT: vmv1r.v v0, v8
; RISCV-NEXT: vmerge.vim v9, v9, 1, v0
; RISCV-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RISCV-NEXT: vslidedown.vi v9, v9, 4
; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RISCV-NEXT: vmsne.vi v11, v9, 0
; RISCV-NEXT: vmv.v.i v10, 0
; RISCV-NEXT: vmv1r.v v0, v11
; RISCV-NEXT: vmerge.vim v9, v10, 1, v0
; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RISCV-NEXT: vslidedown.vi v9, v9, 2
; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RISCV-NEXT: vmsne.vi v9, v9, 0
; RISCV-NEXT: vmv.v.i v12, 0
; RISCV-NEXT: vmv1r.v v0, v9
; RISCV-NEXT: vmerge.vim v13, v12, 1, v0
; RISCV-NEXT: vslidedown.vi v13, v13, 1
; RISCV-NEXT: vmv.x.s a3, v13
; RISCV-NEXT: andi a3, a3, 1
; RISCV-NEXT: bnez a3, .LBB11_4
; RISCV-NEXT: # %bb.3:
; RISCV-NEXT: addi a3, a1, 14
; RISCV-NEXT: j .LBB11_5
; RISCV-NEXT: .LBB11_4:
; RISCV-NEXT: addi a3, a0, 56
; RISCV-NEXT: .LBB11_5:
; RISCV-NEXT: vmv1r.v v0, v8
; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RISCV-NEXT: vmerge.vim v10, v10, 1, v0
; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RISCV-NEXT: vslidedown.vi v10, v10, 2
; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RISCV-NEXT: vmsne.vi v10, v10, 0
; RISCV-NEXT: vmv1r.v v0, v10
; RISCV-NEXT: vmerge.vim v13, v12, 1, v0
; RISCV-NEXT: vslidedown.vi v13, v13, 1
; RISCV-NEXT: vmv.x.s a4, v13
; RISCV-NEXT: andi a4, a4, 1
; RISCV-NEXT: bnez a4, .LBB11_8
; RISCV-NEXT: # %bb.6:
; RISCV-NEXT: addi a4, a1, 6
; RISCV-NEXT: vfirst.m a5, v11
; RISCV-NEXT: bnez a5, .LBB11_9
; RISCV-NEXT: .LBB11_7:
; RISCV-NEXT: addi a5, a0, 32
; RISCV-NEXT: j .LBB11_10
; RISCV-NEXT: .LBB11_8:
; RISCV-NEXT: addi a4, a0, 24
; RISCV-NEXT: vfirst.m a5, v11
; RISCV-NEXT: beqz a5, .LBB11_7
; RISCV-NEXT: .LBB11_9:
; RISCV-NEXT: addi a5, a1, 8
; RISCV-NEXT: .LBB11_10:
; RISCV-NEXT: vmv1r.v v0, v11
; RISCV-NEXT: vmerge.vim v11, v12, 1, v0
; RISCV-NEXT: vslidedown.vi v11, v11, 1
; RISCV-NEXT: vmv.x.s a6, v11
; RISCV-NEXT: andi a6, a6, 1
; RISCV-NEXT: bnez a6, .LBB11_14
; RISCV-NEXT: # %bb.11:
; RISCV-NEXT: addi a6, a1, 10
; RISCV-NEXT: vfirst.m a7, v9
; RISCV-NEXT: bnez a7, .LBB11_15
; RISCV-NEXT: .LBB11_12:
; RISCV-NEXT: addi a7, a0, 48
; RISCV-NEXT: vfirst.m t0, v10
; RISCV-NEXT: bnez t0, .LBB11_16
; RISCV-NEXT: .LBB11_13:
; RISCV-NEXT: addi t1, a0, 16
; RISCV-NEXT: j .LBB11_17
; RISCV-NEXT: .LBB11_14:
; RISCV-NEXT: addi a6, a0, 40
; RISCV-NEXT: vfirst.m a7, v9
; RISCV-NEXT: beqz a7, .LBB11_12
; RISCV-NEXT: .LBB11_15:
; RISCV-NEXT: addi a7, a1, 12
; RISCV-NEXT: vfirst.m t0, v10
; RISCV-NEXT: beqz t0, .LBB11_13
; RISCV-NEXT: .LBB11_16:
; RISCV-NEXT: addi t1, a1, 4
; RISCV-NEXT: .LBB11_17:
; RISCV-NEXT: vmv1r.v v0, v8
; RISCV-NEXT: lh t0, 0(a2)
; RISCV-NEXT: lh a2, 0(a3)
; RISCV-NEXT: lh a3, 0(a4)
; RISCV-NEXT: lh a4, 0(a5)
; RISCV-NEXT: lh a5, 0(a6)
; RISCV-NEXT: lh a6, 0(a7)
; RISCV-NEXT: lh a7, 0(t1)
; RISCV-NEXT: vmerge.vim v8, v12, 1, v0
; RISCV-NEXT: vslidedown.vi v8, v8, 1
; RISCV-NEXT: vmv.x.s t1, v8
; RISCV-NEXT: andi t1, t1, 1
; RISCV-NEXT: bnez t1, .LBB11_19
; RISCV-NEXT: # %bb.18:
; RISCV-NEXT: addi a0, a1, 2
; RISCV-NEXT: j .LBB11_20
; RISCV-NEXT: .LBB11_19:
; RISCV-NEXT: addi a0, a0, 8
; RISCV-NEXT: .LBB11_20:
; RISCV-NEXT: lh a0, 0(a0)
; RISCV-NEXT: sh t0, 0(a1)
; RISCV-NEXT: sh a0, 2(a1)
; RISCV-NEXT: sh a7, 4(a1)
; RISCV-NEXT: sh a3, 6(a1)
; RISCV-NEXT: sh a4, 8(a1)
; RISCV-NEXT: sh a5, 10(a1)
; RISCV-NEXT: sh a6, 12(a1)
; RISCV-NEXT: sh a2, 14(a1)
; RISCV-NEXT: ret
%load = load <8 x half>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x half> %x, <8 x half> %load
store <8 x half> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v8f32(<8 x float> %x, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v8f32:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RISCV-NEXT: vse32.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <8 x float>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x float> %x, <8 x float> %load
store <8 x float> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v8f64(<8 x double> %x, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v8f64:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RISCV-NEXT: vse64.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <8 x double>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x double> %x, <8 x double> %load
store <8 x double> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v16i8(<16 x i8> %x, ptr %ptr, <16 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v16i8:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RISCV-NEXT: vse8.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <16 x i8>, ptr %ptr, align 32
%sel = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %load
store <16 x i8> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v16i16(<16 x i16> %x, ptr %ptr, <16 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v16i16:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RISCV-NEXT: vse16.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <16 x i16>, ptr %ptr, align 32
%sel = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %load
store <16 x i16> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v16i32(<16 x i32> %x, ptr %ptr, <16 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v16i32:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RISCV-NEXT: vse32.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <16 x i32>, ptr %ptr, align 32
%sel = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %load
store <16 x i32> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v32i8(<32 x i8> %x, ptr %ptr, <32 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v32i8:
; RISCV: # %bb.0:
; RISCV-NEXT: li a1, 32
; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RISCV-NEXT: vse8.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <32 x i8>, ptr %ptr, align 32
%sel = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %load
store <32 x i8> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v32i16(<32 x i16> %x, ptr %ptr, <32 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v32i16:
; RISCV: # %bb.0:
; RISCV-NEXT: li a1, 32
; RISCV-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; RISCV-NEXT: vse16.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <32 x i16>, ptr %ptr, align 32
%sel = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %load
store <32 x i16> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_v64i8(<64 x i8> %x, ptr %ptr, <64 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_v64i8:
; RISCV: # %bb.0:
; RISCV-NEXT: li a1, 64
; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; RISCV-NEXT: vse8.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <64 x i8>, ptr %ptr, align 32
%sel = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %load
store <64 x i8> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_invert_mask_v4i32(<4 x i32> %x, ptr %ptr, <4 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_invert_mask_v4i32:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RISCV-NEXT: vmnot.m v0, v0
; RISCV-NEXT: vse32.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <4 x i32>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x i32> %load, <4 x i32> %x
store <4 x i32> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_invert_mask_v8i32(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_invert_mask_v8i32:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RISCV-NEXT: vmnot.m v0, v0
; RISCV-NEXT: vse32.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <8 x i32>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i32> %load, <8 x i32> %x
store <8 x i32> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_success_invert_mask_v16i32(<16 x i32> %x, ptr %ptr, <16 x i1> %mask) {
; RISCV-LABEL: test_masked_store_success_invert_mask_v16i32:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RISCV-NEXT: vmnot.m v0, v0
; RISCV-NEXT: vse32.v v8, (a0), v0.t
; RISCV-NEXT: ret
%load = load <16 x i32>, ptr %ptr, align 32
%sel = select <16 x i1> %mask, <16 x i32> %load, <16 x i32> %x
store <16 x i32> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_zextload(<4 x i64> %x, ptr %ptr, <4 x i1> %mask) {
; RISCV-LABEL: test_masked_store_zextload:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RISCV-NEXT: vle32.v v12, (a0)
; RISCV-NEXT: vzext.vf2 v10, v12
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
; RISCV-NEXT: vse64.v v8, (a0)
; RISCV-NEXT: ret
%load = load <4 x i32>, ptr %ptr, align 32
%zext = zext <4 x i32> %load to <4 x i64>
%masked = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %zext
store <4 x i64> %masked, ptr %ptr, align 32
ret void
}
define void @test_masked_store_volatile_load(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_volatile_load:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RISCV-NEXT: vle32.v v10, (a0)
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
; RISCV-NEXT: vse32.v v8, (a0)
; RISCV-NEXT: ret
%load = load volatile <8 x i32>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load
store <8 x i32> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_volatile_store(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_volatile_store:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RISCV-NEXT: vle32.v v10, (a0)
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
; RISCV-NEXT: vse32.v v8, (a0)
; RISCV-NEXT: ret
%load = load <8 x i32>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load
store volatile <8 x i32> %sel, ptr %ptr, align 32
ret void
}
declare void @use_vec(<8 x i32>)
define void @test_masked_store_intervening(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) nounwind {
; RISCV-LABEL: test_masked_store_intervening:
; RISCV: # %bb.0:
; RISCV-NEXT: addi sp, sp, -32
; RISCV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RISCV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RISCV-NEXT: csrr a1, vlenb
; RISCV-NEXT: slli a2, a1, 2
; RISCV-NEXT: add a1, a2, a1
; RISCV-NEXT: sub sp, sp, a1
; RISCV-NEXT: csrr a1, vlenb
; RISCV-NEXT: slli a1, a1, 2
; RISCV-NEXT: add a1, sp, a1
; RISCV-NEXT: addi a1, a1, 16
; RISCV-NEXT: vs1r.v v0, (a1) # vscale x 8-byte Folded Spill
; RISCV-NEXT: mv s0, a0
; RISCV-NEXT: csrr a1, vlenb
; RISCV-NEXT: slli a1, a1, 1
; RISCV-NEXT: add a1, sp, a1
; RISCV-NEXT: addi a1, a1, 16
; RISCV-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RISCV-NEXT: vle32.v v8, (a0)
; RISCV-NEXT: addi a1, sp, 16
; RISCV-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill
; RISCV-NEXT: vmv.v.i v8, 0
; RISCV-NEXT: vse32.v v8, (a0)
; RISCV-NEXT: call use_vec
; RISCV-NEXT: csrr a0, vlenb
; RISCV-NEXT: slli a0, a0, 2
; RISCV-NEXT: add a0, sp, a0
; RISCV-NEXT: addi a0, a0, 16
; RISCV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; RISCV-NEXT: csrr a0, vlenb
; RISCV-NEXT: slli a0, a0, 1
; RISCV-NEXT: add a0, sp, a0
; RISCV-NEXT: addi a0, a0, 16
; RISCV-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
; RISCV-NEXT: addi a0, sp, 16
; RISCV-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
; RISCV-NEXT: vse32.v v8, (s0)
; RISCV-NEXT: csrr a0, vlenb
; RISCV-NEXT: slli a1, a0, 2
; RISCV-NEXT: add a0, a1, a0
; RISCV-NEXT: add sp, sp, a0
; RISCV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RISCV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RISCV-NEXT: addi sp, sp, 32
; RISCV-NEXT: ret
%load = load <8 x i32>, ptr %ptr, align 32
store <8 x i32> zeroinitializer, ptr %ptr, align 32
%tmp = load <8 x i32>, ptr %ptr
call void @use_vec(<8 x i32> %tmp)
%sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load
store <8 x i32> %sel, ptr %ptr, align 32
ret void
}
define void @test_masked_store_multiple_v8i32(<8 x i32> %x, <8 x i32> %y, ptr %ptr1, ptr %ptr2, <8 x i1> %mask, <8 x i1> %mask2) {
; RISCV-LABEL: test_masked_store_multiple_v8i32:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RISCV-NEXT: vmv1r.v v13, v0
; RISCV-NEXT: vle32.v v14, (a1)
; RISCV-NEXT: vmv1r.v v0, v12
; RISCV-NEXT: vmerge.vvm v10, v14, v10, v0
; RISCV-NEXT: vmv1r.v v0, v13
; RISCV-NEXT: vse32.v v8, (a0), v0.t
; RISCV-NEXT: vse32.v v10, (a1)
; RISCV-NEXT: ret
%load = load <8 x i32>, ptr %ptr1, align 32
%load2 = load <8 x i32>, ptr %ptr2, align 32
%sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load
%sel2 = select <8 x i1> %mask2, <8 x i32> %y, <8 x i32> %load2
store <8 x i32> %sel, ptr %ptr1, align 32
store <8 x i32> %sel2, ptr %ptr2, align 32
ret void
}
define void @test_masked_store_multiple_v8i64(<8 x i64> %x, <8 x i64> %y, ptr %ptr1, ptr %ptr2, <8 x i1> %mask, <8 x i1> %mask2) {
; RISCV-LABEL: test_masked_store_multiple_v8i64:
; RISCV: # %bb.0:
; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RISCV-NEXT: vmv1r.v v17, v0
; RISCV-NEXT: vle64.v v20, (a1)
; RISCV-NEXT: vmv1r.v v0, v16
; RISCV-NEXT: vmerge.vvm v12, v20, v12, v0
; RISCV-NEXT: vmv1r.v v0, v17
; RISCV-NEXT: vse64.v v8, (a0), v0.t
; RISCV-NEXT: vse64.v v12, (a1)
; RISCV-NEXT: ret
%load = load <8 x i64>, ptr %ptr1, align 32
%load2 = load <8 x i64>, ptr %ptr2, align 32
%sel = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %load
%sel2 = select <8 x i1> %mask2, <8 x i64> %y, <8 x i64> %load2
store <8 x i64> %sel, ptr %ptr1, align 32
store <8 x i64> %sel2, ptr %ptr2, align 32
ret void
}
define void @test_masked_store_unaligned_v4i32(<4 x i32> %data, ptr %ptr, <4 x i1> %mask) {
; RISCV-LABEL: test_masked_store_unaligned_v4i32:
; RISCV: # %bb.0:
; RISCV-NEXT: addi a0, a0, 1
; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RISCV-NEXT: vle8.v v9, (a0)
; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RISCV-NEXT: vmerge.vvm v8, v9, v8, v0
; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RISCV-NEXT: vse8.v v8, (a0)
; RISCV-NEXT: ret
%ptr_i8 = getelementptr i8, ptr %ptr, i32 1
%ptr_vec = bitcast ptr %ptr_i8 to ptr
%load = load <4 x i32>, ptr %ptr_vec, align 1
%sel = select <4 x i1> %mask, <4 x i32> %data, <4 x i32> %load
store <4 x i32> %sel, ptr %ptr_vec, align 1
ret void
}
define void @test_masked_store_unaligned_v4i64(<4 x i64> %data, ptr %ptr, <4 x i1> %mask) {
; RISCV-LABEL: test_masked_store_unaligned_v4i64:
; RISCV: # %bb.0:
; RISCV-NEXT: addi a0, a0, 1
; RISCV-NEXT: li a1, 32
; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RISCV-NEXT: vle8.v v10, (a0)
; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RISCV-NEXT: vse8.v v8, (a0)
; RISCV-NEXT: ret
%ptr_i8 = getelementptr i8, ptr %ptr, i64 1
%ptr_vec = bitcast ptr %ptr_i8 to ptr
%load = load <4 x i64>, ptr %ptr_vec, align 1
%sel = select <4 x i1> %mask, <4 x i64> %data, <4 x i64> %load
store <4 x i64> %sel, ptr %ptr_vec, align 1
ret void
}
define void @test_masked_store_unaligned_v8i32(<8 x i32> %data, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_unaligned_v8i32:
; RISCV: # %bb.0:
; RISCV-NEXT: addi a0, a0, 1
; RISCV-NEXT: li a1, 32
; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RISCV-NEXT: vle8.v v10, (a0)
; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0
; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RISCV-NEXT: vse8.v v8, (a0)
; RISCV-NEXT: ret
%ptr_i8 = getelementptr i8, ptr %ptr, i32 1
%ptr_vec = bitcast ptr %ptr_i8 to ptr
%load = load <8 x i32>, ptr %ptr_vec, align 1
%sel = select <8 x i1> %mask, <8 x i32> %data, <8 x i32> %load
store <8 x i32> %sel, ptr %ptr_vec, align 1
ret void
}
define void @test_masked_store_unaligned_v8i64(<8 x i64> %data, ptr %ptr, <8 x i1> %mask) {
; RISCV-LABEL: test_masked_store_unaligned_v8i64:
; RISCV: # %bb.0:
; RISCV-NEXT: addi a0, a0, 1
; RISCV-NEXT: li a1, 64
; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; RISCV-NEXT: vle8.v v12, (a0)
; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RISCV-NEXT: vmerge.vvm v8, v12, v8, v0
; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; RISCV-NEXT: vse8.v v8, (a0)
; RISCV-NEXT: ret
%ptr_i8 = getelementptr i8, ptr %ptr, i64 1
%ptr_vec = bitcast ptr %ptr_i8 to ptr
%load = load <8 x i64>, ptr %ptr_vec, align 1
%sel = select <8 x i1> %mask, <8 x i64> %data, <8 x i64> %load
store <8 x i64> %sel, ptr %ptr_vec, align 1
ret void
}