llvm-project/llvm/test/CodeGen/X86/vmaskmov-offset.ll
David Green 32b6c17b29 [SDAG] Use UnknownSize for masked load/store MMO size
A masked load or store will load a potentially unknown number of bytes
from a memory location - that is not generally known at compile time.
They do not necessarily load/store the entire vector width, and treating
them as such can lead to incorrect aliasing information (for example, if
the underlying object is smaller than the size of the vector).

This makes sure that the MMO is given an unknown size to represent this.
which is less accurate that "may load/store from up to 16 bytes", but
less incorrect that "will load/store from 16 bytes".

Differential Revision: https://reviews.llvm.org/D113888
2021-11-23 09:47:56 +00:00

87 lines
5.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 -stop-after finalize-isel -o - %s | FileCheck %s
declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
define void @test_v16f(<16 x i32> %x) {
; CHECK-LABEL: name: test_v16f
; CHECK: bb.0.bb:
; CHECK-NEXT: liveins: $ymm0, $ymm1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr256 = COPY $ymm1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256 = COPY $ymm0
; CHECK-NEXT: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0
; CHECK-NEXT: [[VPCMPEQDYrr:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY]], [[AVX_SET0_]]
; CHECK-NEXT: [[VPCMPEQDYrr1:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY1]], [[AVX_SET0_]]
; CHECK-NEXT: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load unknown-size from %ir.stack_input_vec, align 4)
; CHECK-NEXT: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load unknown-size from %ir.stack_input_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQDYrr]], killed [[VMASKMOVPSYrm1]] :: (store unknown-size into %ir.stack_output_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQDYrr1]], killed [[VMASKMOVPSYrm]] :: (store unknown-size into %ir.stack_output_vec, align 4)
; CHECK-NEXT: RET 0
bb:
%stack_input_vec = alloca <16 x float>, align 64
%stack_output_vec = alloca <16 x float>, align 64
%mask = icmp eq <16 x i32> %x, zeroinitializer
%masked_loaded_vec = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* nonnull %stack_input_vec, i32 4, <16 x i1> %mask, <16 x float> undef)
call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %masked_loaded_vec, <16 x float>* nonnull %stack_output_vec, i32 4, <16 x i1> %mask)
ret void
}
declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
define void @test_v8d(<8 x i64> %x) {
; CHECK-LABEL: name: test_v8d
; CHECK: bb.0.bb:
; CHECK-NEXT: liveins: $ymm0, $ymm1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr256 = COPY $ymm1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256 = COPY $ymm0
; CHECK-NEXT: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0
; CHECK-NEXT: [[VPCMPEQQYrr:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY]], [[AVX_SET0_]]
; CHECK-NEXT: [[VPCMPEQQYrr1:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY1]], [[AVX_SET0_]]
; CHECK-NEXT: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load unknown-size from %ir.stack_input_vec, align 4)
; CHECK-NEXT: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load unknown-size from %ir.stack_input_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQQYrr]], killed [[VMASKMOVPDYrm1]] :: (store unknown-size into %ir.stack_output_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQQYrr1]], killed [[VMASKMOVPDYrm]] :: (store unknown-size into %ir.stack_output_vec, align 4)
; CHECK-NEXT: RET 0
bb:
%stack_input_vec = alloca <8 x double>, align 64
%stack_output_vec = alloca <8 x double>, align 64
%mask = icmp eq <8 x i64> %x, zeroinitializer
%masked_loaded_vec = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* nonnull %stack_input_vec, i32 4, <8 x i1> %mask, <8 x double> undef)
call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %masked_loaded_vec, <8 x double>* nonnull %stack_output_vec, i32 4, <8 x i1> %mask)
ret void
}
define <2 x double> @mload_constmask_v2f64(<2 x double>* %addr, <2 x double> %dst) {
; CHECK-LABEL: name: mload_constmask_v2f64
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $rdi, $xmm0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
; CHECK-NEXT: [[VMOVHPDrm:%[0-9]+]]:vr128 = VMOVHPDrm [[COPY]], [[COPY1]], 1, $noreg, 8, $noreg :: (load (s64) from %ir.addr + 8, align 4)
; CHECK-NEXT: $xmm0 = COPY [[VMOVHPDrm]]
; CHECK-NEXT: RET 0, $xmm0
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> <i1 0, i1 1>, <2 x double> %dst)
ret <2 x double> %res
}
define void @one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) {
; CHECK-LABEL: name: one_mask_bit_set2
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $rdi, $xmm0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
; CHECK-NEXT: VEXTRACTPSmr [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store (s32) into %ir.addr + 8)
; CHECK-NEXT: RET 0
call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
ret void
}
declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)