
Now that #149310 has restricted lifetime intrinsics to only work on allocas, we can also drop the explicit size argument. Instead, the size is implied by the alloca. This removes the ability to only mark a prefix of an alloca alive/dead. We never used that capability, so we should remove the need to handle that possibility everywhere (though many key places, including stack coloring, did not actually respect this).
777 lines
35 KiB
LLVM
777 lines
35 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S < %s -passes=newgvn,dce | FileCheck %s
|
|
|
|
; Analyze Load from clobbering Load.
|
|
|
|
define <vscale x 4 x i32> @load_store_clobber_load(ptr %p) {
|
|
; CHECK-LABEL: @load_store_clobber_load(
|
|
; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr undef, align 16
|
|
; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD1]], [[LOAD1]]
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
|
|
;
|
|
%load1 = load <vscale x 4 x i32>, ptr %p
|
|
store <vscale x 4 x i32> zeroinitializer, ptr undef
|
|
%load2 = load <vscale x 4 x i32>, ptr %p ; <- load to be eliminated
|
|
%add = add <vscale x 4 x i32> %load1, %load2
|
|
ret <vscale x 4 x i32> %add
|
|
}
|
|
|
|
define <vscale x 4 x i32> @load_store_clobber_load_mayalias(ptr %p, ptr %p2) {
|
|
; CHECK-LABEL: @load_store_clobber_load_mayalias(
|
|
; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[P2:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD2:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
|
|
; CHECK-NEXT: [[SUB:%.*]] = sub <vscale x 4 x i32> [[LOAD1]], [[LOAD2]]
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> [[SUB]]
|
|
;
|
|
%load1 = load <vscale x 4 x i32>, ptr %p
|
|
store <vscale x 4 x i32> zeroinitializer, ptr %p2
|
|
%load2 = load <vscale x 4 x i32>, ptr %p
|
|
%sub = sub <vscale x 4 x i32> %load1, %load2
|
|
ret <vscale x 4 x i32> %sub
|
|
}
|
|
|
|
define <vscale x 4 x i32> @load_store_clobber_load_noalias(ptr noalias %p, ptr noalias %p2) {
|
|
; CHECK-LABEL: @load_store_clobber_load_noalias(
|
|
; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[P2:%.*]], align 16
|
|
; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD1]], [[LOAD1]]
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
|
|
;
|
|
%load1 = load <vscale x 4 x i32>, ptr %p
|
|
store <vscale x 4 x i32> zeroinitializer, ptr %p2
|
|
%load2 = load <vscale x 4 x i32>, ptr %p ; <- load to be eliminated
|
|
%add = add <vscale x 4 x i32> %load1, %load2
|
|
ret <vscale x 4 x i32> %add
|
|
}
|
|
|
|
; BasicAA return MayAlias for %gep1,%gep2, could improve as MustAlias.
|
|
define i32 @load_clobber_load_gep1(ptr %p) {
|
|
; CHECK-LABEL: @load_clobber_load_gep1(
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 0, i64 1
|
|
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4
|
|
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P]], i64 1
|
|
; CHECK-NEXT: [[LOAD2:%.*]] = load i32, ptr [[GEP2]], align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]]
|
|
; CHECK-NEXT: ret i32 [[ADD]]
|
|
;
|
|
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 0, i64 1
|
|
%load1 = load i32, ptr %gep1
|
|
%gep2 = getelementptr i32, ptr %p, i64 1
|
|
%load2 = load i32, ptr %gep2 ; <- load could be eliminated
|
|
%add = add i32 %load1, %load2
|
|
ret i32 %add
|
|
}
|
|
|
|
define i32 @load_clobber_load_gep2(ptr %p) {
|
|
; CHECK-LABEL: @load_clobber_load_gep2(
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 0
|
|
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4
|
|
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P]], i64 4
|
|
; CHECK-NEXT: [[LOAD2:%.*]] = load i32, ptr [[GEP2]], align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]]
|
|
; CHECK-NEXT: ret i32 [[ADD]]
|
|
;
|
|
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
|
|
%load1 = load i32, ptr %gep1
|
|
%gep2 = getelementptr i32, ptr %p, i64 4
|
|
%load2 = load i32, ptr %gep2 ; <- can not determine at compile-time if %load1 and %load2 are same addr
|
|
%add = add i32 %load1, %load2
|
|
ret i32 %add
|
|
}
|
|
|
|
; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as MustAlias.
|
|
define i32 @load_clobber_load_gep3(ptr %p) {
|
|
; CHECK-LABEL: @load_clobber_load_gep3(
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 0
|
|
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4
|
|
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P]], i64 1, i64 0
|
|
; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
|
|
; CHECK-NEXT: [[CAST:%.*]] = bitcast float [[LOAD2]] to i32
|
|
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]]
|
|
; CHECK-NEXT: ret i32 [[ADD]]
|
|
;
|
|
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
|
|
%load1 = load i32, ptr %gep1
|
|
%gep2 = getelementptr <vscale x 4 x float>, ptr %p, i64 1, i64 0
|
|
%load2 = load float, ptr %gep2 ; <- load could be eliminated
|
|
%cast = bitcast float %load2 to i32
|
|
%add = add i32 %load1, %cast
|
|
ret i32 %add
|
|
}
|
|
|
|
define <vscale x 4 x i32> @load_clobber_load_fence(ptr %p) {
|
|
; CHECK-LABEL: @load_clobber_load_fence(
|
|
; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: call void asm "", "~{memory}"()
|
|
; CHECK-NEXT: [[LOAD2:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
|
|
; CHECK-NEXT: [[SUB:%.*]] = sub <vscale x 4 x i32> [[LOAD1]], [[LOAD2]]
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> [[SUB]]
|
|
;
|
|
%load1 = load <vscale x 4 x i32>, ptr %p
|
|
call void asm "", "~{memory}"()
|
|
%load2 = load <vscale x 4 x i32>, ptr %p
|
|
%sub = sub <vscale x 4 x i32> %load1, %load2
|
|
ret <vscale x 4 x i32> %sub
|
|
}
|
|
|
|
define <vscale x 4 x i32> @load_clobber_load_sideeffect(ptr %p) {
|
|
; CHECK-LABEL: @load_clobber_load_sideeffect(
|
|
; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: call void asm sideeffect "", ""()
|
|
; CHECK-NEXT: [[LOAD2:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
|
|
; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD1]], [[LOAD2]]
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
|
|
;
|
|
%load1 = load <vscale x 4 x i32>, ptr %p
|
|
call void asm sideeffect "", ""()
|
|
%load2 = load <vscale x 4 x i32>, ptr %p
|
|
%add = add <vscale x 4 x i32> %load1, %load2
|
|
ret <vscale x 4 x i32> %add
|
|
}
|
|
|
|
; Analyze Load from clobbering Store.
|
|
|
|
define <vscale x 4 x i32> @store_forward_to_load(ptr %p) {
|
|
; CHECK-LABEL: @store_forward_to_load(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> zeroinitializer
|
|
;
|
|
store <vscale x 4 x i32> zeroinitializer, ptr %p
|
|
%load = load <vscale x 4 x i32>, ptr %p
|
|
ret <vscale x 4 x i32> %load
|
|
}
|
|
|
|
define <vscale x 4 x i32> @store_forward_to_load_sideeffect(ptr %p) {
|
|
; CHECK-LABEL: @store_forward_to_load_sideeffect(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: call void asm sideeffect "", ""()
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i32> zeroinitializer, ptr %p
|
|
call void asm sideeffect "", ""()
|
|
%load = load <vscale x 4 x i32>, ptr %p
|
|
ret <vscale x 4 x i32> %load
|
|
}
|
|
|
|
define i32 @store_clobber_load() {
|
|
; CHECK-LABEL: @store_clobber_load(
|
|
; CHECK-NEXT: [[ALLOC:%.*]] = alloca <vscale x 4 x i32>, align 16
|
|
; CHECK-NEXT: store <vscale x 4 x i32> undef, ptr [[ALLOC]], align 16
|
|
; CHECK-NEXT: [[PTR:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[ALLOC]], i32 0, i32 1
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[PTR]], align 4
|
|
; CHECK-NEXT: ret i32 [[LOAD]]
|
|
;
|
|
%alloc = alloca <vscale x 4 x i32>
|
|
store <vscale x 4 x i32> undef, ptr %alloc
|
|
%ptr = getelementptr <vscale x 4 x i32>, ptr %alloc, i32 0, i32 1
|
|
%load = load i32, ptr %ptr
|
|
ret i32 %load
|
|
}
|
|
|
|
; Analyze Load from clobbering MemInst.
|
|
|
|
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
|
|
|
|
define i32 @memset_clobber_load(ptr %p) {
|
|
; CHECK-LABEL: @memset_clobber_load(
|
|
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false)
|
|
; CHECK-NEXT: ret i32 16843009
|
|
;
|
|
tail call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 200, i1 false)
|
|
%gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 0, i64 5
|
|
%load = load i32, ptr %gep
|
|
ret i32 %load
|
|
}
|
|
|
|
define i32 @memset_clobber_load_vscaled_base(ptr %p) {
|
|
; CHECK-LABEL: @memset_clobber_load_vscaled_base(
|
|
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false)
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P]], i64 1, i64 1
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
|
|
; CHECK-NEXT: ret i32 [[LOAD]]
|
|
;
|
|
tail call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 200, i1 false)
|
|
%gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
|
|
%load = load i32, ptr %gep
|
|
ret i32 %load
|
|
}
|
|
|
|
define i32 @memset_clobber_load_nonconst_index(ptr %p, i64 %idx1, i64 %idx2) {
|
|
; CHECK-LABEL: @memset_clobber_load_nonconst_index(
|
|
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false)
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P]], i64 [[IDX1:%.*]], i64 [[IDX2:%.*]]
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
|
|
; CHECK-NEXT: ret i32 [[LOAD]]
|
|
;
|
|
tail call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 200, i1 false)
|
|
%gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 %idx1, i64 %idx2
|
|
%load = load i32, ptr %gep
|
|
ret i32 %load
|
|
}
|
|
|
|
|
|
; Load elimination across BBs
|
|
|
|
define ptr @load_from_alloc_replaced_with_undef() {
|
|
; CHECK-LABEL: @load_from_alloc_replaced_with_undef(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[A:%.*]] = alloca <vscale x 4 x i32>, align 16
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[A]], i64 0, i64 1
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
|
|
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[LOAD]], 0
|
|
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[A]], align 16
|
|
; CHECK-NEXT: br label [[IF_END]]
|
|
; CHECK: if.end:
|
|
; CHECK-NEXT: ret ptr [[A]]
|
|
;
|
|
entry:
|
|
%a = alloca <vscale x 4 x i32>
|
|
%gep = getelementptr <vscale x 4 x i32>, ptr %a, i64 0, i64 1
|
|
%load = load i32, ptr %gep ; <- load to be eliminated
|
|
%tobool = icmp eq i32 %load, 0 ; <- icmp to be eliminated
|
|
br i1 %tobool, label %if.end, label %if.then
|
|
|
|
if.then:
|
|
store <vscale x 4 x i32> zeroinitializer, ptr %a
|
|
br label %if.end
|
|
|
|
if.end:
|
|
ret ptr %a
|
|
}
|
|
|
|
define i32 @redundant_load_elimination_1(ptr %p) {
|
|
; CHECK-LABEL: @redundant_load_elimination_1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 1
|
|
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP]], align 4
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LOAD1]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: br label [[IF_END]]
|
|
; CHECK: if.end:
|
|
; CHECK-NEXT: ret i32 [[LOAD1]]
|
|
;
|
|
entry:
|
|
%gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
|
|
%load1 = load i32, ptr %gep
|
|
%cmp = icmp eq i32 %load1, 0
|
|
br i1 %cmp, label %if.then, label %if.end
|
|
|
|
if.then:
|
|
%load2 = load i32, ptr %gep ; <- load to be eliminated
|
|
%add = add i32 %load1, %load2
|
|
br label %if.end
|
|
|
|
if.end:
|
|
%result = phi i32 [ %add, %if.then ], [ %load1, %entry ]
|
|
ret i32 %result
|
|
}
|
|
|
|
; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as NoAlias.
|
|
define void @redundant_load_elimination_2(i1 %c, ptr %p, ptr %q) {
|
|
; CHECK-LABEL: @redundant_load_elimination_2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 1
|
|
; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4
|
|
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P]], i64 1, i64 0
|
|
; CHECK-NEXT: store i32 1, ptr [[GEP2]], align 4
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[GEP1]], align 4
|
|
; CHECK-NEXT: store i32 [[T]], ptr [[Q:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: if.else:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
|
|
store i32 0, ptr %gep1
|
|
%gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
|
|
store i32 1, ptr %gep2
|
|
br i1 %c, label %if.else, label %if.then
|
|
|
|
if.then:
|
|
%t = load i32, ptr %gep1 ; <- load could be eliminated
|
|
store i32 %t, ptr %q
|
|
ret void
|
|
|
|
if.else:
|
|
ret void
|
|
}
|
|
|
|
define void @redundant_load_elimination_zero_index(i1 %c, ptr %p, ptr %q) {
|
|
; CHECK-LABEL: @redundant_load_elimination_zero_index(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 0, i64 1
|
|
; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4
|
|
; CHECK-NEXT: store i32 1, ptr [[P]], align 4
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: store i32 0, ptr [[Q:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: if.else:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 0, i64 1
|
|
store i32 0, ptr %gep1
|
|
store i32 1, ptr %p
|
|
br i1 %c, label %if.else, label %if.then
|
|
|
|
if.then:
|
|
%t = load i32, ptr %gep1 ; <- load could be eliminated
|
|
store i32 %t, ptr %q
|
|
ret void
|
|
|
|
if.else:
|
|
ret void
|
|
}
|
|
|
|
define void @redundant_load_elimination_zero_index_1(i1 %c, ptr %p, ptr %q, i64 %i) {
|
|
; CHECK-LABEL: @redundant_load_elimination_zero_index_1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[J:%.*]] = add i64 [[I:%.*]], 1
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 0, i64 [[J]]
|
|
; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4
|
|
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P]], i64 0, i64 [[I]]
|
|
; CHECK-NEXT: store i32 1, ptr [[GEP2]], align 4
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: store i32 0, ptr [[Q:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: if.else:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%j = add i64 %i, 1
|
|
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 0, i64 %j
|
|
store i32 0, ptr %gep1
|
|
%gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 0, i64 %i
|
|
store i32 1, ptr %gep2
|
|
br i1 %c, label %if.else, label %if.then
|
|
|
|
if.then:
|
|
%t = load i32, ptr %gep1 ; <- load could be eliminated
|
|
store i32 %t, ptr %q
|
|
ret void
|
|
|
|
if.else:
|
|
ret void
|
|
}
|
|
; TODO: load in if.then could have been eliminated
|
|
define void @missing_load_elimination(i1 %c, ptr %p, ptr %q, <vscale x 4 x i32> %v) {
|
|
; CHECK-LABEL: @missing_load_elimination(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[P1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P]], i64 1
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[V:%.*]], ptr [[P1]], align 16
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: [[T:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[T]], ptr [[Q:%.*]], align 16
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: if.else:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
store <vscale x 4 x i32> zeroinitializer, ptr %p
|
|
%p1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
|
|
store <vscale x 4 x i32> %v, ptr %p1
|
|
br i1 %c, label %if.else, label %if.then
|
|
|
|
if.then:
|
|
%t = load <vscale x 4 x i32>, ptr %p ; load could be eliminated
|
|
store <vscale x 4 x i32> %t, ptr %q
|
|
ret void
|
|
|
|
if.else:
|
|
ret void
|
|
}
|
|
|
|
; Different sizes / types
|
|
|
|
define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
|
|
; CHECK-LABEL: @load_v16i8_store_v4i32_forward_load(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i32> %x, ptr %p
|
|
%load = load <vscale x 16 x i8>, ptr %p
|
|
ret <vscale x 16 x i8> %load
|
|
}
|
|
|
|
define <vscale x 4 x float> @load_v4f32_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
|
|
; CHECK-LABEL: @load_v4f32_store_v4i32_forward_load(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i32> %x, ptr %p
|
|
%load = load <vscale x 4 x float>, ptr %p
|
|
ret <vscale x 4 x float> %load
|
|
}
|
|
|
|
define <vscale x 4 x float> @load_v4f32_store_v16i8_forward_load(ptr %p, <vscale x 16 x i8> %x) {
|
|
; CHECK-LABEL: @load_v4f32_store_v16i8_forward_load(
|
|
; CHECK-NEXT: store <vscale x 16 x i8> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
|
|
;
|
|
store <vscale x 16 x i8> %x, ptr %p
|
|
%load = load <vscale x 4 x float>, ptr %p
|
|
ret <vscale x 4 x float> %load
|
|
}
|
|
|
|
define <vscale x 4 x i32> @load_v4i32_store_v4f32_forward_load(ptr %p, <vscale x 4 x float> %x) {
|
|
; CHECK-LABEL: @load_v4i32_store_v4f32_forward_load(
|
|
; CHECK-NEXT: store <vscale x 4 x float> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x float> %x, ptr %p
|
|
%load = load <vscale x 4 x i32>, ptr %p
|
|
ret <vscale x 4 x i32> %load
|
|
}
|
|
|
|
define <vscale x 4 x i32> @load_v4i32_store_v4i64_forward_load(ptr %p, <vscale x 4 x i64> %x) {
|
|
; CHECK-LABEL: @load_v4i32_store_v4i64_forward_load(
|
|
; CHECK-NEXT: store <vscale x 4 x i64> [[X:%.*]], ptr [[P:%.*]], align 32
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i64> %x, ptr %p
|
|
%load = load <vscale x 4 x i32>, ptr %p
|
|
ret <vscale x 4 x i32> %load
|
|
}
|
|
|
|
define <vscale x 4 x i64> @load_v4i64_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
|
|
; CHECK-LABEL: @load_v4i64_store_v4i32_forward_load(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i64>, ptr [[P]], align 32
|
|
; CHECK-NEXT: ret <vscale x 4 x i64> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i32> %x, ptr %p
|
|
%load = load <vscale x 4 x i64>, ptr %p
|
|
ret <vscale x 4 x i64> %load
|
|
}
|
|
|
|
define <vscale x 2 x i32> @load_v2i32_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
|
|
; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[P]], align 8
|
|
; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i32> %x, ptr %p
|
|
%load = load <vscale x 2 x i32>, ptr %p
|
|
ret <vscale x 2 x i32> %load
|
|
}
|
|
|
|
define <vscale x 2 x i32> @load_v2i32_store_v4i32_forward_load_offsets(ptr %p, <vscale x 4 x i32> %x) {
|
|
; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsets(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[Q:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[P]], i64 1
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[Q]], align 8
|
|
; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i32> %x, ptr %p
|
|
%q = getelementptr <vscale x 2 x i32>, ptr %p, i64 1
|
|
%load = load <vscale x 2 x i32>, ptr %q
|
|
ret <vscale x 2 x i32> %load
|
|
}
|
|
|
|
define <vscale x 2 x i32> @load_v2i32_store_v4i32_forward_load_offsetc(ptr %p, <vscale x 4 x i32> %x) {
|
|
; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsetc(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[Q:%.*]] = getelementptr <2 x i32>, ptr [[P]], i64 1
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[Q]], align 8
|
|
; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i32> %x, ptr %p
|
|
%q = getelementptr <2 x i32>, ptr %p, i64 1
|
|
%load = load <vscale x 2 x i32>, ptr %q
|
|
ret <vscale x 2 x i32> %load
|
|
}
|
|
|
|
define <vscale x 2 x ptr> @load_v2p0_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
|
|
; CHECK-LABEL: @load_v2p0_store_v4i32_forward_load(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x ptr>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <vscale x 2 x ptr> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i32> %x, ptr %p
|
|
%load = load <vscale x 2 x ptr>, ptr %p
|
|
ret <vscale x 2 x ptr> %load
|
|
}
|
|
|
|
define <vscale x 2 x i64> @load_v2i64_store_v2p0_forward_load(ptr %p, <vscale x 2 x ptr> %x) {
|
|
; CHECK-LABEL: @load_v2i64_store_v2p0_forward_load(
|
|
; CHECK-NEXT: store <vscale x 2 x ptr> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <vscale x 2 x i64> [[LOAD]]
|
|
;
|
|
store <vscale x 2 x ptr> %x, ptr %p
|
|
%load = load <vscale x 2 x i64>, ptr %p
|
|
ret <vscale x 2 x i64> %load
|
|
}
|
|
|
|
define <vscale x 16 x i8> @load_nxv16i8_store_v4i32_forward_load(ptr %p, <4 x i32> %x) {
|
|
; CHECK-LABEL: @load_nxv16i8_store_v4i32_forward_load(
|
|
; CHECK-NEXT: store <4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
|
|
;
|
|
store <4 x i32> %x, ptr %p
|
|
%load = load <vscale x 16 x i8>, ptr %p
|
|
ret <vscale x 16 x i8> %load
|
|
}
|
|
|
|
define <16 x i8> @load_v16i8_store_nxv4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
|
|
; CHECK-LABEL: @load_v16i8_store_nxv4i32_forward_load(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i32> %x, ptr %p
|
|
%load = load <16 x i8>, ptr %p
|
|
ret <16 x i8> %load
|
|
}
|
|
|
|
define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_constant(ptr %p) {
|
|
; CHECK-LABEL: @load_v16i8_store_v4i32_forward_constant(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 4), ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i32> splat (i32 4), ptr %p
|
|
%load = load <vscale x 16 x i8>, ptr %p
|
|
ret <vscale x 16 x i8> %load
|
|
}
|
|
|
|
define <vscale x 16 x i8> @load_v16i8_struct_store_v4i32_forward_load(ptr %p, { <vscale x 4 x i32> } %x) {
|
|
; CHECK-LABEL: @load_v16i8_struct_store_v4i32_forward_load(
|
|
; CHECK-NEXT: store { <vscale x 4 x i32> } [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
|
|
;
|
|
store { <vscale x 4 x i32> } %x, ptr %p
|
|
%load = load <vscale x 16 x i8>, ptr %p
|
|
ret <vscale x 16 x i8> %load
|
|
}
|
|
|
|
define {<vscale x 16 x i8>} @load_v16i8_store_v4i32_struct_forward_load(ptr %p, <vscale x 4 x i32> %x) {
|
|
; CHECK-LABEL: @load_v16i8_store_v4i32_struct_forward_load(
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load { <vscale x 16 x i8> }, ptr [[P]], align 16
|
|
; CHECK-NEXT: ret { <vscale x 16 x i8> } [[LOAD]]
|
|
;
|
|
store <vscale x 4 x i32> %x, ptr %p
|
|
%load = load { <vscale x 16 x i8> }, ptr %p
|
|
ret { <vscale x 16 x i8> } %load
|
|
}
|
|
|
|
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @bigexample({ <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a) vscale_range(1,16) {
|
|
; CHECK-LABEL: @bigexample(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[REF_TMP:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP]])
|
|
; CHECK-NEXT: [[A_ELT:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A:%.*]], 0
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT]], ptr [[REF_TMP]], align 16
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
|
|
; CHECK-NEXT: [[REF_TMP_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP1]]
|
|
; CHECK-NEXT: [[A_ELT2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 1
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT2]], ptr [[REF_TMP_REPACK1]], align 16
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 5
|
|
; CHECK-NEXT: [[REF_TMP_REPACK3:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP3]]
|
|
; CHECK-NEXT: [[A_ELT4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 2
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT4]], ptr [[REF_TMP_REPACK3]], align 16
|
|
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 48
|
|
; CHECK-NEXT: [[REF_TMP_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[A_ELT6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 3
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT6]], ptr [[REF_TMP_REPACK5]], align 16
|
|
; CHECK-NEXT: [[DOTUNPACK:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP]], align 16
|
|
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[DOTUNPACK]], 0
|
|
; CHECK-NEXT: [[DOTUNPACK8:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK1]], align 16
|
|
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP6]], <vscale x 16 x i8> [[DOTUNPACK8]], 1
|
|
; CHECK-NEXT: [[DOTUNPACK10:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK3]], align 16
|
|
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP9]], <vscale x 16 x i8> [[DOTUNPACK10]], 2
|
|
; CHECK-NEXT: [[DOTUNPACK12:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK5]], align 16
|
|
; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP12]], <vscale x 16 x i8> [[DOTUNPACK12]], 3
|
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP]])
|
|
; CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP15]]
|
|
;
|
|
entry:
|
|
%ref.tmp = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
|
|
call void @llvm.lifetime.start.p0(ptr nonnull %ref.tmp)
|
|
%a.elt = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 0
|
|
store <vscale x 4 x i32> %a.elt, ptr %ref.tmp, align 16
|
|
%0 = call i64 @llvm.vscale.i64()
|
|
%1 = shl i64 %0, 4
|
|
%ref.tmp.repack1 = getelementptr inbounds i8, ptr %ref.tmp, i64 %1
|
|
%a.elt2 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 1
|
|
store <vscale x 4 x i32> %a.elt2, ptr %ref.tmp.repack1, align 16
|
|
%2 = call i64 @llvm.vscale.i64()
|
|
%3 = shl i64 %2, 5
|
|
%ref.tmp.repack3 = getelementptr inbounds i8, ptr %ref.tmp, i64 %3
|
|
%a.elt4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 2
|
|
store <vscale x 4 x i32> %a.elt4, ptr %ref.tmp.repack3, align 16
|
|
%4 = call i64 @llvm.vscale.i64()
|
|
%5 = mul i64 %4, 48
|
|
%ref.tmp.repack5 = getelementptr inbounds i8, ptr %ref.tmp, i64 %5
|
|
%a.elt6 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 3
|
|
store <vscale x 4 x i32> %a.elt6, ptr %ref.tmp.repack5, align 16
|
|
%.unpack = load <vscale x 16 x i8>, ptr %ref.tmp, align 16
|
|
%6 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> %.unpack, 0
|
|
%7 = call i64 @llvm.vscale.i64()
|
|
%8 = shl i64 %7, 4
|
|
%.elt7 = getelementptr inbounds i8, ptr %ref.tmp, i64 %8
|
|
%.unpack8 = load <vscale x 16 x i8>, ptr %.elt7, align 16
|
|
%9 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, <vscale x 16 x i8> %.unpack8, 1
|
|
%10 = call i64 @llvm.vscale.i64()
|
|
%11 = shl i64 %10, 5
|
|
%.elt9 = getelementptr inbounds i8, ptr %ref.tmp, i64 %11
|
|
%.unpack10 = load <vscale x 16 x i8>, ptr %.elt9, align 16
|
|
%12 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %9, <vscale x 16 x i8> %.unpack10, 2
|
|
%13 = call i64 @llvm.vscale.i64()
|
|
%14 = mul i64 %13, 48
|
|
%.elt11 = getelementptr inbounds i8, ptr %ref.tmp, i64 %14
|
|
%.unpack12 = load <vscale x 16 x i8>, ptr %.elt11, align 16
|
|
%15 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %12, <vscale x 16 x i8> %.unpack12, 3
|
|
call void @llvm.lifetime.end.p0(ptr nonnull %ref.tmp)
|
|
ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %15
|
|
}
|
|
|
|
define <vscale x 4 x float> @scalable_store_to_fixed_load(<vscale x 4 x float> %.coerce) vscale_range(4,4) {
|
|
; CHECK-LABEL: @scalable_store_to_fixed_load(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
|
|
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
|
|
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
|
|
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
|
|
;
|
|
entry:
|
|
%retval = alloca { <16 x float> }
|
|
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
|
|
store <vscale x 4 x float> %0, ptr %retval
|
|
%1 = load <16 x float>, ptr %retval
|
|
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
|
|
ret <vscale x 4 x float> %cast.scalable
|
|
}
|
|
|
|
; Here, only the lower bound for the vscale is known, but this is enough to allow a forward to a load to 16 elements.
|
|
define <vscale x 4 x float> @scalable_store_to_fixed_load_only_lower_bound(<vscale x 4 x float> %a) vscale_range(4) {
|
|
; CHECK-LABEL: @scalable_store_to_fixed_load_only_lower_bound(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <vscale x 4 x float> }, align 16
|
|
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[RETVAL]], align 16
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
|
|
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP0]], i64 0)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
|
|
;
|
|
entry:
|
|
%retval = alloca { <vscale x 4 x float> }
|
|
store <vscale x 4 x float> %a, ptr %retval
|
|
%1 = load <16 x float>, ptr %retval
|
|
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
|
|
ret <vscale x 4 x float> %cast.scalable
|
|
}
|
|
|
|
define <vscale x 4 x float> @scalable_store_to_fixed_load_with_offset(<vscale x 4 x float> %a) vscale_range(4,4) {
|
|
; CHECK-LABEL: @scalable_store_to_fixed_load_with_offset(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[PTR:%.*]] = alloca { <32 x float> }, align 128
|
|
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[GEP]], align 64
|
|
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP0]], i64 0)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
|
|
;
|
|
entry:
|
|
%ptr = alloca { <32 x float> }
|
|
store <vscale x 4 x float> %a, ptr %ptr
|
|
%gep = getelementptr inbounds i8, ptr %ptr, i64 8
|
|
%1 = load <16 x float>, ptr %gep
|
|
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
|
|
ret <vscale x 4 x float> %cast.scalable
|
|
}
|
|
|
|
define <vscale x 4 x float> @scalable_store_to_fixed_load_unknown_vscale(<vscale x 4 x float> %.coerce) {
|
|
; CHECK-LABEL: @scalable_store_to_fixed_load_unknown_vscale(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
|
|
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
|
|
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
|
|
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
|
|
;
|
|
entry:
|
|
%retval = alloca { <16 x float> }
|
|
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
|
|
store <vscale x 4 x float> %0, ptr %retval
|
|
%1 = load <16 x float>, ptr %retval
|
|
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
|
|
ret <vscale x 4 x float> %cast.scalable
|
|
}
|
|
|
|
define <vscale x 4 x float> @scalable_store_to_fixed_load_size_missmatch(<vscale x 4 x float> %.coerce) vscale_range(4,4) {
|
|
; CHECK-LABEL: @scalable_store_to_fixed_load_size_missmatch(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <32 x float> }, align 128
|
|
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
|
|
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, ptr [[RETVAL]], align 128
|
|
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> [[TMP1]], i64 0)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
|
|
;
|
|
entry:
|
|
%retval = alloca { <32 x float> }
|
|
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
|
|
store <vscale x 4 x float> %0, ptr %retval
|
|
%1 = load <32 x float>, ptr %retval
|
|
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> %1, i64 0)
|
|
ret <vscale x 4 x float> %cast.scalable
|
|
}
|
|
|
|
define <vscale x 4 x i32> @scalable_store_to_fixed_load_different_types(<vscale x 4 x float> %a) vscale_range(4,4) {
|
|
; CHECK-LABEL: @scalable_store_to_fixed_load_different_types(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[PTR:%.*]] = alloca { <16 x float> }, align 64
|
|
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[PTR]], align 64
|
|
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP0]], i64 0)
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> [[CAST_SCALABLE]]
|
|
;
|
|
entry:
|
|
%ptr = alloca { <16 x float> }
|
|
store <vscale x 4 x float> %a, ptr %ptr
|
|
%1 = load <16 x i32>, ptr %ptr
|
|
%cast.scalable = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> %1, i64 0)
|
|
ret <vscale x 4 x i32> %cast.scalable
|
|
}
|
|
|
|
; This function does not have a fixed vscale, but the loaded vector is still known
|
|
; to be smaller or equal in size compared to the stored vector.
|
|
define <4 x float> @scalable_store_to_small_fixed_load(<vscale x 4 x float> %a) {
|
|
; CHECK-LABEL: @scalable_store_to_small_fixed_load(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[PTR:%.*]] = alloca <vscale x 4 x float>, align 16
|
|
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[PTR]], align 16
|
|
; CHECK-NEXT: ret <4 x float> [[TMP0]]
|
|
;
|
|
entry:
|
|
%ptr = alloca <vscale x 4 x float>
|
|
store <vscale x 4 x float> %a, ptr %ptr
|
|
%1 = load <4 x float>, ptr %ptr
|
|
ret <4 x float> %1
|
|
}
|