Nikita Popov c23b4fbdbb
[IR] Remove size argument from lifetime intrinsics (#150248)
Now that #149310 has restricted lifetime intrinsics to only work on
allocas, we can also drop the explicit size argument. Instead, the size
is implied by the alloca.

This removes the ability to only mark a prefix of an alloca alive/dead.
We never used that capability, so we should remove the need to handle
that possibility everywhere (though many key places, including stack
coloring, did not actually respect this).
2025-08-08 11:09:34 +02:00

777 lines
35 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S < %s -passes=newgvn,dce | FileCheck %s
; Analyze Load from clobbering Load.
define <vscale x 4 x i32> @load_store_clobber_load(ptr %p) {
; CHECK-LABEL: @load_store_clobber_load(
; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[P:%.*]], align 16
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr undef, align 16
; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD1]], [[LOAD1]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
;
%load1 = load <vscale x 4 x i32>, ptr %p
store <vscale x 4 x i32> zeroinitializer, ptr undef
%load2 = load <vscale x 4 x i32>, ptr %p ; <- load to be eliminated
%add = add <vscale x 4 x i32> %load1, %load2
ret <vscale x 4 x i32> %add
}
define <vscale x 4 x i32> @load_store_clobber_load_mayalias(ptr %p, ptr %p2) {
; CHECK-LABEL: @load_store_clobber_load_mayalias(
; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[P:%.*]], align 16
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[P2:%.*]], align 16
; CHECK-NEXT: [[LOAD2:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
; CHECK-NEXT: [[SUB:%.*]] = sub <vscale x 4 x i32> [[LOAD1]], [[LOAD2]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[SUB]]
;
%load1 = load <vscale x 4 x i32>, ptr %p
store <vscale x 4 x i32> zeroinitializer, ptr %p2
%load2 = load <vscale x 4 x i32>, ptr %p
%sub = sub <vscale x 4 x i32> %load1, %load2
ret <vscale x 4 x i32> %sub
}
define <vscale x 4 x i32> @load_store_clobber_load_noalias(ptr noalias %p, ptr noalias %p2) {
; CHECK-LABEL: @load_store_clobber_load_noalias(
; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[P:%.*]], align 16
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[P2:%.*]], align 16
; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD1]], [[LOAD1]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
;
%load1 = load <vscale x 4 x i32>, ptr %p
store <vscale x 4 x i32> zeroinitializer, ptr %p2
%load2 = load <vscale x 4 x i32>, ptr %p ; <- load to be eliminated
%add = add <vscale x 4 x i32> %load1, %load2
ret <vscale x 4 x i32> %add
}
; BasicAA return MayAlias for %gep1,%gep2, could improve as MustAlias.
define i32 @load_clobber_load_gep1(ptr %p) {
; CHECK-LABEL: @load_clobber_load_gep1(
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P]], i64 1
; CHECK-NEXT: [[LOAD2:%.*]] = load i32, ptr [[GEP2]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]]
; CHECK-NEXT: ret i32 [[ADD]]
;
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 0, i64 1
%load1 = load i32, ptr %gep1
%gep2 = getelementptr i32, ptr %p, i64 1
%load2 = load i32, ptr %gep2 ; <- load could be eliminated
%add = add i32 %load1, %load2
ret i32 %add
}
define i32 @load_clobber_load_gep2(ptr %p) {
; CHECK-LABEL: @load_clobber_load_gep2(
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P]], i64 4
; CHECK-NEXT: [[LOAD2:%.*]] = load i32, ptr [[GEP2]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]]
; CHECK-NEXT: ret i32 [[ADD]]
;
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
%load1 = load i32, ptr %gep1
%gep2 = getelementptr i32, ptr %p, i64 4
%load2 = load i32, ptr %gep2 ; <- can not determine at compile-time if %load1 and %load2 are same addr
%add = add i32 %load1, %load2
ret i32 %add
}
; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as MustAlias.
define i32 @load_clobber_load_gep3(ptr %p) {
; CHECK-LABEL: @load_clobber_load_gep3(
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P]], i64 1, i64 0
; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
; CHECK-NEXT: [[CAST:%.*]] = bitcast float [[LOAD2]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]]
; CHECK-NEXT: ret i32 [[ADD]]
;
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
%load1 = load i32, ptr %gep1
%gep2 = getelementptr <vscale x 4 x float>, ptr %p, i64 1, i64 0
%load2 = load float, ptr %gep2 ; <- load could be eliminated
%cast = bitcast float %load2 to i32
%add = add i32 %load1, %cast
ret i32 %add
}
define <vscale x 4 x i32> @load_clobber_load_fence(ptr %p) {
; CHECK-LABEL: @load_clobber_load_fence(
; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[P:%.*]], align 16
; CHECK-NEXT: call void asm "", "~{memory}"()
; CHECK-NEXT: [[LOAD2:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
; CHECK-NEXT: [[SUB:%.*]] = sub <vscale x 4 x i32> [[LOAD1]], [[LOAD2]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[SUB]]
;
%load1 = load <vscale x 4 x i32>, ptr %p
call void asm "", "~{memory}"()
%load2 = load <vscale x 4 x i32>, ptr %p
%sub = sub <vscale x 4 x i32> %load1, %load2
ret <vscale x 4 x i32> %sub
}
define <vscale x 4 x i32> @load_clobber_load_sideeffect(ptr %p) {
; CHECK-LABEL: @load_clobber_load_sideeffect(
; CHECK-NEXT: [[LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[P:%.*]], align 16
; CHECK-NEXT: call void asm sideeffect "", ""()
; CHECK-NEXT: [[LOAD2:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD1]], [[LOAD2]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
;
%load1 = load <vscale x 4 x i32>, ptr %p
call void asm sideeffect "", ""()
%load2 = load <vscale x 4 x i32>, ptr %p
%add = add <vscale x 4 x i32> %load1, %load2
ret <vscale x 4 x i32> %add
}
; Analyze Load from clobbering Store.
define <vscale x 4 x i32> @store_forward_to_load(ptr %p) {
; CHECK-LABEL: @store_forward_to_load(
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[P:%.*]], align 16
; CHECK-NEXT: ret <vscale x 4 x i32> zeroinitializer
;
store <vscale x 4 x i32> zeroinitializer, ptr %p
%load = load <vscale x 4 x i32>, ptr %p
ret <vscale x 4 x i32> %load
}
define <vscale x 4 x i32> @store_forward_to_load_sideeffect(ptr %p) {
; CHECK-LABEL: @store_forward_to_load_sideeffect(
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[P:%.*]], align 16
; CHECK-NEXT: call void asm sideeffect "", ""()
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
;
store <vscale x 4 x i32> zeroinitializer, ptr %p
call void asm sideeffect "", ""()
%load = load <vscale x 4 x i32>, ptr %p
ret <vscale x 4 x i32> %load
}
define i32 @store_clobber_load() {
; CHECK-LABEL: @store_clobber_load(
; CHECK-NEXT: [[ALLOC:%.*]] = alloca <vscale x 4 x i32>, align 16
; CHECK-NEXT: store <vscale x 4 x i32> undef, ptr [[ALLOC]], align 16
; CHECK-NEXT: [[PTR:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[ALLOC]], i32 0, i32 1
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[PTR]], align 4
; CHECK-NEXT: ret i32 [[LOAD]]
;
%alloc = alloca <vscale x 4 x i32>
store <vscale x 4 x i32> undef, ptr %alloc
%ptr = getelementptr <vscale x 4 x i32>, ptr %alloc, i32 0, i32 1
%load = load i32, ptr %ptr
ret i32 %load
}
; Analyze Load from clobbering MemInst.
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
define i32 @memset_clobber_load(ptr %p) {
; CHECK-LABEL: @memset_clobber_load(
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false)
; CHECK-NEXT: ret i32 16843009
;
tail call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 200, i1 false)
%gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 0, i64 5
%load = load i32, ptr %gep
ret i32 %load
}
define i32 @memset_clobber_load_vscaled_base(ptr %p) {
; CHECK-LABEL: @memset_clobber_load_vscaled_base(
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P]], i64 1, i64 1
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: ret i32 [[LOAD]]
;
tail call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 200, i1 false)
%gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
%load = load i32, ptr %gep
ret i32 %load
}
define i32 @memset_clobber_load_nonconst_index(ptr %p, i64 %idx1, i64 %idx2) {
; CHECK-LABEL: @memset_clobber_load_nonconst_index(
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P]], i64 [[IDX1:%.*]], i64 [[IDX2:%.*]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: ret i32 [[LOAD]]
;
tail call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 200, i1 false)
%gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 %idx1, i64 %idx2
%load = load i32, ptr %gep
ret i32 %load
}
; Load elimination across BBs
define ptr @load_from_alloc_replaced_with_undef() {
; CHECK-LABEL: @load_from_alloc_replaced_with_undef(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca <vscale x 4 x i32>, align 16
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[A]], i64 0, i64 1
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[LOAD]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[A]], align 16
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret ptr [[A]]
;
entry:
%a = alloca <vscale x 4 x i32>
%gep = getelementptr <vscale x 4 x i32>, ptr %a, i64 0, i64 1
%load = load i32, ptr %gep ; <- load to be eliminated
%tobool = icmp eq i32 %load, 0 ; <- icmp to be eliminated
br i1 %tobool, label %if.end, label %if.then
if.then:
store <vscale x 4 x i32> zeroinitializer, ptr %a
br label %if.end
if.end:
ret ptr %a
}
define i32 @redundant_load_elimination_1(ptr %p) {
; CHECK-LABEL: @redundant_load_elimination_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 1
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LOAD1]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret i32 [[LOAD1]]
;
entry:
%gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
%load1 = load i32, ptr %gep
%cmp = icmp eq i32 %load1, 0
br i1 %cmp, label %if.then, label %if.end
if.then:
%load2 = load i32, ptr %gep ; <- load to be eliminated
%add = add i32 %load1, %load2
br label %if.end
if.end:
%result = phi i32 [ %add, %if.then ], [ %load1, %entry ]
ret i32 %result
}
; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as NoAlias.
define void @redundant_load_elimination_2(i1 %c, ptr %p, ptr %q) {
; CHECK-LABEL: @redundant_load_elimination_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 1
; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P]], i64 1, i64 0
; CHECK-NEXT: store i32 1, ptr [[GEP2]], align 4
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[GEP1]], align 4
; CHECK-NEXT: store i32 [[T]], ptr [[Q:%.*]], align 4
; CHECK-NEXT: ret void
; CHECK: if.else:
; CHECK-NEXT: ret void
;
entry:
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
store i32 0, ptr %gep1
%gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
store i32 1, ptr %gep2
br i1 %c, label %if.else, label %if.then
if.then:
%t = load i32, ptr %gep1 ; <- load could be eliminated
store i32 %t, ptr %q
ret void
if.else:
ret void
}
define void @redundant_load_elimination_zero_index(i1 %c, ptr %p, ptr %q) {
; CHECK-LABEL: @redundant_load_elimination_zero_index(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4
; CHECK-NEXT: store i32 1, ptr [[P]], align 4
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: store i32 0, ptr [[Q:%.*]], align 4
; CHECK-NEXT: ret void
; CHECK: if.else:
; CHECK-NEXT: ret void
;
entry:
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 0, i64 1
store i32 0, ptr %gep1
store i32 1, ptr %p
br i1 %c, label %if.else, label %if.then
if.then:
%t = load i32, ptr %gep1 ; <- load could be eliminated
store i32 %t, ptr %q
ret void
if.else:
ret void
}
define void @redundant_load_elimination_zero_index_1(i1 %c, ptr %p, ptr %q, i64 %i) {
; CHECK-LABEL: @redundant_load_elimination_zero_index_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[J:%.*]] = add i64 [[I:%.*]], 1
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 0, i64 [[J]]
; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P]], i64 0, i64 [[I]]
; CHECK-NEXT: store i32 1, ptr [[GEP2]], align 4
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: store i32 0, ptr [[Q:%.*]], align 4
; CHECK-NEXT: ret void
; CHECK: if.else:
; CHECK-NEXT: ret void
;
entry:
%j = add i64 %i, 1
%gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 0, i64 %j
store i32 0, ptr %gep1
%gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 0, i64 %i
store i32 1, ptr %gep2
br i1 %c, label %if.else, label %if.then
if.then:
%t = load i32, ptr %gep1 ; <- load could be eliminated
store i32 %t, ptr %q
ret void
if.else:
ret void
}
; TODO: load in if.then could have been eliminated
define void @missing_load_elimination(i1 %c, ptr %p, ptr %q, <vscale x 4 x i32> %v) {
; CHECK-LABEL: @missing_load_elimination(
; CHECK-NEXT: entry:
; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[P:%.*]], align 16
; CHECK-NEXT: [[P1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P]], i64 1
; CHECK-NEXT: store <vscale x 4 x i32> [[V:%.*]], ptr [[P1]], align 16
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[T:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
; CHECK-NEXT: store <vscale x 4 x i32> [[T]], ptr [[Q:%.*]], align 16
; CHECK-NEXT: ret void
; CHECK: if.else:
; CHECK-NEXT: ret void
;
entry:
store <vscale x 4 x i32> zeroinitializer, ptr %p
%p1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
store <vscale x 4 x i32> %v, ptr %p1
br i1 %c, label %if.else, label %if.then
if.then:
%t = load <vscale x 4 x i32>, ptr %p ; load could be eliminated
store <vscale x 4 x i32> %t, ptr %q
ret void
if.else:
ret void
}
; Different sizes / types
define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v16i8_store_v4i32_forward_load(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
%load = load <vscale x 16 x i8>, ptr %p
ret <vscale x 16 x i8> %load
}
define <vscale x 4 x float> @load_v4f32_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v4f32_store_v4i32_forward_load(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
%load = load <vscale x 4 x float>, ptr %p
ret <vscale x 4 x float> %load
}
define <vscale x 4 x float> @load_v4f32_store_v16i8_forward_load(ptr %p, <vscale x 16 x i8> %x) {
; CHECK-LABEL: @load_v4f32_store_v16i8_forward_load(
; CHECK-NEXT: store <vscale x 16 x i8> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
;
store <vscale x 16 x i8> %x, ptr %p
%load = load <vscale x 4 x float>, ptr %p
ret <vscale x 4 x float> %load
}
define <vscale x 4 x i32> @load_v4i32_store_v4f32_forward_load(ptr %p, <vscale x 4 x float> %x) {
; CHECK-LABEL: @load_v4i32_store_v4f32_forward_load(
; CHECK-NEXT: store <vscale x 4 x float> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
;
store <vscale x 4 x float> %x, ptr %p
%load = load <vscale x 4 x i32>, ptr %p
ret <vscale x 4 x i32> %load
}
define <vscale x 4 x i32> @load_v4i32_store_v4i64_forward_load(ptr %p, <vscale x 4 x i64> %x) {
; CHECK-LABEL: @load_v4i32_store_v4i64_forward_load(
; CHECK-NEXT: store <vscale x 4 x i64> [[X:%.*]], ptr [[P:%.*]], align 32
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
;
store <vscale x 4 x i64> %x, ptr %p
%load = load <vscale x 4 x i32>, ptr %p
ret <vscale x 4 x i32> %load
}
define <vscale x 4 x i64> @load_v4i64_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v4i64_store_v4i32_forward_load(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i64>, ptr [[P]], align 32
; CHECK-NEXT: ret <vscale x 4 x i64> [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
%load = load <vscale x 4 x i64>, ptr %p
ret <vscale x 4 x i64> %load
}
define <vscale x 2 x i32> @load_v2i32_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[P]], align 8
; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
%load = load <vscale x 2 x i32>, ptr %p
ret <vscale x 2 x i32> %load
}
define <vscale x 2 x i32> @load_v2i32_store_v4i32_forward_load_offsets(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsets(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[Q:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[P]], i64 1
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[Q]], align 8
; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
%q = getelementptr <vscale x 2 x i32>, ptr %p, i64 1
%load = load <vscale x 2 x i32>, ptr %q
ret <vscale x 2 x i32> %load
}
define <vscale x 2 x i32> @load_v2i32_store_v4i32_forward_load_offsetc(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsetc(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[Q:%.*]] = getelementptr <2 x i32>, ptr [[P]], i64 1
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[Q]], align 8
; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
%q = getelementptr <2 x i32>, ptr %p, i64 1
%load = load <vscale x 2 x i32>, ptr %q
ret <vscale x 2 x i32> %load
}
define <vscale x 2 x ptr> @load_v2p0_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v2p0_store_v4i32_forward_load(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x ptr>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 2 x ptr> [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
%load = load <vscale x 2 x ptr>, ptr %p
ret <vscale x 2 x ptr> %load
}
define <vscale x 2 x i64> @load_v2i64_store_v2p0_forward_load(ptr %p, <vscale x 2 x ptr> %x) {
; CHECK-LABEL: @load_v2i64_store_v2p0_forward_load(
; CHECK-NEXT: store <vscale x 2 x ptr> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 2 x i64> [[LOAD]]
;
store <vscale x 2 x ptr> %x, ptr %p
%load = load <vscale x 2 x i64>, ptr %p
ret <vscale x 2 x i64> %load
}
define <vscale x 16 x i8> @load_nxv16i8_store_v4i32_forward_load(ptr %p, <4 x i32> %x) {
; CHECK-LABEL: @load_nxv16i8_store_v4i32_forward_load(
; CHECK-NEXT: store <4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
;
store <4 x i32> %x, ptr %p
%load = load <vscale x 16 x i8>, ptr %p
ret <vscale x 16 x i8> %load
}
define <16 x i8> @load_v16i8_store_nxv4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v16i8_store_nxv4i32_forward_load(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[P]], align 16
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
%load = load <16 x i8>, ptr %p
ret <16 x i8> %load
}
define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_constant(ptr %p) {
; CHECK-LABEL: @load_v16i8_store_v4i32_forward_constant(
; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 4), ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
;
store <vscale x 4 x i32> splat (i32 4), ptr %p
%load = load <vscale x 16 x i8>, ptr %p
ret <vscale x 16 x i8> %load
}
define <vscale x 16 x i8> @load_v16i8_struct_store_v4i32_forward_load(ptr %p, { <vscale x 4 x i32> } %x) {
; CHECK-LABEL: @load_v16i8_struct_store_v4i32_forward_load(
; CHECK-NEXT: store { <vscale x 4 x i32> } [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
;
store { <vscale x 4 x i32> } %x, ptr %p
%load = load <vscale x 16 x i8>, ptr %p
ret <vscale x 16 x i8> %load
}
define {<vscale x 16 x i8>} @load_v16i8_store_v4i32_struct_forward_load(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v16i8_store_v4i32_struct_forward_load(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load { <vscale x 16 x i8> }, ptr [[P]], align 16
; CHECK-NEXT: ret { <vscale x 16 x i8> } [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
%load = load { <vscale x 16 x i8> }, ptr %p
ret { <vscale x 16 x i8> } %load
}
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @bigexample({ <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a) vscale_range(1,16) {
; CHECK-LABEL: @bigexample(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[REF_TMP:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP]])
; CHECK-NEXT: [[A_ELT:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A:%.*]], 0
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT]], ptr [[REF_TMP]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
; CHECK-NEXT: [[REF_TMP_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP1]]
; CHECK-NEXT: [[A_ELT2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 1
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT2]], ptr [[REF_TMP_REPACK1]], align 16
; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 5
; CHECK-NEXT: [[REF_TMP_REPACK3:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP3]]
; CHECK-NEXT: [[A_ELT4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 2
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT4]], ptr [[REF_TMP_REPACK3]], align 16
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 48
; CHECK-NEXT: [[REF_TMP_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP5]]
; CHECK-NEXT: [[A_ELT6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 3
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT6]], ptr [[REF_TMP_REPACK5]], align 16
; CHECK-NEXT: [[DOTUNPACK:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP]], align 16
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[DOTUNPACK]], 0
; CHECK-NEXT: [[DOTUNPACK8:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK1]], align 16
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP6]], <vscale x 16 x i8> [[DOTUNPACK8]], 1
; CHECK-NEXT: [[DOTUNPACK10:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK3]], align 16
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP9]], <vscale x 16 x i8> [[DOTUNPACK10]], 2
; CHECK-NEXT: [[DOTUNPACK12:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK5]], align 16
; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP12]], <vscale x 16 x i8> [[DOTUNPACK12]], 3
; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP]])
; CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP15]]
;
entry:
%ref.tmp = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
call void @llvm.lifetime.start.p0(ptr nonnull %ref.tmp)
%a.elt = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 0
store <vscale x 4 x i32> %a.elt, ptr %ref.tmp, align 16
%0 = call i64 @llvm.vscale.i64()
%1 = shl i64 %0, 4
%ref.tmp.repack1 = getelementptr inbounds i8, ptr %ref.tmp, i64 %1
%a.elt2 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 1
store <vscale x 4 x i32> %a.elt2, ptr %ref.tmp.repack1, align 16
%2 = call i64 @llvm.vscale.i64()
%3 = shl i64 %2, 5
%ref.tmp.repack3 = getelementptr inbounds i8, ptr %ref.tmp, i64 %3
%a.elt4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 2
store <vscale x 4 x i32> %a.elt4, ptr %ref.tmp.repack3, align 16
%4 = call i64 @llvm.vscale.i64()
%5 = mul i64 %4, 48
%ref.tmp.repack5 = getelementptr inbounds i8, ptr %ref.tmp, i64 %5
%a.elt6 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 3
store <vscale x 4 x i32> %a.elt6, ptr %ref.tmp.repack5, align 16
%.unpack = load <vscale x 16 x i8>, ptr %ref.tmp, align 16
%6 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> %.unpack, 0
%7 = call i64 @llvm.vscale.i64()
%8 = shl i64 %7, 4
%.elt7 = getelementptr inbounds i8, ptr %ref.tmp, i64 %8
%.unpack8 = load <vscale x 16 x i8>, ptr %.elt7, align 16
%9 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, <vscale x 16 x i8> %.unpack8, 1
%10 = call i64 @llvm.vscale.i64()
%11 = shl i64 %10, 5
%.elt9 = getelementptr inbounds i8, ptr %ref.tmp, i64 %11
%.unpack10 = load <vscale x 16 x i8>, ptr %.elt9, align 16
%12 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %9, <vscale x 16 x i8> %.unpack10, 2
%13 = call i64 @llvm.vscale.i64()
%14 = mul i64 %13, 48
%.elt11 = getelementptr inbounds i8, ptr %ref.tmp, i64 %14
%.unpack12 = load <vscale x 16 x i8>, ptr %.elt11, align 16
%15 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %12, <vscale x 16 x i8> %.unpack12, 3
call void @llvm.lifetime.end.p0(ptr nonnull %ref.tmp)
ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %15
}
define <vscale x 4 x float> @scalable_store_to_fixed_load(<vscale x 4 x float> %.coerce) vscale_range(4,4) {
; CHECK-LABEL: @scalable_store_to_fixed_load(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
;
entry:
%retval = alloca { <16 x float> }
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
store <vscale x 4 x float> %0, ptr %retval
%1 = load <16 x float>, ptr %retval
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
ret <vscale x 4 x float> %cast.scalable
}
; Here, only the lower bound for the vscale is known, but this is enough to allow a forward to a load to 16 elements.
define <vscale x 4 x float> @scalable_store_to_fixed_load_only_lower_bound(<vscale x 4 x float> %a) vscale_range(4) {
; CHECK-LABEL: @scalable_store_to_fixed_load_only_lower_bound(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <vscale x 4 x float> }, align 16
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[RETVAL]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP0]], i64 0)
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
;
entry:
%retval = alloca { <vscale x 4 x float> }
store <vscale x 4 x float> %a, ptr %retval
%1 = load <16 x float>, ptr %retval
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
ret <vscale x 4 x float> %cast.scalable
}
define <vscale x 4 x float> @scalable_store_to_fixed_load_with_offset(<vscale x 4 x float> %a) vscale_range(4,4) {
; CHECK-LABEL: @scalable_store_to_fixed_load_with_offset(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR:%.*]] = alloca { <32 x float> }, align 128
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[GEP]], align 64
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP0]], i64 0)
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
;
entry:
%ptr = alloca { <32 x float> }
store <vscale x 4 x float> %a, ptr %ptr
%gep = getelementptr inbounds i8, ptr %ptr, i64 8
%1 = load <16 x float>, ptr %gep
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
ret <vscale x 4 x float> %cast.scalable
}
define <vscale x 4 x float> @scalable_store_to_fixed_load_unknown_vscale(<vscale x 4 x float> %.coerce) {
; CHECK-LABEL: @scalable_store_to_fixed_load_unknown_vscale(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
;
entry:
%retval = alloca { <16 x float> }
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
store <vscale x 4 x float> %0, ptr %retval
%1 = load <16 x float>, ptr %retval
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
ret <vscale x 4 x float> %cast.scalable
}
define <vscale x 4 x float> @scalable_store_to_fixed_load_size_missmatch(<vscale x 4 x float> %.coerce) vscale_range(4,4) {
; CHECK-LABEL: @scalable_store_to_fixed_load_size_missmatch(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <32 x float> }, align 128
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, ptr [[RETVAL]], align 128
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> [[TMP1]], i64 0)
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
;
entry:
%retval = alloca { <32 x float> }
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
store <vscale x 4 x float> %0, ptr %retval
%1 = load <32 x float>, ptr %retval
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> %1, i64 0)
ret <vscale x 4 x float> %cast.scalable
}
define <vscale x 4 x i32> @scalable_store_to_fixed_load_different_types(<vscale x 4 x float> %a) vscale_range(4,4) {
; CHECK-LABEL: @scalable_store_to_fixed_load_different_types(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR:%.*]] = alloca { <16 x float> }, align 64
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[PTR]], align 64
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP0]], i64 0)
; CHECK-NEXT: ret <vscale x 4 x i32> [[CAST_SCALABLE]]
;
entry:
%ptr = alloca { <16 x float> }
store <vscale x 4 x float> %a, ptr %ptr
%1 = load <16 x i32>, ptr %ptr
%cast.scalable = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> %1, i64 0)
ret <vscale x 4 x i32> %cast.scalable
}
; This function does not have a fixed vscale, but the loaded vector is still known
; to be smaller or equal in size compared to the stored vector.
define <4 x float> @scalable_store_to_small_fixed_load(<vscale x 4 x float> %a) {
; CHECK-LABEL: @scalable_store_to_small_fixed_load(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR:%.*]] = alloca <vscale x 4 x float>, align 16
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[PTR]], align 16
; CHECK-NEXT: ret <4 x float> [[TMP0]]
;
entry:
%ptr = alloca <vscale x 4 x float>
store <vscale x 4 x float> %a, ptr %ptr
%1 = load <4 x float>, ptr %ptr
ret <4 x float> %1
}