Nikita Popov c23b4fbdbb
[IR] Remove size argument from lifetime intrinsics (#150248)
Now that #149310 has restricted lifetime intrinsics to only work on
allocas, we can also drop the explicit size argument. Instead, the size
is implied by the alloca.

This removes the ability to only mark a prefix of an alloca alive/dead.
We never used that capability, so we should remove the need to handle
that possibility everywhere (though many key places, including stack
coloring, did not actually respect this).
2025-08-08 11:09:34 +02:00

426 lines
20 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
@C.0.1248 = internal constant [128 x float] [ float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 ], align 32 ; <ptr> [#uses=1]
define float @test1(i32 %hash, float %x, float %y, float %z, float %w) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T3:%.*]] = shl i32 [[HASH:%.*]], 2
; CHECK-NEXT: [[T5:%.*]] = and i32 [[T3]], 124
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[T5]] to i64
; CHECK-NEXT: [[T753:%.*]] = getelementptr [128 x float], ptr @C.0.1248, i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[T9:%.*]] = load float, ptr [[T753]], align 4
; CHECK-NEXT: [[T11:%.*]] = fmul float [[T9]], [[X:%.*]]
; CHECK-NEXT: [[T13:%.*]] = fadd float [[T11]], 0.000000e+00
; CHECK-NEXT: [[T17_SUM52:%.*]] = or disjoint i32 [[T5]], 1
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[T17_SUM52]] to i64
; CHECK-NEXT: [[T1851:%.*]] = getelementptr [128 x float], ptr @C.0.1248, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[T19:%.*]] = load float, ptr [[T1851]], align 4
; CHECK-NEXT: [[T21:%.*]] = fmul float [[T19]], [[Y:%.*]]
; CHECK-NEXT: [[T23:%.*]] = fadd float [[T21]], [[T13]]
; CHECK-NEXT: [[T27_SUM50:%.*]] = or disjoint i32 [[T5]], 2
; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[T27_SUM50]] to i64
; CHECK-NEXT: [[T2849:%.*]] = getelementptr [128 x float], ptr @C.0.1248, i64 0, i64 [[TMP2]]
; CHECK-NEXT: [[T29:%.*]] = load float, ptr [[T2849]], align 4
; CHECK-NEXT: [[T31:%.*]] = fmul float [[T29]], [[Z:%.*]]
; CHECK-NEXT: [[T33:%.*]] = fadd float [[T31]], [[T23]]
; CHECK-NEXT: [[T37_SUM48:%.*]] = or disjoint i32 [[T5]], 3
; CHECK-NEXT: [[TMP3:%.*]] = zext nneg i32 [[T37_SUM48]] to i64
; CHECK-NEXT: [[T3847:%.*]] = getelementptr [128 x float], ptr @C.0.1248, i64 0, i64 [[TMP3]]
; CHECK-NEXT: [[T39:%.*]] = load float, ptr [[T3847]], align 4
; CHECK-NEXT: [[T41:%.*]] = fmul float [[T39]], [[W:%.*]]
; CHECK-NEXT: [[T43:%.*]] = fadd float [[T41]], [[T33]]
; CHECK-NEXT: ret float [[T43]]
;
entry:
%lookupTable = alloca [128 x float], align 16
call void @llvm.memcpy.p0.p0.i64(ptr align 16 %lookupTable, ptr align 16 @C.0.1248, i64 512, i1 false)
%t3 = shl i32 %hash, 2
%t5 = and i32 %t3, 124
%t753 = getelementptr [128 x float], ptr %lookupTable, i32 0, i32 %t5
%t9 = load float, ptr %t753
%t11 = fmul float %t9, %x
%t13 = fadd float %t11, 0.000000e+00
%t17.sum52 = or i32 %t5, 1
%t1851 = getelementptr [128 x float], ptr %lookupTable, i32 0, i32 %t17.sum52
%t19 = load float, ptr %t1851
%t21 = fmul float %t19, %y
%t23 = fadd float %t21, %t13
%t27.sum50 = or i32 %t5, 2
%t2849 = getelementptr [128 x float], ptr %lookupTable, i32 0, i32 %t27.sum50
%t29 = load float, ptr %t2849
%t31 = fmul float %t29, %z
%t33 = fadd float %t31, %t23
%t37.sum48 = or i32 %t5, 3
%t3847 = getelementptr [128 x float], ptr %lookupTable, i32 0, i32 %t37.sum48
%t39 = load float, ptr %t3847
%t41 = fmul float %t39, %w
%t43 = fadd float %t41, %t33
ret float %t43
}
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) nocapture, ptr nocapture, i64, i1) nounwind
declare void @llvm.memcpy.p0.p1.i64(ptr nocapture, ptr addrspace(1) nocapture, i64, i1) nounwind
declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture, i64, i1) nounwind
%T = type { i8, [123 x i8] }
%U = type { i32, i32, i32, i32, i32 }
@G = constant %T {i8 1, [123 x i8] zeroinitializer }
@H = constant [2 x %U] zeroinitializer, align 16
@I = internal addrspace(1) constant [4 x float] zeroinitializer , align 4
define void @test2() {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[B:%.*]] = alloca [[T:%.*]], align 8
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(124) [[B]], ptr noundef nonnull align 16 dereferenceable(124) @G, i64 124, i1 false)
; CHECK-NEXT: call void @bar(ptr nonnull [[B]])
; CHECK-NEXT: ret void
;
%A = alloca %T
%B = alloca %T
; %A alloca is deleted
; use @G instead of %A
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @G, i64 124, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %B, ptr align 4 %A, i64 124, i1 false)
call void @bar(ptr %B)
ret void
}
define void @test2_no_null_opt() #0 {
; CHECK-LABEL: @test2_no_null_opt(
; CHECK-NEXT: [[B:%.*]] = alloca [[T:%.*]], align 8
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(124) [[B]], ptr noundef nonnull align 16 dereferenceable(124) @G, i64 124, i1 false)
; CHECK-NEXT: call void @bar(ptr nonnull [[B]])
; CHECK-NEXT: ret void
;
%A = alloca %T
%B = alloca %T
; %A alloca is deleted
; use @G instead of %A
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @G, i64 124, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %B, ptr align 4 %A, i64 124, i1 false)
call void @bar(ptr %B)
ret void
}
define void @test2_addrspacecast() {
; CHECK-LABEL: @test2_addrspacecast(
; CHECK-NEXT: [[B:%.*]] = alloca [[T:%.*]], align 8
; CHECK-NEXT: [[B_CAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(1)
; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef align 4 dereferenceable(124) [[B_CAST]], ptr addrspace(1) noundef align 4 dereferenceable(124) addrspacecast (ptr @G to ptr addrspace(1)), i64 124, i1 false)
; CHECK-NEXT: call void @bar_as1(ptr addrspace(1) [[B_CAST]])
; CHECK-NEXT: ret void
;
%A = alloca %T
%B = alloca %T
%a.cast = addrspacecast ptr %A to ptr addrspace(1)
%b.cast = addrspacecast ptr %B to ptr addrspace(1)
; %A alloca is deleted
; This doesn't exactly match what test2 does, because folding the type
; cast into the alloca doesn't work for the addrspacecast yet.
; use @G instead of %A
call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 %a.cast, ptr align 4 @G, i64 124, i1 false)
call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %b.cast, ptr addrspace(1) align 4 %a.cast, i64 124, i1 false)
call void @bar_as1(ptr addrspace(1) %b.cast)
ret void
}
declare void @bar(ptr)
declare void @bar_as1(ptr addrspace(1))
;; Should be able to eliminate the alloca.
define void @test3() {
; CHECK-LABEL: @test3(
; CHECK-NEXT: call void @bar(ptr nonnull @G) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: ret void
;
%A = alloca %T
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @G, i64 124, i1 false)
call void @bar(ptr %A) readonly
ret void
}
define void @test3_addrspacecast() {
; CHECK-LABEL: @test3_addrspacecast(
; CHECK-NEXT: call void @bar(ptr nonnull @G) #[[ATTR3]]
; CHECK-NEXT: ret void
;
%A = alloca %T
call void @llvm.memcpy.p0.p1.i64(ptr align 4 %A, ptr addrspace(1) align 4 addrspacecast (ptr @G to ptr addrspace(1)), i64 124, i1 false)
call void @bar(ptr %A) readonly
ret void
}
define void @test4() {
; CHECK-LABEL: @test4(
; CHECK-NEXT: call void @baz(ptr nonnull byval(i8) @G)
; CHECK-NEXT: ret void
;
%A = alloca %T
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @G, i64 124, i1 false)
call void @baz(ptr byval(i8) %A)
ret void
}
declare void @llvm.lifetime.start.p0(ptr)
define void @test5() {
; CHECK-LABEL: @test5(
; CHECK-NEXT: call void @baz(ptr nonnull byval(i8) @G)
; CHECK-NEXT: ret void
;
%A = alloca %T
call void @llvm.lifetime.start.p0(ptr %A)
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @G, i64 124, i1 false)
call void @baz(ptr byval(i8) %A)
ret void
}
declare void @baz(ptr byval(i8))
define void @test6() {
; CHECK-LABEL: @test6(
; CHECK-NEXT: call void @bar(ptr nonnull @H) #[[ATTR3]]
; CHECK-NEXT: ret void
;
%A = alloca %U, align 16
call void @llvm.memcpy.p0.p0.i64(ptr align 16 %A, ptr align 16 @H, i64 20, i1 false)
call void @bar(ptr %A) readonly
ret void
}
define void @test7() {
; CHECK-LABEL: @test7(
; CHECK-NEXT: call void @bar(ptr nonnull @H) #[[ATTR3]]
; CHECK-NEXT: ret void
;
%A = alloca %U, align 16
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @H, i64 20, i1 false)
call void @bar(ptr %A) readonly
ret void
}
define void @test8() {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[AL:%.*]] = alloca [[U:%.*]], align 16
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(20) [[AL]], ptr noundef nonnull align 4 dereferenceable(20) getelementptr inbounds nuw (i8, ptr @H, i64 20), i64 20, i1 false)
; CHECK-NEXT: call void @bar(ptr nonnull [[AL]]) #[[ATTR3]]
; CHECK-NEXT: ret void
;
%al = alloca %U, align 16
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %al, ptr align 4 getelementptr ([2 x %U], ptr @H, i64 0, i32 1), i64 20, i1 false)
call void @bar(ptr %al) readonly
ret void
}
define void @test8_addrspacecast() {
; CHECK-LABEL: @test8_addrspacecast(
; CHECK-NEXT: [[AL:%.*]] = alloca [[U:%.*]], align 16
; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i64(ptr noundef nonnull align 16 dereferenceable(20) [[AL]], ptr addrspace(1) noundef align 4 dereferenceable(20) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr @H, i64 20) to ptr addrspace(1)), i64 20, i1 false)
; CHECK-NEXT: call void @bar(ptr nonnull [[AL]]) #[[ATTR3]]
; CHECK-NEXT: ret void
;
%Al = alloca %U, align 16
call void @llvm.memcpy.p0.p1.i64(ptr align 4 %Al, ptr addrspace(1) align 4 addrspacecast (ptr getelementptr ([2 x %U], ptr @H, i64 0, i32 1) to ptr addrspace(1)), i64 20, i1 false)
call void @bar(ptr %Al) readonly
ret void
}
define void @test9() {
; CHECK-LABEL: @test9(
; CHECK-NEXT: call void @bar(ptr nonnull getelementptr inbounds nuw (i8, ptr @H, i64 20)) #[[ATTR3]]
; CHECK-NEXT: ret void
;
%A = alloca %U, align 4
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 getelementptr ([2 x %U], ptr @H, i64 0, i32 1), i64 20, i1 false)
call void @bar(ptr %A) readonly
ret void
}
define void @test9_addrspacecast() {
; CHECK-LABEL: @test9_addrspacecast(
; CHECK-NEXT: call void @bar(ptr nonnull getelementptr inbounds nuw (i8, ptr @H, i64 20)) #[[ATTR3]]
; CHECK-NEXT: ret void
;
%A = alloca %U, align 4
call void @llvm.memcpy.p0.p1.i64(ptr align 4 %A, ptr addrspace(1) align 4 addrspacecast (ptr getelementptr ([2 x %U], ptr @H, i64 0, i32 1) to ptr addrspace(1)), i64 20, i1 false)
call void @bar(ptr %A) readonly
ret void
}
@bbb = local_unnamed_addr global [1000000 x i8] zeroinitializer, align 16
@_ZL3KKK = internal unnamed_addr constant [3 x i8] c"\01\01\02", align 1
; Should not replace alloca with global because of size mismatch.
define void @test9_small_global() {
; CHECK-LABEL: @test9_small_global(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CC:%.*]] = alloca [1000000 x i8], align 16
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(3) [[CC]], ptr noundef nonnull align 16 dereferenceable(3) @_ZL3KKK, i64 3, i1 false)
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000000) @bbb, ptr noundef nonnull align 16 dereferenceable(1000000) [[CC]], i64 1000000, i1 false)
; CHECK-NEXT: ret void
;
entry:
%cc = alloca [1000000 x i8], align 16
call void @llvm.memcpy.p0.p0.i64(ptr %cc, ptr @_ZL3KKK, i64 3, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 16 @bbb, ptr align 16 %cc, i64 1000000, i1 false)
ret void
}
; Should replace alloca with global as they have exactly the same size.
define void @test10_same_global() {
; CHECK-LABEL: @test10_same_global(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(3) @bbb, ptr noundef nonnull align 16 dereferenceable(3) @_ZL3KKK, i64 3, i1 false)
; CHECK-NEXT: ret void
;
entry:
%cc = alloca [3 x i8], align 1
call void @llvm.memcpy.p0.p0.i64(ptr %cc, ptr @_ZL3KKK, i64 3, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr @bbb, ptr %cc, i64 3, i1 false)
ret void
}
; Should replace alloca with global even when the global is in a different address space
define float @test11(i64 %i) {
; CHECK-LABEL: @test11(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret float 0.000000e+00
;
entry:
%a = alloca [4 x float], align 4
call void @llvm.lifetime.start.p0(ptr %a)
call void @llvm.memcpy.p0.p1.i64(ptr align 4 %a, ptr addrspace(1) align 4 @I, i64 16, i1 false)
%g = getelementptr inbounds [4 x float], ptr %a, i64 0, i64 %i
%r = load float, ptr %g, align 4
ret float %r
}
; If the memcpy is volatile, it should not be removed
define float @test11_volatile(i64 %i) {
; CHECK-LABEL: @test11_volatile(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca [4 x float], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[A]])
; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i64(ptr align 4 [[A]], ptr addrspace(1) align 4 @I, i64 16, i1 true)
; CHECK-NEXT: [[G:%.*]] = getelementptr inbounds [4 x float], ptr [[A]], i64 0, i64 [[I:%.*]]
; CHECK-NEXT: [[R:%.*]] = load float, ptr [[G]], align 4
; CHECK-NEXT: ret float [[R]]
;
entry:
%a = alloca [4 x float], align 4
call void @llvm.lifetime.start.p0(ptr %a)
call void @llvm.memcpy.p0.p1.i64(ptr align 4 %a, ptr addrspace(1) align 4 @I, i64 16, i1 true)
%g = getelementptr inbounds [4 x float], ptr %a, i64 0, i64 %i
%r = load float, ptr %g, align 4
ret float %r
}
; Tests that we can eliminate allocas copied from readonly noalias pointers.
define void @memcpy_from_readonly_noalias(ptr readonly noalias align 8 dereferenceable(124) %arg) {
; CHECK-LABEL: @memcpy_from_readonly_noalias(
; CHECK-NEXT: call void @bar(ptr nonnull [[ARG:%.*]]) #[[ATTR3]]
; CHECK-NEXT: ret void
;
%alloca = alloca %T, align 8
call void @llvm.memcpy.p0.p0.i64(ptr %alloca, ptr %arg, i64 124, i1 false)
call void @bar(ptr %alloca) readonly
ret void
}
; Tests that we don't eliminate allocas copied from readonly pointers without noalias.
define void @memcpy_from_just_readonly(ptr readonly align 8 dereferenceable(124) %arg) {
; CHECK-LABEL: @memcpy_from_just_readonly(
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[T:%.*]], align 8
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(124) [[ALLOCA]], ptr noundef nonnull align 8 dereferenceable(124) [[ARG:%.*]], i64 124, i1 false)
; CHECK-NEXT: call void @bar(ptr nonnull [[ALLOCA]]) #[[ATTR3]]
; CHECK-NEXT: ret void
;
%alloca = alloca %T, align 8
call void @llvm.memcpy.p0.p0.i64(ptr %alloca, ptr %arg, i64 124, i1 false)
call void @bar(ptr %alloca) readonly
ret void
}
; Test that we don't elide a volatile memcpy.
define void @volatile_memcpy() {
; CHECK-LABEL: @volatile_memcpy(
; CHECK-NEXT: [[A:%.*]] = alloca [[U:%.*]], align 16
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 @H, i64 20, i1 true)
; CHECK-NEXT: call void @bar(ptr nonnull [[A]]) #[[ATTR3]]
; CHECK-NEXT: ret void
;
%A = alloca %U, align 16
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @H, i64 20, i1 true)
call void @bar(ptr %A) readonly
ret void
}
; Test that we can elide a memcpy when copying a constant value onto the stack
; and then forwarding it by readonly nocapture reference.
define void @memcpy_to_nocapture_readonly() {
; CHECK-LABEL: @memcpy_to_nocapture_readonly(
; CHECK-NEXT: call void @bar(ptr nonnull readonly captures(none) @H)
; CHECK-NEXT: ret void
;
%A = alloca %U, align 16
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @H, i64 20, i1 false)
call void @bar(ptr nocapture readonly %A)
ret void
}
; Test that we don't elide the memcpy when copying a constant value onto the
; stack and then forwarding it by readonly, but capturing, reference.
define void @memcpy_to_capturing_readonly() {
; CHECK-LABEL: @memcpy_to_capturing_readonly(
; CHECK-NEXT: [[A:%.*]] = alloca [[U:%.*]], align 16
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(20) [[A]], ptr noundef nonnull align 16 dereferenceable(20) @H, i64 20, i1 false)
; CHECK-NEXT: call void @bar(ptr nonnull readonly [[A]])
; CHECK-NEXT: ret void
;
%A = alloca %U, align 16
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @H, i64 20, i1 false)
call void @bar(ptr readonly %A)
ret void
}
; Test that we don't elide the memcpy when copying a constant value onto the
; stack and then forwarding it by read-write, nocapture reference, even if it's
; also forwarded by readonly nocapture reference to the same function.
define void @memcpy_to_aliased_nocapture_readonly() {
; CHECK-LABEL: @memcpy_to_aliased_nocapture_readonly(
; CHECK-NEXT: [[A:%.*]] = alloca [[U:%.*]], align 16
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(20) [[A]], ptr noundef nonnull align 16 dereferenceable(20) @H, i64 20, i1 false)
; CHECK-NEXT: call void @two_params(ptr nonnull readonly captures(none) [[A]], ptr nonnull captures(none) [[A]])
; CHECK-NEXT: ret void
;
%A = alloca %U, align 16
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @H, i64 20, i1 false)
call void @two_params(ptr nocapture readonly %A, ptr nocapture %A)
ret void
}
declare void @two_params(ptr nocapture readonly, ptr nocapture)
attributes #0 = { null_pointer_is_valid }