llvm-project/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll
Matt Arsenault c5fe075eaf
AMDGPU: Use freeze poison instead of undef in alloca promotion (#131285)
Previously the value created to represent the uninitialized memory
of the alloca was undef. Use freeze poison instead. Enables some
optimization improvements (which need defeating in the limit tests),
but also a few regressions. Seems to leave behind dead code in some
cases too.
2025-03-18 17:27:02 +07:00

142 lines
5.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
; Checks that memsets don't block PromoteAlloca.
define amdgpu_kernel void @memset_all_zero(i64 %val) {
; CHECK-LABEL: @memset_all_zero(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STACK:%.*]] = freeze <6 x i64> poison
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <6 x i64> zeroinitializer, i64 [[VAL:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <6 x i64> [[TMP0]], i64 [[VAL]], i32 1
; CHECK-NEXT: ret void
;
entry:
%stack = alloca [6 x i64], align 4, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 48, i1 false)
store i64 %val, ptr addrspace(5) %stack
%reload = load i64, ptr addrspace(5) %stack
%stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1
store i64 %val, ptr addrspace(5) %stack.1
ret void
}
define amdgpu_kernel void @memset_all_5(i64 %val) {
; CHECK-LABEL: @memset_all_5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i64> poison
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> splat (i64 361700864190383365), i64 [[VAL:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL]], i32 1
; CHECK-NEXT: ret void
;
entry:
%stack = alloca [4 x i64], align 4, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 5, i64 32, i1 false)
store i64 %val, ptr addrspace(5) %stack
%reload = load i64, ptr addrspace(5) %stack
%stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1
store i64 %val, ptr addrspace(5) %stack.1
ret void
}
define amdgpu_kernel void @memset_volatile_nopromote(i64 %val) {
; CHECK-LABEL: @memset_volatile_nopromote(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 32, i1 true)
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
; CHECK-NEXT: ret void
;
entry:
%stack = alloca [4 x i64], align 4, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 32, i1 true)
store i64 %val, ptr addrspace(5) %stack
ret void
}
define amdgpu_kernel void @memset_badsize_nopromote(i64 %val) {
; CHECK-LABEL: @memset_badsize_nopromote(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 31, i1 true)
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
; CHECK-NEXT: ret void
;
entry:
%stack = alloca [4 x i64], align 4, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 31, i1 true)
store i64 %val, ptr addrspace(5) %stack
ret void
}
define amdgpu_kernel void @memset_offset_ptr_nopromote(i64 %val) {
; CHECK-LABEL: @memset_offset_ptr_nopromote(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [4 x i64], ptr addrspace(5) [[STACK]], i64 0, i64 1
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[GEP]], i8 0, i64 24, i1 true)
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
; CHECK-NEXT: ret void
;
entry:
%stack = alloca [4 x i64], align 4, addrspace(5)
%gep = getelementptr [4 x i64], ptr addrspace(5) %stack, i64 0, i64 1
call void @llvm.memset.p5.i64(ptr addrspace(5) %gep, i8 0, i64 24, i1 true)
store i64 %val, ptr addrspace(5) %stack
ret void
}
define amdgpu_kernel void @memset_array_ptr_alloca(ptr %out) {
; CHECK-LABEL: @memset_array_ptr_alloca(
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <6 x ptr> poison
; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8
; CHECK-NEXT: ret void
;
%alloca = alloca [6 x ptr], align 16, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false)
%load = load i64, ptr addrspace(5) %alloca
store i64 %load, ptr %out
ret void
}
define amdgpu_kernel void @memset_vector_ptr_alloca(ptr %out) {
; CHECK-LABEL: @memset_vector_ptr_alloca(
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <6 x ptr> poison
; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8
; CHECK-NEXT: ret void
;
%alloca = alloca <6 x ptr>, align 16, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false)
%load = load i64, ptr addrspace(5) %alloca
store i64 %load, ptr %out
ret void
}
define amdgpu_kernel void @memset_array_of_array_ptr_alloca(ptr %out) {
; CHECK-LABEL: @memset_array_of_array_ptr_alloca(
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <6 x ptr> poison
; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8
; CHECK-NEXT: ret void
;
%alloca = alloca [2 x [3 x ptr]], align 16, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false)
%load = load i64, ptr addrspace(5) %alloca
store i64 %load, ptr %out
ret void
}
define amdgpu_kernel void @memset_array_of_vec_ptr_alloca(ptr %out) {
; CHECK-LABEL: @memset_array_of_vec_ptr_alloca(
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <8 x ptr> poison
; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8
; CHECK-NEXT: ret void
;
%alloca = alloca [2 x <3 x ptr>], align 16, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 64, i1 false)
%load = load i64, ptr addrspace(5) %alloca
store i64 %load, ptr %out
ret void
}
declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg)