
Previously the value created to represent the uninitialized memory of the alloca was undef. Use freeze poison instead. Enables some optimization improvements (which need defeating in the limit tests), but also a few regressions. Seems to leave behind dead code in some cases too.
142 lines
5.5 KiB
LLVM
142 lines
5.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
|
|
|
|
; Checks that memsets don't block PromoteAlloca.
|
|
|
|
define amdgpu_kernel void @memset_all_zero(i64 %val) {
|
|
; CHECK-LABEL: @memset_all_zero(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[STACK:%.*]] = freeze <6 x i64> poison
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <6 x i64> zeroinitializer, i64 [[VAL:%.*]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <6 x i64> [[TMP0]], i64 [[VAL]], i32 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%stack = alloca [6 x i64], align 4, addrspace(5)
|
|
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 48, i1 false)
|
|
store i64 %val, ptr addrspace(5) %stack
|
|
%reload = load i64, ptr addrspace(5) %stack
|
|
%stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1
|
|
store i64 %val, ptr addrspace(5) %stack.1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memset_all_5(i64 %val) {
|
|
; CHECK-LABEL: @memset_all_5(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i64> poison
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> splat (i64 361700864190383365), i64 [[VAL:%.*]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL]], i32 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%stack = alloca [4 x i64], align 4, addrspace(5)
|
|
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 5, i64 32, i1 false)
|
|
store i64 %val, ptr addrspace(5) %stack
|
|
%reload = load i64, ptr addrspace(5) %stack
|
|
%stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1
|
|
store i64 %val, ptr addrspace(5) %stack.1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memset_volatile_nopromote(i64 %val) {
|
|
; CHECK-LABEL: @memset_volatile_nopromote(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
|
|
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 32, i1 true)
|
|
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%stack = alloca [4 x i64], align 4, addrspace(5)
|
|
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 32, i1 true)
|
|
store i64 %val, ptr addrspace(5) %stack
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memset_badsize_nopromote(i64 %val) {
|
|
; CHECK-LABEL: @memset_badsize_nopromote(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
|
|
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 31, i1 true)
|
|
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%stack = alloca [4 x i64], align 4, addrspace(5)
|
|
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 31, i1 true)
|
|
store i64 %val, ptr addrspace(5) %stack
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memset_offset_ptr_nopromote(i64 %val) {
|
|
; CHECK-LABEL: @memset_offset_ptr_nopromote(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [4 x i64], ptr addrspace(5) [[STACK]], i64 0, i64 1
|
|
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[GEP]], i8 0, i64 24, i1 true)
|
|
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%stack = alloca [4 x i64], align 4, addrspace(5)
|
|
%gep = getelementptr [4 x i64], ptr addrspace(5) %stack, i64 0, i64 1
|
|
call void @llvm.memset.p5.i64(ptr addrspace(5) %gep, i8 0, i64 24, i1 true)
|
|
store i64 %val, ptr addrspace(5) %stack
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memset_array_ptr_alloca(ptr %out) {
|
|
; CHECK-LABEL: @memset_array_ptr_alloca(
|
|
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <6 x ptr> poison
|
|
; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%alloca = alloca [6 x ptr], align 16, addrspace(5)
|
|
call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false)
|
|
%load = load i64, ptr addrspace(5) %alloca
|
|
store i64 %load, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memset_vector_ptr_alloca(ptr %out) {
|
|
; CHECK-LABEL: @memset_vector_ptr_alloca(
|
|
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <6 x ptr> poison
|
|
; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%alloca = alloca <6 x ptr>, align 16, addrspace(5)
|
|
call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false)
|
|
%load = load i64, ptr addrspace(5) %alloca
|
|
store i64 %load, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memset_array_of_array_ptr_alloca(ptr %out) {
|
|
; CHECK-LABEL: @memset_array_of_array_ptr_alloca(
|
|
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <6 x ptr> poison
|
|
; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%alloca = alloca [2 x [3 x ptr]], align 16, addrspace(5)
|
|
call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false)
|
|
%load = load i64, ptr addrspace(5) %alloca
|
|
store i64 %load, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memset_array_of_vec_ptr_alloca(ptr %out) {
|
|
; CHECK-LABEL: @memset_array_of_vec_ptr_alloca(
|
|
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <8 x ptr> poison
|
|
; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%alloca = alloca [2 x <3 x ptr>], align 16, addrspace(5)
|
|
call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 64, i1 false)
|
|
%load = load i64, ptr addrspace(5) %alloca
|
|
store i64 %load, ptr %out
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg)
|