
Previously the value created to represent the uninitialized memory of the alloca was undef. Use freeze poison instead. Enables some optimization improvements (which need defeating in the limit tests), but also a few regressions. Seems to leave behind dead code in some cases too.
44 lines
2.1 KiB
LLVM
44 lines
2.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca-to-vector-limit=128 -passes=amdgpu-promote-alloca-to-vector %s -o - | FileCheck %s
|
|
|
|
; Check that when we see an alloca that's too big to vectorize given the remaining budget,
|
|
; we don't give up and we keep looking for other allocas to vectorize.
|
|
|
|
define amdgpu_kernel void @simple_users_scores() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @simple_users_scores(
|
|
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[MANYUSERS:%.*]] = alloca [64 x i64], align 4, addrspace(5)
|
|
; CHECK-NEXT: [[SIMPLEUSER:%.*]] = freeze <4 x i64> poison
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> [[SIMPLEUSER]], i64 42, i32 0
|
|
; CHECK-NEXT: [[MANYUSERS_1:%.*]] = getelementptr i8, ptr addrspace(5) [[MANYUSERS]], i64 2
|
|
; CHECK-NEXT: [[V0:%.*]] = load i8, ptr addrspace(5) [[MANYUSERS_1]], align 1
|
|
; CHECK-NEXT: [[V0_EXT:%.*]] = zext i8 [[V0]] to i64
|
|
; CHECK-NEXT: store i64 [[V0_EXT]], ptr addrspace(5) [[MANYUSERS_1]], align 8
|
|
; CHECK-NEXT: [[MANYUSERS_2:%.*]] = getelementptr i8, ptr addrspace(5) [[MANYUSERS]], i64 1
|
|
; CHECK-NEXT: [[V1:%.*]] = load i8, ptr addrspace(5) [[MANYUSERS_2]], align 1
|
|
; CHECK-NEXT: [[V1_EXT:%.*]] = zext i8 [[V0]] to i64
|
|
; CHECK-NEXT: store i64 [[V1_EXT]], ptr addrspace(5) [[MANYUSERS_2]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
; should get a score of 1
|
|
%simpleuser = alloca [4 x i64], align 4, addrspace(5)
|
|
; should get a score of 4 and be visited first.
|
|
%manyusers = alloca [64 x i64], align 4, addrspace(5)
|
|
|
|
store i64 42, ptr addrspace(5) %simpleuser
|
|
|
|
%manyusers.1 = getelementptr i8, ptr addrspace(5) %manyusers, i64 2
|
|
%v0 = load i8, ptr addrspace(5) %manyusers.1
|
|
%v0.ext = zext i8 %v0 to i64
|
|
store i64 %v0.ext, ptr addrspace(5) %manyusers.1
|
|
|
|
%manyusers.2 = getelementptr i8, ptr addrspace(5) %manyusers, i64 1
|
|
%v1 = load i8, ptr addrspace(5) %manyusers.2
|
|
%v1.ext = zext i8 %v0 to i64
|
|
store i64 %v1.ext, ptr addrspace(5) %manyusers.2
|
|
|
|
ret void
|
|
}
|