[AMDGPULowerLDS] Avoid unnecessary ptrtoint/inttoptr roundtrip (#181671)

Store pointers instead of integers in the table, and load them as
pointers.
This commit is contained in:
Nikita Popov 2026-02-17 09:28:01 +01:00 committed by GitHub
parent 6f0759d568
commit 5933294bb1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 88 additions and 128 deletions

View File

@ -316,20 +316,17 @@ public:
// Create a ConstantArray containing the address of each Variable within the
// kernel corresponding to LDSVarsToConstantGEP, or poison if that kernel
// does not allocate it
// TODO: Drop the ptrtoint conversion
Type *I32 = Type::getInt32Ty(Ctx);
ArrayType *KernelOffsetsType = ArrayType::get(I32, Variables.size());
Type *LocalPtrTy = PointerType::get(Ctx, AMDGPUAS::LOCAL_ADDRESS);
ArrayType *KernelOffsetsType = ArrayType::get(LocalPtrTy, Variables.size());
SmallVector<Constant *> Elements;
for (GlobalVariable *GV : Variables) {
auto ConstantGepIt = LDSVarsToConstantGEP.find(GV);
if (ConstantGepIt != LDSVarsToConstantGEP.end()) {
auto *elt = ConstantExpr::getPtrToInt(ConstantGepIt->second, I32);
Elements.push_back(elt);
Elements.push_back(ConstantGepIt->second);
} else {
Elements.push_back(PoisonValue::get(I32));
Elements.push_back(PoisonValue::get(LocalPtrTy));
}
}
return ConstantArray::get(KernelOffsetsType, Elements);
@ -347,8 +344,8 @@ public:
const size_t NumberVariables = Variables.size();
const size_t NumberKernels = kernels.size();
ArrayType *KernelOffsetsType =
ArrayType::get(Type::getInt32Ty(Ctx), NumberVariables);
Type *LocalPtrTy = PointerType::get(Ctx, AMDGPUAS::LOCAL_ADDRESS);
ArrayType *KernelOffsetsType = ArrayType::get(LocalPtrTy, NumberVariables);
ArrayType *AllKernelsOffsetsType =
ArrayType::get(KernelOffsetsType, NumberKernels);
@ -401,12 +398,8 @@ public:
Value *Address = Builder.CreateInBoundsGEP(
LookupTable->getValueType(), LookupTable, GEPIdx, GV->getName());
Value *loaded = Builder.CreateLoad(I32, Address);
Value *replacement =
Builder.CreateIntToPtr(loaded, GV->getType(), GV->getName());
U.set(replacement);
Value *Loaded = Builder.CreateLoad(GV->getType(), Address);
U.set(Loaded);
}
void replaceUsesInInstructionsWithTableLookup(
@ -868,7 +861,7 @@ public:
if (!KernelsThatIndirectlyAllocateDynamicLDS.empty()) {
LLVMContext &Ctx = M.getContext();
IRBuilder<> Builder(Ctx);
Type *I32 = Type::getInt32Ty(Ctx);
Type *LocalPtrTy = PointerType::get(Ctx, AMDGPUAS::LOCAL_ADDRESS);
std::vector<Constant *> newDynamicLDS;
@ -888,14 +881,14 @@ public:
markUsedByKernel(func, N);
newDynamicLDS.push_back(ConstantExpr::getPtrToInt(N, I32));
newDynamicLDS.push_back(N);
} else {
newDynamicLDS.push_back(PoisonValue::get(I32));
newDynamicLDS.push_back(PoisonValue::get(LocalPtrTy));
}
}
assert(OrderedKernels.size() == newDynamicLDS.size());
ArrayType *t = ArrayType::get(I32, newDynamicLDS.size());
ArrayType *t = ArrayType::get(LocalPtrTy, newDynamicLDS.size());
Constant *init = ConstantArray::get(t, newDynamicLDS);
GlobalVariable *table = new GlobalVariable(
M, t, true, GlobalValue::InternalLinkage, init,

View File

@ -64,9 +64,8 @@ define private void @call_store_A() {
define private void @store_A() {
; CHECK-LABEL: define private void @store_A() {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
; CHECK-NEXT: [[A1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[A1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[A]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[A1]] to ptr
; CHECK-NEXT: store ptr [[TMP3]], ptr null, align 8
; CHECK-NEXT: ret void
@ -78,9 +77,8 @@ define private void @store_A() {
define private ptr @get_B_ptr() {
; CHECK-LABEL: define private ptr @get_B_ptr() {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[B]], align 4
; CHECK-NEXT: [[B1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[B1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[B]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[B1]] to ptr
; CHECK-NEXT: ret ptr [[TMP3]]
;

View File

@ -24,7 +24,7 @@
; CHECK: @llvm.amdgcn.expect_align4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META1:![0-9]+]]
; CHECK: @llvm.amdgcn.expect_align8.dynlds = external addrspace(3) global [0 x i8], align 8, !absolute_symbol [[META0]]
; CHECK: @llvm.amdgcn.expect_max_of_2_and_4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META1]]
; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [5 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds to i32)]
; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [5 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds]
;.
define amdgpu_kernel void @kernel_only() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_only() {
@ -41,9 +41,8 @@ define amdgpu_kernel void @kernel_only() {
define void @use_shared1() {
; CHECK-LABEL: define void @use_shared1() {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[DYNAMIC_SHARED1:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED1]], align 4
; CHECK-NEXT: [[DYNAMIC_SHARED11:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[DYNAMIC_SHARED1:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[DYNAMIC_SHARED11:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED1]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr addrspace(3) [[DYNAMIC_SHARED11]], i32 0, i32 1
; CHECK-NEXT: store i8 0, ptr addrspace(3) [[ARRAYIDX]], align 1
; CHECK-NEXT: ret void
@ -57,9 +56,8 @@ define void @use_shared2() #0 {
; CHECK-LABEL: define void @use_shared2(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[DYNAMIC_SHARED2:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED2]], align 4
; CHECK-NEXT: [[DYNAMIC_SHARED21:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[DYNAMIC_SHARED2:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[DYNAMIC_SHARED21:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED2]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i16], ptr addrspace(3) [[DYNAMIC_SHARED21]], i32 0, i32 3
; CHECK-NEXT: store i16 1, ptr addrspace(3) [[ARRAYIDX]], align 2
; CHECK-NEXT: ret void
@ -76,9 +74,8 @@ define void @use_shared4() #0 {
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: store i32 4, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4
; CHECK-NEXT: [[DYNAMIC_SHARED4:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED4]], align 4
; CHECK-NEXT: [[DYNAMIC_SHARED41:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[DYNAMIC_SHARED4:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[DYNAMIC_SHARED41:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED4]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr addrspace(3) [[DYNAMIC_SHARED41]], i32 0, i32 5
; CHECK-NEXT: store i32 2, ptr addrspace(3) [[ARRAYIDX]], align 4
; CHECK-NEXT: ret void
@ -93,9 +90,8 @@ define void @use_shared8() #0 {
; CHECK-LABEL: define void @use_shared8(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[DYNAMIC_SHARED8:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED8]], align 4
; CHECK-NEXT: [[DYNAMIC_SHARED81:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[DYNAMIC_SHARED8:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[DYNAMIC_SHARED81:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED8]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i64], ptr addrspace(3) [[DYNAMIC_SHARED81]], i32 0, i32 7
; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 8
; CHECK-NEXT: ret void

View File

@ -25,16 +25,14 @@
; CHECK: @llvm.amdgcn.kernel.kern_one.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_one.lds.t poison, align 4, !absolute_symbol [[META1:![0-9]+]]
; CHECK: @llvm.amdgcn.kernel.kern_two.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_two.lds.t poison, align 4, !absolute_symbol [[META1]]
; CHECK: @llvm.amdgcn.kernel.kern_block_direct_allocation.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_block_direct_allocation.lds.t poison, align 4, !absolute_symbol [[META1]]
;.
define void @func_one() {
; CHECK-LABEL: define {{[^@]+}}@func_one() {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2:![0-9]+]]
; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ONE]], align 4
; CHECK-NEXT: [[ONE1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[ONE1]], align 4
; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[ONE]], align 4
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[TMP2]], align 4
; CHECK-NEXT: store i16 10, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11:![0-9]+]]
; CHECK-NEXT: ret void
;
@ -62,10 +60,9 @@ define void @func_two() {
; CHECK-LABEL: define {{[^@]+}}@func_two() {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2]]
; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TWO]], align 4
; CHECK-NEXT: [[TWO1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[TWO1]], align 4
; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[TWO]], align 4
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[TMP2]], align 4
; CHECK-NEXT: store i16 20, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11]]
; CHECK-NEXT: ret void
;
@ -97,14 +94,12 @@ entry:
define void @func_block_direct_allocation() {
; CHECK-LABEL: define {{[^@]+}}@func_block_direct_allocation() {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ONE]], align 4
; CHECK-NEXT: [[ONE1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) [[ONE1]], align 4
; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TWO]], align 4
; CHECK-NEXT: [[TWO2:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) [[TWO2]], align 4
; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[ONE]], align 4
; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) [[TMP2]], align 4
; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[TWO]], align 4
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[VAL1]], [[VAL2]]
; CHECK-NEXT: store i32 [[SUM]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2]]
; CHECK-NEXT: store i16 30, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11]]

View File

@ -48,14 +48,12 @@ define void @f0() {
;
; TABLE-LABEL: @f0(
; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; TABLE-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4
; TABLE-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; TABLE-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; TABLE-NEXT: [[FUNCTION_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS2]], align 4
; TABLE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2
; TABLE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 4
; TABLE-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4
; TABLE-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; TABLE-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; TABLE-NEXT: [[FUNCTION_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS]], align 4
; TABLE-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2
; TABLE-NEXT: ret void
;

View File

@ -35,14 +35,12 @@ define void @f0() {
;
; TABLE-LABEL: @f0(
; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; TABLE-NEXT: [[F0_LDS2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[F0_LDS2]], align 4
; TABLE-NEXT: [[F0_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; TABLE-NEXT: [[F0_LDS2:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; TABLE-NEXT: [[F0_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[F0_LDS2]], align 4
; TABLE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[F0_LDS3]], align 2
; TABLE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
; TABLE-NEXT: [[F0_LDS:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[F0_LDS]], align 4
; TABLE-NEXT: [[F0_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; TABLE-NEXT: [[F0_LDS:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; TABLE-NEXT: [[F0_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[F0_LDS]], align 4
; TABLE-NEXT: store i16 [[MUL]], ptr addrspace(3) [[F0_LDS1]], align 2
; TABLE-NEXT: ret void
;
@ -90,14 +88,12 @@ define void @f_both() {
;
; TABLE-LABEL: @f_both(
; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; TABLE-NEXT: [[BOTH_LDS2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[BOTH_LDS2]], align 4
; TABLE-NEXT: [[BOTH_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; TABLE-NEXT: [[BOTH_LDS2:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; TABLE-NEXT: [[BOTH_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[BOTH_LDS2]], align 4
; TABLE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) [[BOTH_LDS3]], align 4
; TABLE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
; TABLE-NEXT: [[BOTH_LDS:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[BOTH_LDS]], align 4
; TABLE-NEXT: [[BOTH_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; TABLE-NEXT: [[BOTH_LDS:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; TABLE-NEXT: [[BOTH_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[BOTH_LDS]], align 4
; TABLE-NEXT: store i32 [[MUL]], ptr addrspace(3) [[BOTH_LDS1]], align 4
; TABLE-NEXT: ret void
;

View File

@ -11,7 +11,7 @@
@v3 = addrspace(3) global i8 poison
@unused = addrspace(3) global i16 poison
; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [2 x [1 x i32]] [[1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32)], [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32)]]
; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [2 x [1 x ptr addrspace(3)]] [[1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds], [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds]]
define void @f0() {
; OPT-LABEL: @f0(
@ -64,14 +64,12 @@ define void @f1() {
define void @f2() {
; OPT-LABEL: @f2(
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4
; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[V23:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V22]], align 4
; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 8
; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4
; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4
; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[V21:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V2]], align 4
; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 8
; OPT-NEXT: ret void
;

View File

@ -23,20 +23,18 @@
; The kernel naming pattern and the structs being named after the functions helps verify placement of poison
; The remainder are constant expressions into the variable instances checked above
; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [4 x i32]] [[4 x i32] [i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k01.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, i32 0, i32 2) to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds to i32), i32 poison, i32 poison], [4 x i32] [i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 3) to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1) to i32)], [4 x i32] [i32 poison, i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1) to i32)]]
; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [4 x ptr addrspace(3)]] [[4 x ptr addrspace(3)] [ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k01.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, i32 0, i32 2), ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, ptr addrspace(3) poison, ptr addrspace(3) poison], [4 x ptr addrspace(3)] [ptr addrspace(3) poison, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 3), ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1)], [4 x ptr addrspace(3)] [ptr addrspace(3) poison, ptr addrspace(3) poison, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1)]]
define void @f0() {
; OPT-LABEL: define void @f0() {
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; OPT-NEXT: [[V02:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V02]], align 4
; OPT-NEXT: [[V03:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[V03:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V1]], align 4
; OPT-NEXT: [[LD:%.*]] = load float, ptr addrspace(3) [[V03]], align 4
; OPT-NEXT: [[MUL:%.*]] = fmul float [[LD]], 2.000000e+00
; OPT-NEXT: [[V0:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V0]], align 4
; OPT-NEXT: [[V01:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; OPT-NEXT: [[V0:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[V01:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V0]], align 4
; OPT-NEXT: store float [[MUL]], ptr addrspace(3) [[V01]], align 4
; OPT-NEXT: ret void
;
@ -70,14 +68,12 @@ define void @f0() {
define void @f1() {
; OPT-LABEL: define void @f1() {
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; OPT-NEXT: [[V12:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V12]], align 4
; OPT-NEXT: [[V13:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; OPT-NEXT: [[V12:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; OPT-NEXT: [[V13:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V12]], align 4
; OPT-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[V13]], align 2
; OPT-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V1]], align 4
; OPT-NEXT: [[V11:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; OPT-NEXT: [[V11:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V1]], align 4
; OPT-NEXT: store i16 [[MUL]], ptr addrspace(3) [[V11]], align 2
; OPT-NEXT: ret void
;
@ -111,14 +107,12 @@ define void @f1() {
define void @f2() {
; OPT-LABEL: define void @f2() {
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4
; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
; OPT-NEXT: [[V23:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V22]], align 4
; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 8
; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4
; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4
; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
; OPT-NEXT: [[V21:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V2]], align 4
; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 8
; OPT-NEXT: ret void
;
@ -152,14 +146,12 @@ define void @f2() {
define void @f3() {
; OPT-LABEL: define void @f3() {
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; OPT-NEXT: [[V32:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V32]], align 4
; OPT-NEXT: [[V33:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; OPT-NEXT: [[V32:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
; OPT-NEXT: [[V33:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V32]], align 4
; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) [[V33]], align 1
; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 5
; OPT-NEXT: [[V3:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V3]], align 4
; OPT-NEXT: [[V31:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; OPT-NEXT: [[V3:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
; OPT-NEXT: [[V31:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V3]], align 4
; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) [[V31]], align 1
; OPT-NEXT: ret void
;
@ -217,7 +209,7 @@ define amdgpu_kernel void @kernel_no_table() {
; Access two variables, will allocate those two
define amdgpu_kernel void @k01() {
; OPT-LABEL: define amdgpu_kernel void @k01(
; OPT-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
; OPT-SAME: ) #[[ATTR0]] {{.*}}.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ], !alias.scope [[META3:![0-9]+]], !noalias [[META6:![0-9]+]]
; OPT-NEXT: call void @f0()
; OPT-NEXT: call void @f1()
@ -268,7 +260,7 @@ define amdgpu_kernel void @k01() {
define amdgpu_kernel void @k23() {
; OPT-LABEL: define amdgpu_kernel void @k23(
; OPT-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META8:![0-9]+]] {
; OPT-SAME: ) #[[ATTR1:[0-9]+]] {{.*}}.amdgcn.lds.kernel.id [[META8:![0-9]+]] {
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]]
; OPT-NEXT: call void @f2()
; OPT-NEXT: call void @f3()
@ -319,7 +311,7 @@ define amdgpu_kernel void @k23() {
; Access and allocate three variables
define amdgpu_kernel void @k123() {
; OPT-LABEL: define amdgpu_kernel void @k123(
; OPT-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META14:![0-9]+]] {
; OPT-SAME: ) #[[ATTR1]] {{.*}}.amdgcn.lds.kernel.id [[META14:![0-9]+]] {
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope [[META15:![0-9]+]], !noalias [[META18:![0-9]+]]
; OPT-NEXT: call void @f1()
; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope [[META21:![0-9]+]], !noalias [[META22:![0-9]+]]

View File

@ -8,15 +8,14 @@
;.
; CHECK: @llvm.amdgcn.kernelA.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernelA.dynlds to i32)]
; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.kernelA.dynlds]
;.
define void @fn(float %val, i32 %idx) #0 {
; CHECK-LABEL: define void @fn(
; CHECK-SAME: float [[VAL:%.*]], i32 [[IDX:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds [1 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[VAR0]], align 4
; CHECK-NEXT: [[VAR01:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[VAR01:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[VAR0]], align 4
; CHECK-NEXT: [[PTR:%.*]] = getelementptr i32, ptr addrspace(3) [[VAR01]], i32 [[IDX]]
; CHECK-NEXT: store float [[VAL]], ptr addrspace(3) [[PTR]], align 4
; CHECK-NEXT: ret void

View File

@ -12,20 +12,18 @@
; CHECK: @llvm.amdgcn.kernel.k1_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1_f0.lds.t poison, align 2, !absolute_symbol [[META0]]
; CHECK: @llvm.amdgcn.kernel.kernel_lds.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds.lds.t poison, align 2, !absolute_symbol [[META0]]
; CHECK: @llvm.amdgcn.kernel.kernel_lds_recursion.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds_recursion.lds.t poison, align 2, !absolute_symbol [[META0]]
; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x i32]]
; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x ptr addrspace(3)]]
;.
define internal void @lds_use_through_indirect() {
; CHECK-LABEL: define internal void @lds_use_through_indirect(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4
; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS2]], align 4
; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2
; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 7
; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4
; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS]], align 4
; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2
; CHECK-NEXT: ret void
;
@ -71,14 +69,12 @@ define internal void @f0() {
; CHECK-LABEL: define internal void @f0(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4
; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS2]], align 4
; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2
; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 4
; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4
; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS]], align 4
; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2
; CHECK-NEXT: call void @no_lds_global_use_leaf()
; CHECK-NEXT: ret void
@ -150,9 +146,8 @@ define internal i16 @mutual_recursion_0(i16 %arg) {
; CHECK-LABEL: define internal i16 @mutual_recursion_0(
; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[RECURSIVE_KERNEL_LDS]], align 4
; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[RECURSIVE_KERNEL_LDS]], align 4
; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[RECURSIVE_KERNEL_LDS1]], align 2
; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 7
; CHECK-NEXT: [[RET:%.*]] = call i16 @mutual_recursion_1(i16 [[LD]])