diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 17260a875db7..502f77d1dba6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -316,20 +316,17 @@ public: // Create a ConstantArray containing the address of each Variable within the // kernel corresponding to LDSVarsToConstantGEP, or poison if that kernel // does not allocate it - // TODO: Drop the ptrtoint conversion - Type *I32 = Type::getInt32Ty(Ctx); - - ArrayType *KernelOffsetsType = ArrayType::get(I32, Variables.size()); + Type *LocalPtrTy = PointerType::get(Ctx, AMDGPUAS::LOCAL_ADDRESS); + ArrayType *KernelOffsetsType = ArrayType::get(LocalPtrTy, Variables.size()); SmallVector Elements; for (GlobalVariable *GV : Variables) { auto ConstantGepIt = LDSVarsToConstantGEP.find(GV); if (ConstantGepIt != LDSVarsToConstantGEP.end()) { - auto *elt = ConstantExpr::getPtrToInt(ConstantGepIt->second, I32); - Elements.push_back(elt); + Elements.push_back(ConstantGepIt->second); } else { - Elements.push_back(PoisonValue::get(I32)); + Elements.push_back(PoisonValue::get(LocalPtrTy)); } } return ConstantArray::get(KernelOffsetsType, Elements); @@ -347,8 +344,8 @@ public: const size_t NumberVariables = Variables.size(); const size_t NumberKernels = kernels.size(); - ArrayType *KernelOffsetsType = - ArrayType::get(Type::getInt32Ty(Ctx), NumberVariables); + Type *LocalPtrTy = PointerType::get(Ctx, AMDGPUAS::LOCAL_ADDRESS); + ArrayType *KernelOffsetsType = ArrayType::get(LocalPtrTy, NumberVariables); ArrayType *AllKernelsOffsetsType = ArrayType::get(KernelOffsetsType, NumberKernels); @@ -401,12 +398,8 @@ public: Value *Address = Builder.CreateInBoundsGEP( LookupTable->getValueType(), LookupTable, GEPIdx, GV->getName()); - Value *loaded = Builder.CreateLoad(I32, Address); - - Value *replacement = - Builder.CreateIntToPtr(loaded, GV->getType(), GV->getName()); - - U.set(replacement); + Value *Loaded = Builder.CreateLoad(GV->getType(), Address); + U.set(Loaded); } void replaceUsesInInstructionsWithTableLookup( @@ -868,7 +861,7 @@ public: if (!KernelsThatIndirectlyAllocateDynamicLDS.empty()) { LLVMContext &Ctx = M.getContext(); IRBuilder<> Builder(Ctx); - Type *I32 = Type::getInt32Ty(Ctx); + Type *LocalPtrTy = PointerType::get(Ctx, AMDGPUAS::LOCAL_ADDRESS); std::vector newDynamicLDS; @@ -888,14 +881,14 @@ public: markUsedByKernel(func, N); - newDynamicLDS.push_back(ConstantExpr::getPtrToInt(N, I32)); + newDynamicLDS.push_back(N); } else { - newDynamicLDS.push_back(PoisonValue::get(I32)); + newDynamicLDS.push_back(PoisonValue::get(LocalPtrTy)); } } assert(OrderedKernels.size() == newDynamicLDS.size()); - ArrayType *t = ArrayType::get(I32, newDynamicLDS.size()); + ArrayType *t = ArrayType::get(LocalPtrTy, newDynamicLDS.size()); Constant *init = ConstantArray::get(t, newDynamicLDS); GlobalVariable *table = new GlobalVariable( M, t, true, GlobalValue::InternalLinkage, init, diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll index fdb4dc8c2e5b..c95ebb591862 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll @@ -64,9 +64,8 @@ define private void @call_store_A() { define private void @store_A() { ; CHECK-LABEL: define private void @store_A() { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[A]], align 4 -; CHECK-NEXT: [[A1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[A1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[A]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[A1]] to ptr ; CHECK-NEXT: store ptr [[TMP3]], ptr null, align 8 ; CHECK-NEXT: ret void @@ -78,9 +77,8 @@ define private void @store_A() { define private ptr @get_B_ptr() { ; CHECK-LABEL: define private ptr @get_B_ptr() { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[B]], align 4 -; CHECK-NEXT: [[B1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[B1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[B]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[B1]] to ptr ; CHECK-NEXT: ret ptr [[TMP3]] ; diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll index 84490b8be597..4bd08c19a081 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll @@ -24,7 +24,7 @@ ; CHECK: @llvm.amdgcn.expect_align4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META1:![0-9]+]] ; CHECK: @llvm.amdgcn.expect_align8.dynlds = external addrspace(3) global [0 x i8], align 8, !absolute_symbol [[META0]] ; CHECK: @llvm.amdgcn.expect_max_of_2_and_4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META1]] -; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [5 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds to i32)] +; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [5 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds] ;. define amdgpu_kernel void @kernel_only() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_only() { @@ -41,9 +41,8 @@ define amdgpu_kernel void @kernel_only() { define void @use_shared1() { ; CHECK-LABEL: define void @use_shared1() { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; CHECK-NEXT: [[DYNAMIC_SHARED1:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED1]], align 4 -; CHECK-NEXT: [[DYNAMIC_SHARED11:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[DYNAMIC_SHARED1:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[DYNAMIC_SHARED11:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED1]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr addrspace(3) [[DYNAMIC_SHARED11]], i32 0, i32 1 ; CHECK-NEXT: store i8 0, ptr addrspace(3) [[ARRAYIDX]], align 1 ; CHECK-NEXT: ret void @@ -57,9 +56,8 @@ define void @use_shared2() #0 { ; CHECK-LABEL: define void @use_shared2( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; CHECK-NEXT: [[DYNAMIC_SHARED2:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED2]], align 4 -; CHECK-NEXT: [[DYNAMIC_SHARED21:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[DYNAMIC_SHARED2:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[DYNAMIC_SHARED21:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED2]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i16], ptr addrspace(3) [[DYNAMIC_SHARED21]], i32 0, i32 3 ; CHECK-NEXT: store i16 1, ptr addrspace(3) [[ARRAYIDX]], align 2 ; CHECK-NEXT: ret void @@ -76,9 +74,8 @@ define void @use_shared4() #0 { ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() ; CHECK-NEXT: store i32 4, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4 -; CHECK-NEXT: [[DYNAMIC_SHARED4:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED4]], align 4 -; CHECK-NEXT: [[DYNAMIC_SHARED41:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[DYNAMIC_SHARED4:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[DYNAMIC_SHARED41:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED4]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr addrspace(3) [[DYNAMIC_SHARED41]], i32 0, i32 5 ; CHECK-NEXT: store i32 2, ptr addrspace(3) [[ARRAYIDX]], align 4 ; CHECK-NEXT: ret void @@ -93,9 +90,8 @@ define void @use_shared8() #0 { ; CHECK-LABEL: define void @use_shared8( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; CHECK-NEXT: [[DYNAMIC_SHARED8:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED8]], align 4 -; CHECK-NEXT: [[DYNAMIC_SHARED81:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[DYNAMIC_SHARED8:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[DYNAMIC_SHARED81:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED8]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i64], ptr addrspace(3) [[DYNAMIC_SHARED81]], i32 0, i32 7 ; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll index bd29e9e5855f..9f23e3b95345 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll @@ -25,16 +25,14 @@ ; CHECK: @llvm.amdgcn.kernel.kern_one.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_one.lds.t poison, align 4, !absolute_symbol [[META1:![0-9]+]] ; CHECK: @llvm.amdgcn.kernel.kern_two.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_two.lds.t poison, align 4, !absolute_symbol [[META1]] ; CHECK: @llvm.amdgcn.kernel.kern_block_direct_allocation.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_block_direct_allocation.lds.t poison, align 4, !absolute_symbol [[META1]] - ;. define void @func_one() { ; CHECK-LABEL: define {{[^@]+}}@func_one() { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() ; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2:![0-9]+]] -; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ONE]], align 4 -; CHECK-NEXT: [[ONE1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) -; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[ONE1]], align 4 +; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[ONE]], align 4 +; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[TMP2]], align 4 ; CHECK-NEXT: store i16 10, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11:![0-9]+]] ; CHECK-NEXT: ret void ; @@ -62,10 +60,9 @@ define void @func_two() { ; CHECK-LABEL: define {{[^@]+}}@func_two() { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() ; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2]] -; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TWO]], align 4 -; CHECK-NEXT: [[TWO1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) -; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[TWO1]], align 4 +; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[TWO]], align 4 +; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[TMP2]], align 4 ; CHECK-NEXT: store i16 20, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11]] ; CHECK-NEXT: ret void ; @@ -97,14 +94,12 @@ entry: define void @func_block_direct_allocation() { ; CHECK-LABEL: define {{[^@]+}}@func_block_direct_allocation() { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ONE]], align 4 -; CHECK-NEXT: [[ONE1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) -; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) [[ONE1]], align 4 -; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TWO]], align 4 -; CHECK-NEXT: [[TWO2:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) -; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) [[TWO2]], align 4 +; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[ONE]], align 4 +; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) [[TMP2]], align 4 +; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[TWO]], align 4 +; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4 ; CHECK-NEXT: [[SUM:%.*]] = add i32 [[VAL1]], [[VAL2]] ; CHECK-NEXT: store i32 [[SUM]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2]] ; CHECK-NEXT: store i16 30, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11]] diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-ambiguous.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-ambiguous.ll index 3aeaa1ddbef2..9c71d212132c 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-ambiguous.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-ambiguous.ll @@ -48,14 +48,12 @@ define void @f0() { ; ; TABLE-LABEL: @f0( ; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; TABLE-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4 -; TABLE-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; TABLE-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; TABLE-NEXT: [[FUNCTION_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS2]], align 4 ; TABLE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2 ; TABLE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 4 -; TABLE-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4 -; TABLE-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) +; TABLE-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; TABLE-NEXT: [[FUNCTION_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS]], align 4 ; TABLE-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2 ; TABLE-NEXT: ret void ; diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll index e7f78b4c6897..d32ea069b3e5 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll @@ -35,14 +35,12 @@ define void @f0() { ; ; TABLE-LABEL: @f0( ; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; TABLE-NEXT: [[F0_LDS2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 -; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[F0_LDS2]], align 4 -; TABLE-NEXT: [[F0_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; TABLE-NEXT: [[F0_LDS2:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; TABLE-NEXT: [[F0_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[F0_LDS2]], align 4 ; TABLE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[F0_LDS3]], align 2 ; TABLE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3 -; TABLE-NEXT: [[F0_LDS:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 -; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[F0_LDS]], align 4 -; TABLE-NEXT: [[F0_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) +; TABLE-NEXT: [[F0_LDS:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; TABLE-NEXT: [[F0_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[F0_LDS]], align 4 ; TABLE-NEXT: store i16 [[MUL]], ptr addrspace(3) [[F0_LDS1]], align 2 ; TABLE-NEXT: ret void ; @@ -90,14 +88,12 @@ define void @f_both() { ; ; TABLE-LABEL: @f_both( ; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; TABLE-NEXT: [[BOTH_LDS2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[BOTH_LDS2]], align 4 -; TABLE-NEXT: [[BOTH_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; TABLE-NEXT: [[BOTH_LDS2:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; TABLE-NEXT: [[BOTH_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[BOTH_LDS2]], align 4 ; TABLE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) [[BOTH_LDS3]], align 4 ; TABLE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4 -; TABLE-NEXT: [[BOTH_LDS:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[BOTH_LDS]], align 4 -; TABLE-NEXT: [[BOTH_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) +; TABLE-NEXT: [[BOTH_LDS:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; TABLE-NEXT: [[BOTH_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[BOTH_LDS]], align 4 ; TABLE-NEXT: store i32 [[MUL]], ptr addrspace(3) [[BOTH_LDS1]], align 4 ; TABLE-NEXT: ret void ; diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll index 12212a0968c9..c969250f8009 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll @@ -11,7 +11,7 @@ @v3 = addrspace(3) global i8 poison @unused = addrspace(3) global i16 poison -; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [2 x [1 x i32]] [[1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32)], [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32)]] +; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [2 x [1 x ptr addrspace(3)]] [[1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds], [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds]] define void @f0() { ; OPT-LABEL: @f0( @@ -64,14 +64,12 @@ define void @f1() { define void @f2() { ; OPT-LABEL: @f2( ; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4 -; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; OPT-NEXT: [[V23:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V22]], align 4 ; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 8 ; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4 -; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4 -; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) +; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; OPT-NEXT: [[V21:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V2]], align 4 ; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 8 ; OPT-NEXT: ret void ; diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll index b689e1e51c2a..d6fec18e81ef 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll @@ -23,20 +23,18 @@ ; The kernel naming pattern and the structs being named after the functions helps verify placement of poison ; The remainder are constant expressions into the variable instances checked above -; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [4 x i32]] [[4 x i32] [i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k01.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, i32 0, i32 2) to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds to i32), i32 poison, i32 poison], [4 x i32] [i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 3) to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1) to i32)], [4 x i32] [i32 poison, i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1) to i32)]] +; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [4 x ptr addrspace(3)]] [[4 x ptr addrspace(3)] [ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k01.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, i32 0, i32 2), ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, ptr addrspace(3) poison, ptr addrspace(3) poison], [4 x ptr addrspace(3)] [ptr addrspace(3) poison, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 3), ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1)], [4 x ptr addrspace(3)] [ptr addrspace(3) poison, ptr addrspace(3) poison, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1)]] define void @f0() { ; OPT-LABEL: define void @f0() { ; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; OPT-NEXT: [[V02:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V02]], align 4 -; OPT-NEXT: [[V03:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; OPT-NEXT: [[V03:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V1]], align 4 ; OPT-NEXT: [[LD:%.*]] = load float, ptr addrspace(3) [[V03]], align 4 ; OPT-NEXT: [[MUL:%.*]] = fmul float [[LD]], 2.000000e+00 -; OPT-NEXT: [[V0:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V0]], align 4 -; OPT-NEXT: [[V01:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) +; OPT-NEXT: [[V0:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; OPT-NEXT: [[V01:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V0]], align 4 ; OPT-NEXT: store float [[MUL]], ptr addrspace(3) [[V01]], align 4 ; OPT-NEXT: ret void ; @@ -70,14 +68,12 @@ define void @f0() { define void @f1() { ; OPT-LABEL: define void @f1() { ; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; OPT-NEXT: [[V12:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 -; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V12]], align 4 -; OPT-NEXT: [[V13:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; OPT-NEXT: [[V12:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; OPT-NEXT: [[V13:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V12]], align 4 ; OPT-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[V13]], align 2 ; OPT-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3 -; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 -; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V1]], align 4 -; OPT-NEXT: [[V11:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) +; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; OPT-NEXT: [[V11:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V1]], align 4 ; OPT-NEXT: store i16 [[MUL]], ptr addrspace(3) [[V11]], align 2 ; OPT-NEXT: ret void ; @@ -111,14 +107,12 @@ define void @f1() { define void @f2() { ; OPT-LABEL: define void @f2() { ; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2 -; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4 -; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2 +; OPT-NEXT: [[V23:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V22]], align 4 ; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 8 ; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4 -; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2 -; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4 -; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) +; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2 +; OPT-NEXT: [[V21:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V2]], align 4 ; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 8 ; OPT-NEXT: ret void ; @@ -152,14 +146,12 @@ define void @f2() { define void @f3() { ; OPT-LABEL: define void @f3() { ; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; OPT-NEXT: [[V32:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3 -; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V32]], align 4 -; OPT-NEXT: [[V33:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; OPT-NEXT: [[V32:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3 +; OPT-NEXT: [[V33:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V32]], align 4 ; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) [[V33]], align 1 ; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 5 -; OPT-NEXT: [[V3:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3 -; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V3]], align 4 -; OPT-NEXT: [[V31:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) +; OPT-NEXT: [[V3:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3 +; OPT-NEXT: [[V31:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V3]], align 4 ; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) [[V31]], align 1 ; OPT-NEXT: ret void ; @@ -217,7 +209,7 @@ define amdgpu_kernel void @kernel_no_table() { ; Access two variables, will allocate those two define amdgpu_kernel void @k01() { ; OPT-LABEL: define amdgpu_kernel void @k01( -; OPT-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] { +; OPT-SAME: ) #[[ATTR0]] {{.*}}.amdgcn.lds.kernel.id [[META2:![0-9]+]] { ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ], !alias.scope [[META3:![0-9]+]], !noalias [[META6:![0-9]+]] ; OPT-NEXT: call void @f0() ; OPT-NEXT: call void @f1() @@ -268,7 +260,7 @@ define amdgpu_kernel void @k01() { define amdgpu_kernel void @k23() { ; OPT-LABEL: define amdgpu_kernel void @k23( -; OPT-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META8:![0-9]+]] { +; OPT-SAME: ) #[[ATTR1:[0-9]+]] {{.*}}.amdgcn.lds.kernel.id [[META8:![0-9]+]] { ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]] ; OPT-NEXT: call void @f2() ; OPT-NEXT: call void @f3() @@ -319,7 +311,7 @@ define amdgpu_kernel void @k23() { ; Access and allocate three variables define amdgpu_kernel void @k123() { ; OPT-LABEL: define amdgpu_kernel void @k123( -; OPT-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META14:![0-9]+]] { +; OPT-SAME: ) #[[ATTR1]] {{.*}}.amdgcn.lds.kernel.id [[META14:![0-9]+]] { ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope [[META15:![0-9]+]], !noalias [[META18:![0-9]+]] ; OPT-NEXT: call void @f1() ; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope [[META21:![0-9]+]], !noalias [[META22:![0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-zero-size-arr.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-zero-size-arr.ll index 59dfe3300c29..7707b8526d08 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-zero-size-arr.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-zero-size-arr.ll @@ -8,15 +8,14 @@ ;. ; CHECK: @llvm.amdgcn.kernelA.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META0:![0-9]+]] -; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernelA.dynlds to i32)] +; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.kernelA.dynlds] ;. define void @fn(float %val, i32 %idx) #0 { ; CHECK-LABEL: define void @fn( ; CHECK-SAME: float [[VAL:%.*]], i32 [[IDX:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds [1 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[VAR0]], align 4 -; CHECK-NEXT: [[VAR01:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[VAR01:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[VAR0]], align 4 ; CHECK-NEXT: [[PTR:%.*]] = getelementptr i32, ptr addrspace(3) [[VAR01]], i32 [[IDX]] ; CHECK-NEXT: store float [[VAL]], ptr addrspace(3) [[PTR]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll index f847d669c606..78529b4eec4d 100644 --- a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll +++ b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll @@ -12,20 +12,18 @@ ; CHECK: @llvm.amdgcn.kernel.k1_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1_f0.lds.t poison, align 2, !absolute_symbol [[META0]] ; CHECK: @llvm.amdgcn.kernel.kernel_lds.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds.lds.t poison, align 2, !absolute_symbol [[META0]] ; CHECK: @llvm.amdgcn.kernel.kernel_lds_recursion.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds_recursion.lds.t poison, align 2, !absolute_symbol [[META0]] -; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x i32]] +; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x ptr addrspace(3)]] ;. define internal void @lds_use_through_indirect() { ; CHECK-LABEL: define internal void @lds_use_through_indirect( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4 -; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS2]], align 4 ; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2 ; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 7 -; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4 -; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) +; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS]], align 4 ; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2 ; CHECK-NEXT: ret void ; @@ -71,14 +69,12 @@ define internal void @f0() { ; CHECK-LABEL: define internal void @f0( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4 -; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS2]], align 4 ; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2 ; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 4 -; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4 -; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) +; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS]], align 4 ; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2 ; CHECK-NEXT: call void @no_lds_global_use_leaf() ; CHECK-NEXT: ret void @@ -150,9 +146,8 @@ define internal i16 @mutual_recursion_0(i16 %arg) { ; CHECK-LABEL: define internal i16 @mutual_recursion_0( ; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() -; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[RECURSIVE_KERNEL_LDS]], align 4 -; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[RECURSIVE_KERNEL_LDS]], align 4 ; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[RECURSIVE_KERNEL_LDS1]], align 2 ; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 7 ; CHECK-NEXT: [[RET:%.*]] = call i16 @mutual_recursion_1(i16 [[LD]])