[AMDGPULowerLDS] Avoid unnecessary ptrtoint/inttoptr roundtrip (#181671)
Store pointers instead of integers in the table, and load them as pointers.
This commit is contained in:
parent
6f0759d568
commit
5933294bb1
@ -316,20 +316,17 @@ public:
|
||||
// Create a ConstantArray containing the address of each Variable within the
|
||||
// kernel corresponding to LDSVarsToConstantGEP, or poison if that kernel
|
||||
// does not allocate it
|
||||
// TODO: Drop the ptrtoint conversion
|
||||
|
||||
Type *I32 = Type::getInt32Ty(Ctx);
|
||||
|
||||
ArrayType *KernelOffsetsType = ArrayType::get(I32, Variables.size());
|
||||
Type *LocalPtrTy = PointerType::get(Ctx, AMDGPUAS::LOCAL_ADDRESS);
|
||||
ArrayType *KernelOffsetsType = ArrayType::get(LocalPtrTy, Variables.size());
|
||||
|
||||
SmallVector<Constant *> Elements;
|
||||
for (GlobalVariable *GV : Variables) {
|
||||
auto ConstantGepIt = LDSVarsToConstantGEP.find(GV);
|
||||
if (ConstantGepIt != LDSVarsToConstantGEP.end()) {
|
||||
auto *elt = ConstantExpr::getPtrToInt(ConstantGepIt->second, I32);
|
||||
Elements.push_back(elt);
|
||||
Elements.push_back(ConstantGepIt->second);
|
||||
} else {
|
||||
Elements.push_back(PoisonValue::get(I32));
|
||||
Elements.push_back(PoisonValue::get(LocalPtrTy));
|
||||
}
|
||||
}
|
||||
return ConstantArray::get(KernelOffsetsType, Elements);
|
||||
@ -347,8 +344,8 @@ public:
|
||||
const size_t NumberVariables = Variables.size();
|
||||
const size_t NumberKernels = kernels.size();
|
||||
|
||||
ArrayType *KernelOffsetsType =
|
||||
ArrayType::get(Type::getInt32Ty(Ctx), NumberVariables);
|
||||
Type *LocalPtrTy = PointerType::get(Ctx, AMDGPUAS::LOCAL_ADDRESS);
|
||||
ArrayType *KernelOffsetsType = ArrayType::get(LocalPtrTy, NumberVariables);
|
||||
|
||||
ArrayType *AllKernelsOffsetsType =
|
||||
ArrayType::get(KernelOffsetsType, NumberKernels);
|
||||
@ -401,12 +398,8 @@ public:
|
||||
Value *Address = Builder.CreateInBoundsGEP(
|
||||
LookupTable->getValueType(), LookupTable, GEPIdx, GV->getName());
|
||||
|
||||
Value *loaded = Builder.CreateLoad(I32, Address);
|
||||
|
||||
Value *replacement =
|
||||
Builder.CreateIntToPtr(loaded, GV->getType(), GV->getName());
|
||||
|
||||
U.set(replacement);
|
||||
Value *Loaded = Builder.CreateLoad(GV->getType(), Address);
|
||||
U.set(Loaded);
|
||||
}
|
||||
|
||||
void replaceUsesInInstructionsWithTableLookup(
|
||||
@ -868,7 +861,7 @@ public:
|
||||
if (!KernelsThatIndirectlyAllocateDynamicLDS.empty()) {
|
||||
LLVMContext &Ctx = M.getContext();
|
||||
IRBuilder<> Builder(Ctx);
|
||||
Type *I32 = Type::getInt32Ty(Ctx);
|
||||
Type *LocalPtrTy = PointerType::get(Ctx, AMDGPUAS::LOCAL_ADDRESS);
|
||||
|
||||
std::vector<Constant *> newDynamicLDS;
|
||||
|
||||
@ -888,14 +881,14 @@ public:
|
||||
|
||||
markUsedByKernel(func, N);
|
||||
|
||||
newDynamicLDS.push_back(ConstantExpr::getPtrToInt(N, I32));
|
||||
newDynamicLDS.push_back(N);
|
||||
} else {
|
||||
newDynamicLDS.push_back(PoisonValue::get(I32));
|
||||
newDynamicLDS.push_back(PoisonValue::get(LocalPtrTy));
|
||||
}
|
||||
}
|
||||
assert(OrderedKernels.size() == newDynamicLDS.size());
|
||||
|
||||
ArrayType *t = ArrayType::get(I32, newDynamicLDS.size());
|
||||
ArrayType *t = ArrayType::get(LocalPtrTy, newDynamicLDS.size());
|
||||
Constant *init = ConstantArray::get(t, newDynamicLDS);
|
||||
GlobalVariable *table = new GlobalVariable(
|
||||
M, t, true, GlobalValue::InternalLinkage, init,
|
||||
|
||||
@ -64,9 +64,8 @@ define private void @call_store_A() {
|
||||
define private void @store_A() {
|
||||
; CHECK-LABEL: define private void @store_A() {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
|
||||
; CHECK-NEXT: [[A1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[A1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[A]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[A1]] to ptr
|
||||
; CHECK-NEXT: store ptr [[TMP3]], ptr null, align 8
|
||||
; CHECK-NEXT: ret void
|
||||
@ -78,9 +77,8 @@ define private void @store_A() {
|
||||
define private ptr @get_B_ptr() {
|
||||
; CHECK-LABEL: define private ptr @get_B_ptr() {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[B]], align 4
|
||||
; CHECK-NEXT: [[B1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[B1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[B]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[B1]] to ptr
|
||||
; CHECK-NEXT: ret ptr [[TMP3]]
|
||||
;
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
; CHECK: @llvm.amdgcn.expect_align4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META1:![0-9]+]]
|
||||
; CHECK: @llvm.amdgcn.expect_align8.dynlds = external addrspace(3) global [0 x i8], align 8, !absolute_symbol [[META0]]
|
||||
; CHECK: @llvm.amdgcn.expect_max_of_2_and_4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META1]]
|
||||
; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [5 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds to i32)]
|
||||
; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [5 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds]
|
||||
;.
|
||||
define amdgpu_kernel void @kernel_only() {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @kernel_only() {
|
||||
@ -41,9 +41,8 @@ define amdgpu_kernel void @kernel_only() {
|
||||
define void @use_shared1() {
|
||||
; CHECK-LABEL: define void @use_shared1() {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED1:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED1]], align 4
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED11:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED1:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED11:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED1]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr addrspace(3) [[DYNAMIC_SHARED11]], i32 0, i32 1
|
||||
; CHECK-NEXT: store i8 0, ptr addrspace(3) [[ARRAYIDX]], align 1
|
||||
; CHECK-NEXT: ret void
|
||||
@ -57,9 +56,8 @@ define void @use_shared2() #0 {
|
||||
; CHECK-LABEL: define void @use_shared2(
|
||||
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED2:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED2]], align 4
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED21:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED2:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED21:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED2]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i16], ptr addrspace(3) [[DYNAMIC_SHARED21]], i32 0, i32 3
|
||||
; CHECK-NEXT: store i16 1, ptr addrspace(3) [[ARRAYIDX]], align 2
|
||||
; CHECK-NEXT: ret void
|
||||
@ -76,9 +74,8 @@ define void @use_shared4() #0 {
|
||||
; CHECK-SAME: ) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: store i32 4, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED4:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED4]], align 4
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED41:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED4:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED41:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED4]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr addrspace(3) [[DYNAMIC_SHARED41]], i32 0, i32 5
|
||||
; CHECK-NEXT: store i32 2, ptr addrspace(3) [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
@ -93,9 +90,8 @@ define void @use_shared8() #0 {
|
||||
; CHECK-LABEL: define void @use_shared8(
|
||||
; CHECK-SAME: ) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED8:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED8]], align 4
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED81:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED8:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[DYNAMIC_SHARED81:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED8]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i64], ptr addrspace(3) [[DYNAMIC_SHARED81]], i32 0, i32 7
|
||||
; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
|
||||
@ -25,16 +25,14 @@
|
||||
; CHECK: @llvm.amdgcn.kernel.kern_one.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_one.lds.t poison, align 4, !absolute_symbol [[META1:![0-9]+]]
|
||||
; CHECK: @llvm.amdgcn.kernel.kern_two.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_two.lds.t poison, align 4, !absolute_symbol [[META1]]
|
||||
; CHECK: @llvm.amdgcn.kernel.kern_block_direct_allocation.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_block_direct_allocation.lds.t poison, align 4, !absolute_symbol [[META1]]
|
||||
|
||||
;.
|
||||
define void @func_one() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func_one() {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2:![0-9]+]]
|
||||
; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ONE]], align 4
|
||||
; CHECK-NEXT: [[ONE1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[ONE1]], align 4
|
||||
; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[ONE]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[TMP2]], align 4
|
||||
; CHECK-NEXT: store i16 10, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11:![0-9]+]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -62,10 +60,9 @@ define void @func_two() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func_two() {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2]]
|
||||
; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TWO]], align 4
|
||||
; CHECK-NEXT: [[TWO1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[TWO1]], align 4
|
||||
; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[TWO]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[TMP2]], align 4
|
||||
; CHECK-NEXT: store i16 20, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -97,14 +94,12 @@ entry:
|
||||
define void @func_block_direct_allocation() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func_block_direct_allocation() {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ONE]], align 4
|
||||
; CHECK-NEXT: [[ONE1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) [[ONE1]], align 4
|
||||
; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TWO]], align 4
|
||||
; CHECK-NEXT: [[TWO2:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) [[TWO2]], align 4
|
||||
; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[ONE]], align 4
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[TWO]], align 4
|
||||
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[VAL1]], [[VAL2]]
|
||||
; CHECK-NEXT: store i32 [[SUM]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2]]
|
||||
; CHECK-NEXT: store i16 30, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11]]
|
||||
|
||||
@ -48,14 +48,12 @@ define void @f0() {
|
||||
;
|
||||
; TABLE-LABEL: @f0(
|
||||
; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; TABLE-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4
|
||||
; TABLE-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; TABLE-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; TABLE-NEXT: [[FUNCTION_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS2]], align 4
|
||||
; TABLE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2
|
||||
; TABLE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 4
|
||||
; TABLE-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4
|
||||
; TABLE-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
||||
; TABLE-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; TABLE-NEXT: [[FUNCTION_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS]], align 4
|
||||
; TABLE-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2
|
||||
; TABLE-NEXT: ret void
|
||||
;
|
||||
|
||||
@ -35,14 +35,12 @@ define void @f0() {
|
||||
;
|
||||
; TABLE-LABEL: @f0(
|
||||
; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; TABLE-NEXT: [[F0_LDS2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[F0_LDS2]], align 4
|
||||
; TABLE-NEXT: [[F0_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; TABLE-NEXT: [[F0_LDS2:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; TABLE-NEXT: [[F0_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[F0_LDS2]], align 4
|
||||
; TABLE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[F0_LDS3]], align 2
|
||||
; TABLE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
|
||||
; TABLE-NEXT: [[F0_LDS:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[F0_LDS]], align 4
|
||||
; TABLE-NEXT: [[F0_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
||||
; TABLE-NEXT: [[F0_LDS:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; TABLE-NEXT: [[F0_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[F0_LDS]], align 4
|
||||
; TABLE-NEXT: store i16 [[MUL]], ptr addrspace(3) [[F0_LDS1]], align 2
|
||||
; TABLE-NEXT: ret void
|
||||
;
|
||||
@ -90,14 +88,12 @@ define void @f_both() {
|
||||
;
|
||||
; TABLE-LABEL: @f_both(
|
||||
; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; TABLE-NEXT: [[BOTH_LDS2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[BOTH_LDS2]], align 4
|
||||
; TABLE-NEXT: [[BOTH_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; TABLE-NEXT: [[BOTH_LDS2:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; TABLE-NEXT: [[BOTH_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[BOTH_LDS2]], align 4
|
||||
; TABLE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) [[BOTH_LDS3]], align 4
|
||||
; TABLE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
|
||||
; TABLE-NEXT: [[BOTH_LDS:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[BOTH_LDS]], align 4
|
||||
; TABLE-NEXT: [[BOTH_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
||||
; TABLE-NEXT: [[BOTH_LDS:%.*]] = getelementptr inbounds [2 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; TABLE-NEXT: [[BOTH_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[BOTH_LDS]], align 4
|
||||
; TABLE-NEXT: store i32 [[MUL]], ptr addrspace(3) [[BOTH_LDS1]], align 4
|
||||
; TABLE-NEXT: ret void
|
||||
;
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
@v3 = addrspace(3) global i8 poison
|
||||
@unused = addrspace(3) global i16 poison
|
||||
|
||||
; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [2 x [1 x i32]] [[1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32)], [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32)]]
|
||||
; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [2 x [1 x ptr addrspace(3)]] [[1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds], [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds]]
|
||||
|
||||
define void @f0() {
|
||||
; OPT-LABEL: @f0(
|
||||
@ -64,14 +64,12 @@ define void @f1() {
|
||||
define void @f2() {
|
||||
; OPT-LABEL: @f2(
|
||||
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4
|
||||
; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; OPT-NEXT: [[V23:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V22]], align 4
|
||||
; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 8
|
||||
; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4
|
||||
; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4
|
||||
; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
||||
; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [1 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; OPT-NEXT: [[V21:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V2]], align 4
|
||||
; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 8
|
||||
; OPT-NEXT: ret void
|
||||
;
|
||||
|
||||
@ -23,20 +23,18 @@
|
||||
; The kernel naming pattern and the structs being named after the functions helps verify placement of poison
|
||||
; The remainder are constant expressions into the variable instances checked above
|
||||
|
||||
; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [4 x i32]] [[4 x i32] [i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k01.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, i32 0, i32 2) to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds to i32), i32 poison, i32 poison], [4 x i32] [i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 3) to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1) to i32)], [4 x i32] [i32 poison, i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1) to i32)]]
|
||||
; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [4 x ptr addrspace(3)]] [[4 x ptr addrspace(3)] [ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k01.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, i32 0, i32 2), ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, ptr addrspace(3) poison, ptr addrspace(3) poison], [4 x ptr addrspace(3)] [ptr addrspace(3) poison, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 3), ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1)], [4 x ptr addrspace(3)] [ptr addrspace(3) poison, ptr addrspace(3) poison, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1)]]
|
||||
|
||||
|
||||
define void @f0() {
|
||||
; OPT-LABEL: define void @f0() {
|
||||
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; OPT-NEXT: [[V02:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V02]], align 4
|
||||
; OPT-NEXT: [[V03:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; OPT-NEXT: [[V03:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V1]], align 4
|
||||
; OPT-NEXT: [[LD:%.*]] = load float, ptr addrspace(3) [[V03]], align 4
|
||||
; OPT-NEXT: [[MUL:%.*]] = fmul float [[LD]], 2.000000e+00
|
||||
; OPT-NEXT: [[V0:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V0]], align 4
|
||||
; OPT-NEXT: [[V01:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
||||
; OPT-NEXT: [[V0:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; OPT-NEXT: [[V01:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V0]], align 4
|
||||
; OPT-NEXT: store float [[MUL]], ptr addrspace(3) [[V01]], align 4
|
||||
; OPT-NEXT: ret void
|
||||
;
|
||||
@ -70,14 +68,12 @@ define void @f0() {
|
||||
define void @f1() {
|
||||
; OPT-LABEL: define void @f1() {
|
||||
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; OPT-NEXT: [[V12:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V12]], align 4
|
||||
; OPT-NEXT: [[V13:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; OPT-NEXT: [[V12:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; OPT-NEXT: [[V13:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V12]], align 4
|
||||
; OPT-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[V13]], align 2
|
||||
; OPT-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
|
||||
; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V1]], align 4
|
||||
; OPT-NEXT: [[V11:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
||||
; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; OPT-NEXT: [[V11:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V1]], align 4
|
||||
; OPT-NEXT: store i16 [[MUL]], ptr addrspace(3) [[V11]], align 2
|
||||
; OPT-NEXT: ret void
|
||||
;
|
||||
@ -111,14 +107,12 @@ define void @f1() {
|
||||
define void @f2() {
|
||||
; OPT-LABEL: define void @f2() {
|
||||
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
|
||||
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4
|
||||
; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
|
||||
; OPT-NEXT: [[V23:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V22]], align 4
|
||||
; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 8
|
||||
; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4
|
||||
; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
|
||||
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4
|
||||
; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
||||
; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
|
||||
; OPT-NEXT: [[V21:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V2]], align 4
|
||||
; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 8
|
||||
; OPT-NEXT: ret void
|
||||
;
|
||||
@ -152,14 +146,12 @@ define void @f2() {
|
||||
define void @f3() {
|
||||
; OPT-LABEL: define void @f3() {
|
||||
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; OPT-NEXT: [[V32:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
|
||||
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V32]], align 4
|
||||
; OPT-NEXT: [[V33:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; OPT-NEXT: [[V32:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
|
||||
; OPT-NEXT: [[V33:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V32]], align 4
|
||||
; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) [[V33]], align 1
|
||||
; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 5
|
||||
; OPT-NEXT: [[V3:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
|
||||
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V3]], align 4
|
||||
; OPT-NEXT: [[V31:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
||||
; OPT-NEXT: [[V3:%.*]] = getelementptr inbounds [3 x [4 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
|
||||
; OPT-NEXT: [[V31:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[V3]], align 4
|
||||
; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) [[V31]], align 1
|
||||
; OPT-NEXT: ret void
|
||||
;
|
||||
@ -217,7 +209,7 @@ define amdgpu_kernel void @kernel_no_table() {
|
||||
; Access two variables, will allocate those two
|
||||
define amdgpu_kernel void @k01() {
|
||||
; OPT-LABEL: define amdgpu_kernel void @k01(
|
||||
; OPT-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
|
||||
; OPT-SAME: ) #[[ATTR0]] {{.*}}.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
|
||||
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ], !alias.scope [[META3:![0-9]+]], !noalias [[META6:![0-9]+]]
|
||||
; OPT-NEXT: call void @f0()
|
||||
; OPT-NEXT: call void @f1()
|
||||
@ -268,7 +260,7 @@ define amdgpu_kernel void @k01() {
|
||||
|
||||
define amdgpu_kernel void @k23() {
|
||||
; OPT-LABEL: define amdgpu_kernel void @k23(
|
||||
; OPT-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META8:![0-9]+]] {
|
||||
; OPT-SAME: ) #[[ATTR1:[0-9]+]] {{.*}}.amdgcn.lds.kernel.id [[META8:![0-9]+]] {
|
||||
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]]
|
||||
; OPT-NEXT: call void @f2()
|
||||
; OPT-NEXT: call void @f3()
|
||||
@ -319,7 +311,7 @@ define amdgpu_kernel void @k23() {
|
||||
; Access and allocate three variables
|
||||
define amdgpu_kernel void @k123() {
|
||||
; OPT-LABEL: define amdgpu_kernel void @k123(
|
||||
; OPT-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META14:![0-9]+]] {
|
||||
; OPT-SAME: ) #[[ATTR1]] {{.*}}.amdgcn.lds.kernel.id [[META14:![0-9]+]] {
|
||||
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope [[META15:![0-9]+]], !noalias [[META18:![0-9]+]]
|
||||
; OPT-NEXT: call void @f1()
|
||||
; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope [[META21:![0-9]+]], !noalias [[META22:![0-9]+]]
|
||||
|
||||
@ -8,15 +8,14 @@
|
||||
|
||||
;.
|
||||
; CHECK: @llvm.amdgcn.kernelA.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META0:![0-9]+]]
|
||||
; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernelA.dynlds to i32)]
|
||||
; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.kernelA.dynlds]
|
||||
;.
|
||||
define void @fn(float %val, i32 %idx) #0 {
|
||||
; CHECK-LABEL: define void @fn(
|
||||
; CHECK-SAME: float [[VAL:%.*]], i32 [[IDX:%.*]]) {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds [1 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[VAR0]], align 4
|
||||
; CHECK-NEXT: [[VAR01:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[VAR01:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[VAR0]], align 4
|
||||
; CHECK-NEXT: [[PTR:%.*]] = getelementptr i32, ptr addrspace(3) [[VAR01]], i32 [[IDX]]
|
||||
; CHECK-NEXT: store float [[VAL]], ptr addrspace(3) [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
|
||||
@ -12,20 +12,18 @@
|
||||
; CHECK: @llvm.amdgcn.kernel.k1_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1_f0.lds.t poison, align 2, !absolute_symbol [[META0]]
|
||||
; CHECK: @llvm.amdgcn.kernel.kernel_lds.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds.lds.t poison, align 2, !absolute_symbol [[META0]]
|
||||
; CHECK: @llvm.amdgcn.kernel.kernel_lds_recursion.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds_recursion.lds.t poison, align 2, !absolute_symbol [[META0]]
|
||||
; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x i32]]
|
||||
; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x ptr addrspace(3)]]
|
||||
;.
|
||||
define internal void @lds_use_through_indirect() {
|
||||
; CHECK-LABEL: define internal void @lds_use_through_indirect(
|
||||
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4
|
||||
; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS2]], align 4
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2
|
||||
; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 7
|
||||
; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4
|
||||
; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS]], align 4
|
||||
; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -71,14 +69,12 @@ define internal void @f0() {
|
||||
; CHECK-LABEL: define internal void @f0(
|
||||
; CHECK-SAME: ) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4
|
||||
; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS2]], align 4
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2
|
||||
; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 4
|
||||
; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4
|
||||
; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[FUNCTION_LDS]], align 4
|
||||
; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2
|
||||
; CHECK-NEXT: call void @no_lds_global_use_leaf()
|
||||
; CHECK-NEXT: ret void
|
||||
@ -150,9 +146,8 @@ define internal i16 @mutual_recursion_0(i16 %arg) {
|
||||
; CHECK-LABEL: define internal i16 @mutual_recursion_0(
|
||||
; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
||||
; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[RECURSIVE_KERNEL_LDS]], align 4
|
||||
; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS:%.*]] = getelementptr inbounds [3 x [2 x ptr addrspace(3)]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS1:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[RECURSIVE_KERNEL_LDS]], align 4
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[RECURSIVE_KERNEL_LDS1]], align 2
|
||||
; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 7
|
||||
; CHECK-NEXT: [[RET:%.*]] = call i16 @mutual_recursion_1(i16 [[LD]])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user