AMDGPU: Add dereferenceable attribute to dispatch ptr intrinsic (#185955)
Stop manually setting it on the callsite in clang.
This commit is contained in:
parent
763de90db4
commit
7cb3005ba2
@ -56,9 +56,6 @@ Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
|
||||
const CallExpr *E = nullptr) {
|
||||
auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
|
||||
auto *Call = CGF.Builder.CreateCall(F);
|
||||
Call->addRetAttr(
|
||||
Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
|
||||
Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
|
||||
if (!E)
|
||||
return Call;
|
||||
QualType BuiltinRetType = E->getType();
|
||||
|
||||
@ -19,7 +19,7 @@
|
||||
// LLVM-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i32 [[TMP7]]
|
||||
// LLVM-NEXT: [[TMP9:%.*]] = load i16, ptr addrspace(4) [[TMP8]], align 2, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
|
||||
// LLVM-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32
|
||||
// LLVM-NEXT: [[TMP11:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// LLVM-NEXT: [[TMP11:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// LLVM-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP11]], i64 4
|
||||
// LLVM-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 2, !range [[RNG2]], !invariant.load [[META1]], !noundef [[META1]]
|
||||
// LLVM-NEXT: [[TMP14:%.*]] = zext i16 [[TMP13]] to i32
|
||||
|
||||
@ -31,7 +31,7 @@
|
||||
// PRECOV5-NEXT: i32 2, label %[[SW_BB2:.*]]
|
||||
// PRECOV5-NEXT: ]
|
||||
// PRECOV5: [[SW_BB]]:
|
||||
// PRECOV5-NEXT: [[TMP1:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// PRECOV5-NEXT: [[TMP1:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// PRECOV5-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP1]], i64 4
|
||||
// PRECOV5-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(4) [[TMP2]], align 2, !range [[RNG3:![0-9]+]], !invariant.load [[META4:![0-9]+]], !noundef [[META4]]
|
||||
// PRECOV5-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
|
||||
@ -39,7 +39,7 @@
|
||||
// PRECOV5-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
|
||||
// PRECOV5-NEXT: br label %[[SW_EPILOG:.*]]
|
||||
// PRECOV5: [[SW_BB1]]:
|
||||
// PRECOV5-NEXT: [[TMP6:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// PRECOV5-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// PRECOV5-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP6]], i64 6
|
||||
// PRECOV5-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 2, !range [[RNG3]], !invariant.load [[META4]], !noundef [[META4]]
|
||||
// PRECOV5-NEXT: [[TMP9:%.*]] = zext i16 [[TMP8]] to i32
|
||||
@ -47,7 +47,7 @@
|
||||
// PRECOV5-NEXT: store i32 [[TMP9]], ptr [[TMP10]], align 4
|
||||
// PRECOV5-NEXT: br label %[[SW_EPILOG]]
|
||||
// PRECOV5: [[SW_BB2]]:
|
||||
// PRECOV5-NEXT: [[TMP11:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// PRECOV5-NEXT: [[TMP11:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// PRECOV5-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP11]], i64 8
|
||||
// PRECOV5-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 2, !range [[RNG3]], !invariant.load [[META4]], !noundef [[META4]]
|
||||
// PRECOV5-NEXT: [[TMP14:%.*]] = zext i16 [[TMP13]] to i32
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr [[OUT_ASCAST]], align 8
|
||||
// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
|
||||
// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
|
||||
// CHECK-NEXT: store ptr [[TMP1]], ptr [[DISPATCH_PTR_ASCAST]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DISPATCH_PTR_ASCAST]], align 8
|
||||
@ -150,7 +150,7 @@ __global__ void test_ds_fmin(float src, float *shared) {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
|
||||
// CHECK-NEXT: store ptr [[TMP1]], ptr [[X_ASCAST]], align 8
|
||||
// CHECK-NEXT: ret void
|
||||
@ -241,7 +241,7 @@ __device__ void func(float *x);
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
|
||||
// CHECK-NEXT: store volatile float [[TMP3]], ptr [[X_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: call void @_Z4funcPf(ptr noundef [[TMP4]]) #[[ATTR7:[0-9]+]]
|
||||
// CHECK-NEXT: call void @_Z4funcPf(ptr noundef [[TMP4]]) #[[ATTR8:[0-9]+]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
__global__ void test_ds_fmin_func(float src, float *__restrict shared) {
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// CHECK-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// CHECK-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) addrspace(4) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// CHECK-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
|
||||
@ -232,7 +232,7 @@ __device__ void func(float *x);
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
|
||||
// CHECK-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR6:[0-9]+]]
|
||||
// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR7:[0-9]+]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
__global__ void test_ds_fmin_func(float src, float *__restrict shared) {
|
||||
|
||||
@ -33,7 +33,7 @@
|
||||
// NONUNIFORM-V4-LABEL: define dso_local range(i32 0, 1025) i32 @test_get_workgroup_size_x(
|
||||
// NONUNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
||||
// NONUNIFORM-V4-NEXT: [[ENTRY:.*:]]
|
||||
// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4
|
||||
// NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7:![0-9]+]], !invariant.load [[META8:![0-9]+]], !noundef [[META8]]
|
||||
// NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
|
||||
@ -48,7 +48,7 @@
|
||||
// UNIFORM-V4-LABEL: define dso_local range(i32 1, 1025) i32 @test_get_workgroup_size_x(
|
||||
// UNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
||||
// UNIFORM-V4-NEXT: [[ENTRY:.*:]]
|
||||
// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4
|
||||
// UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7:![0-9]+]], !invariant.load [[META8:![0-9]+]], !noundef [[META8]]
|
||||
// UNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
|
||||
@ -67,7 +67,7 @@
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 2, !range [[RNG7:![0-9]+]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = zext nneg i16 [[TMP8]] to i32
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 4
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = zext nneg i16 [[TMP12]] to i32
|
||||
@ -87,7 +87,7 @@
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 12
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG6:![0-9]+]], !invariant.load [[META7:![0-9]+]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 4
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]]
|
||||
@ -125,7 +125,7 @@ unsigned int test_get_workgroup_size_x()
|
||||
// NONUNIFORM-V4-LABEL: define dso_local range(i32 0, 1025) i32 @test_get_workgroup_size_y(
|
||||
// NONUNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
|
||||
// NONUNIFORM-V4-NEXT: [[ENTRY:.*:]]
|
||||
// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 6
|
||||
// NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
|
||||
// NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
|
||||
@ -140,7 +140,7 @@ unsigned int test_get_workgroup_size_x()
|
||||
// UNIFORM-V4-LABEL: define dso_local range(i32 1, 1025) i32 @test_get_workgroup_size_y(
|
||||
// UNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0]] {
|
||||
// UNIFORM-V4-NEXT: [[ENTRY:.*:]]
|
||||
// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 6
|
||||
// UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
|
||||
// UNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
|
||||
@ -160,7 +160,7 @@ unsigned int test_get_workgroup_size_x()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP7]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = load i16, ptr addrspace(4) [[TMP8]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = zext nneg i16 [[TMP9]] to i32
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP11]], i64 6
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP14:%.*]] = zext nneg i16 [[TMP13]] to i32
|
||||
@ -180,7 +180,7 @@ unsigned int test_get_workgroup_size_x()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 14
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 6
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]]
|
||||
@ -218,7 +218,7 @@ unsigned int test_get_workgroup_size_y()
|
||||
// NONUNIFORM-V4-LABEL: define dso_local range(i32 0, 1025) i32 @test_get_workgroup_size_z(
|
||||
// NONUNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR3:[0-9]+]] {
|
||||
// NONUNIFORM-V4-NEXT: [[ENTRY:.*:]]
|
||||
// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8
|
||||
// NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
|
||||
// NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
|
||||
@ -233,7 +233,7 @@ unsigned int test_get_workgroup_size_y()
|
||||
// UNIFORM-V4-LABEL: define dso_local range(i32 1, 1025) i32 @test_get_workgroup_size_z(
|
||||
// UNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0]] {
|
||||
// UNIFORM-V4-NEXT: [[ENTRY:.*:]]
|
||||
// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8
|
||||
// UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
|
||||
// UNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
|
||||
@ -253,7 +253,7 @@ unsigned int test_get_workgroup_size_y()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP7]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = load i16, ptr addrspace(4) [[TMP8]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = zext nneg i16 [[TMP9]] to i32
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP11]], i64 8
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP14:%.*]] = zext nneg i16 [[TMP13]] to i32
|
||||
@ -273,7 +273,7 @@ unsigned int test_get_workgroup_size_y()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 16
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 8, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 8
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]]
|
||||
@ -368,7 +368,7 @@ unsigned int test_get_workgroup_size_z()
|
||||
// NONUNIFORM-V4-NEXT: i32 2, label %[[SW_BB2:.*]]
|
||||
// NONUNIFORM-V4-NEXT: ]
|
||||
// NONUNIFORM-V4: [[SW_BB]]:
|
||||
// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4
|
||||
// NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
|
||||
// NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
|
||||
@ -381,7 +381,7 @@ unsigned int test_get_workgroup_size_z()
|
||||
// NONUNIFORM-V4-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[TMP9]], 1
|
||||
// NONUNIFORM-V4-NEXT: br label %[[SW_EPILOG]]
|
||||
// NONUNIFORM-V4: [[SW_BB1]]:
|
||||
// NONUNIFORM-V4-NEXT: [[TMP10:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP10:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 6
|
||||
// NONUNIFORM-V4-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
|
||||
// NONUNIFORM-V4-NEXT: [[TMP13:%.*]] = zext nneg i16 [[TMP12]] to i32
|
||||
@ -393,7 +393,7 @@ unsigned int test_get_workgroup_size_z()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP19:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP18]], i32 [[TMP13]])
|
||||
// NONUNIFORM-V4-NEXT: br label %[[SW_EPILOG]]
|
||||
// NONUNIFORM-V4: [[SW_BB2]]:
|
||||
// NONUNIFORM-V4-NEXT: [[TMP20:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP20:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-V4-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP20]], i64 8
|
||||
// NONUNIFORM-V4-NEXT: [[TMP22:%.*]] = load i16, ptr addrspace(4) [[TMP21]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
|
||||
// NONUNIFORM-V4-NEXT: [[TMP23:%.*]] = zext nneg i16 [[TMP22]] to i32
|
||||
@ -418,18 +418,18 @@ unsigned int test_get_workgroup_size_z()
|
||||
// UNIFORM-V4-NEXT: i32 2, label %[[SW_BB2:.*]]
|
||||
// UNIFORM-V4-NEXT: ]
|
||||
// UNIFORM-V4: [[SW_BB]]:
|
||||
// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4
|
||||
// UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
|
||||
// UNIFORM-V4-NEXT: [[NARROW:%.*]] = add nuw nsw i16 [[TMP2]], 1
|
||||
// UNIFORM-V4-NEXT: br label %[[SW_EPILOG]]
|
||||
// UNIFORM-V4: [[SW_BB1]]:
|
||||
// UNIFORM-V4-NEXT: [[TMP3:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP3:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP3]], i64 6
|
||||
// UNIFORM-V4-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[TMP4]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
|
||||
// UNIFORM-V4-NEXT: br label %[[SW_EPILOG]]
|
||||
// UNIFORM-V4: [[SW_BB2]]:
|
||||
// UNIFORM-V4-NEXT: [[TMP6:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP6:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-V4-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP6]], i64 8
|
||||
// UNIFORM-V4-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
|
||||
// UNIFORM-V4-NEXT: br label %[[SW_EPILOG]]
|
||||
@ -458,7 +458,7 @@ unsigned int test_get_workgroup_size_z()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = zext nneg i16 [[TMP8]] to i32
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 4
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = zext nneg i16 [[TMP12]] to i32
|
||||
@ -482,7 +482,7 @@ unsigned int test_get_workgroup_size_z()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP22]], i64 [[TMP27]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP29:%.*]] = load i16, ptr addrspace(4) [[TMP28]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP30:%.*]] = zext nneg i16 [[TMP29]] to i32
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP31:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP31:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP31]], i64 6
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP33:%.*]] = load i16, ptr addrspace(4) [[TMP32]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP34:%.*]] = zext nneg i16 [[TMP33]] to i32
|
||||
@ -505,7 +505,7 @@ unsigned int test_get_workgroup_size_z()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP43]], i64 [[TMP48]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP50:%.*]] = load i16, ptr addrspace(4) [[TMP49]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP51:%.*]] = zext nneg i16 [[TMP50]] to i32
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP52:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP52:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP52]], i64 8
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP54:%.*]] = load i16, ptr addrspace(4) [[TMP53]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
|
||||
// NONUNIFORM-UNKNOWN-NEXT: [[TMP55:%.*]] = zext nneg i16 [[TMP54]] to i32
|
||||
@ -535,7 +535,7 @@ unsigned int test_get_workgroup_size_z()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 12
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 4
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[DOTV7:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]]
|
||||
@ -547,7 +547,7 @@ unsigned int test_get_workgroup_size_z()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 14
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP13]], i64 6
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP15:%.*]] = load i16, ptr addrspace(4) [[TMP14]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[DOTV6:%.*]] = select i1 [[TMP9]], i16 [[TMP12]], i16 [[TMP15]]
|
||||
@ -558,7 +558,7 @@ unsigned int test_get_workgroup_size_z()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP18:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP18]], i64 16
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP20:%.*]] = load i16, ptr addrspace(4) [[TMP19]], align 8, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP21:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP21:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP21]], i64 8
|
||||
// UNIFORM-UNKNOWN-NEXT: [[TMP23:%.*]] = load i16, ptr addrspace(4) [[TMP22]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
|
||||
// UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP17]], i16 [[TMP20]], i16 [[TMP23]]
|
||||
|
||||
@ -1051,7 +1051,7 @@ void test_read_exec_hi(global uint* out) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_dispatch_ptr
|
||||
// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// CHECK: {{.*}}call{{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
#if !defined(__SPIRV__)
|
||||
void test_dispatch_ptr(__constant unsigned char ** out)
|
||||
#else
|
||||
@ -1138,7 +1138,7 @@ void test_get_local_id(int d, global int *out)
|
||||
// CHECK: declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
|
||||
|
||||
// CHECK-LABEL: @test_get_grid_size(
|
||||
// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// CHECK: {{.*}}call{{.*}}ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// CHECK: getelementptr inbounds nuw i8, ptr addrspace(4) %{{.*}}, i64 %{{.+}}
|
||||
// CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !range [[$GRID_RANGE:![0-9]+]], !invariant.load
|
||||
void test_get_grid_size(int d, global int *out)
|
||||
|
||||
@ -84,7 +84,7 @@ __gpu_kernel void foo() {
|
||||
// AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_x(
|
||||
// AMDGPU-SAME: ) #[[ATTR0]] {
|
||||
// AMDGPU-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// AMDGPU-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 12
|
||||
// AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META3:![0-9]+]]
|
||||
// AMDGPU-NEXT: [[TMP3:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
@ -103,7 +103,7 @@ __gpu_kernel void foo() {
|
||||
// AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_y(
|
||||
// AMDGPU-SAME: ) #[[ATTR0]] {
|
||||
// AMDGPU-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// AMDGPU-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 16
|
||||
// AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG2]], !invariant.load [[META3]]
|
||||
// AMDGPU-NEXT: [[TMP3:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
@ -122,7 +122,7 @@ __gpu_kernel void foo() {
|
||||
// AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_z(
|
||||
// AMDGPU-SAME: ) #[[ATTR0]] {
|
||||
// AMDGPU-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// AMDGPU-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 20
|
||||
// AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG2]], !invariant.load [[META3]]
|
||||
// AMDGPU-NEXT: [[TMP3:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
|
||||
@ -180,7 +180,7 @@ def int_amdgcn_cluster_workgroup_max_flat_id:
|
||||
|
||||
def int_amdgcn_dispatch_ptr :
|
||||
DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
|
||||
[Align<RetIndex, 4>, NoUndef<RetIndex>, NonNull<RetIndex>, IntrNoMem, IntrSpeculatable]>;
|
||||
[Align<RetIndex, 4>, Dereferenceable<RetIndex, 64>, NoUndef<RetIndex>, NonNull<RetIndex>, IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
def int_amdgcn_queue_ptr :
|
||||
ClangBuiltin<"__builtin_amdgcn_queue_ptr">,
|
||||
|
||||
@ -2,41 +2,47 @@
|
||||
|
||||
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
|
||||
|
||||
|
||||
; Test assumed alignment parameter
|
||||
; CHECK: declare noundef nonnull align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
|
||||
; CHECK: declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #0
|
||||
define ptr addrspace(4) @dispatch_ptr() {
|
||||
%ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
ret ptr addrspace(4) %ptr
|
||||
}
|
||||
|
||||
; CHECK: declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #1
|
||||
define i32 @ds_append(ptr addrspace(3) %ptr) {
|
||||
%ret = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %ptr, i1 false)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; Test assumed alignment parameter
|
||||
; CHECK: declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #0
|
||||
; CHECK: declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #1
|
||||
define i32 @ds_consume(ptr addrspace(3) %ptr) {
|
||||
%ret = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %ptr, i1 false)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK: declare void @llvm.amdgcn.s.wait.event(i16 immarg) #1
|
||||
; CHECK: declare void @llvm.amdgcn.s.wait.event(i16 immarg) #2
|
||||
define void @s_wait_event() {
|
||||
call void @llvm.amdgcn.s.wait.event(i16 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: declare void @llvm.amdgcn.s.wait.event.export.ready() #1
|
||||
; CHECK: declare void @llvm.amdgcn.s.wait.event.export.ready() #2
|
||||
define void @s_wait_event_export_ready() {
|
||||
call void @llvm.amdgcn.s.wait.event.export.ready()
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test assumed range
|
||||
; CHECK: declare noundef range(i32 32, 65) i32 @llvm.amdgcn.wavefrontsize() #2
|
||||
; CHECK: declare noundef range(i32 32, 65) i32 @llvm.amdgcn.wavefrontsize() #0
|
||||
define i32 @wavefrontsize() {
|
||||
%ret = call i32 @llvm.amdgcn.wavefrontsize()
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK: attributes #0 = { convergent nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
; CHECK: attributes #1 = { nocallback nofree nounwind willreturn }
|
||||
; CHNCK: attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
; CHECK: attributes #1 = { convergent nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
; CHECK: attributes #2 = { nocallback nofree nounwind willreturn }
|
||||
|
||||
@ -155,7 +155,7 @@ define i32 @bad_offset() {
|
||||
; CHECK-LABEL: define i32 @bad_offset() {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
; CHECK-NEXT: [[D_GEP_Y:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 16
|
||||
; CHECK-NEXT: [[D_GEP_Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 16
|
||||
; CHECK-NEXT: [[GRID_SIZE_Y:%.*]] = load i32, ptr addrspace(4) [[D_GEP_Y]], align 4
|
||||
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
|
||||
@ -180,7 +180,7 @@ define i32 @dangling() {
|
||||
; CHECK-LABEL: define i32 @dangling() {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
||||
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
||||
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
|
||||
; CHECK-NEXT: ret i32 [[GRID_SIZE_X]]
|
||||
;
|
||||
@ -199,7 +199,7 @@ define i32 @wrong_cast() {
|
||||
; CHECK-LABEL: define i32 @wrong_cast() {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
||||
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
||||
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
|
||||
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
|
||||
@ -224,7 +224,7 @@ define i32 @wrong_size() {
|
||||
; CHECK-LABEL: define i32 @wrong_size() {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
||||
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
||||
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
|
||||
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
|
||||
@ -274,7 +274,7 @@ define i16 @empty_use() {
|
||||
; CHECK-LABEL: define i16 @empty_use() {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
||||
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
||||
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
|
||||
; CHECK-NEXT: [[TRUNC_X:%.*]] = trunc i32 [[GRID_SIZE_X]] to i16
|
||||
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user