Summary: We already have a way to get the block count using the old grid size lookup and dividing it by the number of threads. We did not want to make a new intrinsic to do the same thing, so this optimization pattern matches on this usage to automatically optimize it to the new form. This should improve performance of old kernels by converting branches into a simple index lookup and removing the division.
323 lines
15 KiB
LLVM
323 lines
15 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck %s
|
|
|
|
define i32 @num_blocks_x() {
|
|
; CHECK-LABEL: define i32 @num_blocks_x() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG]], align 4, !invariant.load [[META0:![0-9]+]], !noundef [[META0]]
|
|
; CHECK-NEXT: ret i32 [[TMP0]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12
|
|
%grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12
|
|
%wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2
|
|
%conv_x = zext i16 %wg_size_x to i32
|
|
%count_x = udiv i32 %grid_size_x, %conv_x
|
|
ret i32 %count_x
|
|
}
|
|
|
|
define i32 @num_blocks_y() {
|
|
; CHECK-LABEL: define i32 @num_blocks_y() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4, !invariant.load [[META0]], !noundef [[META0]]
|
|
; CHECK-NEXT: ret i32 [[TMP1]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%d_gep_y = getelementptr i8, ptr addrspace(4) %dispatch, i32 16
|
|
%grid_size_y = load i32, ptr addrspace(4) %d_gep_y, align 4
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%i_gep_y = getelementptr i8, ptr addrspace(4) %implicitarg, i32 14
|
|
%wg_size_y = load i16, ptr addrspace(4) %i_gep_y, align 2
|
|
%conv_y = zext i16 %wg_size_y to i32
|
|
%count_y = udiv i32 %grid_size_y, %conv_y
|
|
ret i32 %count_y
|
|
}
|
|
|
|
define i32 @num_blocks_z() {
|
|
; CHECK-LABEL: define i32 @num_blocks_z() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 8
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4, !invariant.load [[META0]], !noundef [[META0]]
|
|
; CHECK-NEXT: ret i32 [[TMP1]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%d_gep_z = getelementptr i8, ptr addrspace(4) %dispatch, i32 20
|
|
%grid_size_z = load i32, ptr addrspace(4) %d_gep_z, align 4
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%i_gep_z = getelementptr i8, ptr addrspace(4) %implicitarg, i32 16
|
|
%wg_size_z = load i16, ptr addrspace(4) %i_gep_z, align 2
|
|
%conv_z = zext i16 %wg_size_z to i32
|
|
%count_z = udiv i32 %grid_size_z, %conv_z
|
|
ret i32 %count_z
|
|
}
|
|
|
|
define i32 @num_blocks(i32 %dim) {
|
|
; CHECK-LABEL: define i32 @num_blocks(
|
|
; CHECK-SAME: i32 [[DIM:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: switch i32 [[DIM]], label %[[DEFAULT:.*]] [
|
|
; CHECK-NEXT: i32 0, label %[[DIM_X:.*]]
|
|
; CHECK-NEXT: i32 1, label %[[DIM_Y:.*]]
|
|
; CHECK-NEXT: i32 2, label %[[DIM_Z:.*]]
|
|
; CHECK-NEXT: ]
|
|
; CHECK: [[DIM_X]]:
|
|
; CHECK-NEXT: br label %[[EXIT:.*]]
|
|
; CHECK: [[DIM_Y]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 4
|
|
; CHECK-NEXT: br label %[[EXIT]]
|
|
; CHECK: [[DIM_Z]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8
|
|
; CHECK-NEXT: br label %[[EXIT]]
|
|
; CHECK: [[DEFAULT]]:
|
|
; CHECK-NEXT: unreachable
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[RETVAL_IN:%.*]] = phi ptr addrspace(4) [ [[TMP1]], %[[DIM_X]] ], [ [[TMP0]], %[[DIM_Y]] ], [ [[TMP2]], %[[DIM_Z]] ]
|
|
; CHECK-NEXT: [[RETVAL_0_I:%.*]] = load i32, ptr addrspace(4) [[RETVAL_IN]], align 4, !invariant.load [[META0]], !noundef [[META0]]
|
|
; CHECK-NEXT: ret i32 [[RETVAL_0_I]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
|
|
switch i32 %dim, label %default [
|
|
i32 0, label %dim_x
|
|
i32 1, label %dim_y
|
|
i32 2, label %dim_z
|
|
]
|
|
|
|
dim_x:
|
|
%d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12
|
|
%grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4
|
|
%i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12
|
|
%wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2
|
|
%conv_x = zext i16 %wg_size_x to i32
|
|
%count_x = udiv i32 %grid_size_x, %conv_x
|
|
br label %exit
|
|
|
|
dim_y:
|
|
%d_gep_y = getelementptr i8, ptr addrspace(4) %dispatch, i32 16
|
|
%grid_size_y = load i32, ptr addrspace(4) %d_gep_y, align 4
|
|
%i_gep_y = getelementptr i8, ptr addrspace(4) %implicitarg, i32 14
|
|
%wg_size_y = load i16, ptr addrspace(4) %i_gep_y, align 2
|
|
%conv_y = zext i16 %wg_size_y to i32
|
|
%count_y = udiv i32 %grid_size_y, %conv_y
|
|
br label %exit
|
|
|
|
dim_z:
|
|
%d_gep_z = getelementptr i8, ptr addrspace(4) %dispatch, i32 20
|
|
%grid_size_z = load i32, ptr addrspace(4) %d_gep_z, align 4
|
|
%i_gep_z = getelementptr i8, ptr addrspace(4) %implicitarg, i32 16
|
|
%wg_size_z = load i16, ptr addrspace(4) %i_gep_z, align 2
|
|
%conv_z = zext i16 %wg_size_z to i32
|
|
%count_z = udiv i32 %grid_size_z, %conv_z
|
|
br label %exit
|
|
|
|
default:
|
|
unreachable
|
|
|
|
exit:
|
|
%retval = phi i32 [ %count_x, %dim_x ], [ %count_y, %dim_y ], [ %count_z, %dim_z ]
|
|
ret i32 %retval
|
|
}
|
|
|
|
define i64 @larger() {
|
|
; CHECK-LABEL: define i64 @larger() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG]], align 4, !invariant.load [[META0]], !noundef [[META0]]
|
|
; CHECK-NEXT: [[CONV_GRID_X:%.*]] = zext i32 [[GRID_SIZE_X]] to i64
|
|
; CHECK-NEXT: ret i64 [[CONV_GRID_X]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12
|
|
%grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12
|
|
%wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2
|
|
%conv_x = zext i16 %wg_size_x to i64
|
|
%conv_grid_x = zext i32 %grid_size_x to i64
|
|
%count_x = udiv i64 %conv_grid_x, %conv_x
|
|
ret i64 %count_x
|
|
}
|
|
|
|
define i32 @bad_offset() {
|
|
; CHECK-LABEL: define i32 @bad_offset() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
; CHECK-NEXT: [[D_GEP_Y:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 16
|
|
; CHECK-NEXT: [[GRID_SIZE_Y:%.*]] = load i32, ptr addrspace(4) [[D_GEP_Y]], align 4
|
|
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
|
|
; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2
|
|
; CHECK-NEXT: [[CONV_X:%.*]] = zext i16 [[WG_SIZE_X]] to i32
|
|
; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_Y]], [[CONV_X]]
|
|
; CHECK-NEXT: ret i32 [[COUNT_X]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%d_gep_y = getelementptr i8, ptr addrspace(4) %dispatch, i32 16
|
|
%grid_size_y = load i32, ptr addrspace(4) %d_gep_y, align 4
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12
|
|
%wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2
|
|
%conv_x = zext i16 %wg_size_x to i32
|
|
%count_x = udiv i32 %grid_size_y, %conv_x
|
|
ret i32 %count_x
|
|
}
|
|
|
|
define i32 @dangling() {
|
|
; CHECK-LABEL: define i32 @dangling() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
|
|
; CHECK-NEXT: ret i32 [[GRID_SIZE_X]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12
|
|
%grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12
|
|
%wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2
|
|
%conv_x = zext i16 %wg_size_x to i32
|
|
ret i32 %grid_size_x
|
|
}
|
|
|
|
define i32 @wrong_cast() {
|
|
; CHECK-LABEL: define i32 @wrong_cast() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
|
|
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
|
|
; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2
|
|
; CHECK-NEXT: [[CONV_X:%.*]] = sext i16 [[WG_SIZE_X]] to i32
|
|
; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_X]], [[CONV_X]]
|
|
; CHECK-NEXT: ret i32 [[COUNT_X]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12
|
|
%grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12
|
|
%wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2
|
|
%conv_x = sext i16 %wg_size_x to i32
|
|
%count_x = udiv i32 %grid_size_x, %conv_x
|
|
ret i32 %count_x
|
|
}
|
|
|
|
define i32 @wrong_size() {
|
|
; CHECK-LABEL: define i32 @wrong_size() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
|
|
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
|
|
; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i8, ptr addrspace(4) [[I_GEP_X]], align 2
|
|
; CHECK-NEXT: [[CONV_X:%.*]] = zext i8 [[WG_SIZE_X]] to i32
|
|
; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_X]], [[CONV_X]]
|
|
; CHECK-NEXT: ret i32 [[COUNT_X]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12
|
|
%grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12
|
|
%wg_size_x = load i8, ptr addrspace(4) %i_gep_x, align 2
|
|
%conv_x = zext i8 %wg_size_x to i32
|
|
%count_x = udiv i32 %grid_size_x, %conv_x
|
|
ret i32 %count_x
|
|
}
|
|
|
|
define i32 @wrong_intrinsic() {
|
|
; CHECK-LABEL: define i32 @wrong_intrinsic() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 16
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
|
|
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
|
|
; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2
|
|
; CHECK-NEXT: [[CONV_X:%.*]] = zext i16 [[WG_SIZE_X]] to i32
|
|
; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_X]], [[CONV_X]]
|
|
; CHECK-NEXT: ret i32 [[COUNT_X]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 16
|
|
%grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12
|
|
%wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2
|
|
%conv_x = zext i16 %wg_size_x to i32
|
|
%count_x = udiv i32 %grid_size_x, %conv_x
|
|
ret i32 %count_x
|
|
}
|
|
|
|
define i16 @empty_use() {
|
|
; CHECK-LABEL: define i16 @empty_use() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
|
|
; CHECK-NEXT: [[TRUNC_X:%.*]] = trunc i32 [[GRID_SIZE_X]] to i16
|
|
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
|
|
; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2
|
|
; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i16 [[TRUNC_X]], [[WG_SIZE_X]]
|
|
; CHECK-NEXT: ret i16 [[COUNT_X]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12
|
|
%grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4
|
|
%trunc_x = trunc i32 %grid_size_x to i16
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12
|
|
%wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2
|
|
%count_x = udiv i16 %trunc_x, %wg_size_x
|
|
ret i16 %count_x
|
|
}
|
|
|
|
define i32 @multiple_use() {
|
|
; CHECK-LABEL: define i32 @multiple_use() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG]], align 4, !invariant.load [[META0]], !noundef [[META0]]
|
|
; CHECK-NEXT: [[SUM:%.*]] = shl i32 [[TMP0]], 1
|
|
; CHECK-NEXT: ret i32 [[SUM]]
|
|
;
|
|
entry:
|
|
%dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
|
%d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12
|
|
%grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4
|
|
%implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12
|
|
%wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2
|
|
%conv_x_1 = zext i16 %wg_size_x to i32
|
|
%count_x_1 = udiv i32 %grid_size_x, %conv_x_1
|
|
%conv_x_2 = zext i16 %wg_size_x to i32
|
|
%count_x_2 = udiv i32 %grid_size_x, %conv_x_2
|
|
%sum = add i32 %count_x_1, %count_x_2
|
|
ret i32 %sum
|
|
}
|
|
;.
|
|
; CHECK: [[META0]] = !{}
|
|
;.
|