We previously did the same for the grid size when annotated. The group size is easier, so it's weird that this wasn't implemented first.
126 lines
6.2 KiB
LLVM
126 lines
6.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-lower-kernel-attributes %s | FileCheck %s
|
|
|
|
define i32 @use_grid_size_x_max_num_workgroups() #0 {
|
|
; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups(
|
|
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG0:![0-9]+]]
|
|
; CHECK-NEXT: ret i32 [[GRID_SIZE_X]]
|
|
;
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
|
|
ret i32 %grid.size.x
|
|
}
|
|
|
|
define i32 @use_grid_size_x_max_num_workgroups_existing_nonzero_range() #0 {
|
|
; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_existing_nonzero_range(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG1:![0-9]+]]
|
|
; CHECK-NEXT: ret i32 [[GRID_SIZE_X]]
|
|
;
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4, !range !0
|
|
ret i32 %grid.size.x
|
|
}
|
|
|
|
define i32 @use_grid_size_y_max_num_workgroups() #0 {
|
|
; CHECK-LABEL: define i32 @use_grid_size_y_max_num_workgroups(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[GEP_GRID_SIZE_Y:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 4
|
|
; CHECK-NEXT: [[GRID_SIZE_Y:%.*]] = load i32, ptr addrspace(4) [[GEP_GRID_SIZE_Y]], align 4, !range [[RNG2:![0-9]+]]
|
|
; CHECK-NEXT: ret i32 [[GRID_SIZE_Y]]
|
|
;
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.grid.size.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 4
|
|
%grid.size.y = load i32, ptr addrspace(4) %gep.grid.size.y, align 4
|
|
ret i32 %grid.size.y
|
|
}
|
|
|
|
define i32 @use_grid_size_z_max_num_workgroups() #0 {
|
|
; CHECK-LABEL: define i32 @use_grid_size_z_max_num_workgroups(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[GEP_GRID_SIZE_Z:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 8
|
|
; CHECK-NEXT: [[GRID_SIZE_Z:%.*]] = load i32, ptr addrspace(4) [[GEP_GRID_SIZE_Z]], align 4, !range [[RNG3:![0-9]+]]
|
|
; CHECK-NEXT: ret i32 [[GRID_SIZE_Z]]
|
|
;
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%gep.grid.size.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 8
|
|
%grid.size.z = load i32, ptr addrspace(4) %gep.grid.size.z, align 4
|
|
ret i32 %grid.size.z
|
|
}
|
|
|
|
define <2 x i16> @use_grid_size_x_max_num_workgroups_load_wrong_type() #0 {
|
|
; CHECK-LABEL: define <2 x i16> @use_grid_size_x_max_num_workgroups_load_wrong_type(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load <2 x i16>, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4
|
|
; CHECK-NEXT: ret <2 x i16> [[GRID_SIZE_X]]
|
|
;
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%grid.size.x = load <2 x i16>, ptr addrspace(4) %implicitarg.ptr, align 4
|
|
ret <2 x i16> %grid.size.x
|
|
}
|
|
|
|
define i32 @use_grid_size_x_max_num_workgroups_max_minus_1() #1 {
|
|
; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_max_minus_1(
|
|
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG4:![0-9]+]]
|
|
; CHECK-NEXT: ret i32 [[GRID_SIZE_X]]
|
|
;
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
|
|
ret i32 %grid.size.x
|
|
}
|
|
|
|
define i32 @use_grid_size_x_max_num_workgroups_max() #2 {
|
|
; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_max(
|
|
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
|
|
; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4
|
|
; CHECK-NEXT: ret i32 [[GRID_SIZE_X]]
|
|
;
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
|
|
ret i32 %grid.size.x
|
|
}
|
|
|
|
define i32 @use_grid_size_x_max_num_workgroups_zero() #3 {
|
|
; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_zero(
|
|
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
|
|
; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4
|
|
; CHECK-NEXT: ret i32 [[GRID_SIZE_X]]
|
|
;
|
|
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
|
%grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
|
|
ret i32 %grid.size.x
|
|
}
|
|
|
|
declare noundef align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3
|
|
|
|
attributes #0 = { "amdgpu-max-num-workgroups"="36,42,89" }
|
|
attributes #1 = { "amdgpu-max-num-workgroups"="4294967294,42,89" }
|
|
attributes #2 = { "amdgpu-max-num-workgroups"="4294967295,42,89" }
|
|
attributes #3 = { "amdgpu-max-num-workgroups"="0,42,89" }
|
|
|
|
!0 = !{i32 0, i32 -1}
|
|
|
|
;.
|
|
; CHECK: attributes #[[ATTR0]] = { "amdgpu-max-num-workgroups"="36,42,89" }
|
|
; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="4294967294,42,89" }
|
|
; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="4294967295,42,89" }
|
|
; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="0,42,89" }
|
|
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
;.
|
|
; CHECK: [[RNG0]] = !{i32 1, i32 37}
|
|
; CHECK: [[RNG1]] = !{i32 0, i32 -1}
|
|
; CHECK: [[RNG2]] = !{i32 1, i32 43}
|
|
; CHECK: [[RNG3]] = !{i32 1, i32 90}
|
|
; CHECK: [[RNG4]] = !{i32 1, i32 -1}
|
|
;.
|