[AMDGPU] Make uniform-work-group-size a valueless attribute (#183925)
The "uniform-work-group-size" function attribute previously took a string value of "true" or "false". Since presence alone can convey the "true" semantics and absence can convey "false", the value is unnecessary. This patch converts it to a valueless string attribute: presence indicates true, absence indicates false. For backward compatibility, auto-upgrade logic is added in both UpgradeAttributes (bitcode) and UpgradeFunctionAttributes: if the old value is "true", the attribute is kept without a value; if "false", the attribute is removed.
This commit is contained in:
parent
e2ef93fc57
commit
f05d2e8a39
@ -2619,22 +2619,21 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
|
||||
// OpenCL Kernel Stub
|
||||
if (getLangOpts().OpenCLVersion <= 120) {
|
||||
// OpenCL v1.2 Work groups are always uniform
|
||||
FuncAttrs.addAttribute("uniform-work-group-size", "true");
|
||||
FuncAttrs.addAttribute("uniform-work-group-size");
|
||||
} else {
|
||||
// OpenCL v2.0 Work groups may be whether uniform or not.
|
||||
// '-cl-uniform-work-group-size' compile option gets a hint
|
||||
// to the compiler that the global work-size be a multiple of
|
||||
// the work-group size specified to clEnqueueNDRangeKernel
|
||||
// (i.e. work groups are uniform).
|
||||
FuncAttrs.addAttribute(
|
||||
"uniform-work-group-size",
|
||||
llvm::toStringRef(getLangOpts().OffloadUniformBlock));
|
||||
if (getLangOpts().OffloadUniformBlock)
|
||||
FuncAttrs.addAttribute("uniform-work-group-size");
|
||||
}
|
||||
}
|
||||
|
||||
if (TargetDecl->hasAttr<CUDAGlobalAttr>() &&
|
||||
getLangOpts().OffloadUniformBlock)
|
||||
FuncAttrs.addAttribute("uniform-work-group-size", "true");
|
||||
FuncAttrs.addAttribute("uniform-work-group-size");
|
||||
|
||||
if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>())
|
||||
FuncAttrs.addAttribute("aarch64_pstate_sm_body");
|
||||
|
||||
@ -103,7 +103,7 @@ template __global__ void template_a_b_c_max_num_work_groups<32, 4, 2>();
|
||||
// NAMD-NOT: "amdgpu-num-sgpr"
|
||||
// NAMD-NOT: "amdgpu-max-num-work-groups"
|
||||
|
||||
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
|
||||
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"
|
||||
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
|
||||
// MAX1024-SPIRV-DAG: [[MAX_WORK_GROUP_SIZE_DEFAULT]] = !{i32 1024, i32 1, i32 1}
|
||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
|
||||
@ -114,4 +114,4 @@ template __global__ void template_a_b_c_max_num_work_groups<32, 4, 2>();
|
||||
// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_4_2]] = {{.*}}"amdgpu-max-num-workgroups"="32,4,2"
|
||||
// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_1_1]] = {{.*}}"amdgpu-max-num-workgroups"="32,1,1"
|
||||
|
||||
// NOUB-NOT: "uniform-work-group-size"="true"
|
||||
// NOUB-NOT: "uniform-work-group-size"
|
||||
|
||||
@ -35,7 +35,7 @@ __global__ void kernel() {
|
||||
//.
|
||||
// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
|
||||
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
|
||||
// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
|
||||
//.
|
||||
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
||||
|
||||
@ -26,7 +26,7 @@ kernel void foo(global int *p) { *p = 1; }
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//.
|
||||
// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "uniform-work-group-size"="false" }
|
||||
// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" }
|
||||
// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" }
|
||||
// CHECK: attributes #[[ATTR2]] = { convergent nounwind }
|
||||
//.
|
||||
|
||||
@ -107,13 +107,13 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
|
||||
// NOCPU-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR10:[0-9]+]]
|
||||
// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR8:[0-9]+]]
|
||||
// NOCPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test(
|
||||
// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] {
|
||||
// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
|
||||
@ -235,19 +235,19 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define dso_local amdgpu_kernel void @test_target_features_kernel(
|
||||
// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] {
|
||||
// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
|
||||
// NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR10]]
|
||||
// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR8]]
|
||||
// NOCPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test_target_features_kernel(
|
||||
// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
@ -268,7 +268,7 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define internal void @__test_block_invoke(
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7:[0-9]+]] {
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5:[0-9]+]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
@ -287,7 +287,7 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR8:[0-9]+]] !associated [[META11:![0-9]+]] !kernel_arg_addr_space [[META12:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] !associated [[META11:![0-9]+]] !kernel_arg_addr_space [[META12:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
@ -298,7 +298,7 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define internal void @__test_block_invoke_2(
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] {
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
@ -323,7 +323,7 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META14:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META14:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
@ -334,7 +334,7 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define internal void @__test_block_invoke_3(
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR7]] {
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR5]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
|
||||
@ -365,7 +365,7 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR8]] !associated [[META15:![0-9]+]] !kernel_arg_addr_space [[META16:![0-9]+]] !kernel_arg_access_qual [[META17:![0-9]+]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META19:![0-9]+]] {
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR6]] !associated [[META15:![0-9]+]] !kernel_arg_addr_space [[META16:![0-9]+]] !kernel_arg_access_qual [[META17:![0-9]+]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META19:![0-9]+]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
|
||||
@ -376,7 +376,7 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define internal void @__test_block_invoke_4(
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] {
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
@ -388,13 +388,13 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8
|
||||
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
|
||||
// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR10]]
|
||||
// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8]]
|
||||
// NOCPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META20:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META20:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
@ -405,7 +405,7 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define internal void @__test_target_features_kernel_block_invoke(
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] {
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
@ -419,7 +419,7 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign)
|
||||
// NOCPU-LABEL: define internal amdgpu_kernel void @__test_target_features_kernel_block_invoke_kernel(
|
||||
// NOCPU-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR8]] !associated [[META21:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR6]] !associated [[META21:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
@ -805,19 +805,17 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//.
|
||||
// NOCPU: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
|
||||
// NOCPU: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
|
||||
// NOCPU: attributes #[[ATTR3]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR4]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" }
|
||||
// NOCPU: attributes #[[ATTR5]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" }
|
||||
// NOCPU: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
// NOCPU: attributes #[[ATTR7]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR8]] = { convergent nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
|
||||
// NOCPU: attributes #[[ATTR10]] = { convergent nounwind }
|
||||
// NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR3]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" }
|
||||
// NOCPU: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
// NOCPU: attributes #[[ATTR5]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR6]] = { convergent nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
|
||||
// NOCPU: attributes #[[ATTR8]] = { convergent nounwind }
|
||||
//.
|
||||
// GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
|
||||
// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" }
|
||||
// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" "uniform-work-group-size"="false" }
|
||||
// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" }
|
||||
// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" }
|
||||
// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
|
||||
// GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
|
||||
@ -1,19 +1,20 @@
|
||||
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
|
||||
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
|
||||
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
|
||||
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
|
||||
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s | FileCheck %s --check-prefix=CHECK-UNIFORM
|
||||
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s --check-prefix=CHECK-NONUNIFORM
|
||||
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s | FileCheck %s --check-prefix=CHECK-UNIFORM
|
||||
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o - %s | FileCheck %s --check-prefix=CHECK-UNIFORM
|
||||
|
||||
// CHECK-UNIFORM: define dso_local spir_kernel void @ker(){{.*}}[[ATTR0:#[0-9]+]]
|
||||
// CHECK-UNIFORM: define dso_local void @__clang_ocl_kern_imp_ker(){{.*}}[[ATTR1:#[0-9]+]]
|
||||
// CHECK-UNIFORM: define dso_local void @foo{{.*}}[[ATTR1]]
|
||||
|
||||
// CHECK-NONUNIFORM: define dso_local spir_kernel void @ker(){{.*}}[[ATTR0:#[0-9]+]]
|
||||
// CHECK-NONUNIFORM: define dso_local void @__clang_ocl_kern_imp_ker(){{.*}}[[ATTR0]]
|
||||
// CHECK-NONUNIFORM: define dso_local void @foo{{.*}}[[ATTR0]]
|
||||
kernel void ker() {};
|
||||
// CHECK: define{{.*}}@ker() #[[ATTR0:[0-9]+]]
|
||||
|
||||
// CHECK: define{{.*}}@__clang_ocl_kern_imp_ker() #[[ATTR1:[0-9]+]]
|
||||
|
||||
void foo() {};
|
||||
// CHECK: define{{.*}}@foo() #[[ATTR1:[0-9]+]]
|
||||
|
||||
// CHECK: attributes #[[ATTR0]]
|
||||
// CHECK-UNIFORM: "uniform-work-group-size"="true"
|
||||
// CHECK-NONUNIFORM: "uniform-work-group-size"="false"
|
||||
// CHECK-UNIFORM: attributes [[ATTR0]] {{.*}} "uniform-work-group-size"
|
||||
// CHECK-UNIFORM-NOT: attributes [[ATTR1]] {{.*}} "uniform-work-group-size"
|
||||
|
||||
// CHECK: attributes #[[ATTR1]]
|
||||
// CHECK-NOT: uniform-work-group-size
|
||||
// CHECK-NONUNIFORM-NOT: attributes [[ATTR0]] {{.*}} "uniform-work-group-size"
|
||||
|
||||
@ -196,19 +196,19 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
|
||||
// STRICTFP-NEXT: ret void
|
||||
//
|
||||
//.
|
||||
// SPIR32: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
|
||||
// SPIR32: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
|
||||
// SPIR32: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
// SPIR32: attributes #[[ATTR2]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// SPIR32: attributes #[[ATTR3:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
// SPIR32: attributes #[[ATTR4]] = { convergent nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// SPIR32: attributes #[[ATTR5]] = { convergent nounwind "uniform-work-group-size"="true" }
|
||||
// SPIR32: attributes #[[ATTR5]] = { convergent nounwind "uniform-work-group-size" }
|
||||
//.
|
||||
// STRICTFP: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone strictfp "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
|
||||
// STRICTFP: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone strictfp "stack-protector-buffer-size"="8" }
|
||||
// STRICTFP: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
// STRICTFP: attributes #[[ATTR2]] = { convergent noinline nounwind optnone strictfp "stack-protector-buffer-size"="8" }
|
||||
// STRICTFP: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind strictfp willreturn memory(inaccessiblemem: readwrite) }
|
||||
// STRICTFP: attributes #[[ATTR4]] = { convergent nounwind "stack-protector-buffer-size"="8" }
|
||||
// STRICTFP: attributes #[[ATTR5]] = { convergent nounwind strictfp "uniform-work-group-size"="false" }
|
||||
// STRICTFP: attributes #[[ATTR5]] = { convergent nounwind strictfp }
|
||||
// STRICTFP: attributes #[[ATTR6]] = { strictfp }
|
||||
//.
|
||||
// SPIR32: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
|
||||
@ -31,9 +31,9 @@ int callable(int x) {
|
||||
return x + 1;
|
||||
}
|
||||
|
||||
// DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
|
||||
// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "uniform-work-group-size"="true" }
|
||||
// NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
|
||||
// DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
|
||||
// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "uniform-work-group-size" }
|
||||
// NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
|
||||
|
||||
// DEFAULT: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" }
|
||||
|
||||
@ -11065,7 +11065,7 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr,
|
||||
// Add a function attribute for the kernel.
|
||||
Fn->addFnAttr("kernel");
|
||||
if (T.isAMDGCN())
|
||||
Fn->addFnAttr("uniform-work-group-size", "true");
|
||||
Fn->addFnAttr("uniform-work-group-size");
|
||||
Fn->addFnAttr(Attribute::MustProgress);
|
||||
}
|
||||
|
||||
|
||||
@ -6352,6 +6352,16 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
|
||||
AddingAttrs = RemovingAttrs = true;
|
||||
}
|
||||
|
||||
if (Attribute A = F.getFnAttribute("uniform-work-group-size");
|
||||
A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
|
||||
AttrsToRemove.addAttribute("uniform-work-group-size");
|
||||
RemovingAttrs = true;
|
||||
if (A.getValueAsString() == "true") {
|
||||
AttrsToAdd.addAttribute("uniform-work-group-size");
|
||||
AddingAttrs = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!F.empty()) {
|
||||
// For some reason this is called twice, and the first time is before any
|
||||
// instructions are loaded into the body.
|
||||
@ -6749,6 +6759,17 @@ void llvm::UpgradeAttributes(AttrBuilder &B) {
|
||||
if (NullPointerIsValid)
|
||||
B.addAttribute(Attribute::NullPointerIsValid);
|
||||
}
|
||||
|
||||
A = B.getAttribute("uniform-work-group-size");
|
||||
if (A.isValid()) {
|
||||
StringRef Val = A.getValueAsString();
|
||||
if (!Val.empty()) {
|
||||
bool IsTrue = Val == "true";
|
||||
B.removeAttribute("uniform-work-group-size");
|
||||
if (IsTrue)
|
||||
B.addAttribute("uniform-work-group-size");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
|
||||
|
||||
@ -362,11 +362,7 @@ struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
|
||||
if (CC != CallingConv::AMDGPU_KERNEL)
|
||||
return;
|
||||
|
||||
bool InitialValue = false;
|
||||
if (F->hasFnAttribute("uniform-work-group-size"))
|
||||
InitialValue =
|
||||
F->getFnAttribute("uniform-work-group-size").getValueAsString() ==
|
||||
"true";
|
||||
bool InitialValue = F->hasFnAttribute("uniform-work-group-size");
|
||||
|
||||
if (InitialValue)
|
||||
indicateOptimisticFixpoint();
|
||||
@ -405,10 +401,9 @@ struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
|
||||
return ChangeStatus::UNCHANGED;
|
||||
|
||||
LLVMContext &Ctx = getAssociatedFunction()->getContext();
|
||||
return A.manifestAttrs(
|
||||
getIRPosition(),
|
||||
{Attribute::get(Ctx, "uniform-work-group-size", "true")},
|
||||
/*ForceReplace=*/true);
|
||||
return A.manifestAttrs(getIRPosition(),
|
||||
{Attribute::get(Ctx, "uniform-work-group-size")},
|
||||
/*ForceReplace=*/true);
|
||||
}
|
||||
|
||||
bool isValidState() const override {
|
||||
|
||||
@ -731,7 +731,7 @@ void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM,
|
||||
MetadataStreamerMsgPackV4::emitKernelAttrs(TM, MF, Kern);
|
||||
|
||||
const Function &Func = MF.getFunction();
|
||||
if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
|
||||
if (Func.hasFnAttribute("uniform-work-group-size"))
|
||||
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
|
||||
}
|
||||
|
||||
|
||||
@ -103,7 +103,7 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) {
|
||||
const bool HasReqdWorkGroupSize = MD && MD->getNumOperands() == 3;
|
||||
|
||||
const bool HasUniformWorkGroupSize =
|
||||
F->getFnAttribute("uniform-work-group-size").getValueAsBool();
|
||||
F->hasFnAttribute("uniform-work-group-size");
|
||||
|
||||
SmallVector<unsigned> MaxNumWorkgroups =
|
||||
AMDGPU::getIntegerVecAttribute(*F, "amdgpu-max-num-workgroups",
|
||||
|
||||
@ -157,7 +157,7 @@ define hidden void @g() #3 !dbg !43 {
|
||||
}
|
||||
|
||||
attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
|
||||
attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "frame-pointer"="all" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="256" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "frame-pointer"="all" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="256" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size" }
|
||||
attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
|
||||
attributes #3 = { convergent noinline nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
|
||||
attributes #4 = { convergent }
|
||||
|
||||
21
llvm/test/Bitcode/upgrade-uniform-work-group-size.ll
Normal file
21
llvm/test/Bitcode/upgrade-uniform-work-group-size.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: llvm-as < %s | llvm-dis - | FileCheck %s
|
||||
|
||||
; "uniform-work-group-size"="true" should be upgraded to a valueless attribute.
|
||||
; CHECK: define void @true_val() #[[ATTR_TRUE:[0-9]+]]
|
||||
define void @true_val() "uniform-work-group-size"="true" {
|
||||
ret void
|
||||
}
|
||||
|
||||
; "uniform-work-group-size"="false" should be removed entirely.
|
||||
; CHECK: define void @false_val() {
|
||||
define void @false_val() "uniform-work-group-size"="false" {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Already-upgraded valueless attribute should be left alone.
|
||||
; CHECK: define void @no_val() #[[ATTR_TRUE]]
|
||||
define void @no_val() "uniform-work-group-size" {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-DAG: attributes #[[ATTR_TRUE]] = { "uniform-work-group-size" }
|
||||
@ -70,4 +70,4 @@ define amdgpu_kernel void @amdhsa_kernarg_preload_1_implicit_2(i32 inreg) #0 { r
|
||||
|
||||
define amdgpu_kernel void @amdhsa_kernarg_preload_0_implicit_2(i32) #0 { ret void }
|
||||
|
||||
attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
|
||||
@ -14,7 +14,7 @@ bb:
|
||||
; CHECK: - .args:
|
||||
; CHECK-LABEL: .name: kernel_non_uniform_workgroup
|
||||
; CHECK-NOT: .uniform_work_group_size:
|
||||
define amdgpu_kernel void @kernel_non_uniform_workgroup() #1 {
|
||||
define amdgpu_kernel void @kernel_non_uniform_workgroup() {
|
||||
bb:
|
||||
ret void
|
||||
}
|
||||
@ -26,8 +26,7 @@ define amdgpu_kernel void @kernel_no_attr() {
|
||||
bb:
|
||||
ret void
|
||||
}
|
||||
attributes #0 = { "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "uniform-work-group-size"="false" }
|
||||
attributes #0 = { "uniform-work-group-size" }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
|
||||
|
||||
@ -197,7 +197,7 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #1
|
||||
|
||||
!llvm.module.flags = !{!1}
|
||||
|
||||
attributes #0 = { nounwind "uniform-work-group-size"="true" }
|
||||
attributes #0 = { nounwind "uniform-work-group-size" }
|
||||
attributes #1 = { nounwind readnone speculatable }
|
||||
!0 = !{i32 8, i32 16, i32 2}
|
||||
!1 = !{i32 1, !"amdhsa_code_object_version", i32 500}
|
||||
|
||||
@ -35,17 +35,17 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "uniform-work-group-size"="false"}
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind "less-precise-fpmad"="true" "no-nans-fp-math"="true" }
|
||||
|
||||
; NOINFS: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; NOINFS: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-nans-fp-math"="false" "uniform-work-group-size"="false" }
|
||||
; NOINFS: attributes #[[ATTR0]] = { nounwind }
|
||||
; NOINFS: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-nans-fp-math"="false" }
|
||||
; NOINFS: [[META0]] = !{}
|
||||
;.
|
||||
; UNSAFE: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; UNSAFE: attributes #[[ATTR0]] = { nounwind }
|
||||
; UNSAFE: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-nans-fp-math"="false" }
|
||||
;.
|
||||
; NONANS: attributes #[[ATTR0]] = { nounwind "no-nans-fp-math"="true" "uniform-work-group-size"="false" }
|
||||
; NONANS: attributes #[[ATTR0]] = { nounwind "no-nans-fp-math"="true" }
|
||||
; NONANS: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-nans-fp-math"="true" }
|
||||
;.
|
||||
; UNSAFE: [[META0]] = !{}
|
||||
|
||||
@ -172,7 +172,7 @@ declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
declare void @llvm.amdgcn.sched.group.barrier(i32 immarg, i32 immarg, i32 immarg) #2
|
||||
|
||||
attributes #0 = { convergent mustprogress norecurse nounwind "amdgpu-agpr-alloc"="1" "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx942" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { convergent mustprogress norecurse nounwind "amdgpu-agpr-alloc"="1" "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx942" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" "uniform-work-group-size" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #2 = { convergent nocallback nofree nounwind willreturn }
|
||||
attributes #3 = { convergent nounwind memory(none) }
|
||||
|
||||
@ -18,4 +18,4 @@ define amdgpu_kernel void @preloadremainder_z_no_preload_diag(ptr addrspace(1) i
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
|
||||
@ -1098,7 +1098,7 @@ declare i64 @llvm.fshl.i64(i64, i64, i64) #3
|
||||
|
||||
attributes #0 = { convergent mustprogress nofree nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" }
|
||||
attributes #1 = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" }
|
||||
attributes #2 = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="64,64" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="64,64" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" "uniform-work-group-size" }
|
||||
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #4 = { convergent nounwind willreturn memory(none) }
|
||||
attributes #5 = { convergent nounwind }
|
||||
|
||||
@ -212,4 +212,4 @@ cleanup.cont: ; preds = %if.end15, %if.end
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "uniform-work-group-size" }
|
||||
|
||||
@ -265,6 +265,6 @@ declare float @llvm.fmuladd.f32(float, float, float) #1
|
||||
|
||||
attributes #0 = { nounwind willreturn denormal_fpenv(float: preservesign) }
|
||||
attributes #1 = { nounwind readnone speculatable }
|
||||
attributes #2 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" denormal_fpenv(float: preservesign) }
|
||||
attributes #2 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" denormal_fpenv(float: preservesign) }
|
||||
|
||||
!0 = !{float 2.500000e+00}
|
||||
|
||||
@ -401,7 +401,7 @@ bb.1:
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone speculatable }
|
||||
attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
|
||||
|
||||
@ -1105,4 +1105,4 @@ define amdgpu_kernel void @preload_block_count_z_workgroup_size_z_remainder_z(pt
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
|
||||
@ -1687,4 +1687,4 @@ define amdgpu_kernel void @ptr1_i8_trailing_unused(ptr addrspace(1) inreg %out,
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
|
||||
@ -2671,4 +2671,4 @@ end:
|
||||
}
|
||||
|
||||
|
||||
attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
|
||||
|
||||
@ -444,10 +444,10 @@ declare i32 @llvm.umin.i32(i32, i32) #1
|
||||
declare i32 @llvm.smin.i32(i32, i32) #1
|
||||
declare i32 @llvm.umax.i32(i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind "uniform-work-group-size"="true" }
|
||||
attributes #0 = { nounwind "uniform-work-group-size" }
|
||||
attributes #1 = { nounwind readnone speculatable }
|
||||
attributes #2 = { nounwind "uniform-work-group-size"="true" }
|
||||
attributes #3 = { nounwind "uniform-work-group-size"="false" }
|
||||
attributes #2 = { nounwind "uniform-work-group-size" }
|
||||
attributes #3 = { nounwind }
|
||||
|
||||
!0 = !{i32 8, i32 16, i32 2}
|
||||
!1 = !{i32 8, i32 16}
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
ret [13 x i32] poison
|
||||
}
|
||||
|
||||
attributes #0 = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="32,32" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" denormal_fpenv(float: preservesign) "target-cpu"="gfx1030" "target-features"="+wavefrontsize32,+cumode,+enable-flat-scratch" "uniform-work-group-size"="false" }
|
||||
attributes #0 = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="32,32" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" denormal_fpenv(float: preservesign) "target-cpu"="gfx1030" "target-features"="+wavefrontsize32,+cumode,+enable-flat-scratch" }
|
||||
...
|
||||
---
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
unreachable
|
||||
}
|
||||
|
||||
attributes #0 = { "target-cpu"="gfx1100" "target-features"="+wavefrontsize64,+cumode" "uniform-work-group-size"="false" }
|
||||
attributes #0 = { "target-cpu"="gfx1100" "target-features"="+wavefrontsize64,+cumode" }
|
||||
...
|
||||
---
|
||||
name: _amdgpu_ps_main
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #1 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx908" "uniform-work-group-size"="false" }
|
||||
attributes #1 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx908" }
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3}
|
||||
|
||||
|
||||
@ -594,7 +594,7 @@ declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32,
|
||||
|
||||
attributes #0 = { cold noreturn nounwind memory(inaccessiblemem: write) }
|
||||
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
|
||||
attributes #2 = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1201" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1201" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" "uniform-work-group-size" }
|
||||
attributes #3 = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #5 = { nocallback nofree nosync nounwind willreturn memory(read) }
|
||||
|
||||
@ -29,8 +29,8 @@ define amdgpu_kernel void @kernel1() #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "uniform-work-group-size" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
;.
|
||||
|
||||
@ -94,9 +94,9 @@ define amdgpu_kernel void @kernel2() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "uniform-work-group-size" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
|
||||
;.
|
||||
|
||||
@ -39,8 +39,8 @@ define amdgpu_kernel void @kernel3() #2 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #2 = { "uniform-work-group-size"="true" }
|
||||
attributes #2 = { "uniform-work-group-size" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
|
||||
;.
|
||||
|
||||
@ -39,8 +39,8 @@ define amdgpu_kernel void @kernel2() #2 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #1 = { "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "uniform-work-group-size" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
|
||||
;.
|
||||
|
||||
@ -17,7 +17,7 @@ define void @func() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel1() #1 {
|
||||
define amdgpu_kernel void @kernel1() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: call void @func()
|
||||
@ -38,7 +38,7 @@ define weak_odr void @weak_func() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel2() #2 {
|
||||
define amdgpu_kernel void @kernel2() #1 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
|
||||
; CHECK-NEXT: call void @weak_func()
|
||||
@ -49,11 +49,10 @@ define amdgpu_kernel void @kernel2() #2 {
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { "uniform-work-group-size"="false" }
|
||||
attributes #2 = { "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "uniform-work-group-size" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
; CHECK: attributes #[[ATTR2]] = { nounwind }
|
||||
; CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size" }
|
||||
;.
|
||||
|
||||
@ -99,9 +99,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 {
|
||||
|
||||
; nounwind and readnone are added to match attributor results.
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "uniform-work-group-size" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
|
||||
;.
|
||||
|
||||
@ -25,9 +25,9 @@ define void @func4() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func2() #0 {
|
||||
define void @func2() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func2
|
||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: call void @func4()
|
||||
; CHECK-NEXT: call void @func1()
|
||||
; CHECK-NEXT: ret void
|
||||
@ -47,9 +47,9 @@ define void @func3() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel3() #0 {
|
||||
define amdgpu_kernel void @kernel3() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; CHECK-SAME: () #[[ATTR1]] {
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: call void @func2()
|
||||
; CHECK-NEXT: call void @func3()
|
||||
; CHECK-NEXT: ret void
|
||||
@ -59,8 +59,6 @@ define amdgpu_kernel void @kernel3() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
||||
@ -99,7 +99,7 @@
|
||||
; Function Attrs: convergent nocallback nofree nounwind willreturn
|
||||
declare void @llvm.amdgcn.end.cf.i64(i64) #2
|
||||
|
||||
attributes #0 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
attributes #0 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #2 = { convergent nocallback nofree nounwind willreturn }
|
||||
attributes #3 = { convergent nocallback nofree nounwind willreturn memory(none) }
|
||||
|
||||
@ -936,7 +936,7 @@ define void @device_func(ptr byval(i32) align 4 %input) {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "no-trapping-math"="true" "target-cpu"="sm_60" "target-features"="+ptx78,+sm_60" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "no-trapping-math"="true" "target-cpu"="sm_60" "target-features"="+ptx78,+sm_60" "uniform-work-group-size" }
|
||||
attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
|
||||
|
||||
|
||||
@ -64,7 +64,7 @@ declare dso_local spir_func void @_Z31intel_work_group_barrier_arrivej(i32 nound
|
||||
; Function Attrs: convergent
|
||||
declare dso_local spir_func void @_Z29intel_work_group_barrier_waitj(i32 noundef) local_unnamed_addr #1
|
||||
|
||||
attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
|
||||
attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
attributes #2 = { convergent nounwind }
|
||||
|
||||
|
||||
@ -126,7 +126,7 @@ declare dso_local spir_func void @_Z31intel_work_group_barrier_arrivej12memory_s
|
||||
; Function Attrs: convergent
|
||||
declare dso_local spir_func void @_Z29intel_work_group_barrier_waitj12memory_scope(i32 noundef, i32 noundef) local_unnamed_addr #1
|
||||
|
||||
attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
|
||||
attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
attributes #2 = { convergent nounwind }
|
||||
|
||||
|
||||
@ -120,7 +120,7 @@ declare dso_local spir_func void @_Z33__spirv_ControlBarrierArriveINTELiii(i32 n
|
||||
; Function Attrs: convergent
|
||||
declare dso_local spir_func void @_Z31__spirv_ControlBarrierWaitINTELiii(i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #1
|
||||
|
||||
attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
|
||||
attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
attributes #2 = { convergent nounwind }
|
||||
|
||||
|
||||
@ -171,7 +171,7 @@ declare spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(ptr addr
|
||||
; Function Attrs: convergent
|
||||
declare spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(ptr addrspace(1), <2 x i64>) local_unnamed_addr #1
|
||||
|
||||
attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { convergent "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { convergent nounwind }
|
||||
|
||||
|
||||
@ -85,7 +85,7 @@ declare void @llvm.stackrestore.p0(ptr) #4
|
||||
|
||||
declare i64 @_Z20__spirv_SpecConstantix(i32, i64)
|
||||
|
||||
attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/work/intel/vla_spec_const.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/work/intel/vla_spec_const.cpp" "uniform-work-group-size" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { argmemonly nounwind willreturn }
|
||||
attributes #2 = { inlinehint norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #3 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
@ -329,7 +329,7 @@ declare spir_func double @_Z16sub_group_rotatedi(double noundef, i32 noundef) #1
|
||||
; Function Attrs: convergent nounwind
|
||||
declare spir_func double @_Z26sub_group_clustered_rotatedij(double noundef, i32 noundef, i32 noundef) #1
|
||||
|
||||
attributes #0 = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
|
||||
attributes #0 = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
attributes #1 = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
attributes #2 = { convergent nounwind }
|
||||
|
||||
|
||||
@ -44,7 +44,7 @@ declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, meta
|
||||
; Function Attrs: inaccessiblememonly nounwind willreturn
|
||||
declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) #1
|
||||
|
||||
attributes #0 = { norecurse nounwind strictfp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test2.cl" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { norecurse nounwind strictfp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test2.cl" "uniform-work-group-size" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { inaccessiblememonly nounwind willreturn }
|
||||
attributes #2 = { strictfp }
|
||||
|
||||
|
||||
@ -101,7 +101,7 @@ public:
|
||||
// assumption. This assumption may be overridden by setting
|
||||
// `rocdl.uniform_work_group_size` on a given function.
|
||||
if (!llvmFunc->hasFnAttribute("uniform-work-group-size"))
|
||||
llvmFunc->addFnAttr("uniform-work-group-size", "true");
|
||||
llvmFunc->addFnAttr("uniform-work-group-size");
|
||||
}
|
||||
// Override flat-work-group-size
|
||||
// TODO: update clients to rocdl.flat_work_group_size instead,
|
||||
@ -170,8 +170,10 @@ public:
|
||||
" must be a boolean");
|
||||
llvm::Function *llvmFunc =
|
||||
moduleTranslation.lookupFunction(func.getName());
|
||||
llvmFunc->addFnAttr("uniform-work-group-size",
|
||||
value.getValue() ? "true" : "false");
|
||||
if (value.getValue())
|
||||
llvmFunc->addFnAttr("uniform-work-group-size");
|
||||
else
|
||||
llvmFunc->removeFnAttr("uniform-work-group-size");
|
||||
}
|
||||
if (dialect->getUnsafeFpAtomicsAttrHelper().getName() ==
|
||||
attribute.getName()) {
|
||||
|
||||
@ -2115,10 +2115,10 @@ llvm.func @rocdl.cvt.scalef32.sr.pk16(%v16xf32: vector<16xf32>,
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" }
|
||||
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size" }
|
||||
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
|
||||
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"
|
||||
// CHECK-DAG: attributes #[[$KERNEL_NO_UNIFORM_WORK_GROUPS_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="false" }
|
||||
// CHECK-DAG: attributes #[[$KERNEL_NO_UNIFORM_WORK_GROUPS_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" }
|
||||
// CHECK-DAG: ![[$REQD_WORK_GROUP_SIZE]] = !{i32 16, i32 4, i32 2}
|
||||
// CHECK-DAG: attributes #[[$KERNEL_WAVES_PER_EU_ATTR]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "uniform-work-group-size"="true" }
|
||||
// CHECK-DAG: attributes #[[$KERNEL_UNSAFE_FP_ATOMICS_ATTR]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-unsafe-fp-atomics"="true" "uniform-work-group-size"="true" }
|
||||
// CHECK-DAG: attributes #[[$KERNEL_WAVES_PER_EU_ATTR]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "uniform-work-group-size" }
|
||||
// CHECK-DAG: attributes #[[$KERNEL_UNSAFE_FP_ATOMICS_ATTR]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-unsafe-fp-atomics"="true" "uniform-work-group-size" }
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user