This change adds two builtins for AMDGPU: - `__builtin_amdgcn_processor_is`, which is similar in observable behaviour with `__builtin_cpu_is`, except that it is never "evaluated" at run time; - `__builtin_amdgcn_is_invocable`, which is behaviourally similar with `__has_builtin`, except that it is not a macro (i.e. not evaluated at preprocessing time). Neither of these are `constexpr`, even though when compiling for concrete (i.e. `gfxXXX` / `gfxXXX-generic`) targets they get evaluated in Clang, so they shouldn't tear the AST too badly / at all for multi-pass compilation cases like HIP. They can only be used in specific contexts (as args to control structures). The motivation for adding these is two-fold: - as a nice to have, it provides an AST-visible way to incorporate architecture specific code, rather than having to rely on macros and the preprocessor, which burn in the choice quite early; - as a must have, it allows featureful AMDGCN flavoured SPIR-V to be produced, where target specific capability is guarded and chosen or discarded when finalising compilation for a concrete target; this is built atop the Speciali\ation Constant concept which is described in the SPIR-V specification under section [2.12 Specialization](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_specialization_2) I've tried to keep the overall footprint of the change small. The changes to Sema are a bit unpleasant, but there was a strong desire to have Clang validate these, and to constrain their uses, and this was the most compact solution I could come up with (suggestions welcome). --------- Co-authored-by: Juan Manuel Martinez Caamaño <jmartinezcaamao@gmail.com> Co-authored-by: Voicu <avoicu@amd.com>
73 lines
5.3 KiB
C
73 lines
5.3 KiB
C
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
|
|
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX900 %s
|
|
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX1010 %s
|
|
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCNSPIRV %s
|
|
|
|
// Test that, depending on triple and, if applicable, target-cpu, one of three
|
|
// things happens:
|
|
// 1) for gfx900 we emit an empty kernel (concrete target, lacks feature)
|
|
// 2) for gfx1010 we emit a call to trap (concrete target, has feature)
|
|
// 3) for AMDGCNSPIRV we emit a boolean specialisation constant, via a call
|
|
// to __spirv_SpecConstant, with the id of UINT32_MAX, and the boolean
|
|
// value of false, which will yield an OpSpecConstantFalse in SPIR-V
|
|
|
|
// AMDGCN-GFX900-LABEL: define dso_local void @foo(
|
|
// AMDGCN-GFX900-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
// AMDGCN-GFX900-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-GFX900-NEXT: ret void
|
|
//
|
|
// AMDGCN-GFX1010-LABEL: define dso_local void @foo(
|
|
// AMDGCN-GFX1010-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
// AMDGCN-GFX1010-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-GFX1010-NEXT: call void @llvm.trap()
|
|
// AMDGCN-GFX1010-NEXT: ret void
|
|
//
|
|
// AMDGCNSPIRV-LABEL: define spir_func void @foo(
|
|
// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
|
|
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META2:![0-9]+]])
|
|
// AMDGCNSPIRV-NEXT: [[TOBOOL:%.*]] = icmp ne i1 [[TMP0]], false
|
|
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[LOR_LHS_FALSE:.*]]
|
|
// AMDGCNSPIRV: [[LOR_LHS_FALSE]]:
|
|
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META3:![0-9]+]])
|
|
// AMDGCNSPIRV-NEXT: [[TOBOOL1:%.*]] = icmp ne i1 [[TMP1]], false
|
|
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL1]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE2:.*]]
|
|
// AMDGCNSPIRV: [[LOR_LHS_FALSE2]]:
|
|
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META4:![0-9]+]])
|
|
// AMDGCNSPIRV-NEXT: [[TOBOOL3:%.*]] = icmp ne i1 [[TMP2]], false
|
|
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL3]], label %[[IF_THEN]], label %[[IF_END:.*]]
|
|
// AMDGCNSPIRV: [[IF_THEN]]:
|
|
// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.trap()
|
|
// AMDGCNSPIRV-NEXT: br label %[[IF_END]]
|
|
// AMDGCNSPIRV: [[IF_END]]:
|
|
// AMDGCNSPIRV-NEXT: ret void
|
|
//
|
|
void foo() {
|
|
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16) ||
|
|
(__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16_var)) ||
|
|
(__builtin_amdgcn_is_invocable(__builtin_amdgcn_ashr_pk_i8_i32)))
|
|
return __builtin_trap();
|
|
}
|
|
//.
|
|
// AMDGCN-GFX900: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" }
|
|
//.
|
|
// AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" }
|
|
// AMDGCN-GFX1010: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
|
|
//.
|
|
// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" }
|
|
// AMDGCNSPIRV: attributes #[[ATTR1:[0-9]+]] = { nounwind }
|
|
// AMDGCNSPIRV: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
|
|
//.
|
|
// AMDGCN-GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
|
// AMDGCN-GFX900: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
|
|
//.
|
|
// AMDGCN-GFX1010: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
|
// AMDGCN-GFX1010: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
|
|
//.
|
|
// AMDGCNSPIRV: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
|
// AMDGCNSPIRV: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
|
|
// AMDGCNSPIRV: [[META2]] = !{!"has.gfx10-insts"}
|
|
// AMDGCNSPIRV: [[META3]] = !{!"has.gfx12-insts"}
|
|
// AMDGCNSPIRV: [[META4]] = !{!"has.ashr-pk-insts"}
|
|
//.
|